diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000000..eb7cbe4b12 --- /dev/null +++ b/NOTICE @@ -0,0 +1,39 @@ +======================================================== +DataX 是阿里云 DataWorks数据集成 的开源版本,在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX 实现了包括 MySQL、Oracle、OceanBase、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、Hologres、DRDS 等各种异构数据源之间高效的数据同步功能。 + +DataX is an open source offline data synchronization tool / platform widely used in Alibaba group and other companies. DataX implements efficient data synchronization between heterogeneous data sources including mysql, Oracle, oceanbase, sqlserver, postgre, HDFS, hive, ads, HBase, tablestore (OTS), maxcompute (ODPs), hologres, DRDS, etc. + +Copyright 1999-2022 Alibaba Group Holding Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=================================================================== +文级别引用,按许可证 +This product contains various third-party components under other open source licenses. +This section summarizes those components and their licenses. +GNU Lesser General Public License +-------------------------------------- +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java +=================================================================== diff --git a/README.md b/README.md index 3cc8f79950..ec5de7159a 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N | | Elasticsearch | | √ |[写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)| | 时间序列数据库 | OpenTSDB | √ | |[读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md)| | | TSDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/tsdbreader/doc/tsdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md)| -| | TDengine | √ | √ |[读](https://github.com/taosdata/DataX/blob/master/tdenginereader/doc/tdenginereader.md) 、[写](https://github.com/taosdata/DataX/blob/master/tdenginewriter/doc/tdenginewriter-CN.md)| +| | TDengine | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/tdenginereader/doc/tdenginereader-CN.md) 、[写](https://github.com/alibaba/DataX/blob/master/tdenginewriter/doc/tdenginewriter-CN.md)| # 阿里云DataWorks数据集成 diff --git a/adbpgwriter/src/main/doc/adbpgwriter.md b/adbpgwriter/src/main/doc/adbpgwriter.md index 804272418e..6d3857bc8c 100644 --- a/adbpgwriter/src/main/doc/adbpgwriter.md +++ b/adbpgwriter/src/main/doc/adbpgwriter.md @@ -65,9 +65,9 @@ COPY命令将数据写入ADB PG数据库中。 "writer": { "name": "adbpgwriter", "parameter": { - "username": "username", - "password": "password", - "host": "host", + "username": "", + "password": "", + "host": "127.0.0.1", "port": "1234", "database": "database", "schema": "schema", diff --git a/common/pom.xml b/common/pom.xml index 1a57cccda9..eafdb5da7d 100755 --- a/common/pom.xml +++ b/common/pom.xml @@ -61,6 +61,14 @@ + + + src/main/java + + **/*.properties + + + maven-compiler-plugin diff --git a/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java index 7699e152ae..0978074bb9 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java @@ -92,7 +92,13 @@ public Date asDate() { throw DataXException.asDataXException( CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date ."); } - + + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date ."); + } + @Override public byte[] asBytes() { throw DataXException.asDataXException( diff --git a/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java index d3cc599361..bc1eeb79d3 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java @@ -75,6 +75,12 @@ public Date asDate() { throw DataXException.asDataXException( CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date ."); } + + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date ."); + } @Override public Boolean asBoolean() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/Column.java b/common/src/main/java/com/alibaba/datax/common/element/Column.java index ed68e88d6b..2e093a7af6 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/Column.java +++ b/common/src/main/java/com/alibaba/datax/common/element/Column.java @@ -55,6 +55,8 @@ protected void setByteSize(int byteSize) { public abstract String asString(); public abstract Date asDate(); + + public abstract Date asDate(String dateFormat); public abstract byte[] asBytes(); diff --git a/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java index 89d0a7c627..85d62ecc1c 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java +++ b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java @@ -22,6 +22,11 @@ public static Date string2Date(final StringColumn column) throws ParseException { return StringCast.asDate(column); } + + public static Date string2Date(final StringColumn column, String dateFormat) + throws ParseException { + return StringCast.asDate(column, dateFormat); + } public static byte[] string2Bytes(final StringColumn column) throws UnsupportedEncodingException { @@ -115,6 +120,16 @@ static Date asDate(final StringColumn column) throws ParseException { } throw e; } + + static Date asDate(final StringColumn column, String dateFormat) throws ParseException { + ParseException e; + try { + return FastDateFormat.getInstance(dateFormat, StringCast.timeZoner).parse(column.asString()); + } catch (ParseException ignored) { + e = ignored; + } + throw e; + } static byte[] asBytes(final StringColumn column) throws UnsupportedEncodingException { diff --git a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java index 6626a6fbdd..f688d1639f 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java @@ -89,6 +89,11 @@ public Date asDate() { return new Date((Long)this.getRawData()); } + + @Override + public Date asDate(String dateFormat) { + return asDate(); + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java index 17170ea6c4..915bd8effb 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java @@ -132,6 +132,12 @@ public Date asDate() { throw DataXException.asDataXException( CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 ."); } + + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 ."); + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java index d8113f7c05..e0f8d86525 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java @@ -125,6 +125,11 @@ public Date asDate() { } return new Date(this.asLong()); } + + @Override + public Date asDate(String dateFormat) { + return this.asDate(); + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/Record.java b/common/src/main/java/com/alibaba/datax/common/element/Record.java index d06d80aafb..7abf45ddaa 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/Record.java +++ b/common/src/main/java/com/alibaba/datax/common/element/Record.java @@ -1,5 +1,7 @@ package com.alibaba.datax.common.element; +import java.util.Map; + /** * Created by jingxing on 14-8-24. */ @@ -20,4 +22,8 @@ public interface Record { public int getMemorySize(); + public void setMeta(Map meta); + + public Map getMeta(); + } diff --git a/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java index 11209f4688..c1e7a84e0d 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java @@ -149,6 +149,16 @@ public Date asDate() { String.format("String[\"%s\"]不能转为Date .", this.asString())); } } + + @Override + public Date asDate(String dateFormat) { + try { + return ColumnCast.string2Date(this, dateFormat); + } catch (Exception e) { + throw DataXException.asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("String[\"%s\"]不能转为Date .", this.asString())); + } + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java index f360e69900..09d00adcf1 100755 --- a/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java +++ b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java @@ -16,6 +16,10 @@ public DataXException(ErrorCode errorCode, String errorMessage) { this.errorCode = errorCode; } + public DataXException(String errorMessage) { + super(errorMessage); + } + private DataXException(ErrorCode errorCode, String errorMessage, Throwable cause) { super(errorCode.toString() + " - " + getMessage(errorMessage) + " - " + getMessage(cause), cause); @@ -26,6 +30,10 @@ public static DataXException asDataXException(ErrorCode errorCode, String messag return new DataXException(errorCode, message); } + public static DataXException asDataXException(String message) { + return new DataXException(message); + } + public static DataXException asDataXException(ErrorCode errorCode, String message, Throwable cause) { if (cause instanceof DataXException) { return (DataXException) cause; diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java index 184ee89ece..0323a97689 100755 --- a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java +++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java @@ -3,6 +3,8 @@ import com.alibaba.datax.common.base.BaseObject; import com.alibaba.datax.common.util.Configuration; +import java.util.List; + public abstract class AbstractPlugin extends BaseObject implements Pluginable { //作业的config private Configuration pluginJobConf; @@ -15,6 +17,8 @@ public abstract class AbstractPlugin extends BaseObject implements Pluginable { private String peerPluginName; + private List readerPluginSplitConf; + @Override public String getPluginName() { assert null != this.pluginConf; @@ -84,4 +88,12 @@ public void preHandler(Configuration jobConfiguration){ public void postHandler(Configuration jobConfiguration){ } + + public List getReaderPluginSplitConf(){ + return this.readerPluginSplitConf; + } + + public void setReaderPluginSplitConf(List readerPluginSplitConf){ + this.readerPluginSplitConf = readerPluginSplitConf; + } } diff --git a/common/src/main/java/com/alibaba/datax/common/util/ConfigurationUtil.java b/common/src/main/java/com/alibaba/datax/common/util/ConfigurationUtil.java new file mode 100644 index 0000000000..e5e075475a --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/ConfigurationUtil.java @@ -0,0 +1,37 @@ +package com.alibaba.datax.common.util; + +import java.util.Arrays; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.StringUtils; + +public class ConfigurationUtil { + private static final List SENSITIVE_KEYS = Arrays.asList("password", "accessKey", "securityToken", + "AccessKeyId", "AccessKeySecert", "AccessKeySecret", "clientPassword"); + + public static Configuration filterSensitive(Configuration origin) { + // shell 任务configuration metric 可能为null。 + if (origin == null) { + return origin; + } + // 确保不影响入参的对象 + Configuration configuration = origin.clone(); + Set keys = configuration.getKeys(); + for (final String key : keys) { + boolean isSensitive = false; + for (String sensitiveKey : SENSITIVE_KEYS) { + if (StringUtils.endsWithIgnoreCase(key, sensitiveKey)) { + isSensitive = true; + break; + } + } + + if (isSensitive && configuration.get(key) instanceof String) { + configuration.set(key, configuration.getString(key).replaceAll(".", "*")); + } + + } + return configuration; + } +} \ No newline at end of file diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/DESCipher.java b/common/src/main/java/com/alibaba/datax/common/util/DESCipher.java old mode 100644 new mode 100755 similarity index 60% rename from odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/DESCipher.java rename to common/src/main/java/com/alibaba/datax/common/util/DESCipher.java index 82e9719196..0692a7b3e3 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/DESCipher.java +++ b/common/src/main/java/com/alibaba/datax/common/util/DESCipher.java @@ -1,5 +1,5 @@ /** - * (C) 2010-2014 Alibaba Group Holding Limited. + * (C) 2010-2022 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,342 +14,216 @@ * limitations under the License. */ -package com.alibaba.datax.plugin.reader.odpsreader.util; +package com.alibaba.datax.common.util; import javax.crypto.Cipher; import javax.crypto.SecretKey; import javax.crypto.SecretKeyFactory; import javax.crypto.spec.DESKeySpec; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.security.SecureRandom; /** - *   * DES加解密,支持与delphi交互(字符串编码需统一为UTF-8) - * - *   * - * - *   * @author wym - * - *    + * DES加解密,支持与delphi交互(字符串编码需统一为UTF-8) + * 将这个工具类抽取到 common 中,方便后续代码复用 */ - public class DESCipher { - + private static Logger LOGGER = LoggerFactory.getLogger(DESCipher.class); /** - *   * 密钥 - * - *    + * 密钥 */ - - public static final String KEY = "DESDES"; - + public static final String KEY = ""; private final static String DES = "DES"; /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    + * 加密 + * @param src 明文(字节) + * @param key 密钥,长度必须是8的倍数 + * @return 密文(字节) + * @throws Exception */ - public static byte[] encrypt(byte[] src, byte[] key) throws Exception { - // DES算法要求有一个可信任的随机数源 - SecureRandom sr = new SecureRandom(); - + // 从原始密匙数据创建DESKeySpec对象 - DESKeySpec dks = new DESKeySpec(key); - + // 创建一个密匙工厂,然后用它把DESKeySpec转换成 - // 一个SecretKey对象 - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - SecretKey securekey = keyFactory.generateSecret(dks); - + // Cipher对象实际完成加密操作 - Cipher cipher = Cipher.getInstance(DES); // 用密匙初始化Cipher对象 - cipher.init(Cipher.ENCRYPT_MODE, securekey, sr); // 现在,获取数据并加密 - // 正式执行加密操作 - return cipher.doFinal(src); - } /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    + * * 解密 + * * @param src + * * 密文(字节) + * * @param key + * * 密钥,长度必须是8的倍数 + * * @return 明文(字节) + * * @throws Exception */ - public static byte[] decrypt(byte[] src, byte[] key) throws Exception { - // DES算法要求有一个可信任的随机数源 - SecureRandom sr = new SecureRandom(); // 从原始密匙数据创建一个DESKeySpec对象 - DESKeySpec dks = new DESKeySpec(key); // 创建一个密匙工厂,然后用它把DESKeySpec对象转换成 - // 一个SecretKey对象 - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - SecretKey securekey = keyFactory.generateSecret(dks); // Cipher对象实际完成解密操作 - Cipher cipher = Cipher.getInstance(DES); // 用密匙初始化Cipher对象 - cipher.init(Cipher.DECRYPT_MODE, securekey, sr); // 现在,获取数据并解密 - // 正式执行解密操作 - return cipher.doFinal(src); - } /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    + * 加密 + * @param src * 明文(字节) + * @return 密文(字节) + * @throws Exception */ - public static byte[] encrypt(byte[] src) throws Exception { - return encrypt(src, KEY.getBytes()); - } /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    + * 解密 + * @param src 密文(字节) + * @return 明文(字节) + * @throws Exception */ - public static byte[] decrypt(byte[] src) throws Exception { - return decrypt(src, KEY.getBytes()); - } /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字符串) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    + * 加密 + * @param src 明文(字符串) + * @return 密文(16进制字符串) + * @throws Exception */ - public final static String encrypt(String src) { - try { - return byte2hex(encrypt(src.getBytes(), KEY.getBytes())); - } catch (Exception e) { - - e.printStackTrace(); - + LOGGER.warn(e.getMessage(), e); } - return null; - } - + /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字符串) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    + * 加密 + * @param src 明文(字符串) + * @param encryptKey 加密用的秘钥 + * @return 密文(16进制字符串) + * @throws Exception */ + public final static String encrypt(String src, String encryptKey) { + try { + return byte2hex(encrypt(src.getBytes(), encryptKey.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); + } + return null; + } + /** + * 解密 + * @param src 密文(字符串) + * @return 明文(字符串) + * @throws Exception + */ public final static String decrypt(String src) { try { - return new String(decrypt(hex2byte(src.getBytes()), KEY.getBytes())); - } catch (Exception e) { - - e.printStackTrace(); - + LOGGER.warn(e.getMessage(), e); } - return null; - } - + /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    + * 解密 + * @param src 密文(字符串) + * @param decryptKey 解密用的秘钥 + * @return 明文(字符串) + * @throws Exception */ + public final static String decrypt(String src, String decryptKey) { + try { + return new String(decrypt(hex2byte(src.getBytes()), decryptKey.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); + } + return null; + } + /** + * 加密 + * @param src + * 明文(字节) + * @return 密文(16进制字符串) + * @throws Exception + */ public static String encryptToString(byte[] src) throws Exception { - return encrypt(new String(src)); - } /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    + * 解密 + * @param src 密文(字节) + * @return 明文(字符串) + * @throws Exception */ - public static String decryptToString(byte[] src) throws Exception { - return decrypt(new String(src)); - } public static String byte2hex(byte[] b) { - String hs = ""; - String stmp = ""; - for (int n = 0; n < b.length; n++) { - stmp = (Integer.toHexString(b[n] & 0XFF)); - if (stmp.length() == 1) - hs = hs + "0" + stmp; - else - hs = hs + stmp; - } - return hs.toUpperCase(); - } public static byte[] hex2byte(byte[] b) { - if ((b.length % 2) != 0) - - throw new IllegalArgumentException("长度不是偶数"); - + throw new IllegalArgumentException("The length is not an even number"); byte[] b2 = new byte[b.length / 2]; - for (int n = 0; n < b.length; n += 2) { - String item = new String(b, n, 2); - b2[n / 2] = (byte) Integer.parseInt(item, 16); - } return b2; - } - - /* - * public static void main(String[] args) { try { String src = "cheetah"; - * String crypto = DESCipher.encrypt(src); System.out.println("密文[" + src + - * "]:" + crypto); System.out.println("解密后:" + DESCipher.decrypt(crypto)); } - * catch (Exception e) { e.printStackTrace(); } } - */ } diff --git a/common/src/main/java/com/alibaba/datax/common/util/DataXCaseEnvUtil.java b/common/src/main/java/com/alibaba/datax/common/util/DataXCaseEnvUtil.java new file mode 100644 index 0000000000..ca137b94de --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/DataXCaseEnvUtil.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.common.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class DataXCaseEnvUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(DataXCaseEnvUtil.class); + + // datax回归测试效率提升 + private static String DATAX_AUTOTEST_RETRY_TIME = System.getenv("DATAX_AUTOTEST_RETRY_TIME"); + private static String DATAX_AUTOTEST_RETRY_INTERVAL = System.getenv("DATAX_AUTOTEST_RETRY_INTERVAL"); + private static String DATAX_AUTOTEST_RETRY_EXPONENTIAL = System.getenv("DATAX_AUTOTEST_RETRY_EXPONENTIAL"); + + public static int getRetryTimes(int retryTimes) { + int actualRetryTimes = DATAX_AUTOTEST_RETRY_TIME != null ? Integer.valueOf(DATAX_AUTOTEST_RETRY_TIME) : retryTimes; + // LOGGER.info("The actualRetryTimes is {}", actualRetryTimes); + return actualRetryTimes; + } + + public static long getRetryInterval(long retryInterval) { + long actualRetryInterval = DATAX_AUTOTEST_RETRY_INTERVAL != null ? Long.valueOf(DATAX_AUTOTEST_RETRY_INTERVAL) : retryInterval; + // LOGGER.info("The actualRetryInterval is {}", actualRetryInterval); + return actualRetryInterval; + } + + public static boolean getRetryExponential(boolean retryExponential) { + boolean actualRetryExponential = DATAX_AUTOTEST_RETRY_EXPONENTIAL != null ? Boolean.valueOf(DATAX_AUTOTEST_RETRY_EXPONENTIAL) : retryExponential; + // LOGGER.info("The actualRetryExponential is {}", actualRetryExponential); + return actualRetryExponential; + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/util/IdAndKeyRollingUtil.java b/common/src/main/java/com/alibaba/datax/common/util/IdAndKeyRollingUtil.java new file mode 100644 index 0000000000..8bab301e6f --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/IdAndKeyRollingUtil.java @@ -0,0 +1,62 @@ +package com.alibaba.datax.common.util; + +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.alibaba.datax.common.exception.DataXException; + +public class IdAndKeyRollingUtil { + private static Logger LOGGER = LoggerFactory.getLogger(IdAndKeyRollingUtil.class); + public static final String SKYNET_ACCESSID = "SKYNET_ACCESSID"; + public static final String SKYNET_ACCESSKEY = "SKYNET_ACCESSKEY"; + + public final static String ACCESS_ID = "accessId"; + public final static String ACCESS_KEY = "accessKey"; + + public static String parseAkFromSkynetAccessKey() { + Map envProp = System.getenv(); + String skynetAccessID = envProp.get(IdAndKeyRollingUtil.SKYNET_ACCESSID); + String skynetAccessKey = envProp.get(IdAndKeyRollingUtil.SKYNET_ACCESSKEY); + String accessKey = null; + // follow 原有的判断条件 + // 环境变量中,如果存在SKYNET_ACCESSID/SKYNET_ACCESSKEy(只要有其中一个变量,则认为一定是两个都存在的! + // if (StringUtils.isNotBlank(skynetAccessID) || + // StringUtils.isNotBlank(skynetAccessKey)) { + // 检查严格,只有加密串不为空的时候才进去,不过 之前能跑的加密串都不应该为空 + if (StringUtils.isNotBlank(skynetAccessKey)) { + LOGGER.info("Try to get accessId/accessKey from environment SKYNET_ACCESSKEY."); + accessKey = DESCipher.decrypt(skynetAccessKey); + if (StringUtils.isBlank(accessKey)) { + // 环境变量里面有,但是解析不到 + throw DataXException.asDataXException(String.format( + "Failed to get the [accessId]/[accessKey] from the environment variable. The [accessId]=[%s]", + skynetAccessID)); + } + } + if (StringUtils.isNotBlank(accessKey)) { + LOGGER.info("Get accessId/accessKey from environment variables SKYNET_ACCESSKEY successfully."); + } + return accessKey; + } + + public static String getAccessIdAndKeyFromEnv(Configuration originalConfig) { + String accessId = null; + Map envProp = System.getenv(); + accessId = envProp.get(IdAndKeyRollingUtil.SKYNET_ACCESSID); + String accessKey = null; + if (StringUtils.isBlank(accessKey)) { + // 老的没有出异常,只是获取不到ak + accessKey = IdAndKeyRollingUtil.parseAkFromSkynetAccessKey(); + } + + if (StringUtils.isNotBlank(accessKey)) { + // 确认使用这个的都是 accessId、accessKey的命名习惯 + originalConfig.set(IdAndKeyRollingUtil.ACCESS_ID, accessId); + originalConfig.set(IdAndKeyRollingUtil.ACCESS_KEY, accessKey); + } + return accessKey; + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java index d7a5b76462..a381bb90fc 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java +++ b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java @@ -6,6 +6,7 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; /** @@ -136,4 +137,25 @@ public static List valueToLowerCase(List aList) { return result; } + + public static Boolean checkIfHasSameValue(List listA, List listB) { + if (null == listA || listA.isEmpty() || null == listB || listB.isEmpty()) { + return false; + } + + for (String oneValue : listA) { + if (listB.contains(oneValue)) { + return true; + } + } + + return false; + } + + public static boolean checkIfAllSameValue(List listA, List listB) { + if (null == listA || listA.isEmpty() || null == listB || listB.isEmpty() || listA.size() != listB.size()) { + return false; + } + return new HashSet<>(listA).containsAll(new HashSet<>(listB)); + } } diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings.properties new file mode 100644 index 0000000000..25661f7a67 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings.properties @@ -0,0 +1,54 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1}, STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_en_US.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_en_US.properties new file mode 100644 index 0000000000..2074bbb952 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_en_US.properties @@ -0,0 +1,53 @@ +very_like_yixiao=1{0}2{1}3 + + +configuration.1=Configuration information error. The configuration file [{0}] you provided does not exist. Please check your configuration files. +configuration.2=Configuration information error. Failed to read the configuration file [{0}] you provided. Error reason: {1}. Please check the permission settings of your configuration files. +configuration.3=Please check your configuration files. Failed to read the configuration file you provided. Error reason: {0}. Please check the permission settings of your configuration files. +configuration.4=The configuration file you provided contains errors. [{0}] is a required parameter and cannot be empty or blank. +configuration.5=The configuration file you provided contains errors. [{0}] is a required parameter and cannot be empty or blank. +configuration.6=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the character type: {1}. Please check your configuration and make corrections. +configuration.7=The configuration information you provided contains errors. The value [{1}] obtained from [{0}] cannot be converted to the Bool type. Please check the source table configuration and make corrections. +configuration.8=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the integer type: {1}. Please check your configuration and make corrections. +configuration.9=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the integer type: {1}. Please check your configuration and make corrections. +configuration.10=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the floating-point type: {1}. Please check your configuration and make corrections. +configuration.11=The Key [{0}] for the configuration file does not exist. This is a code programming error. Please contact the DataX team. +configuration.12=The value [{0}] cannot adapt to the [{1}] you provided. This exception represents a system programming error. Please contact the DataX developer team. +configuration.13=The path cannot be null. This exception represents a system programming error. Please contact the DataX developer team. +configuration.14=The path [{0}] has an invalid value type [{1}]. This exception represents a system programming error. Please contact the DataX developer team. +configuration.15=This exception represents a system programming error. Please contact the DataX developer team. +configuration.16=The configuration file you provided contains errors. The path [{0}] requires you to configure a Map object in JSON format, but the actual type found on the node is [{1}]. Please check your configuration and make corrections. +configuration.17=The configuration file you provided contains errors. The value of the path [{0}] is null and DataX cannot recognize the configuration. Please check your configuration and make corrections. +configuration.18=The configuration file you provided contains errors. The path [{0}] requires you to configure a Map object in JSON format, but the actual type found on the node is [{1}]. Please check your configuration and make corrections. +configuration.19=System programming error. The list subscript must be of the numeric type, but the actual type found on this node is [{0}]. This exception represents a system programming error. Please contact the DataX developer team. +configuration.20=System programming error. This exception represents a system programming error. Please contact the DataX developer team. +configuration.21=System programming error. Invalid path [{0}]. No spaces are allowed between path layers. +configuration.22=Configuration information error. The configuration information you provided is not in a legal JSON format. JSON cannot be blank. Please provide the configuration information in the standard JSON format. +configuration.23=Configuration information error. The configuration information you provided is not in a valid JSON format: {0}. Please provide the configuration information in the standard JSON format. + + +listutil.1=The job configuration you provided contains errors. The list cannot be empty. +listutil.2=The job configuration you provided contains errors. The list cannot be empty. +listutil.3=The job configuration information you provided contains errors. String: [{0}] is not allowed to be repeated in the list: [{1}]. +listutil.4=The job configuration you provided contains errors. The list cannot be empty. +listutil.5=The job configuration you provided contains errors. The list cannot be empty. +listutil.6=The job configuration information you provided contains errors. String: [{0}] does not exist in the list: [{1}]. +listutil.7=The job configuration you provided contains errors. The list cannot be empty. +listutil.8=The job configuration you provided contains errors. The list cannot be empty. + + +rangesplitutil.1=The slice number cannot be less than 1. Here: [expectSliceNumber]=[{0}]. +rangesplitutil.2=The left or right intervals of BigInteger character strings cannot be null when they are sliced. Here: [left]=[{0}], [right]=[{1}]. +rangesplitutil.3=The [bigInteger] parameter cannot be null. +rangesplitutil.4=Only ASCII character strings are supported for character string slicing, but the [{0}] character string is not of the ASCII type. +rangesplitutil.5=The [bigInteger] parameter cannot be null. +rangesplitutil.6=Only ASCII character strings are supported for character string slicing, but the [{0}] character string is not of the ASCII type. + + +retryutil.1=System programming error. The “callable” input parameter cannot be null. +retryutil.2=System programming error. The “retrytime[%d]” input parameter cannot be less than 1. +retryutil.3=Exception when calling callable. Exception Msg: {0} +retryutil.4=Exception when calling callable. Retry Attempt: {0} will start soon. {1} attempts in total. This attempt planned to wait for [{2}]ms, and actually waited for [{3}]ms. Exception Msg: [{4}]. + +httpclientutil.1=Request address: {0}. Request method: {1}. STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=The remote interface returns -1. We will try again \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_ja_JP.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..d4409a8df3 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_ja_JP.properties @@ -0,0 +1,53 @@ +very_like_yixiao=1{0}2{1}3 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_CN.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..533dcd52dd --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_CN.properties @@ -0,0 +1,54 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_HK.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..ab26ac52af --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_HK.properties @@ -0,0 +1,104 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + +very_like_yixiao=一{0}二{1}三 + + +configuration.1=配置資訊錯誤,您提供的配置檔案[{0}]不存在. 請檢查您的配置檔案. +configuration.2=配置資訊錯誤. 您提供配置檔案[{0}]讀取失敗,錯誤原因: {1}. 請檢查您的配置檔案的權限設定. +configuration.3=請檢查您的配置檔案. 您提供的配置檔案讀取失敗,錯誤原因: {0}. 請檢查您的配置檔案的權限設定. +configuration.4=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.5=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.6=任務讀取配置檔案出錯. 因為配置檔案路徑[{0}] 值不合法,期望是字符類型: {1}. 請檢查您的配置並作出修改. +configuration.7=您提供的配置資訊有誤,因為從[{0}]獲取的值[{1}]無法轉換為bool類型. 請檢查源表的配置並且做出相應的修改. +configuration.8=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.9=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.10=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是浮點類型: {1}. 請檢查您的配置並作出修改. +configuration.11=配置檔案對應Key[{0}]並不存在,該情況是代碼編程錯誤. 請聯絡DataX團隊的同學. +configuration.12=值[{0}]無法適配您提供[{1}], 該異常代表系統編程錯誤, 請聯絡DataX開發團隊! +configuration.13=Path不能為null,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.14=路徑[{0}]出現不合法值類型[{1}],該異常代表系統編程錯誤, 請聯絡DataX開發團隊! . +configuration.15=該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.16=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.17=您提供的配置檔案有誤. 路徑[{0}]值為null,datax無法識別該配置. 請檢查您的配置並作出修改. +configuration.18=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.19=系統編程錯誤,清單下標必須為數字類型,但該節點發現實際類型是[{0}] ,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.20=系統編程錯誤, 該異常代表系統編程錯誤, 請聯絡DataX開發團隊!. +configuration.21=系統編程錯誤, 路徑[{0}]不合法, 路徑層次之間不能出現空白字符 . +configuration.22=配置資訊錯誤. 因為您提供的配置資訊不是合法的JSON格式, JSON不能為空白. 請按照標準json格式提供配置資訊. +configuration.23=配置資訊錯誤. 您提供的配置資訊不是合法的JSON格式: {0}. 請按照標準json格式提供配置資訊. + + +listutil.1=您提供的作業配置有誤,List不能為空. +listutil.2=您提供的作業配置有誤, List不能為空. +listutil.3=您提供的作業配置資訊有誤, String:[{0}]不允許重複出現在清單中: [{1}]. +listutil.4=您提供的作業配置有誤, List不能為空. +listutil.5=您提供的作業配置有誤, List不能為空. +listutil.6=您提供的作業配置資訊有誤, String:[{0}]不存在於清單中:[{1}]. +listutil.7=您提供的作業配置有誤, List不能為空. +listutil.8=您提供的作業配置有誤, List不能為空. + + +rangesplitutil.1=切分份數不能小於1. 此處:expectSliceNumber=[{0}]. +rangesplitutil.2=對 BigInteger 進行切分時,其左右區間不能為 null. 此處:left=[{0}],right=[{1}]. +rangesplitutil.3=參數 bigInteger 不能為空. +rangesplitutil.4=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. +rangesplitutil.5=參數 bigInteger 不能為空. +rangesplitutil.6=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. + + +retryutil.1=系統編程錯誤, 入參callable不能為空 ! +retryutil.2=系統編程錯誤, 入參retrytime[%d]不能小於1 ! +retryutil.3=Exception when calling callable, 異常Msg:{0} +retryutil.4=Exception when calling callable, 即將嘗試執行第{0}次重試,共計重試{1}次.本次重試計劃等待[{2}]ms,實際等待[{3}]ms, 異常Msg:[{4}] + +httpclientutil.1=\u8ACB\u6C42\u5730\u5740\uFF1A{0}, \u8ACB\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u9060\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C07\u91CD\u8A66 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_TW.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..89eb1eae92 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_TW.properties @@ -0,0 +1,104 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + +very_like_yixiao=一{0}二{1}三 + + +configuration.1=配置資訊錯誤,您提供的配置檔案[{0}]不存在. 請檢查您的配置檔案. +configuration.2=配置資訊錯誤. 您提供配置檔案[{0}]讀取失敗,錯誤原因: {1}. 請檢查您的配置檔案的權限設定. +configuration.3=請檢查您的配置檔案. 您提供的配置檔案讀取失敗,錯誤原因: {0}. 請檢查您的配置檔案的權限設定. +configuration.4=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.5=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.6=任務讀取配置檔案出錯. 因為配置檔案路徑[{0}] 值不合法,期望是字符類型: {1}. 請檢查您的配置並作出修改. +configuration.7=您提供的配置資訊有誤,因為從[{0}]獲取的值[{1}]無法轉換為bool類型. 請檢查源表的配置並且做出相應的修改. +configuration.8=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.9=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.10=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是浮點類型: {1}. 請檢查您的配置並作出修改. +configuration.11=配置檔案對應Key[{0}]並不存在,該情況是代碼編程錯誤. 請聯絡DataX團隊的同學. +configuration.12=值[{0}]無法適配您提供[{1}], 該異常代表系統編程錯誤, 請聯絡DataX開發團隊! +configuration.13=Path不能為null,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.14=路徑[{0}]出現不合法值類型[{1}],該異常代表系統編程錯誤, 請聯絡DataX開發團隊! . +configuration.15=該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.16=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.17=您提供的配置檔案有誤. 路徑[{0}]值為null,datax無法識別該配置. 請檢查您的配置並作出修改. +configuration.18=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.19=系統編程錯誤,清單下標必須為數字類型,但該節點發現實際類型是[{0}] ,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.20=系統編程錯誤, 該異常代表系統編程錯誤, 請聯絡DataX開發團隊!. +configuration.21=系統編程錯誤, 路徑[{0}]不合法, 路徑層次之間不能出現空白字符 . +configuration.22=配置資訊錯誤. 因為您提供的配置資訊不是合法的JSON格式, JSON不能為空白. 請按照標準json格式提供配置資訊. +configuration.23=配置資訊錯誤. 您提供的配置資訊不是合法的JSON格式: {0}. 請按照標準json格式提供配置資訊. + + +listutil.1=您提供的作業配置有誤,List不能為空. +listutil.2=您提供的作業配置有誤, List不能為空. +listutil.3=您提供的作業配置資訊有誤, String:[{0}]不允許重複出現在清單中: [{1}]. +listutil.4=您提供的作業配置有誤, List不能為空. +listutil.5=您提供的作業配置有誤, List不能為空. +listutil.6=您提供的作業配置資訊有誤, String:[{0}]不存在於清單中:[{1}]. +listutil.7=您提供的作業配置有誤, List不能為空. +listutil.8=您提供的作業配置有誤, List不能為空. + + +rangesplitutil.1=切分份數不能小於1. 此處:expectSliceNumber=[{0}]. +rangesplitutil.2=對 BigInteger 進行切分時,其左右區間不能為 null. 此處:left=[{0}],right=[{1}]. +rangesplitutil.3=參數 bigInteger 不能為空. +rangesplitutil.4=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. +rangesplitutil.5=參數 bigInteger 不能為空. +rangesplitutil.6=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. + + +retryutil.1=系統編程錯誤, 入參callable不能為空 ! +retryutil.2=系統編程錯誤, 入參retrytime[%d]不能小於1 ! +retryutil.3=Exception when calling callable, 異常Msg:{0} +retryutil.4=Exception when calling callable, 即將嘗試執行第{0}次重試,共計重試{1}次.本次重試計劃等待[{2}]ms,實際等待[{3}]ms, 異常Msg:[{4}] + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/MessageSource.java b/common/src/main/java/com/alibaba/datax/common/util/MessageSource.java new file mode 100644 index 0000000000..d2411328da --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/MessageSource.java @@ -0,0 +1,207 @@ +package com.alibaba.datax.common.util; + +import java.text.MessageFormat; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.MissingResourceException; +import java.util.ResourceBundle; +import java.util.TimeZone; + +import org.apache.commons.lang3.LocaleUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class MessageSource { + private static final Logger LOG = LoggerFactory.getLogger(MessageSource.class); + private static Map resourceBundleCache = new HashMap(); + public static Locale locale = null; + public static TimeZone timeZone = null; + private ResourceBundle resourceBundle = null; + + private MessageSource(ResourceBundle resourceBundle) { + this.resourceBundle = resourceBundle; + } + + /** + * @param baseName + * demo: javax.servlet.http.LocalStrings + * + * @throws MissingResourceException + * - if no resource bundle for the specified base name can be + * found + * */ + public static MessageSource loadResourceBundle(String baseName) { + return loadResourceBundle(baseName, MessageSource.locale, + MessageSource.timeZone); + } + + /** + * @param clazz + * 根据其获取package name + * */ + public static MessageSource loadResourceBundle(Class clazz) { + return loadResourceBundle(clazz.getPackage().getName()); + } + + /** + * @param clazz + * 根据其获取package name + * */ + public static MessageSource loadResourceBundle(Class clazz, + Locale locale, TimeZone timeZone) { + return loadResourceBundle(clazz.getPackage().getName(), locale, + timeZone); + } + + /** + * warn: + * ok: ResourceBundle.getBundle("xxx.LocalStrings", Locale.getDefault(), LoadUtil.getJarLoader(PluginType.WRITER, "odpswriter")) + * error: ResourceBundle.getBundle("xxx.LocalStrings", Locale.getDefault(), LoadUtil.getJarLoader(PluginType.WRITER, "odpswriter")) + * @param baseName + * demo: javax.servlet.http.LocalStrings + * + * @throws MissingResourceException + * - if no resource bundle for the specified base name can be + * found + * + * */ + public static MessageSource loadResourceBundle(String baseName, + Locale locale, TimeZone timeZone) { + ResourceBundle resourceBundle = null; + if (null == locale) { + locale = LocaleUtils.toLocale("en_US"); + } + if (null == timeZone) { + timeZone = TimeZone.getDefault(); + } + String resourceBaseName = String.format("%s.LocalStrings", baseName); + LOG.debug( + "initEnvironment MessageSource.locale[{}], MessageSource.timeZone[{}]", + MessageSource.locale, MessageSource.timeZone); + LOG.debug( + "loadResourceBundle with locale[{}], timeZone[{}], baseName[{}]", + locale, timeZone, resourceBaseName); + // warn: 这个map的维护需要考虑Local吗, no? + if (!MessageSource.resourceBundleCache.containsKey(resourceBaseName)) { + ClassLoader clazzLoader = Thread.currentThread() + .getContextClassLoader(); + LOG.debug("loadResourceBundle classLoader:{}", clazzLoader); + resourceBundle = ResourceBundle.getBundle(resourceBaseName, locale, + clazzLoader); + MessageSource.resourceBundleCache.put(resourceBaseName, + resourceBundle); + } else { + resourceBundle = MessageSource.resourceBundleCache + .get(resourceBaseName); + } + + return new MessageSource(resourceBundle); + } + + public static boolean unloadResourceBundle(Class clazz) { + String baseName = clazz.getPackage().getName(); + String resourceBaseName = String.format("%s.LocalStrings", baseName); + if (!MessageSource.resourceBundleCache.containsKey(resourceBaseName)) { + return false; + } else { + MessageSource.resourceBundleCache.remove(resourceBaseName); + return true; + } + } + + public static MessageSource reloadResourceBundle(Class clazz) { + MessageSource.unloadResourceBundle(clazz); + return MessageSource.loadResourceBundle(clazz); + } + + public static void setEnvironment(Locale locale, TimeZone timeZone) { + // warn: 设置默认? @2018.03.21 将此处注释移除,否则在国际化多时区下会遇到问题 + Locale.setDefault(locale); + TimeZone.setDefault(timeZone); + MessageSource.locale = locale; + MessageSource.timeZone = timeZone; + LOG.info("use Locale: {} timeZone: {}", locale, timeZone); + } + + public static void init(final Configuration configuration) { + Locale locale2Set = Locale.getDefault(); + String localeStr = configuration.getString("common.column.locale", "zh_CN");// 默认操作系统的 + if (StringUtils.isNotBlank(localeStr)) { + try { + locale2Set = LocaleUtils.toLocale(localeStr); + } catch (Exception e) { + LOG.warn("ignored locale parse exception: {}", e.getMessage()); + } + } + + TimeZone timeZone2Set = TimeZone.getDefault(); + String timeZoneStr = configuration.getString("common.column.timeZone");// 默认操作系统的 + if (StringUtils.isNotBlank(timeZoneStr)) { + try { + timeZone2Set = TimeZone.getTimeZone(timeZoneStr); + } catch (Exception e) { + LOG.warn("ignored timezone parse exception: {}", e.getMessage()); + } + } + + LOG.info("JVM TimeZone: {}, Locale: {}", timeZone2Set.getID(), locale2Set); + MessageSource.setEnvironment(locale2Set, timeZone2Set); + } + + public static void clearCache() { + MessageSource.resourceBundleCache.clear(); + } + + public String message(String code) { + return this.messageWithDefaultMessage(code, null); + } + + public String message(String code, String args1) { + return this.messageWithDefaultMessage(code, null, + new Object[] { args1 }); + } + + public String message(String code, String args1, String args2) { + return this.messageWithDefaultMessage(code, null, new Object[] { args1, + args2 }); + } + + public String message(String code, String args1, String args2, String args3) { + return this.messageWithDefaultMessage(code, null, new Object[] { args1, + args2, args3 }); + } + + // 上面几个重载可以应对大多数情况, 避免使用这个可以提高性能的 + public String message(String code, Object... args) { + return this.messageWithDefaultMessage(code, null, args); + } + + public String messageWithDefaultMessage(String code, String defaultMessage) { + return this.messageWithDefaultMessage(code, defaultMessage, + new Object[] {}); + } + + /** + * @param args + * MessageFormat会依次调用对应对象的toString方法 + * */ + public String messageWithDefaultMessage(String code, String defaultMessage, + Object... args) { + String messageStr = null; + try { + messageStr = this.resourceBundle.getString(code); + } catch (MissingResourceException e) { + messageStr = defaultMessage; + } + if (null != messageStr && null != args && args.length > 0) { + // warn: see loadResourceBundle set default locale + return MessageFormat.format(messageStr, args); + } else { + return messageStr; + } + + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java b/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java index 791f9ea12c..ec353730ab 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java +++ b/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java @@ -206,4 +206,27 @@ private static boolean isPureAscii(String aString) { return true; } + + /** + * List拆分工具函数,主要用于reader插件的split拆分逻辑 + * */ + public static List> doListSplit(List objects, int adviceNumber) { + List> splitLists = new ArrayList>(); + if (null == objects) { + return splitLists; + } + long[] splitPoint = RangeSplitUtil.doLongSplit(0, objects.size(), adviceNumber); + for (int startIndex = 0; startIndex < splitPoint.length - 1; startIndex++) { + List objectsForTask = new ArrayList(); + int endIndex = startIndex + 1; + for (long i = splitPoint[startIndex]; i < splitPoint[endIndex]; i++) { + objectsForTask.add(objects.get((int) i)); + } + if (!objectsForTask.isEmpty()) { + splitLists.add(objectsForTask); + } + } + return splitLists; + } + } diff --git a/core/pom.xml b/core/pom.xml index 174a18d3fa..970f95a6d3 100755 --- a/core/pom.xml +++ b/core/pom.xml @@ -100,6 +100,14 @@ + + + src/main/java + + **/*.properties + + + org.apache.maven.plugins diff --git a/core/src/main/java/com/alibaba/datax/core/Engine.java b/core/src/main/java/com/alibaba/datax/core/Engine.java index be21512b15..3834253285 100755 --- a/core/src/main/java/com/alibaba/datax/core/Engine.java +++ b/core/src/main/java/com/alibaba/datax/core/Engine.java @@ -6,6 +6,7 @@ import com.alibaba.datax.common.statistics.PerfTrace; import com.alibaba.datax.common.statistics.VMInfo; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.core.job.JobContainer; import com.alibaba.datax.core.taskgroup.TaskGroupContainer; import com.alibaba.datax.core.util.ConfigParser; @@ -135,6 +136,9 @@ public static void entry(final String[] args) throws Throwable { RUNTIME_MODE = cl.getOptionValue("mode"); Configuration configuration = ConfigParser.parse(jobPath); + // 绑定i18n信息 + MessageSource.init(configuration); + MessageSource.reloadResourceBundle(Configuration.class); long jobId; if (!"-1".equalsIgnoreCase(jobIdString)) { diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings.properties new file mode 100644 index 0000000000..97d46f07f5 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings.properties @@ -0,0 +1,5 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_en_US.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_en_US.properties new file mode 100644 index 0000000000..7ff93838bc --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_en_US.properties @@ -0,0 +1,5 @@ +very_like_yixiao=1{0}2{1}3 + +engine.1=A valid job ID must be provided in the URL for the non-standalone mode. +engine.2=\n\nThrough the intelligent analysis by DataX, the most likely error reason of this task is: \n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_ja_JP.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..dfbad97035 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_ja_JP.properties @@ -0,0 +1,5 @@ +very_like_yixiao=1{0}2{1}3 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_CN.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..97d46f07f5 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_CN.properties @@ -0,0 +1,5 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_HK.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..2587e0ab51 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_HK.properties @@ -0,0 +1,10 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + +very_like_yixiao=一{0}二{1}三 + +engine.1=非 standalone 模式必須在 URL 中提供有效的 jobId. +engine.2=\n\n經DataX智能分析,該任務最可能的錯誤原因是:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_TW.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..2587e0ab51 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_TW.properties @@ -0,0 +1,10 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + +very_like_yixiao=一{0}二{1}三 + +engine.1=非 standalone 模式必須在 URL 中提供有效的 jobId. +engine.2=\n\n經DataX智能分析,該任務最可能的錯誤原因是:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java index fdc5d8215d..1b0d52380c 100755 --- a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java +++ b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java @@ -11,15 +11,18 @@ import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.Map; public class DirtyRecord implements Record { private List columns = new ArrayList(); + private Map meta; public static DirtyRecord asDirtyRecord(final Record record) { DirtyRecord result = new DirtyRecord(); for (int i = 0; i < record.getColumnNumber(); i++) { result.addColumn(record.getColumn(i)); } + result.setMeta(record.getMeta()); return result; } @@ -65,6 +68,16 @@ public int getMemorySize() { "该方法不支持!"); } + @Override + public void setMeta(Map meta) { + this.meta = meta; + } + + @Override + public Map getMeta() { + return this.meta; + } + public List getColumns() { return columns; } @@ -119,6 +132,12 @@ public Date asDate() { throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR, "该方法不支持!"); } + + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR, + "该方法不支持!"); + } @Override public byte[] asBytes() { diff --git a/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java b/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java index 2598bc8c80..c78a2a877c 100755 --- a/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java +++ b/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java @@ -27,6 +27,8 @@ public class DefaultRecord implements Record { // 首先是Record本身需要的内存 private int memorySize = ClassSize.DefaultRecordHead; + private Map meta; + public DefaultRecord() { this.columns = new ArrayList(RECORD_AVERGAE_COLUMN_NUMBER); } @@ -83,6 +85,16 @@ public int getMemorySize(){ return memorySize; } + @Override + public void setMeta(Map meta) { + this.meta = meta; + } + + @Override + public Map getMeta() { + return this.meta; + } + private void decrByteSize(final Column column) { if (null == column) { return; diff --git a/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java b/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java index 928609abda..7cb1cff104 100755 --- a/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java +++ b/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java @@ -3,6 +3,8 @@ import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; +import java.util.Map; + /** * 作为标示 生产者已经完成生产的标志 * @@ -41,6 +43,16 @@ public int getMemorySize() { return 0; } + @Override + public void setMeta(Map meta) { + + } + + @Override + public Map getMeta() { + return null; + } + @Override public void setColumn(int i, Column column) { return; diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings.properties new file mode 100644 index 0000000000..a90f782972 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings.properties @@ -0,0 +1,58 @@ +configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2} +configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0} +configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0} + +dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}] +dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4 +errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570 +errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761. +errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}]. + + +errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 . +errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684 +errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF +errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C . +errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3 +errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519. +errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25 +errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4. + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1}, STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 + + +secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519 +secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519 +secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519 +secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6 +secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519 +secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5 +secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}] +secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E + diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_en_US.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_en_US.properties new file mode 100644 index 0000000000..8e01b15386 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_en_US.properties @@ -0,0 +1,58 @@ +configparser.1=Failed to load the plug-in [{0},{1}]. We will retry in 1s... Exception: {2} +configparser.2=Failed to obtain the job configuration information: {0} +configparser.3=Failed to obtain the job configuration information: {0} +configparser.4=Failed to obtain the job configuration information: {0} +configparser.5=Failed to load the plug-in. Loading of the specific plug-in:{0} is not completed +configparser.6=Failed to load the plug-in. A duplicate plug-in: {0} exists + +dataxserviceutil.1=Exception in creating signature. NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=Exception in creating signature. InvalidKeyException, [{0}] +dataxserviceutil.3=Exception in creating signature. UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=The percentage of dirty data should be limited to within [0.0, 1.0] +errorrecordchecker.2=The number of dirty data entries should now be a nonnegative integer +errorrecordchecker.3=Check for the number of dirty data entries has not passed. The limit is [{0}] entries, but [{1}] entries have been captured. +errorrecordchecker.4=Check for the percentage of dirty data has not passed. The limit is [{0}], but [{1}] of dirty data has been captured. + + +errorcode.install_error=Error in installing DataX engine. Please contact your O&M team to solve the problem. +errorcode.argument_error=Error in running DataX engine. This problem is generally caused by an internal programming error. Please contact the DataX developer team to solve the problem. +errorcode.runtime_error=The DataX engine encountered an error during running. For the specific cause, refer to the error diagnosis after DataX stops running. +errorcode.config_error=Error in DataX engine configuration. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. +errorcode.secret_error=Error in DataX engine encryption or decryption. This problem is generally caused by a DataX key configuration error. Please contact your O&M team to solve the problem. +errorcode.hook_load_error=Error in loading the external hook. This problem is generally caused by the DataX installation. +errorcode.hook_fail_error=Error in executing the external hook +errorcode.plugin_install_error=Error in installing DataX plug-in. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. +errorcode.plugin_not_found=Error in DataX plug-in configuration. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. +errorcode.plugin_init_error=Error in DataX plug-in initialization. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. +errorcode.plugin_runtime_error=The DataX plug-in encountered an error during running. For the specific cause, refer to the error diagnosis after DataX stops running. +errorcode.plugin_dirty_data_limit_exceed=The dirty data transmitted by DataX exceeds user expectations. This error often occurs when a lot dirty data exists in the source data. Please carefully check the dirty data log information reported by DataX, or you can tune up the dirty data threshold value. +errorcode.plugin_split_error=Error in DataX plug-in slicing. This problem is generally caused by a programming error in some DataX plug-in. Please contact the DataX developer team to solve the problem. +errorcode.kill_job_timeout_error=The kill task times out. Please contact the PE to solve the problem +errorcode.start_taskgroup_error=Failed to start the task group. Please contact the DataX developer team to solve the problem +errorcode.call_datax_service_failed=Error in requesting DataX Service. +errorcode.call_remote_failed=Remote call failure +errorcode.killed_exit_value=The job has received a Kill command. + + +httpclientutil.1=Request address: {0}. Request method: {1}. STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=The remote interface returns -1. We will try again + + +secretutil.1=System programing error. Unsupported encryption type +secretutil.2=System programing error. Unsupported encryption type +secretutil.3=RSA encryption error +secretutil.4=RSA decryption error +secretutil.5=Triple DES encryption error +secretutil.6=RSA decryption error +secretutil.7=Error in building Triple DES key +secretutil.8=DataX configuration requires encryption and decryption, but unable to find the key configuration file +secretutil.9=Error in reading the encryption and decryption configuration file +secretutil.10=The version of the DataX-configured key is [{0}], but there is no configuration in the system. Error in task key configuration. The key version you configured does not exist +secretutil.11=The version of the DataX-configured key is [{0}], but there is no configuration in the system. There may be an error in task key configuration, or a problem in system maintenance +secretutil.12=The version of the DataX-configured key is [{0}], but there is no configuration in the system. Error in task key configuration. The key version you configured does not exist +secretutil.13=The version of the DataX-configured key is [{0}], but there is no configuration in the system. There may be an error in task key configuration, or a problem in system maintenance +secretutil.14=DataX configuration requires encryption and decryption, but some key in the configured key version [{0}] is empty +secretutil.15=DataX configuration requires encryption and decryption, but some configured public/private key pairs are empty and the version is [{0}] +secretutil.16=DataX configuration requires encryption and decryption, but the encryption and decryption configuration cannot be found + diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_ja_JP.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..7a0c95ac9d --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_ja_JP.properties @@ -0,0 +1,58 @@ +configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2} +configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0} +configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0} + +dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}] +dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4 +errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570 +errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761. +errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}]. + + +errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 . +errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684 +errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF +errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C . +errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3 +errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519. +errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25 +errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4. + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 + + +secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519 +secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519 +secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519 +secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6 +secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519 +secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5 +secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}] +secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E + diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_CN.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..7a0c95ac9d --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_CN.properties @@ -0,0 +1,58 @@ +configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2} +configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0} +configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0} + +dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}] +dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4 +errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570 +errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761. +errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}]. + + +errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 . +errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684 +errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF +errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C . +errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3 +errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519. +errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25 +errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4. + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 + + +secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519 +secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519 +secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519 +secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6 +secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519 +secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5 +secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}] +secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E + diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_HK.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..59ce9fd943 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_HK.properties @@ -0,0 +1,116 @@ +configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2} +configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0} +configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0} + +dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}] +dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4 +errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570 +errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761. +errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}]. + + +errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 . +errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684 +errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF +errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C . +errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3 +errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519. +errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25 +errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4. + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 + + +secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519 +secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519 +secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519 +secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6 +secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519 +secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5 +secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}] +secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E + +configparser.1=\u5916\u639B\u7A0B\u5F0F[{0},{1}]\u8F09\u5165\u5931\u6557\uFF0C1s\u5F8C\u91CD\u8A66... Exception:{2} +configparser.2=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0} +configparser.3=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0} +configparser.4=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0} +configparser.5=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u5916\u639B\u7A0B\u5F0F\u8F09\u5165:{0} +configparser.6=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557,\u5B58\u5728\u91CD\u8907\u5916\u639B\u7A0B\u5F0F:{0} + +dataxserviceutil.1=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38InvalidKeyException, [{0}] +dataxserviceutil.3=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u9650\u5236\u61C9\u8A72\u5728[0.0, 1.0]\u4E4B\u9593 +errorrecordchecker.2=\u9AD2\u6578\u64DA\u689D\u6578\u73FE\u5728\u61C9\u8A72\u70BA\u975E\u8CA0\u6574\u6578 +errorrecordchecker.3=\u9AD2\u6578\u64DA\u689D\u6578\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\u689D\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u4E86[{1}]\u689D. +errorrecordchecker.4=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u5230[{1}]. + + +errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88DD\u932F\u8AA4, \u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.argument_error=DataX\u5F15\u64CE\u904B\u884C\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BC\u5167\u90E8\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A . +errorcode.runtime_error=DataX\u5F15\u64CE\u904B\u884C\u904E\u7A0B\u51FA\u932F\uFF0C\u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A . +errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u932F\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.hook_load_error=\u8F09\u5165\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4\uFF0C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u5F15\u8D77\u7684 +errorcode.hook_fail_error=\u57F7\u884C\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4 +errorcode.plugin_install_error=DataX\u5916\u639B\u7A0B\u5F0F\u5B89\u88DD\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.plugin_not_found=DataX\u5916\u639B\u7A0B\u5F0F\u914D\u7F6E\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.plugin_init_error=DataX\u5916\u639B\u7A0B\u5F0F\u521D\u59CB\u5316\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.plugin_runtime_error=DataX\u5916\u639B\u7A0B\u5F0F\u904B\u884C\u6642\u51FA\u932F, \u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A . +errorcode.plugin_dirty_data_limit_exceed=DataX\u50B3\u8F38\u9AD2\u6578\u64DA\u8D85\u904E\u7528\u6236\u9810\u671F\uFF0C\u8A72\u932F\u8AA4\u901A\u5E38\u662F\u7531\u65BC\u6E90\u7AEF\u6578\u64DA\u5B58\u5728\u8F03\u591A\u696D\u52D9\u9AD2\u6578\u64DA\u5C0E\u81F4\uFF0C\u8ACB\u4ED4\u7D30\u6AA2\u67E5DataX\u5F59\u5831\u7684\u9AD2\u6578\u64DA\u65E5\u8A8C\u8CC7\u8A0A, \u6216\u8005\u60A8\u53EF\u4EE5\u9069\u7576\u8ABF\u5927\u9AD2\u6578\u64DA\u95BE\u503C . +errorcode.plugin_split_error=DataX\u5916\u639B\u7A0B\u5F0F\u5207\u5206\u51FA\u932F, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5404\u500B\u5916\u639B\u7A0B\u5F0F\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A +errorcode.kill_job_timeout_error=kill \u4EFB\u52D9\u903E\u6642\uFF0C\u8ACB\u806F\u7D61PE\u89E3\u6C7A +errorcode.start_taskgroup_error=taskGroup\u555F\u52D5\u5931\u6557,\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A +errorcode.call_datax_service_failed=\u8ACB\u6C42 DataX Service \u51FA\u932F. +errorcode.call_remote_failed=\u9060\u7A0B\u8ABF\u7528\u5931\u6557 +errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4. + + +httpclientutil.1=\u8ACB\u6C42\u5730\u5740\uFF1A{0}, \u8ACB\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u9060\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C07\u91CD\u8A66 + + +secretutil.1=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B +secretutil.2=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B +secretutil.3=rsa\u52A0\u5BC6\u51FA\u932F +secretutil.4=rsa\u89E3\u5BC6\u51FA\u932F +secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u932F +secretutil.6=rsa\u89E3\u5BC6\u51FA\u932F +secretutil.7=\u69CB\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u932F +secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u5BC6\u9470\u7684\u914D\u7F6E\u6A94\u6848 +secretutil.9=\u8B80\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6A94\u6848\u51FA\u932F +secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C +secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C +secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C +secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C +secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u9470\u70BA\u7A7A\u7684\u60C5\u6CC1 +secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u9470\u5C0D\u5B58\u5728\u70BA\u7A7A\u7684\u60C5\u6CC1\uFF0C\u7248\u672C[{0}] +secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E + diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_TW.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..59ce9fd943 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_TW.properties @@ -0,0 +1,116 @@ +configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2} +configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0} +configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0} +configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0} + +dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}] +dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4 +errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570 +errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761. +errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}]. + + +errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 . +errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684 +errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF +errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 . +errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F . +errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C . +errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3 +errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 +errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519. +errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25 +errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4. + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 + + +secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B +secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519 +secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519 +secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519 +secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519 +secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6 +secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519 +secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C +secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898 +secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5 +secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}] +secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E + +configparser.1=\u5916\u639B\u7A0B\u5F0F[{0},{1}]\u8F09\u5165\u5931\u6557\uFF0C1s\u5F8C\u91CD\u8A66... Exception:{2} +configparser.2=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0} +configparser.3=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0} +configparser.4=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0} +configparser.5=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u5916\u639B\u7A0B\u5F0F\u8F09\u5165:{0} +configparser.6=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557,\u5B58\u5728\u91CD\u8907\u5916\u639B\u7A0B\u5F0F:{0} + +dataxserviceutil.1=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38NoSuchAlgorithmException, [{0}] +dataxserviceutil.2=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38InvalidKeyException, [{0}] +dataxserviceutil.3=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38UnsupportedEncodingException, [{0}] + +errorrecordchecker.1=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u9650\u5236\u61C9\u8A72\u5728[0.0, 1.0]\u4E4B\u9593 +errorrecordchecker.2=\u9AD2\u6578\u64DA\u689D\u6578\u73FE\u5728\u61C9\u8A72\u70BA\u975E\u8CA0\u6574\u6578 +errorrecordchecker.3=\u9AD2\u6578\u64DA\u689D\u6578\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\u689D\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u4E86[{1}]\u689D. +errorrecordchecker.4=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u5230[{1}]. + + +errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88DD\u932F\u8AA4, \u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.argument_error=DataX\u5F15\u64CE\u904B\u884C\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BC\u5167\u90E8\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A . +errorcode.runtime_error=DataX\u5F15\u64CE\u904B\u884C\u904E\u7A0B\u51FA\u932F\uFF0C\u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A . +errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u932F\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.hook_load_error=\u8F09\u5165\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4\uFF0C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u5F15\u8D77\u7684 +errorcode.hook_fail_error=\u57F7\u884C\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4 +errorcode.plugin_install_error=DataX\u5916\u639B\u7A0B\u5F0F\u5B89\u88DD\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.plugin_not_found=DataX\u5916\u639B\u7A0B\u5F0F\u914D\u7F6E\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.plugin_init_error=DataX\u5916\u639B\u7A0B\u5F0F\u521D\u59CB\u5316\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A . +errorcode.plugin_runtime_error=DataX\u5916\u639B\u7A0B\u5F0F\u904B\u884C\u6642\u51FA\u932F, \u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A . +errorcode.plugin_dirty_data_limit_exceed=DataX\u50B3\u8F38\u9AD2\u6578\u64DA\u8D85\u904E\u7528\u6236\u9810\u671F\uFF0C\u8A72\u932F\u8AA4\u901A\u5E38\u662F\u7531\u65BC\u6E90\u7AEF\u6578\u64DA\u5B58\u5728\u8F03\u591A\u696D\u52D9\u9AD2\u6578\u64DA\u5C0E\u81F4\uFF0C\u8ACB\u4ED4\u7D30\u6AA2\u67E5DataX\u5F59\u5831\u7684\u9AD2\u6578\u64DA\u65E5\u8A8C\u8CC7\u8A0A, \u6216\u8005\u60A8\u53EF\u4EE5\u9069\u7576\u8ABF\u5927\u9AD2\u6578\u64DA\u95BE\u503C . +errorcode.plugin_split_error=DataX\u5916\u639B\u7A0B\u5F0F\u5207\u5206\u51FA\u932F, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5404\u500B\u5916\u639B\u7A0B\u5F0F\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A +errorcode.kill_job_timeout_error=kill \u4EFB\u52D9\u903E\u6642\uFF0C\u8ACB\u806F\u7D61PE\u89E3\u6C7A +errorcode.start_taskgroup_error=taskGroup\u555F\u52D5\u5931\u6557,\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A +errorcode.call_datax_service_failed=\u8ACB\u6C42 DataX Service \u51FA\u932F. +errorcode.call_remote_failed=\u9060\u7A0B\u8ABF\u7528\u5931\u6557 +errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4. + + +httpclientutil.1=\u8ACB\u6C42\u5730\u5740\uFF1A{0}, \u8ACB\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u9060\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C07\u91CD\u8A66 + + +secretutil.1=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B +secretutil.2=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B +secretutil.3=rsa\u52A0\u5BC6\u51FA\u932F +secretutil.4=rsa\u89E3\u5BC6\u51FA\u932F +secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u932F +secretutil.6=rsa\u89E3\u5BC6\u51FA\u932F +secretutil.7=\u69CB\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u932F +secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u5BC6\u9470\u7684\u914D\u7F6E\u6A94\u6848 +secretutil.9=\u8B80\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6A94\u6848\u51FA\u932F +secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C +secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C +secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C +secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C +secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u9470\u70BA\u7A7A\u7684\u60C5\u6CC1 +secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u9470\u5C0D\u5B58\u5728\u70BA\u7A7A\u7684\u60C5\u6CC1\uFF0C\u7248\u672C[{0}] +secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E + diff --git a/hbase11xsqlreader/src/main/resources/plugin.json b/hbase11xsqlreader/src/main/resources/plugin.json index e245ca2742..162f571288 100644 --- a/hbase11xsqlreader/src/main/resources/plugin.json +++ b/hbase11xsqlreader/src/main/resources/plugin.json @@ -2,6 +2,6 @@ "name": "hbase11xsqlreader", "class": "com.alibaba.datax.plugin.reader.hbase11xsqlreader.HbaseSQLReader", "description": "useScene: prod. mechanism: Scan to read data.", - "developer": "liwei.li, bug reported to : liwei.li@alibaba-inc.com" + "developer": "alibaba" } diff --git a/hdfsreader/pom.xml b/hdfsreader/pom.xml index 5d07dc256b..a5c2da2c4f 100644 --- a/hdfsreader/pom.xml +++ b/hdfsreader/pom.xml @@ -16,6 +16,17 @@ 2.7.1 + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -51,6 +62,11 @@ hadoop-yarn-common ${hadoop.version} + + com.aliyun.oss + hadoop-aliyun + 2.7.2 + org.apache.hadoop hadoop-mapreduce-client-core diff --git a/hdfswriter/pom.xml b/hdfswriter/pom.xml index 15b3780a60..741159cb5e 100644 --- a/hdfswriter/pom.xml +++ b/hdfswriter/pom.xml @@ -19,6 +19,17 @@ + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -30,6 +41,11 @@ + + com.aliyun.oss + hadoop-aliyun + 2.7.2 + org.slf4j slf4j-api @@ -132,4 +148,4 @@ - \ No newline at end of file + diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java index c8bfa50b6c..1ecdb57831 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java @@ -6,10 +6,13 @@ import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.unstructuredstorage.util.ColumnTypeUtil; +import com.alibaba.datax.plugin.unstructuredstorage.util.HdfsUtil; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; import org.apache.commons.lang3.tuple.MutablePair; import org.apache.hadoop.fs.*; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; @@ -24,6 +27,10 @@ import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Types; + import java.io.IOException; import java.text.SimpleDateFormat; import java.util.*; @@ -556,4 +563,67 @@ public static MutablePair, Boolean> transportOneRecord( transportResult.setLeft(recordList); return transportResult; } + + + public static String generateParquetSchemaFromColumnAndType(List columns) { + Map decimalColInfo = new HashMap<>(16); + ColumnTypeUtil.DecimalInfo PARQUET_DEFAULT_DECIMAL_INFO = new ColumnTypeUtil.DecimalInfo(10, 2); + Types.MessageTypeBuilder typeBuilder = Types.buildMessage(); + for (Configuration column : columns) { + String name = column.getString("name"); + String colType = column.getString("type"); + Validate.notNull(name, "column.name can't be null"); + Validate.notNull(colType, "column.type can't be null"); + switch (colType.toLowerCase()) { + case "tinyint": + case "smallint": + case "int": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT32).named(name); + break; + case "bigint": + case "long": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT64).named(name); + break; + case "float": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.FLOAT).named(name); + break; + case "double": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.DOUBLE).named(name); + break; + case "binary": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).named(name); + break; + case "char": + case "varchar": + case "string": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + break; + case "boolean": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BOOLEAN).named(name); + break; + case "timestamp": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT96).named(name); + break; + case "date": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT32).as(OriginalType.DATE).named(name); + break; + default: + if (ColumnTypeUtil.isDecimalType(colType)) { + ColumnTypeUtil.DecimalInfo decimalInfo = ColumnTypeUtil.getDecimalInfo(colType, PARQUET_DEFAULT_DECIMAL_INFO); + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) + .as(OriginalType.DECIMAL) + .precision(decimalInfo.getPrecision()) + .scale(decimalInfo.getScale()) + .length(HdfsUtil.computeMinBytesForPrecision(decimalInfo.getPrecision())) + .named(name); + + decimalColInfo.put(name, decimalInfo); + } else { + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).named(name); + } + break; + } + } + return typeBuilder.named("m").toString(); + } } diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java index 853613a225..59ec6d18ea 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java @@ -9,9 +9,11 @@ import org.apache.commons.io.Charsets; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import parquet.schema.MessageTypeParser; import java.util.*; @@ -323,8 +325,55 @@ private String buildTmpFilePath(String userPath) { } return tmpFilePath; } + public void unitizeParquetConfig(Configuration writerSliceConfig) { + String parquetSchema = writerSliceConfig.getString(Key.PARQUET_SCHEMA); + if (StringUtils.isNotBlank(parquetSchema)) { + LOG.info("parquetSchema has config. use parquetSchema:\n{}", parquetSchema); + return; + } + + List columns = writerSliceConfig.getListConfiguration(Key.COLUMN); + if (columns == null || columns.isEmpty()) { + throw DataXException.asDataXException("parquetSchema or column can't be blank!"); + } + + parquetSchema = generateParquetSchemaFromColumn(columns); + // 为了兼容历史逻辑,对之前的逻辑做保留,但是如果配置的时候报错,则走新逻辑 + try { + MessageTypeParser.parseMessageType(parquetSchema); + } catch (Throwable e) { + LOG.warn("The generated parquetSchema {} is illegal, try to generate parquetSchema in another way", parquetSchema); + parquetSchema = HdfsHelper.generateParquetSchemaFromColumnAndType(columns); + LOG.info("The last generated parquet schema is {}", parquetSchema); + } + writerSliceConfig.set(Key.PARQUET_SCHEMA, parquetSchema); + LOG.info("dataxParquetMode use default fields."); + writerSliceConfig.set(Key.DATAX_PARQUET_MODE, "fields"); + } + + private String generateParquetSchemaFromColumn(List columns) { + StringBuffer parquetSchemaStringBuffer = new StringBuffer(); + parquetSchemaStringBuffer.append("message m {"); + for (Configuration column: columns) { + String name = column.getString("name"); + Validate.notNull(name, "column.name can't be null"); + + String type = column.getString("type"); + Validate.notNull(type, "column.type can't be null"); + + String parquetColumn = String.format("optional %s %s;", type, name); + parquetSchemaStringBuffer.append(parquetColumn); + } + parquetSchemaStringBuffer.append("}"); + String parquetSchema = parquetSchemaStringBuffer.toString(); + LOG.info("generate parquetSchema:\n{}", parquetSchema); + return parquetSchema; + } + } + + public static class Task extends Writer.Task { private static final Logger LOG = LoggerFactory.getLogger(Task.class); diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java index f1f6309689..2b1fab9802 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java @@ -33,4 +33,17 @@ public class Key { public static final String KERBEROS_PRINCIPAL = "kerberosPrincipal"; // hadoop config public static final String HADOOP_CONFIG = "hadoopConfig"; + + // useOldRawDataTransf + public final static String PARQUET_FILE_USE_RAW_DATA_TRANSF = "useRawDataTransf"; + + public final static String DATAX_PARQUET_MODE = "dataxParquetMode"; + + // hdfs username 默认值 admin + public final static String HDFS_USERNAME = "hdfsUsername"; + + public static final String PROTECTION = "protection"; + + public static final String PARQUET_SCHEMA = "parquetSchema"; + public static final String PARQUET_MERGE_RESULT = "parquetMergeResult"; } diff --git a/hologresjdbcwriter/doc/hologresjdbcwriter.md b/hologresjdbcwriter/doc/hologresjdbcwriter.md new file mode 100644 index 0000000000..8b163017be --- /dev/null +++ b/hologresjdbcwriter/doc/hologresjdbcwriter.md @@ -0,0 +1,204 @@ +# DataX HologresJdbcWriter + + +--- + + +## 1 快速介绍 + +HologresJdbcWriter 插件实现了写入数据到 Hologres目的表的功能。在底层实现上,HologresJdbcWriter通过JDBC连接远程 Hologres 数据库,并执行相应的 insert into ... on conflict sql 语句将数据写入 Hologres,内部会分批次提交入库。 + +
+ +* HologresJdbcWriter 只支持单表同步 + +## 2 实现原理 + +HologresJdbcWriter 通过 DataX 框架获取 Reader 生成的协议数据,根据你配置生成相应的SQL插入语句 + +* `insert into... on conflict ` + + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从内存产生到 HologresJdbcWriter导入的数据。 + +```json +{ + "job": { + "setting": { + "speed": { + "channel": 1 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": "DataX", + "type": "string" + }, + { + "value": 19880808, + "type": "long" + }, + { + "value": "1988-08-08 08:08:08", + "type": "date" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "test", + "type": "bytes" + } + ], + "sliceRecordCount": 1000 + } + }, + "writer": { + "name": "hologresjdbcwriter", + "parameter": { + "username": "xx", + "password": "xx", + "column": [ + "id", + "name" + ], + "preSql": [ + "delete from test" + ], + "connection": [ + { + "jdbcUrl": "jdbc:postgresql://127.0.0.1:3002/datax", + "table": [ + "test" + ] + } + ], + "writeMode" : "REPLACE", + "client" : { + "writeThreadSize" : 3 + } + } + } + } + ] + } +} + +``` + + +### 3.2 参数说明 + +* **jdbcUrl** + + * 描述:目的数据库的 JDBC 连接信息 ,jdbcUrl必须包含在connection配置单元中。 + + 注意:1、在一个数据库上只能配置一个值。 + 2、jdbcUrl按照PostgreSQL官方规范,并可以填写连接附加参数信息。具体请参看PostgreSQL官方文档或者咨询对应 DBA。 + + + * 必选:是
+ + * 默认值:无
+ +* **username** + + * 描述:目的数据库的用户名
+ + * 必选:是
+ + * 默认值:无
+ +* **password** + + * 描述:目的数据库的密码
+ + * 必选:是
+ + * 默认值:无
+ +* **table** + + * 描述:目的表的表名称。只支持写入一个表。 + + 注意:table 和 jdbcUrl 必须包含在 connection 配置单元中 + + * 必选:是
+ + * 默认值:无
+ +* **column** + + * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。如果要依次写入全部列,使用\*表示, 例如: "column": ["\*"] + + 注意:1、我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 + 2、此处 column 不能配置任何常量值 + + * 必选:是
+ + * 默认值:否
+ +* **preSql** + + * 描述:写入数据到目的表前,会先执行这里的标准语句。如果 Sql 中有你需要操作到的表名称,请使用 `@table` 表示,这样在实际执行 Sql 语句时,会对变量按照实际表名称进行替换。
+ + * 必选:否
+ + * 默认值:无
+ +* **postSql** + + * 描述:写入数据到目的表后,会执行这里的标准语句。(原理同 preSql )
+ + * 必选:否
+ + * 默认值:无
+ +* **batchSize** + + * 描述:一次性批量提交的记录数大小,该值可以极大减少DataX与HologresJdbcWriter的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成DataX运行进程OOM情况。
+ + * 必选:否
+ + * 默认值:512
+ +* **writeMode** + + * 描述:当写入hologres有主键表时,控制主键冲突后的策略。REPLACE表示冲突后hologres表的所有字段都被覆盖(未在writer中配置的字段将填充null);UPDATE表示冲突后hologres表writer配置的字段将被覆盖;IGNORE表示冲突后丢弃新数据,不覆盖。
+ + * 必选:否
+ + * 默认值:REPLACE
+ +* **client.writeThreadSize** + + * 描述:写入hologres的连接池大小,多个连接将并行写入数据。
+ + * 必选:否
+ + * 默认值:1
+ +### 3.3 类型转换 + +目前 HologresJdbcWriter支持大部分 Hologres类型,但也存在部分没有支持的情况,请注意检查你的类型。 + +下面列出 HologresJdbcWriter针对 Hologres类型转换列表: + +| DataX 内部类型| Hologres 数据类型 | +| -------- | ----- | +| Long |bigint, integer, smallint | +| Double |double precision, money, numeric, real | +| String |varchar, char, text, bit| +| Date |date, time, timestamp | +| Boolean |bool| +| Bytes |bytea| diff --git a/hologresjdbcwriter/pom.xml b/hologresjdbcwriter/pom.xml new file mode 100644 index 0000000000..a908dfed86 --- /dev/null +++ b/hologresjdbcwriter/pom.xml @@ -0,0 +1,90 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + hologresjdbcwriter + hologresjdbcwriter + jar + writer data into hologres using jdbc + + + 1.8 + + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + + org.slf4j + slf4j-api + + + + ch.qos.logback + logback-classic + + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + + com.alibaba.hologres + holo-client + 2.1.0 + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + \ No newline at end of file diff --git a/hologresjdbcwriter/src/main/assembly/package.xml b/hologresjdbcwriter/src/main/assembly/package.xml new file mode 100755 index 0000000000..db8100e16f --- /dev/null +++ b/hologresjdbcwriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/hologresjdbcwriter + + + target/ + + hologresjdbcwriter-0.0.1-SNAPSHOT.jar + + plugin/writer/hologresjdbcwriter + + + + + + false + plugin/writer/hologresjdbcwriter/libs + runtime + + + diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/BaseWriter.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/BaseWriter.java new file mode 100644 index 0000000000..2c390bcb1b --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/BaseWriter.java @@ -0,0 +1,526 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.DateColumn; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.util.ConfLoader; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.util.OriginalConfPretreatmentUtil; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.util.WriterUtil; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.hologres.client.HoloClient; +import com.alibaba.hologres.client.HoloConfig; +import com.alibaba.hologres.client.Put; +import com.alibaba.hologres.client.exception.HoloClientWithDetailsException; +import com.alibaba.hologres.client.model.TableSchema; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Time; +import java.sql.Timestamp; +import java.sql.Types; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class BaseWriter { + + protected static final Set ignoreConfList; + + static { + ignoreConfList = new HashSet<>(); + ignoreConfList.add("jdbcUrl"); + ignoreConfList.add("username"); + ignoreConfList.add("password"); + ignoreConfList.add("writeMode"); + } + + enum WriteMode { + IGNORE, + UPDATE, + REPLACE + } + + private static WriteMode getWriteMode(String text) { + text = text.toUpperCase(); + switch (text) { + case "IGNORE": + return WriteMode.IGNORE; + case "UPDATE": + return WriteMode.UPDATE; + case "REPLACE": + return WriteMode.REPLACE; + default: + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "writeMode只支持IGNORE,UPDATE,REPLACE,无法识别 " + text); + } + } + + public static class Job { + private DataBaseType dataBaseType; + + private static final Logger LOG = LoggerFactory + .getLogger(BaseWriter.Job.class); + + public Job(DataBaseType dataBaseType) { + this.dataBaseType = dataBaseType; + OriginalConfPretreatmentUtil.DATABASE_TYPE = this.dataBaseType; + } + + public void init(Configuration originalConfig) { + OriginalConfPretreatmentUtil.doPretreatment(originalConfig, this.dataBaseType); + checkConf(originalConfig); + LOG.debug("After job init(), originalConfig now is:[\n{}\n]", + originalConfig.toJSON()); + } + + private void checkConf(Configuration originalConfig) { + getWriteMode(originalConfig.getString(Key.WRITE_MODE, "REPLACE")); + List userConfiguredColumns = originalConfig.getList(Key.COLUMN, String.class); + List conns = originalConfig.getList(Constant.CONN_MARK, + JSONObject.class); + if (conns.size() > 1) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "只支持单表同步"); + } + int tableNumber = originalConfig.getInt(Constant.TABLE_NUMBER_MARK); + if (tableNumber > 1) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "只支持单表同步"); + } + JSONObject connConf = conns.get(0); + String jdbcUrl = connConf.getString(Key.JDBC_URL); + String username = originalConfig.getString(Key.USERNAME); + String password = originalConfig.getString(Key.PASSWORD); + + String table = connConf.getJSONArray(Key.TABLE).getString(0); + + Map clientConf = originalConfig.getMap("client"); + + HoloConfig config = new HoloConfig(); + config.setJdbcUrl(jdbcUrl); + config.setUsername(username); + config.setPassword(password); + if (clientConf != null) { + try { + config = ConfLoader.load(clientConf, config, ignoreConfList); + } catch (Exception e) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + "配置解析失败."); + } + } + + try (HoloClient client = new HoloClient(config)) { + TableSchema schema = client.getTableSchema(table); + LOG.info("table {} column info:", schema.getTableNameObj().getFullName()); + for (com.alibaba.hologres.client.model.Column column : schema.getColumnSchema()) { + LOG.info("name:{},type:{},typeName:{},nullable:{},defaultValue:{}", column.getName(), column.getType(), column.getTypeName(), column.getAllowNull(), column.getDefaultValue()); + } + for (String userColumn : userConfiguredColumns) { + if (schema.getColumnIndex(userColumn) == null) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, "配置的列 " + userColumn + " 不存在"); + } + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.CONN_DB_ERROR, "获取表schema失败", e); + } + + } + + // 一般来说,是需要推迟到 task 中进行pre 的执行(单表情况例外) + public void prepare(Configuration originalConfig) { + + try { + String username = originalConfig.getString(Key.USERNAME); + String password = originalConfig.getString(Key.PASSWORD); + + List conns = originalConfig.getList(Constant.CONN_MARK, + Object.class); + Configuration connConf = Configuration.from(conns.get(0) + .toString()); + + String jdbcUrl = connConf.getString(Key.JDBC_URL); + originalConfig.set(Key.JDBC_URL, jdbcUrl); + + String table = connConf.getList(Key.TABLE, String.class).get(0); + originalConfig.set(Key.TABLE, table); + + List preSqls = originalConfig.getList(Key.PRE_SQL, + String.class); + List renderedPreSqls = WriterUtil.renderPreOrPostSqls( + preSqls, table); + + originalConfig.remove(Constant.CONN_MARK); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + // 说明有 preSql 配置,则此处删除掉 + originalConfig.remove(Key.PRE_SQL); + String tempJdbcUrl = jdbcUrl.replace("postgresql", "hologres"); + try (Connection conn = DriverManager.getConnection( + tempJdbcUrl, username, password)) { + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", + StringUtils.join(renderedPreSqls, ";"), tempJdbcUrl); + + WriterUtil.executeSqls(conn, renderedPreSqls, tempJdbcUrl, dataBaseType); + } + } + LOG.debug("After job prepare(), originalConfig now is:[\n{}\n]", + originalConfig.toJSON()); + } catch (SQLException e) { + throw DataXException.asDataXException(DBUtilErrorCode.SQL_EXECUTE_FAIL, e); + } + } + + public List split(Configuration originalConfig, + int mandatoryNumber) { + return WriterUtil.doSplit(originalConfig, mandatoryNumber); + } + + // 一般来说,是需要推迟到 task 中进行post 的执行(单表情况例外) + public void post(Configuration originalConfig) { + + String username = originalConfig.getString(Key.USERNAME); + String password = originalConfig.getString(Key.PASSWORD); + + String jdbcUrl = originalConfig.getString(Key.JDBC_URL); + + String table = originalConfig.getString(Key.TABLE); + + List postSqls = originalConfig.getList(Key.POST_SQL, + String.class); + List renderedPostSqls = WriterUtil.renderPreOrPostSqls( + postSqls, table); + + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + // 说明有 postSql 配置,则此处删除掉 + originalConfig.remove(Key.POST_SQL); + String tempJdbcUrl = jdbcUrl.replace("postgresql", "hologres"); + Connection conn = DBUtil.getConnection(this.dataBaseType, + tempJdbcUrl, username, password); + + LOG.info( + "Begin to execute postSqls:[{}]. context info:{}.", + StringUtils.join(renderedPostSqls, ";"), tempJdbcUrl); + WriterUtil.executeSqls(conn, renderedPostSqls, tempJdbcUrl, dataBaseType); + DBUtil.closeDBResources(null, null, conn); + } + + } + + public void destroy(Configuration originalConfig) { + } + + } + + public static class Task { + protected static final Logger LOG = LoggerFactory + .getLogger(BaseWriter.Task.class); + + protected DataBaseType dataBaseType; + + protected String username; + protected String password; + protected String jdbcUrl; + protected String table; + protected List columns; + protected int batchSize; + protected int batchByteSize; + protected int columnNumber = 0; + protected TaskPluginCollector taskPluginCollector; + + // 作为日志显示信息时,需要附带的通用信息。比如信息所对应的数据库连接等信息,针对哪个表做的操作 + protected static String BASIC_MESSAGE; + + protected WriteMode writeMode; + protected String arrayDelimiter; + protected boolean emptyAsNull; + + protected HoloConfig config; + + public Task(DataBaseType dataBaseType) { + this.dataBaseType = dataBaseType; + } + + public void init(Configuration writerSliceConfig) { + this.username = writerSliceConfig.getString(Key.USERNAME); + this.password = writerSliceConfig.getString(Key.PASSWORD); + this.jdbcUrl = writerSliceConfig.getString(Key.JDBC_URL); + this.table = writerSliceConfig.getString(Key.TABLE); + + this.columns = writerSliceConfig.getList(Key.COLUMN, String.class); + this.columnNumber = this.columns.size(); + + this.arrayDelimiter = writerSliceConfig.getString(Key.Array_Delimiter); + + this.batchSize = writerSliceConfig.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE); + this.batchByteSize = writerSliceConfig.getInt(Key.BATCH_BYTE_SIZE, Constant.DEFAULT_BATCH_BYTE_SIZE); + + writeMode = getWriteMode(writerSliceConfig.getString(Key.WRITE_MODE, "REPLACE")); + emptyAsNull = writerSliceConfig.getBool(Key.EMPTY_AS_NULL, true); + + Map clientConf = writerSliceConfig.getMap("client"); + + config = new HoloConfig(); + config.setJdbcUrl(this.jdbcUrl); + config.setUsername(username); + config.setPassword(password); + config.setWriteMode(writeMode == WriteMode.IGNORE ? com.alibaba.hologres.client.model.WriteMode.INSERT_OR_IGNORE : (writeMode == WriteMode.UPDATE ? com.alibaba.hologres.client.model.WriteMode.INSERT_OR_UPDATE : com.alibaba.hologres.client.model.WriteMode.INSERT_OR_REPLACE)); + config.setWriteBatchSize(this.batchSize); + config.setWriteBatchTotalByteSize(this.batchByteSize); + config.setMetaCacheTTL(3600000L); + config.setEnableDefaultForNotNullColumn(false); + config.setRetryCount(5); + config.setAppName("datax"); + + if (clientConf != null) { + try { + config = ConfLoader.load(clientConf, config, ignoreConfList); + } catch (Exception e) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + "配置解析失败."); + } + } + + BASIC_MESSAGE = String.format("jdbcUrl:[%s], table:[%s]", + this.jdbcUrl, this.table); + } + + public void prepare(Configuration writerSliceConfig) { + + } + + public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCollector taskPluginCollector) { + this.taskPluginCollector = taskPluginCollector; + + try (HoloClient client = new HoloClient(config)) { + Record record; + TableSchema schema = RetryUtil.executeWithRetry(() -> client.getTableSchema(this.table), 3, 5000L, true); + while ((record = recordReceiver.getFromReader()) != null) { + if (record.getColumnNumber() != this.columnNumber) { + // 源头读取字段列数与目的表字段写入列数不相等,直接报错 + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.", + record.getColumnNumber(), + this.columnNumber)); + } + Put put = convertToPut(record, schema); + if (null != put) { + try { + client.put(put); + } catch (HoloClientWithDetailsException detail) { + handleDirtyData(detail); + } + } + } + try { + client.flush(); + } catch (HoloClientWithDetailsException detail) { + handleDirtyData(detail); + } + } catch (Exception e) { + throw DataXException.asDataXException( + DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + private void handleDirtyData(HoloClientWithDetailsException detail) { + for (int i = 0; i < detail.size(); ++i) { + com.alibaba.hologres.client.model.Record failRecord = detail.getFailRecord(i); + if (failRecord.getAttachmentList() != null) { + for (Object obj : failRecord.getAttachmentList()) { + taskPluginCollector.collectDirtyRecord((Record) obj, detail.getException(i)); + } + } + } + } + + public void startWrite(RecordReceiver recordReceiver, + TaskPluginCollector taskPluginCollector) { + startWriteWithConnection(recordReceiver, taskPluginCollector); + } + + public void post(Configuration writerSliceConfig) { + + } + + public void destroy(Configuration writerSliceConfig) { + } + + // 直接使用了两个类变量:columnNumber,resultSetMetaData + protected Put convertToPut(Record record, TableSchema schema) { + try { + Put put = new Put(schema); + put.getRecord().addAttachment(record); + for (int i = 0; i < this.columnNumber; i++) { + fillColumn(put, schema, schema.getColumnIndex(this.columns.get(i)), record.getColumn(i)); + } + return put; + } catch (Exception e) { + taskPluginCollector.collectDirtyRecord(record, e); + return null; + } + + } + + protected void fillColumn(Put data, TableSchema schema, int index, Column column) throws SQLException { + com.alibaba.hologres.client.model.Column holoColumn = schema.getColumn(index); + switch (holoColumn.getType()) { + case Types.CHAR: + case Types.NCHAR: + case Types.CLOB: + case Types.NCLOB: + case Types.VARCHAR: + case Types.LONGVARCHAR: + case Types.NVARCHAR: + case Types.LONGNVARCHAR: + String value = column.asString(); + if (emptyAsNull && value != null && value.length() == 0) { + data.setObject(index, null); + } else { + data.setObject(index, value); + } + break; + + case Types.SMALLINT: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigInteger().shortValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.INTEGER: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigInteger().intValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.BIGINT: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigInteger().longValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.NUMERIC: + case Types.DECIMAL: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigDecimal()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.FLOAT: + case Types.REAL: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigDecimal().floatValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.DOUBLE: + if (column.getByteSize() > 0) { + data.setObject(index, column.asDouble()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.TIME: + if (column.getByteSize() > 0) { + if (column instanceof LongColumn || column instanceof DateColumn) { + data.setObject(index, new Time(column.asLong())); + } else { + data.setObject(index, column.asString()); + } + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.DATE: + if (column.getByteSize() > 0) { + if (column instanceof LongColumn || column instanceof DateColumn) { + data.setObject(index, column.asLong()); + } else { + data.setObject(index, column.asString()); + } + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.TIMESTAMP: + if (column.getByteSize() > 0) { + if (column instanceof LongColumn || column instanceof DateColumn) { + data.setObject(index, new Timestamp(column.asLong())); + } else { + data.setObject(index, column.asString()); + } + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + + case Types.BINARY: + case Types.VARBINARY: + case Types.BLOB: + case Types.LONGVARBINARY: + String byteValue = column.asString(); + if (null != byteValue) { + data.setObject(index, column + .asBytes()); + } + break; + case Types.BOOLEAN: + case Types.BIT: + if (column.getByteSize() == 0) { + break; + } + try { + Boolean boolValue = column.asBoolean(); + data.setObject(index, boolValue); + } catch (Exception e) { + data.setObject(index, !"0".equals(column.asString())); + } + break; + case Types.ARRAY: + String arrayString = column.asString(); + Object arrayObject = null; + if (null == arrayString || (emptyAsNull && "".equals(arrayString))) { + data.setObject(index, null); + break; + } else if (arrayDelimiter != null && arrayDelimiter.length() > 0) { + arrayObject = arrayString.split(this.arrayDelimiter); + } else { + arrayObject = JSONArray.parseArray(arrayString); + } + data.setObject(index, arrayObject); + break; + default: + throw DataXException + .asDataXException( + DBUtilErrorCode.UNSUPPORTED_TYPE, + String.format( + "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d], 字段Java类型:[%s]. 请修改表中该字段的类型或者不同步该字段.", + holoColumn.getName(), + holoColumn.getType(), + holoColumn.getTypeName())); + } + } + } +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Constant.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Constant.java new file mode 100755 index 0000000000..eb51d0267a --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Constant.java @@ -0,0 +1,15 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +/** + * 用于插件解析用户配置时,需要进行标识(MARK)的常量的声明. + */ +public final class Constant { + public static final int DEFAULT_BATCH_SIZE = 512; + + public static final int DEFAULT_BATCH_BYTE_SIZE = 50 * 1024 * 1024; + + public static String CONN_MARK = "connection"; + + public static String TABLE_NUMBER_MARK = "tableNumber"; + +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/HologresJdbcWriter.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/HologresJdbcWriter.java new file mode 100755 index 0000000000..811a2e11b3 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/HologresJdbcWriter.java @@ -0,0 +1,78 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; + +import java.util.List; + +public class HologresJdbcWriter extends Writer { + private static final DataBaseType DATABASE_TYPE = DataBaseType.PostgreSQL; + + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private BaseWriter.Job baseWriterMaster; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + this.baseWriterMaster = new BaseWriter.Job(DATABASE_TYPE); + this.baseWriterMaster.init(this.originalConfig); + } + + @Override + public void prepare() { + this.baseWriterMaster.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.baseWriterMaster.split(this.originalConfig, mandatoryNumber); + } + + @Override + public void post() { + this.baseWriterMaster.post(this.originalConfig); + } + + @Override + public void destroy() { + this.baseWriterMaster.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + private Configuration writerSliceConfig; + private BaseWriter.Task baseWriterSlave; + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + this.baseWriterSlave = new BaseWriter.Task(DATABASE_TYPE); + this.baseWriterSlave.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.baseWriterSlave.prepare(this.writerSliceConfig); + } + + public void startWrite(RecordReceiver recordReceiver) { + this.baseWriterSlave.startWrite(recordReceiver, super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.baseWriterSlave.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.baseWriterSlave.destroy(this.writerSliceConfig); + } + + } + +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Key.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Key.java new file mode 100755 index 0000000000..3bd5d1e2c5 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Key.java @@ -0,0 +1,31 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +public final class Key { + public final static String JDBC_URL = "jdbcUrl"; + + public final static String USERNAME = "username"; + + public final static String PASSWORD = "password"; + + public final static String TABLE = "table"; + + public final static String COLUMN = "column"; + + public final static String Array_Delimiter = "arrayDelimiter"; + + public final static String WRITE_MODE = "writeMode"; + + public final static String PRE_SQL = "preSql"; + + public final static String POST_SQL = "postSql"; + + //默认值:256 + public final static String BATCH_SIZE = "batchSize"; + + //默认值:50m + public final static String BATCH_BYTE_SIZE = "batchByteSize"; + + public final static String EMPTY_AS_NULL = "emptyAsNull"; + + +} \ No newline at end of file diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/ConfLoader.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/ConfLoader.java new file mode 100644 index 0000000000..48d7584e98 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/ConfLoader.java @@ -0,0 +1,59 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter.util; + +import com.alibaba.hologres.client.model.WriteMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Field; +import java.util.Map; +import java.util.Set; + +public class ConfLoader { + public static Logger LOG = LoggerFactory.getLogger(ConfLoader.class); + + static public T load(Map props, T config, Set ignoreList) throws Exception { + Field[] fields = config.getClass().getDeclaredFields(); + for (Map.Entry entry : props.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue().toString(); + if (ignoreList.contains(key)) { + LOG.info("Config Skip {}", key); + continue; + } + boolean match = false; + for (Field field : fields) { + if (field.getName().equals(key)) { + match = true; + field.setAccessible(true); + Class type = field.getType(); + if (type.equals(String.class)) { + field.set(config, value); + } else if (type.equals(int.class)) { + field.set(config, Integer.parseInt(value)); + } else if (type.equals(long.class)) { + field.set(config, Long.parseLong(value)); + } else if (type.equals(boolean.class)) { + field.set(config, Boolean.parseBoolean(value)); + } else if (WriteMode.class.equals(type)) { + field.set(config, WriteMode.valueOf(value)); + } else { + throw new Exception("invalid type " + type + " for param " + key); + } + if ("password".equals(key)) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < value.length(); ++i) { + sb.append("*"); + } + LOG.info("Config {}={}", key, sb.toString()); + } else { + LOG.info("Config {}={}", key, value); + } + } + } + if (!match) { + throw new Exception("param " + key + " not found in HoloConfig"); + } + } + return config; + } +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/OriginalConfPretreatmentUtil.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/OriginalConfPretreatmentUtil.java new file mode 100755 index 0000000000..70176b912d --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/OriginalConfPretreatmentUtil.java @@ -0,0 +1,82 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.TableExpandUtil; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.Constant; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.Key; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public final class OriginalConfPretreatmentUtil { + private static final Logger LOG = LoggerFactory + .getLogger(OriginalConfPretreatmentUtil.class); + + public static DataBaseType DATABASE_TYPE; + + public static void doPretreatment(Configuration originalConfig, DataBaseType dataBaseType) { + // 检查 username/password 配置(必填) + originalConfig.getNecessaryValue(Key.USERNAME, DBUtilErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(Key.PASSWORD, DBUtilErrorCode.REQUIRED_VALUE); + + doCheckBatchSize(originalConfig); + simplifyConf(originalConfig); + } + + public static void doCheckBatchSize(Configuration originalConfig) { + // 检查batchSize 配置(选填,如果未填写,则设置为默认值) + int batchSize = originalConfig.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE); + if (batchSize < 1) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, String.format( + "您的batchSize配置有误. 您所配置的写入数据库表的 batchSize:%s 不能小于1. 推荐配置范围为:[256-1024] (保持128的倍数), 该值越大, 内存溢出可能性越大. 请检查您的配置并作出修改.", + batchSize)); + } + + originalConfig.set(Key.BATCH_SIZE, batchSize); + } + + public static void simplifyConf(Configuration originalConfig) { + List connections = originalConfig.getList(Constant.CONN_MARK, + Object.class); + + int tableNum = 0; + + for (int i = 0, len = connections.size(); i < len; i++) { + Configuration connConf = Configuration.from(connections.get(i).toString()); + + String jdbcUrl = connConf.getString(Key.JDBC_URL); + if (StringUtils.isBlank(jdbcUrl)) { + throw DataXException.asDataXException(DBUtilErrorCode.REQUIRED_VALUE, "您未配置的写入数据库表的 jdbcUrl."); + } + + List tables = connConf.getList(Key.TABLE, String.class); + + if (null == tables || tables.isEmpty()) { + throw DataXException.asDataXException(DBUtilErrorCode.REQUIRED_VALUE, + "您未配置写入数据库表的表名称. 根据配置DataX找不到您配置的表. 请检查您的配置并作出修改."); + } + + // 对每一个connection 上配置的table 项进行解析 + List expandedTables = TableExpandUtil + .expandTableConf(DATABASE_TYPE, tables); + + if (null == expandedTables || expandedTables.isEmpty()) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + "您配置的写入数据库表名称错误. DataX找不到您配置的表,请检查您的配置并作出修改."); + } + + tableNum += expandedTables.size(); + + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, + i, Key.TABLE), expandedTables); + } + + originalConfig.set(Constant.TABLE_NUMBER_MARK, tableNum); + } + +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/WriterUtil.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/WriterUtil.java new file mode 100755 index 0000000000..d35194e833 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/WriterUtil.java @@ -0,0 +1,111 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.RdbmsException; +import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.datax.plugin.rdbms.writer.Key; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public final class WriterUtil { + private static final Logger LOG = LoggerFactory.getLogger(WriterUtil.class); + + //TODO 切分报错 + public static List doSplit(Configuration simplifiedConf, + int adviceNumber) { + + List splitResultConfigs = new ArrayList(); + + int tableNumber = simplifiedConf.getInt(Constant.TABLE_NUMBER_MARK); + + //处理单表的情况 + if (tableNumber == 1) { + //由于在之前的 master prepare 中已经把 table,jdbcUrl 提取出来,所以这里处理十分简单 + for (int j = 0; j < adviceNumber; j++) { + splitResultConfigs.add(simplifiedConf.clone()); + } + + return splitResultConfigs; + } + + if (tableNumber != adviceNumber) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + String.format("您的配置文件中的列配置信息有误. 您要写入的目的端的表个数是:%s , 但是根据系统建议需要切分的份数是:%s. 请检查您的配置并作出修改.", + tableNumber, adviceNumber)); + } + + String jdbcUrl; + List preSqls = simplifiedConf.getList(Key.PRE_SQL, String.class); + List postSqls = simplifiedConf.getList(Key.POST_SQL, String.class); + + List conns = simplifiedConf.getList(Constant.CONN_MARK, + Object.class); + + for (Object conn : conns) { + Configuration sliceConfig = simplifiedConf.clone(); + + Configuration connConf = Configuration.from(conn.toString()); + jdbcUrl = connConf.getString(Key.JDBC_URL); + sliceConfig.set(Key.JDBC_URL, jdbcUrl); + + sliceConfig.remove(Constant.CONN_MARK); + + List tables = connConf.getList(Key.TABLE, String.class); + + for (String table : tables) { + Configuration tempSlice = sliceConfig.clone(); + tempSlice.set(Key.TABLE, table); + tempSlice.set(Key.PRE_SQL, renderPreOrPostSqls(preSqls, table)); + tempSlice.set(Key.POST_SQL, renderPreOrPostSqls(postSqls, table)); + + splitResultConfigs.add(tempSlice); + } + + } + + return splitResultConfigs; + } + + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { + if (null == preOrPostSqls) { + return Collections.emptyList(); + } + + List renderedSqls = new ArrayList(); + for (String sql : preOrPostSqls) { + //preSql为空时,不加入执行队列 + if (StringUtils.isNotBlank(sql)) { + renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); + } + } + + return renderedSqls; + } + + public static void executeSqls(Connection conn, List sqls, String basicMessage,DataBaseType dataBaseType) { + Statement stmt = null; + String currentSql = null; + try { + stmt = conn.createStatement(); + for (String sql : sqls) { + currentSql = sql; + DBUtil.executeSqlWithoutResultSet(stmt, sql); + } + } catch (Exception e) { + throw RdbmsException.asQueryException(dataBaseType,e,currentSql,null,null); + } finally { + DBUtil.closeDBResources(null, stmt, null); + } + } +} diff --git a/hologresjdbcwriter/src/main/resources/plugin.json b/hologresjdbcwriter/src/main/resources/plugin.json new file mode 100644 index 0000000000..d46f216b15 --- /dev/null +++ b/hologresjdbcwriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "hologreswriter", + "class": "com.alibaba.datax.plugin.writer.hologreswriter.HologresWriter", + "description": "", + "developer": "alibaba" +} diff --git a/hologresjdbcwriter/src/main/resources/plugin_job_template.json b/hologresjdbcwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 0000000000..656971c3e8 --- /dev/null +++ b/hologresjdbcwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,11 @@ +{ + "name": "hologreswriter", + "parameter": { + "url": "", + "username": "", + "password": "", + "database": "", + "table": "", + "partition": "" + } +} diff --git a/license.txt b/license.txt index 00b845b43b..2f293c0fe7 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -Copyright 1999-2017 Alibaba Group Holding Ltd. +Copyright 1999-2022 Alibaba Group Holding Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/mongodbwriter/doc/mongodbwriter.md b/mongodbwriter/doc/mongodbwriter.md index b3bfece47d..93f502903e 100644 --- a/mongodbwriter/doc/mongodbwriter.md +++ b/mongodbwriter/doc/mongodbwriter.md @@ -116,10 +116,10 @@ MongoDBWriter通过Datax框架获取Reader生成的数据,然后将Datax支持 "type": "int" } ], - "writeMode": { - "isReplace": "true", - "replaceKey": "unique_id" - } + "writeMode": { + "isReplace": "true", + "replaceKey": "unique_id" + } } } } diff --git a/odpsreader/pom.xml b/odpsreader/pom.xml index 5762a57b79..3f2c4acbd4 100755 --- a/odpsreader/pom.xml +++ b/odpsreader/pom.xml @@ -36,18 +36,18 @@ guava 16.0.1 - - org.bouncycastle - bcprov-jdk15on - 1.52 - system - ${basedir}/src/main/libs/bcprov-jdk15on-1.52.jar - - - com.aliyun.odps - odps-sdk-core - 0.20.7-public - + + org.xerial + sqlite-jdbc + 3.34.0 + + + + + com.aliyun.odps + odps-sdk-core + 0.38.4-public + org.mockito @@ -87,29 +87,22 @@ 1.4.10 test - - org.mockito - mockito-core - 1.8.5 - test - - - org.powermock - powermock-api-mockito - 1.4.10 - test - - - - org.powermock - powermock-module-junit4 - 1.4.10 - test + commons-codec + commons-codec + 1.8 + + + src/main/java + + **/*.properties + + + diff --git a/odpsreader/src/main/assembly/package.xml b/odpsreader/src/main/assembly/package.xml index 9ec3309e6e..db659a1796 100755 --- a/odpsreader/src/main/assembly/package.xml +++ b/odpsreader/src/main/assembly/package.xml @@ -23,13 +23,6 @@ plugin/reader/odpsreader - - src/main/libs - - *.* - - plugin/reader/odpsreader/libs - diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java index c3c674ddd1..dee2ef5c27 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java @@ -31,5 +31,7 @@ public class Constant { public static final String PARTITION_COLUMNS = "partitionColumns"; public static final String PARSED_COLUMNS = "parsedColumns"; - + + public static final String PARTITION_FILTER_HINT = "/*query*/"; + } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java index 9537cb9397..2cee65d17f 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java @@ -5,6 +5,8 @@ public class Key { public final static String ACCESS_ID = "accessId"; public final static String ACCESS_KEY = "accessKey"; + + public final static String SECURITY_TOKEN = "securityToken"; public static final String PROJECT = "project"; @@ -30,5 +32,14 @@ public class Key { public final static String IS_COMPRESS = "isCompress"; public final static String MAX_RETRY_TIME = "maxRetryTime"; + + // 分区不存在时 + public final static String SUCCESS_ON_NO_PATITION="successOnNoPartition"; + // preSql + public final static String PRE_SQL="preSql"; + + // postSql + public final static String POST_SQL="postSql"; + } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/LocalStrings.properties b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/LocalStrings.properties new file mode 100644 index 0000000000..ef1830022d --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/LocalStrings.properties @@ -0,0 +1,64 @@ +description.DATAX_R_ODPS_001=\u7F3A\u5C11\u5FC5\u586B\u53C2\u6570 +description.DATAX_R_ODPS_002=\u914D\u7F6E\u503C\u4E0D\u5408\u6CD5 +description.DATAX_R_ODPS_003=\u521B\u5EFAODPS Session\u5931\u8D25 +description.DATAX_R_ODPS_004=\u83B7\u53D6ODPS Session\u5931\u8D25 +description.DATAX_R_ODPS_005=\u8BFB\u53D6ODPS\u6570\u636E\u5931\u8D25 +description.DATAX_R_ODPS_006=\u83B7\u53D6AK\u5931\u8D25 +description.DATAX_R_ODPS_007=\u8BFB\u53D6\u6570\u636E\u53D1\u751F\u5F02\u5E38 +description.DATAX_R_ODPS_008=\u6253\u5F00RecordReader\u5931\u8D25 +description.DATAX_R_ODPS_009=ODPS\u9879\u76EE\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_010=\u8868\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_011=AK\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_012=AK\u975E\u6CD5 +description.DATAX_R_ODPS_013=AK\u62D2\u7EDD\u8BBF\u95EE +description.DATAX_R_ODPS_014=splitMode\u914D\u7F6E\u9519\u8BEF +description.DATAX_R_ODPS_015=ODPS\u8D26\u53F7\u7C7B\u578B\u9519\u8BEF +description.DATAX_R_ODPS_016=\u4E0D\u652F\u6301\u89C6\u56FE +description.DATAX_R_ODPS_017=\u5206\u533A\u914D\u7F6E\u9519\u8BEF +description.DATAX_R_ODPS_018=\u5206\u533A\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_019=\u6267\u884CODPS SQL\u5931\u8D25 +description.DATAX_R_ODPS_020=\u6267\u884CODPS SQL\u53D1\u751F\u5F02\u5E38 + + +solution.DATAX_R_ODPS_001=\u8BF7\u4FEE\u6539\u914D\u7F6E\u6587\u4EF6 +solution.DATAX_R_ODPS_002=\u8BF7\u4FEE\u6539\u914D\u7F6E\u503C +solution.DATAX_R_ODPS_003=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684AK\u6216\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_004=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_005=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_006=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684AK +solution.DATAX_R_ODPS_007=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_008=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_009=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684\u9879\u76EE\u540D +solution.DATAX_R_ODPS_010=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684\u8868\u540D +solution.DATAX_R_ODPS_011=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684AK +solution.DATAX_R_ODPS_012=\u8BF7\u4FEE\u6539AK +solution.DATAX_R_ODPS_013=\u8BF7\u786E\u5B9AAK\u5728\u9879\u76EE\u4E2D\u7684\u6743\u9650 +solution.DATAX_R_ODPS_014=\u8BF7\u4FEE\u6539splitMode\u503C +solution.DATAX_R_ODPS_015=\u8BF7\u4FEE\u6539\u8D26\u53F7\u7C7B\u578B +solution.DATAX_R_ODPS_016=\u8BF7\u4FEE\u6539\u914D\u7F6E\u6587\u4EF6 +solution.DATAX_R_ODPS_017=\u8BF7\u4FEE\u6539\u5206\u533A\u503C +solution.DATAX_R_ODPS_018=\u8BF7\u4FEE\u6539\u914D\u7F6E\u7684\u5206\u533A\u503C +solution.DATAX_R_ODPS_019=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_020=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 + +odpsreader.1=\u6E90\u5934\u8868:{0} \u662F\u865A\u62DF\u89C6\u56FE\uFF0CDataX \u4E0D\u652F\u6301\u8BFB\u53D6\u865A\u62DF\u89C6\u56FE. +odpsreader.2=\u60A8\u6240\u914D\u7F6E\u7684 splitMode:{0} \u4E0D\u6B63\u786E. splitMode \u4EC5\u5141\u8BB8\u914D\u7F6E\u4E3A record \u6216\u8005 partition. +odpsreader.3=\u5206\u533A\u4FE1\u606F\u6CA1\u6709\u914D\u7F6E.\u7531\u4E8E\u6E90\u5934\u8868:{0} \u4E3A\u5206\u533A\u8868, \u6240\u4EE5\u60A8\u9700\u8981\u914D\u7F6E\u5176\u62BD\u53D6\u7684\u8868\u7684\u5206\u533A\u4FE1\u606F. \u683C\u5F0F\u5F62\u5982:pt=hello,ds=hangzhou\uFF0C\u8BF7\u60A8\u53C2\u8003\u6B64\u683C\u5F0F\u4FEE\u6539\u8BE5\u914D\u7F6E\u9879. +odpsreader.4=\u5206\u533A\u4FE1\u606F\u914D\u7F6E\u9519\u8BEF.\u6E90\u5934\u8868:{0} \u867D\u7136\u4E3A\u5206\u533A\u8868, \u4F46\u5176\u5B9E\u9645\u5206\u533A\u503C\u5E76\u4E0D\u5B58\u5728. \u8BF7\u786E\u8BA4\u6E90\u5934\u8868\u5DF2\u7ECF\u751F\u6210\u8BE5\u5206\u533A\uFF0C\u518D\u8FDB\u884C\u6570\u636E\u62BD\u53D6. +odpsreader.5=\u5206\u533A\u914D\u7F6E\u9519\u8BEF\uFF0C\u6839\u636E\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u6CA1\u6709\u5339\u914D\u5230\u6E90\u5934\u8868\u4E2D\u7684\u5206\u533A. \u6E90\u5934\u8868\u6240\u6709\u5206\u533A\u662F:[\n{0}\n], \u60A8\u914D\u7F6E\u7684\u5206\u533A\u662F:[\n{1}\n]. \u8BF7\u60A8\u6839\u636E\u5B9E\u9645\u60C5\u51B5\u518D\u4F5C\u51FA\u4FEE\u6539. +odpsreader.6=\u5206\u533A\u914D\u7F6E\u9519\u8BEF\uFF0C\u6E90\u5934\u8868:{0} \u4E3A\u975E\u5206\u533A\u8868, \u60A8\u4E0D\u80FD\u914D\u7F6E\u5206\u533A. \u8BF7\u60A8\u5220\u9664\u8BE5\u914D\u7F6E\u9879. +odpsreader.7=\u6E90\u5934\u8868:{0} \u7684\u6240\u6709\u5206\u533A\u5217\u662F:[{1}] +odpsreader.8=\u5206\u533A\u914D\u7F6E\u9519\u8BEF, \u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u7EA7\u6570\u548C\u8BE5\u8868\u7684\u5B9E\u9645\u60C5\u51B5\u4E0D\u4E00\u81F4, \u6BD4\u5982\u5206\u533A:[{0}] \u662F {1} \u7EA7\u5206\u533A, \u800C\u5206\u533A:[{2}] \u662F {3} \u7EA7\u5206\u533A. DataX \u662F\u901A\u8FC7\u82F1\u6587\u9017\u53F7\u5224\u65AD\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u7EA7\u6570\u7684. \u6B63\u786E\u7684\u683C\u5F0F\u5F62\u5982\"pt=$'{bizdate'}, type=0\" \uFF0C\u8BF7\u60A8\u53C2\u8003\u793A\u4F8B\u4FEE\u6539\u8BE5\u914D\u7F6E\u9879. +odpsreader.9=\u5206\u533A\u914D\u7F6E\u9519\u8BEF, \u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A:{0} \u7684\u7EA7\u6570:{1} \u4E0E\u60A8\u8981\u8BFB\u53D6\u7684 ODPS \u6E90\u5934\u8868\u7684\u5206\u533A\u7EA7\u6570:{2} \u4E0D\u76F8\u7B49. DataX \u662F\u901A\u8FC7\u82F1\u6587\u9017\u53F7\u5224\u65AD\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u7EA7\u6570\u7684.\u6B63\u786E\u7684\u683C\u5F0F\u5F62\u5982\"pt=$'{bizdate'}, type=0\" \uFF0C\u8BF7\u60A8\u53C2\u8003\u793A\u4F8B\u4FEE\u6539\u8BE5\u914D\u7F6E\u9879. +odpsreader.10=\u6E90\u5934\u8868:{0} \u7684\u6240\u6709\u5B57\u6BB5\u662F:[{1}] +odpsreader.11=\u8FD9\u662F\u4E00\u6761\u8B66\u544A\u4FE1\u606F\uFF0C\u60A8\u914D\u7F6E\u7684 ODPS \u8BFB\u53D6\u7684\u5217\u4E3A*\uFF0C\u8FD9\u662F\u4E0D\u63A8\u8350\u7684\u884C\u4E3A\uFF0C\u56E0\u4E3A\u5F53\u60A8\u7684\u8868\u5B57\u6BB5\u4E2A\u6570\u3001\u7C7B\u578B\u6709\u53D8\u52A8\u65F6\uFF0C\u53EF\u80FD\u5F71\u54CD\u4EFB\u52A1\u6B63\u786E\u6027\u751A\u81F3\u4F1A\u8FD0\u884C\u51FA\u9519. \u5EFA\u8BAE\u60A8\u628A\u6240\u6709\u9700\u8981\u62BD\u53D6\u7684\u5217\u90FD\u914D\u7F6E\u4E0A. +odpsreader.12=\u6E90\u5934\u8868:{0} \u7684\u5206\u533A:{1} \u6CA1\u6709\u5185\u5BB9\u53EF\u62BD\u53D6, \u8BF7\u60A8\u77E5\u6653. +odpsreader.13=\u6E90\u5934\u8868:{0} \u7684\u5206\u533A:{1} \u8BFB\u53D6\u884C\u6570\u4E3A\u8D1F\u6570, \u8BF7\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u67E5\u770B\u8868\u72B6\u6001! +odpsreader.14=\u6E90\u5934\u8868:{0} \u7684\u5206\u533A:{1} \u8BFB\u53D6\u5931\u8D25, \u8BF7\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u67E5\u770B\u9519\u8BEF\u8BE6\u60C5. + + +readerproxy.1=odps-read-exception, \u91CD\u8BD5\u7B2C{0}\u6B21 +readerproxy.2=\u60A8\u7684\u5206\u533A [{0}] \u89E3\u6790\u51FA\u73B0\u9519\u8BEF,\u89E3\u6790\u540E\u6B63\u786E\u7684\u914D\u7F6E\u65B9\u5F0F\u7C7B\u4F3C\u4E3A [ pt=1,dt=1 ]. +readerproxy.3=\u8868\u6240\u6709\u5206\u533A\u4FE1\u606F\u4E3A: {0} \u5176\u4E2D\u627E\u4E0D\u5230 [{1}] \u5BF9\u5E94\u7684\u5206\u533A\u503C. +readerproxy.4=\u60A8\u8BFB\u53D6\u5206\u533A [{0}] \u51FA\u73B0\u65E5\u671F\u8F6C\u6362\u5F02\u5E38, \u65E5\u671F\u7684\u5B57\u7B26\u4E32\u8868\u793A\u4E3A [{1}]. +readerproxy.5=DataX \u62BD\u53D6 ODPS \u6570\u636E\u4E0D\u652F\u6301\u5B57\u6BB5\u7C7B\u578B\u4E3A:[{0}]. \u76EE\u524D\u652F\u6301\u62BD\u53D6\u7684\u5B57\u6BB5\u7C7B\u578B\u6709\uFF1Abigint, boolean, datetime, double, decimal, string. \u60A8\u53EF\u4EE5\u9009\u62E9\u4E0D\u62BD\u53D6 DataX \u4E0D\u652F\u6301\u7684\u5B57\u6BB5\u6216\u8005\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u5BFB\u6C42\u5E2E\u52A9. diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java index f5cf10ca28..8cb7ba31a6 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java @@ -5,44 +5,44 @@ import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.common.util.FilterUtil; -import com.alibaba.datax.plugin.reader.odpsreader.util.IdAndKeyUtil; -import com.alibaba.datax.plugin.reader.odpsreader.util.OdpsSplitUtil; -import com.alibaba.datax.plugin.reader.odpsreader.util.OdpsUtil; -import com.aliyun.odps.*; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.reader.odpsreader.util.*; +import com.alibaba.fastjson.JSON; +import com.aliyun.odps.Column; +import com.aliyun.odps.Odps; +import com.aliyun.odps.Table; +import com.aliyun.odps.TableSchema; import com.aliyun.odps.tunnel.TableTunnel.DownloadSession; - +import com.aliyun.odps.type.TypeInfo; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.MutablePair; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; public class OdpsReader extends Reader { public static class Job extends Reader.Job { private static final Logger LOG = LoggerFactory - .getLogger(Job.class); - - private static boolean IS_DEBUG = LOG.isDebugEnabled(); + .getLogger(Job.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsReaderErrorCode.class, Locale.ENGLISH, MessageSource.timeZone); private Configuration originalConfig; + private boolean successOnNoPartition; private Odps odps; private Table table; + @Override public void preCheck() { this.init(); + this.prepare(); } - @Override public void init() { this.originalConfig = super.getPluginJobConf(); + this.successOnNoPartition = this.originalConfig.getBool(Key.SUCCESS_ON_NO_PATITION, false); //如果用户没有配置accessId/accessKey,尝试从环境变量获取 String accountType = originalConfig.getString(Key.ACCOUNT_TYPE, Constant.DEFAULT_ACCOUNT_TYPE); @@ -59,17 +59,21 @@ public void init() { dealSplitMode(this.originalConfig); this.odps = OdpsUtil.initOdps(this.originalConfig); + + } + + private void initOdpsTableInfo() { String tableName = this.originalConfig.getString(Key.TABLE); String projectName = this.originalConfig.getString(Key.PROJECT); this.table = OdpsUtil.getTable(this.odps, projectName, tableName); this.originalConfig.set(Constant.IS_PARTITIONED_TABLE, - OdpsUtil.isPartitionedTable(table)); + OdpsUtil.isPartitionedTable(table)); boolean isVirtualView = this.table.isVirtualView(); if (isVirtualView) { throw DataXException.asDataXException(OdpsReaderErrorCode.VIRTUAL_VIEW_NOT_SUPPORT, - String.format("源头表:%s 是虚拟视图,DataX 不支持读取虚拟视图.", tableName)); + MESSAGE_SOURCE.message("odpsreader.1", tableName)); } this.dealPartition(this.table); @@ -79,11 +83,11 @@ public void init() { private void dealSplitMode(Configuration originalConfig) { String splitMode = originalConfig.getString(Key.SPLIT_MODE, Constant.DEFAULT_SPLIT_MODE).trim(); if (splitMode.equalsIgnoreCase(Constant.DEFAULT_SPLIT_MODE) || - splitMode.equalsIgnoreCase(Constant.PARTITION_SPLIT_MODE)) { + splitMode.equalsIgnoreCase(Constant.PARTITION_SPLIT_MODE)) { originalConfig.set(Key.SPLIT_MODE, splitMode); } else { throw DataXException.asDataXException(OdpsReaderErrorCode.SPLIT_MODE_ERROR, - String.format("您所配置的 splitMode:%s 不正确. splitMode 仅允许配置为 record 或者 partition.", splitMode)); + MESSAGE_SOURCE.message("odpsreader.2", splitMode)); } } @@ -98,7 +102,7 @@ private void dealSplitMode(Configuration originalConfig) { */ private void dealPartition(Table table) { List userConfiguredPartitions = this.originalConfig.getList( - Key.PARTITION, String.class); + Key.PARTITION, String.class); boolean isPartitionedTable = this.originalConfig.getBool(Constant.IS_PARTITIONED_TABLE); List partitionColumns = new ArrayList(); @@ -107,60 +111,140 @@ private void dealPartition(Table table) { // 分区表,需要配置分区 if (null == userConfiguredPartitions || userConfiguredPartitions.isEmpty()) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区信息没有配置.由于源头表:%s 为分区表, 所以您需要配置其抽取的表的分区信息. 格式形如:pt=hello,ds=hangzhou,请您参考此格式修改该配置项.", - table.getName())); + MESSAGE_SOURCE.message("odpsreader.3", table.getName())); } else { - List allPartitions = OdpsUtil.getTableAllPartitions(table); - - if (null == allPartitions || allPartitions.isEmpty()) { - throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区信息配置错误.源头表:%s 虽然为分区表, 但其实际分区值并不存在. 请确认源头表已经生成该分区,再进行数据抽取.", - table.getName())); + // 获取分区列名, 支持用户配置分区列同步 + for (Column column : table.getSchema().getPartitionColumns()) { + partitionColumns.add(column.getName()); } - List parsedPartitions = expandUserConfiguredPartition( - allPartitions, userConfiguredPartitions); + List allPartitions = OdpsUtil.getTableAllPartitions(table); + List parsedPartitions = expandUserConfiguredPartition( + table, allPartitions, userConfiguredPartitions, partitionColumns.size()); if (null == parsedPartitions || parsedPartitions.isEmpty()) { - throw DataXException.asDataXException( - OdpsReaderErrorCode.PARTITION_ERROR, + if (!this.successOnNoPartition) { + // PARTITION_NOT_EXISTS_ERROR 这个异常ErrorCode在AdsWriter有使用,用户判断空分区Load Data任务不报错 + // 其他类型的异常不要使用这个错误码 + throw DataXException.asDataXException( + OdpsReaderErrorCode.PARTITION_NOT_EXISTS_ERROR, + MESSAGE_SOURCE.message("odpsreader.5", + StringUtils.join(allPartitions, "\n"), + StringUtils.join(userConfiguredPartitions, "\n"))); + } else { + LOG.warn( String.format( - "分区配置错误,根据您所配置的分区没有匹配到源头表中的分区. 源头表所有分区是:[\n%s\n], 您配置的分区是:[\n%s\n]. 请您根据实际情况在作出修改. ", - StringUtils.join(allPartitions, "\n"), - StringUtils.join(userConfiguredPartitions, "\n"))); + "The partition configuration is wrong, " + + "but you have configured the successOnNoPartition to be true to ignore the error. " + + "According to the partition you have configured, it does not match the partition in the source table. " + + "All the partitions in the source table are:[\n%s\n], the partition you configured is:[\n%s\n]. " + + "please revise it according to the actual situation.", + StringUtils.join(allPartitions, "\n"), + StringUtils.join(userConfiguredPartitions, "\n"))); + } } + LOG.info(String + .format("expand user configured partitions are : %s", JSON.toJSONString(parsedPartitions))); this.originalConfig.set(Key.PARTITION, parsedPartitions); - - for (Column column : table.getSchema() - .getPartitionColumns()) { - partitionColumns.add(column.getName()); - } } } else { // 非分区表,则不能配置分区 if (null != userConfiguredPartitions - && !userConfiguredPartitions.isEmpty()) { + && !userConfiguredPartitions.isEmpty()) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区配置错误,源头表:%s 为非分区表, 您不能配置分区. 请您删除该配置项. ", table.getName())); + MESSAGE_SOURCE.message("odpsreader.6", table.getName())); } } - + this.originalConfig.set(Constant.PARTITION_COLUMNS, partitionColumns); if (isPartitionedTable) { - LOG.info("{源头表:{} 的所有分区列是:[{}]}", table.getName(), - StringUtils.join(partitionColumns, ",")); + LOG.info(MESSAGE_SOURCE.message("odpsreader.7", table.getName(), + StringUtils.join(partitionColumns, ","))); } } - private List expandUserConfiguredPartition( - List allPartitions, List userConfiguredPartitions) { + /** + * 将用户配置的分区(可能是直接的分区配置 dt=20170101, 可能是简单正则dt=201701*, 也可能是区间过滤条件 dt>=20170101 and dt<20170130) 和ODPS + * table所有的分区进行匹配,过滤出用户希望同步的分区集合 + * + * @param table odps table + * @param allPartitions odps table所有的分区 + * @param userConfiguredPartitions 用户配置的分区 + * @param tableOriginalPartitionDepth odps table分区级数(一级分区,二级分区,三级分区等) + * @return 返回过滤出的分区 + */ + private List expandUserConfiguredPartition(Table table, + List allPartitions, + List userConfiguredPartitions, + int tableOriginalPartitionDepth) { + + UserConfiguredPartitionClassification userConfiguredPartitionClassification = OdpsUtil + .classifyUserConfiguredPartitions(userConfiguredPartitions); + + if (userConfiguredPartitionClassification.isIncludeHintPartition()) { + List expandUserConfiguredPartitionResult = new ArrayList(); + + // 处理不包含/*query*/的分区过滤 + if (!userConfiguredPartitionClassification.getUserConfiguredNormalPartition().isEmpty()) { + expandUserConfiguredPartitionResult.addAll(expandNoHintUserConfiguredPartition(allPartitions, + userConfiguredPartitionClassification.getUserConfiguredNormalPartition(), + tableOriginalPartitionDepth)); + } + if (!allPartitions.isEmpty()) { + expandUserConfiguredPartitionResult.addAll(expandHintUserConfiguredPartition(table, + allPartitions, userConfiguredPartitionClassification.getUserConfiguredHintPartition())); + } + return expandUserConfiguredPartitionResult; + } else { + return expandNoHintUserConfiguredPartition(allPartitions, userConfiguredPartitions, + tableOriginalPartitionDepth); + } + } + + /** + * 匹配包含 HINT 条件的过滤 + * + * @param table odps table + * @param allPartitions odps table所有的分区 + * @param userHintConfiguredPartitions 用户配置的分区 + * @return 返回过滤出的分区 + */ + private List expandHintUserConfiguredPartition(Table table, + List allPartitions, + List userHintConfiguredPartitions) { + try { + // load odps table all partitions into sqlite memory database + SqliteUtil sqliteUtil = new SqliteUtil(); + sqliteUtil.loadAllPartitionsIntoSqlite(table, allPartitions); + return sqliteUtil.selectUserConfiguredPartition(userHintConfiguredPartitions); + } catch (Exception ex) { + throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, + String.format("Expand user configured partition has exception: %s", ex.getMessage()), ex); + } + } + + /** + * 匹配没有 HINT 条件的过滤,包括 简单正则匹配(dt=201701*) 和 直接匹配(dt=20170101) + * + * @param allPartitions odps table所有的分区 + * @param userNormalConfiguredPartitions 用户配置的分区 + * @param tableOriginalPartitionDepth odps table分区级数(一级分区,二级分区,三级分区等) + * @return 返回过滤出的分区 + */ + private List expandNoHintUserConfiguredPartition(List allPartitions, + List userNormalConfiguredPartitions, + int tableOriginalPartitionDepth) { // 对odps 本身的所有分区进行特殊字符的处理 + LOG.info("format partition with rules: remove all space; remove all '; replace / to ,"); + // 表里面已有分区量比较大,有些任务无关,没有打印 List allStandardPartitions = OdpsUtil - .formatPartitions(allPartitions); + .formatPartitions(allPartitions); // 对用户自身配置的所有分区进行特殊字符的处理 List allStandardUserConfiguredPartitions = OdpsUtil - .formatPartitions(userConfiguredPartitions); + .formatPartitions(userNormalConfiguredPartitions); + LOG.info("user configured partition: {}", JSON.toJSONString(userNormalConfiguredPartitions)); + LOG.info("formated partition: {}", JSON.toJSONString(allStandardUserConfiguredPartitions)); /** * 对配置的分区级数(深度)进行检查 @@ -177,20 +261,20 @@ private List expandUserConfiguredPartition( comparedPartitionDepth = comparedPartition.split(",").length; if (comparedPartitionDepth != firstPartitionDepth) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区配置错误, 您所配置的分区级数和该表的实际情况不一致, 比如分区:[%s] 是 %s 级分区, 而分区:[%s] 是 %s 级分区. DataX 是通过英文逗号判断您所配置的分区级数的. 正确的格式形如\"pt=${bizdate}, type=0\" ,请您参考示例修改该配置项. ", - firstPartition, firstPartitionDepth, comparedPartition, comparedPartitionDepth)); + MESSAGE_SOURCE + .message("odpsreader.8", firstPartition, firstPartitionDepth, comparedPartition, + comparedPartitionDepth)); } } - int tableOriginalPartitionDepth = allStandardPartitions.get(0).split(",").length; if (firstPartitionDepth != tableOriginalPartitionDepth) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区配置错误, 您所配置的分区:%s 的级数:%s 与您要读取的 ODPS 源头表的分区级数:%s 不相等. DataX 是通过英文逗号判断您所配置的分区级数的.正确的格式形如\"pt=${bizdate}, type=0\" ,请您参考示例修改该配置项.", - firstPartition, firstPartitionDepth, tableOriginalPartitionDepth)); + MESSAGE_SOURCE + .message("odpsreader.9", firstPartition, firstPartitionDepth, tableOriginalPartitionDepth)); } List retPartitions = FilterUtil.filterByRegulars(allStandardPartitions, - allStandardUserConfiguredPartitions); + allStandardUserConfiguredPartitions); return retPartitions; } @@ -198,11 +282,11 @@ private List expandUserConfiguredPartition( private void dealColumn(Table table) { // 用户配置的 column 之前已经确保其不为空 List userConfiguredColumns = this.originalConfig.getList( - Key.COLUMN, String.class); + Key.COLUMN, String.class); List allColumns = OdpsUtil.getTableAllColumns(table); List allNormalColumns = OdpsUtil - .getTableOriginalColumnNameList(allColumns); + .getTableOriginalColumnNameList(allColumns); StringBuilder columnMeta = new StringBuilder(); for (Column column : allColumns) { @@ -210,26 +294,26 @@ private void dealColumn(Table table) { } columnMeta.setLength(columnMeta.length() - 1); - LOG.info("源头表:{} 的所有字段是:[{}]", table.getName(), columnMeta.toString()); + LOG.info(MESSAGE_SOURCE.message("odpsreader.10", table.getName(), columnMeta.toString())); if (1 == userConfiguredColumns.size() - && "*".equals(userConfiguredColumns.get(0))) { - LOG.warn("这是一条警告信息,您配置的 ODPS 读取的列为*,这是不推荐的行为,因为当您的表字段个数、类型有变动时,可能影响任务正确性甚至会运行出错. 建议您把所有需要抽取的列都配置上. "); + && "*".equals(userConfiguredColumns.get(0))) { + LOG.warn(MESSAGE_SOURCE.message("odpsreader.11")); this.originalConfig.set(Key.COLUMN, allNormalColumns); } userConfiguredColumns = this.originalConfig.getList( - Key.COLUMN, String.class); + Key.COLUMN, String.class); /** * warn: 字符串常量需要与表原生字段tableOriginalColumnNameList 分开存放 demo: * ["id","'id'","name"] */ List allPartitionColumns = this.originalConfig.getList( - Constant.PARTITION_COLUMNS, String.class); + Constant.PARTITION_COLUMNS, String.class); List> parsedColumns = OdpsUtil - .parseColumns(allNormalColumns, allPartitionColumns, - userConfiguredColumns); + .parseColumns(allNormalColumns, allPartitionColumns, + userConfiguredColumns); this.originalConfig.set(Constant.PARSED_COLUMNS, parsedColumns); @@ -238,7 +322,7 @@ private void dealColumn(Table table) { for (int i = 0, len = parsedColumns.size(); i < len; i++) { Pair pair = parsedColumns.get(i); sb.append(String.format(" %s : %s", pair.getLeft(), - pair.getRight())); + pair.getRight())); if (i != len - 1) { sb.append(","); } @@ -247,9 +331,36 @@ private void dealColumn(Table table) { LOG.info("parsed column details: {} .", sb.toString()); } - @Override public void prepare() { + List preSqls = this.originalConfig.getList(Key.PRE_SQL, String.class); + if (preSqls != null && !preSqls.isEmpty()) { + LOG.info( + String.format("Beigin to exectue preSql : %s. \n Attention: these preSqls must be idempotent!!!", + JSON.toJSONString(preSqls))); + long beginTime = System.currentTimeMillis(); + + StringBuffer preSqlBuffer = new StringBuffer(); + for (String preSql : preSqls) { + preSql = preSql.trim(); + if (StringUtils.isNotBlank(preSql) && !preSql.endsWith(";")) { + preSql = String.format("%s;", preSql); + } + if (StringUtils.isNotBlank(preSql)) { + preSqlBuffer.append(preSql); + } + } + if (StringUtils.isNotBlank(preSqlBuffer.toString())) { + OdpsUtil.runSqlTaskWithRetry(this.odps, preSqlBuffer.toString(), "preSql"); + } else { + LOG.info("skip to execute the preSql: {}", JSON.toJSONString(preSqls)); + } + long endTime = System.currentTimeMillis(); + + LOG.info( + String.format("Exectue odpsreader preSql successfully! cost time: %s ms.", (endTime - beginTime))); + } + this.initOdpsTableInfo(); } @Override @@ -259,6 +370,33 @@ public List split(int adviceNumber) { @Override public void post() { + List postSqls = this.originalConfig.getList(Key.POST_SQL, String.class); + + if (postSqls != null && !postSqls.isEmpty()) { + LOG.info( + String.format("Beigin to exectue postSql : %s. \n Attention: these postSqls must be idempotent!!!", + JSON.toJSONString(postSqls))); + long beginTime = System.currentTimeMillis(); + StringBuffer postSqlBuffer = new StringBuffer(); + for (String postSql : postSqls) { + postSql = postSql.trim(); + if (StringUtils.isNotBlank(postSql) && !postSql.endsWith(";")) { + postSql = String.format("%s;", postSql); + } + if (StringUtils.isNotBlank(postSql)) { + postSqlBuffer.append(postSql); + } + } + if (StringUtils.isNotBlank(postSqlBuffer.toString())) { + OdpsUtil.runSqlTaskWithRetry(this.odps, postSqlBuffer.toString(), "postSql"); + } else { + LOG.info("skip to execute the postSql: {}", JSON.toJSONString(postSqls)); + } + + long endTime = System.currentTimeMillis(); + LOG.info( + String.format("Exectue odpsreader postSql successfully! cost time: %s ms.", (endTime - beginTime))); + } } @Override @@ -268,6 +406,7 @@ public void destroy() { public static class Task extends Reader.Task { private static final Logger LOG = LoggerFactory.getLogger(Task.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsReader.class); private Configuration readerSliceConf; private String tunnelServer; @@ -278,32 +417,35 @@ public static class Task extends Reader.Task { private boolean isPartitionedTable; private String sessionId; private boolean isCompress; + private boolean successOnNoPartition; @Override public void init() { this.readerSliceConf = super.getPluginJobConf(); this.tunnelServer = this.readerSliceConf.getString( - Key.TUNNEL_SERVER, null); + Key.TUNNEL_SERVER, null); this.odps = OdpsUtil.initOdps(this.readerSliceConf); this.projectName = this.readerSliceConf.getString(Key.PROJECT); this.tableName = this.readerSliceConf.getString(Key.TABLE); this.table = OdpsUtil.getTable(this.odps, projectName, tableName); this.isPartitionedTable = this.readerSliceConf - .getBool(Constant.IS_PARTITIONED_TABLE); + .getBool(Constant.IS_PARTITIONED_TABLE); this.sessionId = this.readerSliceConf.getString(Constant.SESSION_ID, null); - - - this.isCompress = this.readerSliceConf.getBool(Key.IS_COMPRESS, false); + this.successOnNoPartition = this.readerSliceConf.getBool(Key.SUCCESS_ON_NO_PATITION, false); // sessionId 为空的情况是:切分级别只到 partition 的情况 - if (StringUtils.isBlank(this.sessionId)) { + String partition = this.readerSliceConf.getString(Key.PARTITION); + + // 没有分区读取时, 是没有sessionId这些的 + if (this.isPartitionedTable && StringUtils.isBlank(partition) && this.successOnNoPartition) { + LOG.warn("Partition is blank, but you config successOnNoPartition[true] ,don't need to create session"); + } else if (StringUtils.isBlank(this.sessionId)) { DownloadSession session = OdpsUtil.createMasterSessionForPartitionedTable(odps, - tunnelServer, projectName, tableName, this.readerSliceConf.getString(Key.PARTITION)); + tunnelServer, projectName, tableName, this.readerSliceConf.getString(Key.PARTITION)); this.sessionId = session.getId(); } - LOG.info("sessionId:{}", this.sessionId); } @@ -316,68 +458,72 @@ public void startRead(RecordSender recordSender) { DownloadSession downloadSession = null; String partition = this.readerSliceConf.getString(Key.PARTITION); + if (this.isPartitionedTable && StringUtils.isBlank(partition) && this.successOnNoPartition) { + LOG.warn(String.format( + "Partition is blank,not need to be read")); + recordSender.flush(); + return; + } + if (this.isPartitionedTable) { downloadSession = OdpsUtil.getSlaveSessionForPartitionedTable(this.odps, this.sessionId, - this.tunnelServer, this.projectName, this.tableName, partition); + this.tunnelServer, this.projectName, this.tableName, partition); } else { downloadSession = OdpsUtil.getSlaveSessionForNonPartitionedTable(this.odps, this.sessionId, - this.tunnelServer, this.projectName, this.tableName); + this.tunnelServer, this.projectName, this.tableName); } long start = this.readerSliceConf.getLong(Constant.START_INDEX, 0); long count = this.readerSliceConf.getLong(Constant.STEP_COUNT, - downloadSession.getRecordCount()); + downloadSession.getRecordCount()); if (count > 0) { LOG.info(String.format( - "Begin to read ODPS table:%s, partition:%s, startIndex:%s, count:%s.", - this.tableName, partition, start, count)); + "Begin to read ODPS table:%s, partition:%s, startIndex:%s, count:%s.", + this.tableName, partition, start, count)); } else if (count == 0) { - LOG.warn(String.format("源头表:%s 的分区:%s 没有内容可抽取, 请您知晓.", - this.tableName, partition)); + LOG.warn(MESSAGE_SOURCE.message("odpsreader.12", this.tableName, partition)); return; } else { throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL, - String.format("源头表:%s 的分区:%s 读取行数为负数, 请联系 ODPS 管理员查看表状态!", - this.tableName, partition)); + MESSAGE_SOURCE.message("odpsreader.13", this.tableName, partition)); } - + TableSchema tableSchema = this.table.getSchema(); Set allColumns = new HashSet(); allColumns.addAll(tableSchema.getColumns()); allColumns.addAll(tableSchema.getPartitionColumns()); - Map columnTypeMap = new HashMap(); + Map columnTypeMap = new HashMap(); for (Column column : allColumns) { - columnTypeMap.put(column.getName(), column.getType()); + columnTypeMap.put(column.getName(), column.getTypeInfo()); } try { List parsedColumnsTmp = this.readerSliceConf - .getListConfiguration(Constant.PARSED_COLUMNS); + .getListConfiguration(Constant.PARSED_COLUMNS); List> parsedColumns = new ArrayList>(); for (int i = 0; i < parsedColumnsTmp.size(); i++) { Configuration eachColumnConfig = parsedColumnsTmp.get(i); String columnName = eachColumnConfig.getString("left"); ColumnType columnType = ColumnType - .asColumnType(eachColumnConfig.getString("right")); + .asColumnType(eachColumnConfig.getString("right")); parsedColumns.add(new MutablePair( - columnName, columnType)); + columnName, columnType)); } ReaderProxy readerProxy = new ReaderProxy(recordSender, downloadSession, columnTypeMap, parsedColumns, partition, this.isPartitionedTable, - start, count, this.isCompress); + start, count, this.isCompress, this.readerSliceConf); readerProxy.doRead(); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL, - String.format("源头表:%s 的分区:%s 读取失败, 请联系 ODPS 管理员查看错误详情.", this.tableName, partition), e); + MESSAGE_SOURCE.message("odpsreader.14", this.tableName, partition), e); } } - @Override public void post() { } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java index cdda6ac862..8311d4efc3 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java @@ -1,45 +1,53 @@ package com.alibaba.datax.plugin.reader.odpsreader; import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; public enum OdpsReaderErrorCode implements ErrorCode { - REQUIRED_VALUE("OdpsReader-00", "您缺失了必须填写的参数值."), - ILLEGAL_VALUE("OdpsReader-01", "您配置的值不合法."), - CREATE_DOWNLOADSESSION_FAIL("OdpsReader-03", "创建 ODPS 的 downloadSession 失败."), - GET_DOWNLOADSESSION_FAIL("OdpsReader-04", "获取 ODPS 的 downloadSession 失败."), - READ_DATA_FAIL("OdpsReader-05", "读取 ODPS 源头表失败."), - GET_ID_KEY_FAIL("OdpsReader-06", "获取 accessId/accessKey 失败."), + REQUIRED_VALUE("DATAX_R_ODPS_001", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_001"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_001")), + ILLEGAL_VALUE("DATAX_R_ODPS_002", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_002"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_002")), + CREATE_DOWNLOADSESSION_FAIL("DATAX_R_ODPS_003", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_003"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_003")), + GET_DOWNLOADSESSION_FAIL("DATAX_R_ODPS_004", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_004"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_004")), + READ_DATA_FAIL("DATAX_R_ODPS_005", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_005"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_005")), + GET_ID_KEY_FAIL("DATAX_R_ODPS_006", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_006"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_006")), - ODPS_READ_EXCEPTION("OdpsReader-07", "读取 odps 异常"), - OPEN_RECORD_READER_FAILED("OdpsReader-08", "打开 recordReader 失败."), + ODPS_READ_EXCEPTION("DATAX_R_ODPS_007", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_007"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_007")), + OPEN_RECORD_READER_FAILED("DATAX_R_ODPS_008", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_008"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_008")), - ODPS_PROJECT_NOT_FOUNT("OdpsReader-10", "您配置的值不合法, odps project 不存在."), //ODPS-0420111: Project not found + ODPS_PROJECT_NOT_FOUNT("DATAX_R_ODPS_009", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_009"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_009")), //ODPS-0420111: Project not found - ODPS_TABLE_NOT_FOUNT("OdpsReader-12", "您配置的值不合法, odps table 不存在."), // ODPS-0130131:Table not found + ODPS_TABLE_NOT_FOUNT("DATAX_R_ODPS_010", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_010"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_010")), // ODPS-0130131:Table not found - ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsReader-13", "您配置的值不合法, odps accessId,accessKey 不存在."), //ODPS-0410051:Invalid credentials - accessKeyId not found + ODPS_ACCESS_KEY_ID_NOT_FOUND("DATAX_R_ODPS_011", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_011"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_011")), //ODPS-0410051:Invalid credentials - accessKeyId not found - ODPS_ACCESS_KEY_INVALID("OdpsReader-14", "您配置的值不合法, odps accessKey 错误."), //ODPS-0410042:Invalid signature value - User signature dose not match + ODPS_ACCESS_KEY_INVALID("DATAX_R_ODPS_012", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_012"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_012")), //ODPS-0410042:Invalid signature value - User signature dose not match - ODPS_ACCESS_DENY("OdpsReader-15", "拒绝访问, 您不在 您配置的 project 中."), //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project + ODPS_ACCESS_DENY("DATAX_R_ODPS_013", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_013"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_013")), //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project - SPLIT_MODE_ERROR("OdpsReader-30", "splitMode配置错误."), + SPLIT_MODE_ERROR("DATAX_R_ODPS_014", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_014"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_014")), - ACCOUNT_TYPE_ERROR("OdpsReader-31", "odps 账号类型错误."), + ACCOUNT_TYPE_ERROR("DATAX_R_ODPS_015", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_015"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_015")), - VIRTUAL_VIEW_NOT_SUPPORT("OdpsReader-32", "Datax 不支持 读取虚拟视图."), + VIRTUAL_VIEW_NOT_SUPPORT("DATAX_R_ODPS_016", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_016"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_016")), - PARTITION_ERROR("OdpsReader-33", "分区配置错误."), + PARTITION_ERROR("DATAX_R_ODPS_017", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_017"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_017")), + PARTITION_NOT_EXISTS_ERROR("DATAX_R_ODPS_018", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_018"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_018")), + + RUN_SQL_FAILED("DATAX_R_ODPS_019", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_019"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_019")), + + RUN_SQL_ODPS_EXCEPTION("DATAX_R_ODPS_020", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_020"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_020")), ; private final String code; private final String description; + private final String solution; - private OdpsReaderErrorCode(String code, String description) { + private OdpsReaderErrorCode(String code, String description,String solution) { this.code = code; this.description = description; + this.solution = solution; } @Override @@ -52,9 +60,12 @@ public String getDescription() { return this.description; } + public String getSolution() { + return solution; + } + @Override public String toString() { - return String.format("Code:[%s], Description:[%s]. ", this.code, - this.description); + return String.format("Code:%s:%s, Solution:[%s]. ", this.code,this.description,this.solution); } } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java index 8e069ef568..31d0d605d6 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java @@ -3,28 +3,37 @@ import com.alibaba.datax.common.element.*; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.reader.odpsreader.util.OdpsUtil; +import com.alibaba.fastjson.JSON; +import com.aliyun.odps.Column; import com.aliyun.odps.OdpsType; +import com.aliyun.odps.data.*; import com.aliyun.odps.data.Record; -import com.aliyun.odps.data.RecordReader; import com.aliyun.odps.tunnel.TableTunnel; +import com.aliyun.odps.type.ArrayTypeInfo; +import com.aliyun.odps.type.MapTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.text.ParseException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.text.SimpleDateFormat; +import java.util.*; public class ReaderProxy { private static final Logger LOG = LoggerFactory .getLogger(ReaderProxy.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ReaderProxy.class); private static boolean IS_DEBUG = LOG.isDebugEnabled(); private RecordSender recordSender; private TableTunnel.DownloadSession downloadSession; - private Map columnTypeMap; + private Map columnTypeMap; private List> parsedColumns; private String partition; private boolean isPartitionTable; @@ -32,11 +41,38 @@ public class ReaderProxy { private long start; private long count; private boolean isCompress; + + private static final String NULL_INDICATOR = null; + // TODO 没有支持用户可配置 + // TODO 没有timezone + private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + // 读取 jvm 默认时区 + private Calendar calendarForDate = null; + private boolean useDateWithCalendar = true; + + private Calendar initCalendar(Configuration config) { + // 理论上不会有其他选择,有配置化可以随时应急 + String calendarType = config.getString("calendarType", "iso8601"); + Boolean lenient = config.getBool("calendarLenient", true); + + // 默认jvm时区 + TimeZone timeZone = TimeZone.getDefault(); + String timeZoneStr = config.getString("calendarTimeZone"); + if (StringUtils.isNotBlank(timeZoneStr)) { + // 如果用户明确指定使用用户指定的 + timeZone = TimeZone.getTimeZone(timeZoneStr); + } + + Calendar calendarForDate = new Calendar.Builder().setCalendarType(calendarType).setLenient(lenient) + .setTimeZone(timeZone).build(); + return calendarForDate; + } public ReaderProxy(RecordSender recordSender, TableTunnel.DownloadSession downloadSession, - Map columnTypeMap, - List> parsedColumns, String partition, - boolean isPartitionTable, long start, long count, boolean isCompress) { + Map columnTypeMap, + List> parsedColumns, String partition, + boolean isPartitionTable, long start, long count, boolean isCompress, Configuration taskConfig) { this.recordSender = recordSender; this.downloadSession = downloadSession; this.columnTypeMap = columnTypeMap; @@ -46,14 +82,24 @@ public ReaderProxy(RecordSender recordSender, TableTunnel.DownloadSession downlo this.start = start; this.count = count; this.isCompress = isCompress; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); } // warn: odps 分区列和正常列不能重名, 所有列都不不区分大小写 public void doRead() { try { LOG.info("start={}, count={}",start, count); - //RecordReader recordReader = downloadSession.openRecordReader(start, count, isCompress); - RecordReader recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress); + List userConfigNormalColumns = OdpsUtil.getNormalColumns(this.parsedColumns, this.columnTypeMap); + RecordReader recordReader = null; + // fix #ODPS-52184/10332469, updateColumnsSize表示如果用户指定的读取源表列数100列以内的话,则进行列裁剪优化; + int updateColumnsSize = 100; + if(userConfigNormalColumns.size() <= updateColumnsSize){ + recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress, userConfigNormalColumns); + } else { + recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress); + } Record odpsRecord; Map partitionMap = this @@ -72,7 +118,7 @@ public void doRead() { } catch (InterruptedException ignored) { } recordReader = downloadSession.openRecordReader(start, count, isCompress); - LOG.warn("odps-read-exception, 重试第{}次", retryTimes); + LOG.warn(MESSAGE_SOURCE.message("readerproxy.1", retryTimes)); retryTimes++; continue; } else { @@ -144,9 +190,7 @@ private Map parseCurrentPartitionValue() { throw DataXException .asDataXException( OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format( - "您的分区 [%s] 解析出现错误,解析后正确的配置方式类似为 [ pt=1,dt=1 ].", - eachPartition)); + MESSAGE_SOURCE.message("readerproxy.2", eachPartition)); } // warn: translate to lower case, it's more comfortable to // compare whit user's input columns @@ -168,8 +212,7 @@ private String getPartitionColumnValue(Map partitionMap, partitionColumnName = partitionColumnName.toLowerCase(); // it's will never happen, but add this checking if (!partitionMap.containsKey(partitionColumnName)) { - String errorMessage = String.format( - "表所有分区信息为: %s 其中找不到 [%s] 对应的分区值.", + String errorMessage = MESSAGE_SOURCE.message("readerproxy.3", com.alibaba.fastjson.JSON.toJSONString(partitionMap), partitionColumnName); throw DataXException.asDataXException( @@ -190,7 +233,7 @@ private String getPartitionColumnValue(Map partitionMap, * every line record of odps table * @param dataXRecord * every datax record, to be send to writer. method getXXX() case sensitive - * @param type + * @param typeInfo * odps column type * @param columnNameValue * for partition column it's column value, for normal column it's @@ -199,83 +242,681 @@ private String getPartitionColumnValue(Map partitionMap, * true means partition column and false means normal column * */ private void odpsColumnToDataXField(Record odpsRecord, - com.alibaba.datax.common.element.Record dataXRecord, OdpsType type, + com.alibaba.datax.common.element.Record dataXRecord, TypeInfo typeInfo, String columnNameValue, boolean isPartitionColumn) { + + ArrayRecord record = (ArrayRecord) odpsRecord; + + OdpsType type = typeInfo.getOdpsType(); + switch (type) { - case BIGINT: { - if (isPartitionColumn) { - dataXRecord.addColumn(new LongColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new LongColumn(odpsRecord - .getBigint(columnNameValue))); + case BIGINT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new LongColumn(record + .getBigint(columnNameValue))); + } + break; } - break; - } - case BOOLEAN: { - if (isPartitionColumn) { - dataXRecord.addColumn(new BoolColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new BoolColumn(odpsRecord - .getBoolean(columnNameValue))); + case BOOLEAN: { + if (isPartitionColumn) { + dataXRecord.addColumn(new BoolColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new BoolColumn(record + .getBoolean(columnNameValue))); + } + break; } - break; - } - case DATETIME: { - if (isPartitionColumn) { - try { - dataXRecord.addColumn(new DateColumn(ColumnCast + case DATE: + case DATETIME: { + // odps分区列,目前支持TINYINT、SMALLINT、INT、BIGINT、VARCHAR和STRING类型 + if (isPartitionColumn) { + try { + dataXRecord.addColumn(new DateColumn(ColumnCast + .string2Date(new StringColumn(columnNameValue)))); + } catch (ParseException e) { + String errMessage = MESSAGE_SOURCE.message("readerproxy.4", + this.partition, columnNameValue); + LOG.error(errMessage); + throw DataXException.asDataXException( + OdpsReaderErrorCode.READ_DATA_FAIL, errMessage, e); + } + } else { + if (com.aliyun.odps.OdpsType.DATETIME == type) { + dataXRecord.addColumn(new DateColumn(record + .getDatetime(columnNameValue))); + } else { + if (this.useDateWithCalendar) { + dataXRecord.addColumn(new DateColumn(record. + getDate(columnNameValue, this.calendarForDate))); + } else { + dataXRecord.addColumn(new DateColumn(record + .getDate(columnNameValue))); + } + + } + } + + break; + } + case DOUBLE: { + if (isPartitionColumn) { + dataXRecord.addColumn(new DoubleColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new DoubleColumn(record + .getDouble(columnNameValue))); + } + break; + } + case DECIMAL: { + if(isPartitionColumn) { + dataXRecord.addColumn(new DoubleColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new DoubleColumn(record.getDecimal(columnNameValue))); + } + break; + } + case STRING: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new StringColumn(record + .getString(columnNameValue))); + } + break; + } + case TINYINT: + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + Byte value = record.getTinyint(columnNameValue); + Integer intValue = value != null ? value.intValue() : null; + dataXRecord.addColumn(new LongColumn(intValue)); + } + break; + case SMALLINT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + Short value = record.getSmallint(columnNameValue); + Long valueInLong = null; + if (null != value) { + valueInLong = value.longValue(); + } + dataXRecord.addColumn(new LongColumn(valueInLong)); + } + break; + } + case INT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new LongColumn(record + .getInt(columnNameValue))); + } + break; + } + case FLOAT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new DoubleColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new DoubleColumn(record + .getFloat(columnNameValue))); + } + break; + } + case VARCHAR: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + Varchar value = record.getVarchar(columnNameValue); + String columnValue = value != null ? value.getValue() : null; + dataXRecord.addColumn(new StringColumn(columnValue)); + } + break; + } + case TIMESTAMP: { + if (isPartitionColumn) { + try { + dataXRecord.addColumn(new DateColumn(ColumnCast .string2Date(new StringColumn(columnNameValue)))); - } catch (ParseException e) { - LOG.error(String.format("", this.partition)); - String errMessage = String.format( - "您读取分区 [%s] 出现日期转换异常, 日期的字符串表示为 [%s].", + } catch (ParseException e) { + String errMessage = MESSAGE_SOURCE.message("readerproxy.4", this.partition, columnNameValue); - LOG.error(errMessage); - throw DataXException.asDataXException( + LOG.error(errMessage); + throw DataXException.asDataXException( OdpsReaderErrorCode.READ_DATA_FAIL, errMessage, e); + } + } else { + dataXRecord.addColumn(new DateColumn(record + .getTimestamp(columnNameValue))); } - } else { - dataXRecord.addColumn(new DateColumn(odpsRecord - .getDatetime(columnNameValue))); - } - break; - } - case DOUBLE: { - if (isPartitionColumn) { - dataXRecord.addColumn(new DoubleColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new DoubleColumn(odpsRecord - .getDouble(columnNameValue))); + break; } - break; + case BINARY: { + if (isPartitionColumn) { + dataXRecord.addColumn(new BytesColumn(columnNameValue.getBytes())); + } else { +// dataXRecord.addColumn(new BytesColumn(record +// .getBinary(columnNameValue).data())); + Binary binaryData = record.getBinary(columnNameValue); + if (null == binaryData) { + dataXRecord.addColumn(new BytesColumn(null)); + } else { + dataXRecord.addColumn(new BytesColumn(binaryData.data())); + } + } + break; + } + case ARRAY: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + List arrayValue = record.getArray(columnNameValue); + if (arrayValue == null) { + dataXRecord.addColumn(new StringColumn(null)); + } else { + dataXRecord.addColumn(new StringColumn(JSON.toJSONString(transOdpsArrayToJavaList(arrayValue, (ArrayTypeInfo)typeInfo)))); + } + } + break; + } + case MAP: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + Map mapValue = record.getMap(columnNameValue); + if (mapValue == null) { + dataXRecord.addColumn(new StringColumn(null)); + } else { + dataXRecord.addColumn(new StringColumn(JSON.toJSONString(transOdpsMapToJavaMap(mapValue, (MapTypeInfo)typeInfo)))); + } + } + break; + } + case STRUCT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + Struct structValue = record.getStruct(columnNameValue); + if (structValue == null) { + dataXRecord.addColumn(new StringColumn(null)); + } else { + dataXRecord.addColumn(new StringColumn(JSON.toJSONString(transOdpsStructToJavaMap(structValue)))); + } + } + break; + } + default: + throw DataXException.asDataXException( + OdpsReaderErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("readerproxy.5", type)); } - case DECIMAL: { - if(isPartitionColumn) { - dataXRecord.addColumn(new DoubleColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new DoubleColumn(odpsRecord.getDecimal(columnNameValue))); + } + + private List transOdpsArrayToJavaList(List odpsArray, ArrayTypeInfo typeInfo) { + TypeInfo eleType = typeInfo.getElementTypeInfo(); + List result = new ArrayList(); + switch (eleType.getOdpsType()) { + // warn:array [1.2, 3.4] 被转为了:"["1.2", "3.4"]", 本来应该被转换成 "[1.2, 3.4]" + // 注意回归Case覆盖 + case BIGINT: + case DOUBLE: + case INT: + case FLOAT: + case DECIMAL: + case TINYINT: + case SMALLINT: + for (Object item : odpsArray) { + Object object = item; + result.add(object == null ? NULL_INDICATOR : object); + } + return result; + case BOOLEAN: // 未调整array 问题 + case STRING: + case VARCHAR: + case CHAR: + case TIMESTAMP: + case DATE: + for (Object item : odpsArray) { + Object object = item; + result.add(object == null ? NULL_INDICATOR : object.toString()); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + for (Object item : odpsArray) { + Date dateVal = (Date) item; + result.add(dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (Object item : odpsArray) { + Binary binaryVal = (Binary) item; + result.add(binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (Object item : odpsArray) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item; + result.add(dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (Object item : odpsArray) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item; + result.add(yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (Object item : odpsArray) { + Struct structVal = (Struct) item; + result.add(structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); } - break; + return result; + /** + * MAP类型 + */ + case MAP: + for (Object item : odpsArray) { + Map mapVal = (Map) item; + result.add(mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) eleType)); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (Object item : odpsArray) { + List arrayVal = (List) item; + result.add(arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) eleType)); + } + return result; + default: + throw new IllegalArgumentException("decode record failed. column type: " + eleType.getTypeName()); } - case STRING: { - if (isPartitionColumn) { - dataXRecord.addColumn(new StringColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new StringColumn(odpsRecord - .getString(columnNameValue))); + } + + private Map transOdpsMapToJavaMap(Map odpsMap, MapTypeInfo typeInfo) { + TypeInfo keyType = typeInfo.getKeyTypeInfo(); + TypeInfo valueType = typeInfo.getValueTypeInfo(); + Map result = new HashMap(); + Set entrySet = null; + switch (valueType.getOdpsType()) { + case BIGINT: + case DOUBLE: + case BOOLEAN: + case STRING: + case DECIMAL: + case TINYINT: + case SMALLINT: + case INT: + case FLOAT: + case CHAR: + case VARCHAR: + case DATE: + case TIMESTAMP: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Object value = item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), value == null ? NULL_INDICATOR : value.toString()); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Object value = item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + value == null ? NULL_INDICATOR : value.toString()); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Object value = item.getValue(); + result.put(item.getKey(), value == null ? NULL_INDICATOR : value.toString()); + } + return result; } - break; + /** + * 日期类型 + */ + case DATETIME: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Date dateVal = (Date) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), + dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Date dateVal = (Date) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Date dateVal = (Date) item.getValue(); + result.put(item.getKey(), dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + } + /** + * 字节数组 + */ + case BINARY: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Binary binaryVal = (Binary) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Binary binaryVal = (Binary) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Binary binaryVal = (Binary) item.getValue(); + result.put(item.getKey(), binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + } + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item.getValue(); + result.put(item.getKey(), dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + } + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item.getValue(); + result.put(item.getKey(), yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + } + /** + * 结构体 + */ + case STRUCT: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Struct structVal = (Struct) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Struct structVal = (Struct) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Struct structVal = (Struct) item.getValue(); + result.put(item.getKey(), structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + } + return result; + } + /** + * MAP类型 + */ + case MAP: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Map mapVal = (Map) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()),mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) valueType)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Map mapVal = (Map) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + mapVal == null ? NULL_INDICATOR : transOdpsMapToJavaMap(mapVal, (MapTypeInfo) valueType)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Map mapVal = (Map) item.getValue(); + result.put(item.getKey(), mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) valueType)); + } + return result; + } + /** + * ARRAY类型 + */ + case ARRAY: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + List arrayVal = (List) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()),arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) valueType)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + List arrayVal = (List) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + arrayVal == null ? NULL_INDICATOR : transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) valueType)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + List arrayVal = (List) item.getValue(); + result.put(item.getKey(), arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) valueType)); + } + return result; + } + default: + throw new IllegalArgumentException("decode record failed. column type: " + valueType.getTypeName()); } - default: - throw DataXException - .asDataXException( - OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format( - "DataX 抽取 ODPS 数据不支持字段类型为:[%s]. 目前支持抽取的字段类型有:bigint, boolean, datetime, double, decimal, string. " - + "您可以选择不抽取 DataX 不支持的字段或者联系 ODPS 管理员寻求帮助.", - type)); + } + + private Map transIntervalDayTimeToJavaMap(IntervalDayTime dayTime) { + Map result = new HashMap(); + result.put("totalSeconds", dayTime.getTotalSeconds()); + result.put("nanos", (long)dayTime.getNanos()); + return result; + } + + private Map transOdpsStructToJavaMap(Struct odpsStruct) { + Map result = new HashMap(); + for (int i = 0; i < odpsStruct.getFieldCount(); i++) { + String fieldName = odpsStruct.getFieldName(i); + Object fieldValue = odpsStruct.getFieldValue(i); + TypeInfo fieldType = odpsStruct.getFieldTypeInfo(i); + switch (fieldType.getOdpsType()) { + case BIGINT: + case DOUBLE: + case BOOLEAN: + case STRING: + case DECIMAL: + case TINYINT: + case SMALLINT: + case INT: + case FLOAT: + case VARCHAR: + case CHAR: + case TIMESTAMP: + case DATE: + result.put(fieldName, fieldValue == null ? NULL_INDICATOR : fieldValue.toString()); + break; + /** + * 日期类型 + */ + case DATETIME: + Date dateVal = (Date) fieldValue; + result.put(fieldName, dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + break; + /** + * 字节数组 + */ + case BINARY: + Binary binaryVal = (Binary) fieldValue; + result.put(fieldName, binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + IntervalDayTime dayTimeVal = (IntervalDayTime) fieldValue; + result.put(fieldName, dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + break; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + IntervalYearMonth yearMonthVal = (IntervalYearMonth) fieldValue; + result.put(fieldName, yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + break; + /** + * 结构体 + */ + case STRUCT: + Struct structVal = (Struct) fieldValue; + result.put(fieldName, structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + break; + /** + * MAP类型 + */ + case MAP: + Map mapVal = (Map) fieldValue; + result.put(fieldName, mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) fieldType)); + break; + /** + * ARRAY类型 + */ + case ARRAY: + List arrayVal = (List) fieldValue; + result.put(fieldName, arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) fieldType)); + break; + default: + throw new IllegalArgumentException("decode record failed. column type: " + fieldType.getTypeName()); + } } - } + return result; + } + + private Map transIntervalYearMonthToJavaMap(IntervalYearMonth yearMonth) { + Map result = new HashMap(); + result.put("years", yearMonth.getYears()); + result.put("months", yearMonth.getMonths()); + return result; + } + } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/IdAndKeyUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/IdAndKeyUtil.java index faa90a987d..05722b59f9 100644 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/IdAndKeyUtil.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/IdAndKeyUtil.java @@ -1,5 +1,5 @@ /** - * (C) 2010-2014 Alibaba Group Holding Limited. + * (C) 2010-2022 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,11 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.reader.odpsreader.Constant; +import com.alibaba.datax.common.util.IdAndKeyRollingUtil; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.reader.odpsreader.Key; import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode; + import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,6 +31,7 @@ public class IdAndKeyUtil { private static Logger LOG = LoggerFactory.getLogger(IdAndKeyUtil.class); + private static MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(IdAndKeyUtil.class); public static Configuration parseAccessIdAndKey(Configuration originalConfig) { String accessId = originalConfig.getString(Key.ACCESS_ID); @@ -50,36 +53,13 @@ public static Configuration parseAccessIdAndKey(Configuration originalConfig) { private static Configuration getAccessIdAndKeyFromEnv(Configuration originalConfig, Map envProp) { - String accessId = null; - String accessKey = null; - - String skynetAccessID = envProp.get(Constant.SKYNET_ACCESSID); - String skynetAccessKey = envProp.get(Constant.SKYNET_ACCESSKEY); - - if (StringUtils.isNotBlank(skynetAccessID) - || StringUtils.isNotBlank(skynetAccessKey)) { - /** - * 环境变量中,如果存在SKYNET_ACCESSID/SKYNET_ACCESSKEy(只要有其中一个变量,则认为一定是两个都存在的!), - * 则使用其值作为odps的accessId/accessKey(会解密) - */ - - LOG.info("Try to get accessId/accessKey from environment."); - accessId = skynetAccessID; - accessKey = DESCipher.decrypt(skynetAccessKey); - if (StringUtils.isNotBlank(accessKey)) { - originalConfig.set(Key.ACCESS_ID, accessId); - originalConfig.set(Key.ACCESS_KEY, accessKey); - LOG.info("Get accessId/accessKey from environment variables successfully."); - } else { - throw DataXException.asDataXException(OdpsReaderErrorCode.GET_ID_KEY_FAIL, - String.format("从环境变量中获取accessId/accessKey 失败, accessId=[%s]", accessId)); - } - } else { + // 如果获取到ak,在getAccessIdAndKeyFromEnv中已经设置到originalConfig了 + String accessKey = IdAndKeyRollingUtil.getAccessIdAndKeyFromEnv(originalConfig); + if (StringUtils.isBlank(accessKey)) { // 无处获取(既没有配置在作业中,也没用在环境变量中) throw DataXException.asDataXException(OdpsReaderErrorCode.GET_ID_KEY_FAIL, - "无法获取到accessId/accessKey. 它们既不存在于您的配置中,也不存在于环境变量中."); + MESSAGE_SOURCE.message("idandkeyutil.2")); } - return originalConfig; } } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/LocalStrings.properties b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/LocalStrings.properties new file mode 100644 index 0000000000..897ce23244 --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/LocalStrings.properties @@ -0,0 +1,25 @@ +descipher.1=\u957F\u5EA6\u4E0D\u662F\u5076\u6570 + +idandkeyutil.1=\u4ECE\u73AF\u5883\u53D8\u91CF\u4E2D\u83B7\u53D6accessId/accessKey \u5931\u8D25, accessId=[{0}] +idandkeyutil.2=\u65E0\u6CD5\u83B7\u53D6\u5230accessId/accessKey. \u5B83\u4EEC\u65E2\u4E0D\u5B58\u5728\u4E8E\u60A8\u7684\u914D\u7F6E\u4E2D\uFF0C\u4E5F\u4E0D\u5B58\u5728\u4E8E\u73AF\u5883\u53D8\u91CF\u4E2D. + + +odpssplitutil.1=\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u4E0D\u80FD\u4E3A\u7A7A\u767D. +odpssplitutil.2=\u5207\u5206\u7684 recordCount \u4E0D\u80FD\u4E3A\u8D1F\u6570.recordCount={0} +odpssplitutil.3=\u5207\u5206\u7684 adviceNum \u4E0D\u80FD\u4E3A\u8D1F\u6570.adviceNum={0} +odpssplitutil.4=\u6CE8\u610F: \u7531\u4E8E\u60A8\u914D\u7F6E\u4E86successOnNoPartition\u503C\u4E3Atrue (\u5373\u5F53\u5206\u533A\u503C\u4E0D\u5B58\u5728\u65F6, \u540C\u6B65\u4EFB\u52A1\u4E0D\u62A5\u9519), \u60A8\u8BBE\u7F6E\u7684\u5206\u533A\u65E0\u6CD5\u5339\u914D\u5230ODPS\u8868\u4E2D\u5BF9\u5E94\u7684\u5206\u533A, \u540C\u6B65\u4EFB\u52A1\u7EE7\u7EED... + +odpsutil.1=datax\u83B7\u53D6\u4E0D\u5230\u6E90\u8868\u7684\u5217\u4FE1\u606F\uFF0C \u7531\u4E8E\u60A8\u672A\u914D\u7F6E\u8BFB\u53D6\u6E90\u5934\u8868\u7684\u5217\u4FE1\u606F. datax\u65E0\u6CD5\u77E5\u9053\u8BE5\u62BD\u53D6\u8868\u7684\u54EA\u4E9B\u5B57\u6BB5\u7684\u6570\u636E\uFF0C \u6B63\u786E\u7684\u914D\u7F6E\u65B9\u5F0F\u662F\u7ED9 column \u914D\u7F6E\u4E0A\u60A8\u9700\u8981\u8BFB\u53D6\u7684\u5217\u540D\u79F0,\u7528\u82F1\u6587\u9017\u53F7\u5206\u9694. +odpsutil.2=\u60A8\u6240\u914D\u7F6E\u7684maxRetryTime \u503C\u9519\u8BEF. \u8BE5\u503C\u4E0D\u80FD\u5C0F\u4E8E1, \u4E14\u4E0D\u80FD\u5927\u4E8E {0}. \u63A8\u8350\u7684\u914D\u7F6E\u65B9\u5F0F\u662F\u7ED9maxRetryTime \u914D\u7F6E1-11\u4E4B\u95F4\u7684\u67D0\u4E2A\u503C. \u8BF7\u60A8\u68C0\u67E5\u914D\u7F6E\u5E76\u505A\u51FA\u76F8\u5E94\u4FEE\u6539. +odpsutil.3=\u4E0D\u652F\u6301\u7684\u8D26\u53F7\u7C7B\u578B:[{0}]. \u8D26\u53F7\u7C7B\u578B\u76EE\u524D\u4EC5\u652F\u6301aliyun, taobao. +odpsutil.4=\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u4E0D\u80FD\u4E3A\u7A7A\u767D. +odpsutil.5=\u6E90\u5934\u8868\u7684\u5217\u914D\u7F6E\u9519\u8BEF. \u60A8\u6240\u914D\u7F6E\u7684\u5217 [{0}] \u4E0D\u5B58\u5728. +odpsutil.6=open RecordReader\u5931\u8D25. \u8BF7\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u5904\u7406. +odpsutil.7=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [project] \u662F\u5426\u6B63\u786E. +odpsutil.8=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [table] \u662F\u5426\u6B63\u786E. +odpsutil.9=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [accessId] [accessKey]\u662F\u5426\u6B63\u786E. +odpsutil.10=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [accessKey] \u662F\u5426\u6B63\u786E. +odpsutil.11=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [accessId] [accessKey] [project]\u662F\u5426\u5339\u914D. +odpsutil.12=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 project,table,accessId,accessKey,odpsServer\u7B49\u503C. +odpsutil.13=\u6267\u884C ODPS SQL\u5931\u8D25, \u8FD4\u56DE\u503C\u4E3A:{0}. \u8BF7\u4ED4\u7EC6\u68C0\u67E5ODPS SQL\u662F\u5426\u6B63\u786E, \u5982\u679C\u68C0\u67E5\u65E0\u8BEF, \u8BF7\u8054\u7CFB ODPS \u503C\u73ED\u540C\u5B66\u5904\u7406. SQL \u5185\u5BB9\u4E3A:[\n{1}\n]. +odpsutil.14=\u6267\u884C ODPS SQL \u65F6\u629B\u51FA\u5F02\u5E38, \u8BF7\u4ED4\u7EC6\u68C0\u67E5ODPS SQL\u662F\u5426\u6B63\u786E, \u5982\u679C\u68C0\u67E5\u65E0\u8BEF, \u8BF7\u8054\u7CFB ODPS \u503C\u73ED\u540C\u5B66\u5904\u7406. SQL \u5185\u5BB9\u4E3A:[\n{0}\n]. \ No newline at end of file diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java index b7f4f1aaf3..2030033da7 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java @@ -2,19 +2,26 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.common.util.RangeSplitUtil; import com.alibaba.datax.plugin.reader.odpsreader.Constant; import com.alibaba.datax.plugin.reader.odpsreader.Key; import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode; import com.aliyun.odps.Odps; import com.aliyun.odps.tunnel.TableTunnel.DownloadSession; + import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; public final class OdpsSplitUtil { + private static final Logger LOG = LoggerFactory.getLogger(OdpsSplitUtil.class); + + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsSplitUtil.class); public static List doSplit(Configuration originalConfig, Odps odps, int adviceNum) { @@ -36,9 +43,17 @@ private static List splitPartitionedTable(Odps odps, Configuratio List partitions = originalConfig.getList(Key.PARTITION, String.class); + if ((null == partitions || partitions.isEmpty()) && originalConfig.getBool(Key.SUCCESS_ON_NO_PATITION, false)) { + Configuration tempConfig = originalConfig.clone(); + tempConfig.set(Key.PARTITION, null); + splittedConfigs.add(tempConfig); + LOG.warn(MESSAGE_SOURCE.message("odpssplitutil.4")); + return splittedConfigs; + } + if (null == partitions || partitions.isEmpty()) { throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, - "您所配置的分区不能为空白."); + MESSAGE_SOURCE.message("odpssplitutil.1")); } //splitMode 默认为 record @@ -141,11 +156,11 @@ private static List splitOnePartition(Odps odps, */ private static List> splitRecordCount(long recordCount, int adviceNum) { if(recordCount<0){ - throw new IllegalArgumentException("切分的 recordCount 不能为负数.recordCount=" + recordCount); + throw new IllegalArgumentException(MESSAGE_SOURCE.message("odpssplitutil.2", recordCount)); } if(adviceNum<1){ - throw new IllegalArgumentException("切分的 adviceNum 不能为负数.adviceNum=" + adviceNum); + throw new IllegalArgumentException(MESSAGE_SOURCE.message("odpssplitutil.3", adviceNum)); } List> result = new ArrayList>(); diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java index 2aa3f66e4a..0103a3832c 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java @@ -2,16 +2,22 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.DataXCaseEnvUtil; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.common.util.RetryUtil; import com.alibaba.datax.plugin.reader.odpsreader.ColumnType; import com.alibaba.datax.plugin.reader.odpsreader.Constant; import com.alibaba.datax.plugin.reader.odpsreader.Key; import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode; import com.aliyun.odps.*; +import com.aliyun.odps.Column; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; +import com.aliyun.odps.account.StsAccount; import com.aliyun.odps.data.RecordReader; +import com.aliyun.odps.task.SQLTask; import com.aliyun.odps.tunnel.TableTunnel; +import com.aliyun.odps.type.TypeInfo; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.MutablePair; @@ -19,13 +25,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.*; import java.util.concurrent.Callable; public final class OdpsUtil { private static final Logger LOG = LoggerFactory.getLogger(OdpsUtil.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsUtil.class); public static int MAX_RETRY_TIME = 10; @@ -37,8 +42,8 @@ public static void checkNecessaryConfig(Configuration originalConfig) { if (null == originalConfig.getList(Key.COLUMN) || originalConfig.getList(Key.COLUMN, String.class).isEmpty()) { - throw DataXException.asDataXException(OdpsReaderErrorCode.REQUIRED_VALUE, "datax获取不到源表的列信息, 由于您未配置读取源头表的列信息. datax无法知道该抽取表的哪些字段的数据 " + - "正确的配置方式是给 column 配置上您需要读取的列名称,用英文逗号分隔."); + throw DataXException.asDataXException(OdpsReaderErrorCode.REQUIRED_VALUE, + MESSAGE_SOURCE.message("odpsutil.1")); } } @@ -47,8 +52,8 @@ public static void dealMaxRetryTime(Configuration originalConfig) { int maxRetryTime = originalConfig.getInt(Key.MAX_RETRY_TIME, OdpsUtil.MAX_RETRY_TIME); if (maxRetryTime < 1 || maxRetryTime > OdpsUtil.MAX_RETRY_TIME) { - throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, "您所配置的maxRetryTime 值错误. 该值不能小于1, 且不能大于 " + OdpsUtil.MAX_RETRY_TIME + - ". 推荐的配置方式是给maxRetryTime 配置1-11之间的某个值. 请您检查配置并做出相应修改."); + throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpsutil.2", OdpsUtil.MAX_RETRY_TIME)); } MAX_RETRY_TIME = maxRetryTime; } @@ -59,11 +64,12 @@ public static Odps initOdps(Configuration originalConfig) { String accessId = originalConfig.getString(Key.ACCESS_ID); String accessKey = originalConfig.getString(Key.ACCESS_KEY); String project = originalConfig.getString(Key.PROJECT); + String securityToken = originalConfig.getString(Key.SECURITY_TOKEN); String packageAuthorizedProject = originalConfig.getString(Key.PACKAGE_AUTHORIZED_PROJECT); String defaultProject; - if(StringUtils.isBlank(packageAuthorizedProject)) { + if (StringUtils.isBlank(packageAuthorizedProject)) { defaultProject = project; } else { defaultProject = packageAuthorizedProject; @@ -74,21 +80,26 @@ public static Odps initOdps(Configuration originalConfig) { Account account = null; if (accountType.equalsIgnoreCase(Constant.DEFAULT_ACCOUNT_TYPE)) { - account = new AliyunAccount(accessId, accessKey); + if (StringUtils.isNotBlank(securityToken)) { + account = new StsAccount(accessId, accessKey, securityToken); + } else { + account = new AliyunAccount(accessId, accessKey); + } } else { throw DataXException.asDataXException(OdpsReaderErrorCode.ACCOUNT_TYPE_ERROR, - String.format("不支持的账号类型:[%s]. 账号类型目前仅支持aliyun, taobao.", accountType)); + MESSAGE_SOURCE.message("odpsutil.3", accountType)); } Odps odps = new Odps(account); boolean isPreCheck = originalConfig.getBool("dryRun", false); - if(isPreCheck) { + if (isPreCheck) { odps.getRestClient().setConnectTimeout(3); odps.getRestClient().setReadTimeout(3); odps.getRestClient().setRetryTimes(2); } odps.setDefaultProject(defaultProject); odps.setEndpoint(odpsServer); + odps.setUserAgent("DATAX"); return odps; } @@ -103,7 +114,7 @@ public Table call() throws Exception { table.reload(); return table; } - }, 3, 1000, false); + }, DataXCaseEnvUtil.getRetryTimes(3), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(false)); } catch (Exception e) { throwDataXExceptionWhenReloadTable(e, tableName); } @@ -154,7 +165,7 @@ public static List getTableOriginalColumnNameList( public static String formatPartition(String partition) { if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, - "您所配置的分区不能为空白."); + MESSAGE_SOURCE.message("odpsutil.4")); } else { return partition.trim().replaceAll(" *= *", "=") .replaceAll(" */ *", ",").replaceAll(" *, *", ",") @@ -175,6 +186,35 @@ public static List formatPartitions(List partitions) { } } + /** + * 将用户配置的分区分类成两类: + * (1) 包含 HINT 的区间过滤; + * (2) 不包含 HINT 的普通模式 + * @param userConfiguredPartitions + * @return + */ + public static UserConfiguredPartitionClassification classifyUserConfiguredPartitions(List userConfiguredPartitions){ + UserConfiguredPartitionClassification userConfiguredPartitionClassification = new UserConfiguredPartitionClassification(); + + List userConfiguredHintPartition = new ArrayList(); + List userConfiguredNormalPartition = new ArrayList(); + boolean isIncludeHintPartition = false; + for (String userConfiguredPartition : userConfiguredPartitions){ + if (StringUtils.isNotBlank(userConfiguredPartition)){ + if (userConfiguredPartition.trim().toLowerCase().startsWith(Constant.PARTITION_FILTER_HINT)) { + userConfiguredHintPartition.add(userConfiguredPartition.trim()); + isIncludeHintPartition = true; + }else { + userConfiguredNormalPartition.add(userConfiguredPartition.trim()); + } + } + } + userConfiguredPartitionClassification.setIncludeHintPartition(isIncludeHintPartition); + userConfiguredPartitionClassification.setUserConfiguredHintPartition(userConfiguredHintPartition); + userConfiguredPartitionClassification.setUserConfiguredNormalPartition(userConfiguredNormalPartition); + return userConfiguredPartitionClassification; + } + public static List> parseColumns( List allNormalColumns, List allPartitionColumns, List userConfiguredColumns) { @@ -182,7 +222,7 @@ public static List> parseColumns( // warn: upper & lower case for (String column : userConfiguredColumns) { MutablePair pair = new MutablePair(); - + // if constant column if (OdpsUtil.checkIfConstantColumn(column)) { // remove first and last ' @@ -213,14 +253,14 @@ public static List> parseColumns( // not exist column throw DataXException.asDataXException( OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format("源头表的列配置错误. 您所配置的列 [%s] 不存在.", column)); + MESSAGE_SOURCE.message("odpsutil.5", column)); } return parsededColumns; } - + private static int indexOfIgnoreCase(List columnCollection, - String column) { + String column) { int index = -1; for (int i = 0; i < columnCollection.size(); i++) { if (columnCollection.get(i).equalsIgnoreCase(column)) { @@ -255,7 +295,7 @@ public TableTunnel.DownloadSession call() throws Exception { return tunnel.createDownloadSession( projectName, tableName); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.CREATE_DOWNLOADSESSION_FAIL, e); } @@ -276,7 +316,7 @@ public TableTunnel.DownloadSession call() throws Exception { return tunnel.getDownloadSession( projectName, tableName, sessionId); } - }, MAX_RETRY_TIME ,1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.GET_DOWNLOADSESSION_FAIL, e); } @@ -299,7 +339,7 @@ public TableTunnel.DownloadSession call() throws Exception { return tunnel.createDownloadSession( projectName, tableName, partitionSpec); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.CREATE_DOWNLOADSESSION_FAIL, e); } @@ -321,58 +361,152 @@ public TableTunnel.DownloadSession call() throws Exception { return tunnel.getDownloadSession( projectName, tableName, partitionSpec, sessionId); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.GET_DOWNLOADSESSION_FAIL, e); } } + /** + * odpsreader采用的直接读取所有列的downloadSession + */ + public static RecordReader getRecordReader(final TableTunnel.DownloadSession downloadSession, final long start, final long count, + final boolean isCompress) { + try { + return RetryUtil.executeWithRetry(new Callable() { + @Override + public RecordReader call() throws Exception { + return downloadSession.openRecordReader(start, count, isCompress); + } + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsReaderErrorCode.OPEN_RECORD_READER_FAILED, + MESSAGE_SOURCE.message("odpsutil.6"), e); + } + } + /** + * odpsreader采用的指定读取某些列的downloadSession + */ public static RecordReader getRecordReader(final TableTunnel.DownloadSession downloadSession, final long start, final long count, - final boolean isCompress) { + final boolean isCompress, final List columns) { try { return RetryUtil.executeWithRetry(new Callable() { @Override public RecordReader call() throws Exception { - return downloadSession.openRecordReader(start, count, isCompress); + return downloadSession.openRecordReader(start, count, isCompress, columns); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.OPEN_RECORD_READER_FAILED, - "open RecordReader失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.6"), e); } } + /** * table.reload() 方法抛出的 odps 异常 转化为更清晰的 datax 异常 抛出 */ public static void throwDataXExceptionWhenReloadTable(Exception e, String tableName) { - if(e.getMessage() != null) { - if(e.getMessage().contains(OdpsExceptionMsg.ODPS_PROJECT_NOT_FOUNT)) { + if (e.getMessage() != null) { + if (e.getMessage().contains(OdpsExceptionMsg.ODPS_PROJECT_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_PROJECT_NOT_FOUNT, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [project] 是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_TABLE_NOT_FOUNT)) { + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_TABLE_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_TABLE_NOT_FOUNT, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [table] 是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_ID_NOT_FOUND)) { + MESSAGE_SOURCE.message("odpsutil.8", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_ID_NOT_FOUND)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_ACCESS_KEY_ID_NOT_FOUND, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [accessId] [accessKey]是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_INVALID)) { + MESSAGE_SOURCE.message("odpsutil.9", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_INVALID)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_ACCESS_KEY_INVALID, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [accessKey] 是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_DENY)) { + MESSAGE_SOURCE.message("odpsutil.10", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_DENY)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_ACCESS_DENY, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [accessId] [accessKey] [project]是否匹配.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.11", tableName), e); } } throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 project,table,accessId,accessKey,odpsServer等值.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.12", tableName), e); + } + + public static List getNormalColumns(List> parsedColumns, + Map columnTypeMap) { + List userConfigNormalColumns = new ArrayList(); + Set columnNameSet = new HashSet(); + for (Pair columnInfo : parsedColumns) { + if (columnInfo.getValue() == ColumnType.NORMAL) { + String columnName = columnInfo.getKey(); + if (!columnNameSet.contains(columnName)) { + Column column = new Column(columnName, columnTypeMap.get(columnName)); + userConfigNormalColumns.add(column); + columnNameSet.add(columnName); + } + } + } + return userConfigNormalColumns; + } + + /** + * 执行odps preSql和postSql + * + * @param odps: odps client + * @param sql : 要执行的odps sql语句, 因为会有重试, 所以sql 必须为幂等的 + * @param tag : "preSql" or "postSql" + */ + public static void runSqlTaskWithRetry(final Odps odps, final String sql, final String tag){ + //重试次数 + int retryTimes = 10; + //重试间隔(ms) + long sleepTimeInMilliSecond = 1000L; + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Void call() throws Exception { + long beginTime = System.currentTimeMillis(); + + runSqlTask(odps, sql, tag); + + long endIime = System.currentTimeMillis(); + LOG.info(String.format("exectue odps sql: %s finished, cost time : %s ms", + sql, (endIime - beginTime))); + return null; + } + }, DataXCaseEnvUtil.getRetryTimes(retryTimes), DataXCaseEnvUtil.getRetryInterval(sleepTimeInMilliSecond), DataXCaseEnvUtil.getRetryExponential(true)); + } catch (Exception e) { + String errMessage = String.format("Retry %s times to exectue sql :[%s] failed! Exception: %s", + retryTimes, e.getMessage()); + throw DataXException.asDataXException(OdpsReaderErrorCode.RUN_SQL_ODPS_EXCEPTION, errMessage, e); + } + } + + public static void runSqlTask(Odps odps, String sql, String tag) { + if (StringUtils.isBlank(sql)) { + return; + } + + String taskName = String.format("datax_odpsreader_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); + + LOG.info("Try to start sqlTask:[{}] to run odps sql:[\n{}\n] .", taskName, sql); + + Instance instance; + Instance.TaskStatus status; + try { + Map hints = new HashMap(); + hints.put("odps.sql.submit.mode", "script"); + instance = SQLTask.run(odps, odps.getDefaultProject(), sql, taskName, hints, null); + instance.waitForSuccess(); + status = instance.getTaskStatus().get(taskName); + if (!Instance.TaskStatus.Status.SUCCESS.equals(status.getStatus())) { + throw DataXException.asDataXException(OdpsReaderErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.13", sql)); + } + } catch (DataXException e) { + throw e; + } catch (Exception e) { + throw DataXException.asDataXException(OdpsReaderErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.14", sql), e); + } } } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/SqliteUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/SqliteUtil.java new file mode 100644 index 0000000000..70c2226728 --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/SqliteUtil.java @@ -0,0 +1,103 @@ +package com.alibaba.datax.plugin.reader.odpsreader.util; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; + +import com.alibaba.datax.plugin.reader.odpsreader.Constant; +import com.aliyun.odps.Partition; +import com.aliyun.odps.Table; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SqliteUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(SqliteUtil.class); + + private Connection connection = null; + private Statement stmt = null; + + private String partitionName = "partitionName"; + + private String createSQLTemplate = "Create Table DataXODPSReaderPPR (" + partitionName +" String, %s)"; + private String insertSQLTemplate = "Insert Into DataXODPSReaderPPR Values (%s)"; + private String selectSQLTemplate = "Select * From DataXODPSReaderPPR Where %s"; + + public SqliteUtil() throws ClassNotFoundException, SQLException { + + Class.forName("org.sqlite.JDBC"); + this.connection = DriverManager.getConnection("jdbc:sqlite::memory:"); + this.stmt = this.connection.createStatement(); + } + + public void loadAllPartitionsIntoSqlite(Table table, List allOriginPartitions) throws SQLException { + List partitionColumnList = new ArrayList(); + String partition = allOriginPartitions.get(0); + String[] partitionSpecs = partition.split(","); + List partitionKeyList = new ArrayList(); + for (String partitionKeyValue : partitionSpecs) { + String partitionKey = partitionKeyValue.split("=")[0]; + partitionColumnList.add(String.format("%s String", partitionKey)); + partitionKeyList.add(partitionKey); + } + String createSQL = String.format(createSQLTemplate, StringUtils.join(partitionColumnList.toArray(), ",")); + LOGGER.info(createSQL); + this.stmt.execute(createSQL); + + insertAllOriginPartitionIntoSqlite(table, partitionKeyList); + } + + /** + * 根据用户配置的过滤条件, 从sqlite中select出符合的partition列表 + * @param userHintConfiguredPartitions + * @return + */ + public List selectUserConfiguredPartition(List userHintConfiguredPartitions) throws SQLException { + List selectedPartitionsFromSqlite = new ArrayList(); + for (String partitionWhereConditions : userHintConfiguredPartitions) { + String selectUserConfiguredPartitionsSql = String.format(selectSQLTemplate, + StringUtils.remove(partitionWhereConditions, Constant.PARTITION_FILTER_HINT)); + LOGGER.info(selectUserConfiguredPartitionsSql); + ResultSet rs = stmt.executeQuery(selectUserConfiguredPartitionsSql); + while (rs.next()) { + selectedPartitionsFromSqlite.add(getPartitionsValue(rs)); + } + } + return selectedPartitionsFromSqlite; + } + + private String getPartitionsValue (ResultSet rs) throws SQLException { + List partitions = new ArrayList(); + ResultSetMetaData rsMetaData = rs.getMetaData(); + Integer columnCounter = rs.getMetaData().getColumnCount(); + for (int columnIndex = 2; columnIndex <= columnCounter; columnIndex++) { + partitions.add(String.format("%s=%s", rsMetaData.getColumnName(columnIndex), rs.getString(columnIndex))); + } + return StringUtils.join(partitions, ","); + } + + /** + * 将odps table里所有partition值载入sqlite中 + * @param table + * @param partitionKeyList + * @throws SQLException + */ + private void insertAllOriginPartitionIntoSqlite(Table table, List partitionKeyList) throws SQLException { + List partitions = table.getPartitions(); + for (Partition partition : partitions){ + List partitionColumnValue = new ArrayList(); + partitionColumnValue.add("\""+partition.getPartitionSpec().toString()+"\""); + for (String partitionKey : partitionKeyList) { + partitionColumnValue.add("\""+partition.getPartitionSpec().get(partitionKey)+"\""); + } + String insertPartitionValueSql = String.format(insertSQLTemplate, StringUtils.join(partitionColumnValue, ",")); + this.stmt.execute(insertPartitionValueSql); + } + } +} diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/UserConfiguredPartitionClassification.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/UserConfiguredPartitionClassification.java new file mode 100644 index 0000000000..1a979969fc --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/UserConfiguredPartitionClassification.java @@ -0,0 +1,39 @@ +package com.alibaba.datax.plugin.reader.odpsreader.util; + +import java.util.List; + +public class UserConfiguredPartitionClassification { + + //包含/*query*/的partition, 例如: /*query*/ dt>=20170101 and dt<= 20170109 + private List userConfiguredHintPartition; + + //不包含/*query*/的partition, 例如: dt=20170101 或者 dt=201701* + private List userConfiguredNormalPartition; + + //是否包含hint的partition + private boolean isIncludeHintPartition; + + public List getUserConfiguredHintPartition() { + return userConfiguredHintPartition; + } + + public void setUserConfiguredHintPartition(List userConfiguredHintPartition) { + this.userConfiguredHintPartition = userConfiguredHintPartition; + } + + public List getUserConfiguredNormalPartition() { + return userConfiguredNormalPartition; + } + + public void setUserConfiguredNormalPartition(List userConfiguredNormalPartition) { + this.userConfiguredNormalPartition = userConfiguredNormalPartition; + } + + public boolean isIncludeHintPartition() { + return isIncludeHintPartition; + } + + public void setIncludeHintPartition(boolean includeHintPartition) { + isIncludeHintPartition = includeHintPartition; + } +} diff --git a/odpsreader/src/main/libs/bcprov-jdk15on-1.52.jar b/odpsreader/src/main/libs/bcprov-jdk15on-1.52.jar deleted file mode 100644 index 6c54dd901c..0000000000 Binary files a/odpsreader/src/main/libs/bcprov-jdk15on-1.52.jar and /dev/null differ diff --git a/odpswriter/pom.xml b/odpswriter/pom.xml index 8073ec43d3..c253e3fc7e 100755 --- a/odpswriter/pom.xml +++ b/odpswriter/pom.xml @@ -31,17 +31,10 @@ logback-classic - org.bouncycastle - bcprov-jdk15on - 1.52 - system - ${basedir}/src/main/libs/bcprov-jdk15on-1.52.jar + com.aliyun.odps + odps-sdk-core + 0.38.4-public - - com.aliyun.odps - odps-sdk-core - 0.20.7-public - @@ -51,6 +44,14 @@ + + + + org.mockito mockito-core @@ -70,9 +71,30 @@ test + + + org.aspectj + aspectjweaver + 1.8.10 + + + + commons-codec + commons-codec + 1.8 + + + + + src/main/java + + **/*.properties + + + diff --git a/odpswriter/src/main/assembly/package.xml b/odpswriter/src/main/assembly/package.xml index 7d3c91b51b..0ef0b43b18 100755 --- a/odpswriter/src/main/assembly/package.xml +++ b/odpswriter/src/main/assembly/package.xml @@ -23,13 +23,6 @@ plugin/writer/odpswriter - - src/main/libs - - *.* - - plugin/writer/odpswriter/libs - diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java index 22bcc16cb3..f4d9734b9c 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java @@ -12,4 +12,34 @@ public class Constant { public static final String COLUMN_POSITION = "columnPosition"; + /* + * 每个task独立维护一个proxy列表,一共会生成 task并发量 * 分区数量 的proxy,每个proxy会创建 blocksizeInMB(一般是64M) 大小的数组 + * 因此极易OOM, + * 假设默认情况下768M的内存,实际最多只能创建 12 个proxy,8G内存最多只能创建126个proxy,所以最多只允许创建一定数量的proxy,对应到分区数量 1:1 + * + * blockSizeInMB 减小可以减少内存消耗,但是意味着更高频率的网络请求,会对odps服务器造成较大压力 + * + * 另外,可以考虑proxy不用常驻内存,但是需要增加复杂的控制逻辑 + * 但是一般情况下用户作为分区值得数据是有规律的,比如按照时间,2020-08的数据已经同步完成了,并且后面没有这个分区的数据了,对应的proxy还放在内存中, + * 会造成很大的内存浪费。所以有必要对某些proxy进行回收。 + * + * 这里采用是否回收某个proxy的标准是:在最近时间内是否有过数据传输。 + * + * + * 需要注意的问题! + * 多个任务公用一个proxy,写入时需要抢锁,多并发的性能会受到很大影响,相当于单个分区时串行写入 + * 这个对性能影响很大,需要避免这种方式,还是尽量各个task有独立的proxy,只是需要去控制内存的使用,只能是控制每个task保有的proxy数量了 + * + * 还可以考虑修改proxy的数组大小,但是设置太小不确定会不会影响性能。可以测试一下 + */ + + public static final Long PROXY_MAX_IDLE_TIME_MS =60 * 1000L; // 60s没有动作就回收 + + public static final Long MAX_PARTITION_CNT = 200L; + + public static final int UTF8_ENCODED_CHAR_MAX_SIZE = 6; + + public static final int DEFAULT_FIELD_MAX_SIZE = 8 * 1024 * 1024; + + } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java new file mode 100644 index 0000000000..dedc9eccda --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java @@ -0,0 +1,57 @@ +package com.alibaba.datax.plugin.writer.odpswriter; + +public class DateTransForm { + /** + * 列名称 + */ + private String colName; + + /** + * 之前是什么格式 + */ + private String fromFormat; + + /** + * 要转换成什么格式 + */ + private String toFormat; + + public DateTransForm(String colName, String fromFormat, String toFormat) { + this.colName = colName; + this.fromFormat = fromFormat; + this.toFormat = toFormat; + } + + public String getColName() { + return colName; + } + + public void setColName(String colName) { + this.colName = colName; + } + + public String getFromFormat() { + return fromFormat; + } + + public void setFromFormat(String fromFormat) { + this.fromFormat = fromFormat; + } + + public String getToFormat() { + return toFormat; + } + + public void setToFormat(String toFormat) { + this.toFormat = toFormat; + } + + @Override + public String toString() { + return "DateTransForm{" + + "colName='" + colName + '\'' + + ", fromFormat='" + fromFormat + '\'' + + ", toFormat='" + toFormat + '\'' + + '}'; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java index f578d72d9a..7ee11128ff 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java @@ -11,6 +11,8 @@ public final class Key { public final static String ACCESS_KEY = "accessKey"; + public final static String SECURITY_TOKEN = "securityToken"; + public final static String PROJECT = "project"; public final static String TABLE = "table"; @@ -31,4 +33,58 @@ public final class Key { public final static String ACCOUNT_TYPE = "accountType"; public final static String IS_COMPRESS = "isCompress"; + + // preSql + public final static String PRE_SQL="preSql"; + + // postSql + public final static String POST_SQL="postSql"; + + public final static String CONSISTENCY_COMMIT = "consistencyCommit"; + + public final static String UPLOAD_ID = "uploadId"; + + public final static String TASK_COUNT = "taskCount"; + + /** + * support dynamic partition,支持动态分区,即根据读取到的record的某一列或几列来确定该record应该存入哪个分区 + * 1. 如何确定根据哪些列:根据目的表哪几列是分区列,再根据对应的column来路由 + * 2. 何时创建upload session:由于是动态分区,因此无法在初始化时确定分区,也就无法在初始化时创建 upload session,只有再读取到具体record之后才能创建 + * 3. 缓存 upload sesseion:每当出现新的分区,则创建新的session,同时将该分区对应的session缓存下来,以备下次又有需要存入该分区的记录 + * 4. 参数检查:不必要检查分区是否配置 + */ + public final static String SUPPORT_DYNAMIC_PARTITION = "supportDynamicPartition"; + + /** + * 动态分区下,用户如果将源表的某一个时间列映射到分区列,存在如下需求场景:源表的该时间列精确到秒,当时同步到odps表时,只想保留到天,并存入对应的天分区 + * 格式: + * "partitionColumnMapping":[ + * { + * "name":"pt", // 必填 + * "srcDateFormat":"YYYY-MM-dd hh:mm:ss", // 可选,可能源表中的时间列是 String 类型,此时必须通过 fromDateFormat 来指定源表中该列的日期格式 + * "dateFormat":"YYYY-MM-dd" // 必填 + * }, + * { + * ... + * }, + * + * ... + * ] + */ + public final static String PARTITION_COL_MAPPING = "partitionColumnMapping"; + public final static String PARTITION_COL_MAPPING_NAME = "name"; + public final static String PARTITION_COL_MAPPING_SRC_COL_DATEFORMAT = "srcDateFormat"; + public final static String PARTITION_COL_MAPPING_DATEFORMAT = "dateFormat"; + public final static String WRITE_TIMEOUT_IN_MS = "writeTimeoutInMs"; + + public static final String OVER_LENGTH_RULE = "overLengthRule"; + //截断后保留的最大长度 + public static final String MAX_FIELD_LENGTH = "maxFieldLength"; + //odps本身支持的最大长度 + public static final String MAX_ODPS_FIELD_LENGTH = "maxOdpsFieldLength"; + public static final String ENABLE_OVER_LENGTH_OUTPUT = "enableOverLengthOutput"; + public static final String MAX_OVER_LENGTH_OUTPUT_COUNT = "maxOverLengthOutputCount"; + + //动态分区写入模式下,内存使用率达到80%则flush时间间隔,单位分钟 + public static final String DYNAMIC_PARTITION_MEM_USAGE_FLUSH_INTERVAL_IN_MINUTE = "dynamicPartitionMemUsageFlushIntervalInMinute"; } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties new file mode 100644 index 0000000000..be7862af38 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties @@ -0,0 +1,34 @@ +errorcode.required_value=\u60a8\u7f3a\u5931\u4e86\u5fc5\u987b\u586b\u5199\u7684\u53c2\u6570\u503c. +errorcode.illegal_value=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5. +errorcode.unsupported_column_type=DataX \u4e0d\u652f\u6301\u5199\u5165 ODPS \u7684\u76ee\u7684\u8868\u7684\u6b64\u79cd\u6570\u636e\u7c7b\u578b. +errorcode.table_truncate_error=\u6e05\u7a7a ODPS \u76ee\u7684\u8868\u65f6\u51fa\u9519. +errorcode.create_master_upload_fail=\u521b\u5efa ODPS \u7684 uploadSession \u5931\u8d25. +errorcode.get_slave_upload_fail=\u83b7\u53d6 ODPS \u7684 uploadSession \u5931\u8d25. +errorcode.get_id_key_fail=\u83b7\u53d6 accessId/accessKey \u5931\u8d25. +errorcode.get_partition_fail=\u83b7\u53d6 ODPS \u76ee\u7684\u8868\u7684\u6240\u6709\u5206\u533a\u5931\u8d25. +errorcode.add_partition_failed=\u6dfb\u52a0\u5206\u533a\u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.writer_record_fail=\u5199\u5165\u6570\u636e\u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.commit_block_fail=\u63d0\u4ea4 block \u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.run_sql_failed=\u6267\u884c ODPS Sql \u5931\u8d25. +errorcode.check_if_partitioned_table_failed=\u68c0\u67e5 ODPS \u76ee\u7684\u8868:%s \u662f\u5426\u4e3a\u5206\u533a\u8868\u5931\u8d25. +errorcode.run_sql_odps_exception=\u6267\u884c ODPS Sql \u65f6\u629b\u51fa\u5f02\u5e38, \u53ef\u91cd\u8bd5 +errorcode.account_type_error=\u8d26\u53f7\u7c7b\u578b\u9519\u8bef. +errorcode.partition_error=\u5206\u533a\u914d\u7f6e\u9519\u8bef. +errorcode.column_not_exist=\u7528\u6237\u914d\u7f6e\u7684\u5217\u4e0d\u5b58\u5728. +errorcode.odps_project_not_fount=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps project \u4e0d\u5b58\u5728. +errorcode.odps_table_not_fount=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps table \u4e0d\u5b58\u5728 +errorcode.odps_access_key_id_not_found=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps accessId,accessKey \u4e0d\u5b58\u5728 +errorcode.odps_access_key_invalid=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps accessKey \u9519\u8bef +errorcode.odps_access_deny=\u62d2\u7edd\u8bbf\u95ee, \u60a8\u4e0d\u5728 \u60a8\u914d\u7f6e\u7684 project \u4e2d + + +odpswriter.1=\u8d26\u53f7\u7c7b\u578b\u9519\u8bef\uff0c\u56e0\u4e3a\u4f60\u7684\u8d26\u53f7 [{0}] \u4e0d\u662fdatax\u76ee\u524d\u652f\u6301\u7684\u8d26\u53f7\u7c7b\u578b\uff0c\u76ee\u524d\u4ec5\u652f\u6301aliyun, taobao\u8d26\u53f7\uff0c\u8bf7\u4fee\u6539\u60a8\u7684\u8d26\u53f7\u4fe1\u606f. +odpswriter.2=\u8fd9\u662f\u4e00\u6761\u9700\u8981\u6ce8\u610f\u7684\u4fe1\u606f \u7531\u4e8e\u60a8\u7684\u4f5c\u4e1a\u914d\u7f6e\u4e86\u5199\u5165 ODPS \u7684\u76ee\u7684\u8868\u65f6emptyAsNull=true, \u6240\u4ee5 DataX\u5c06\u4f1a\u628a\u957f\u5ea6\u4e3a0\u7684\u7a7a\u5b57\u7b26\u4e32\u4f5c\u4e3a java \u7684 null \u5199\u5165 ODPS. +odpswriter.3=\u60a8\u914d\u7f6e\u7684blockSizeInMB:{0} \u53c2\u6570\u9519\u8bef. \u6b63\u786e\u7684\u914d\u7f6e\u662f[1-512]\u4e4b\u95f4\u7684\u6574\u6570. \u8bf7\u4fee\u6539\u6b64\u53c2\u6570\u7684\u503c\u4e3a\u8be5\u533a\u95f4\u5185\u7684\u6570\u503c +odpswriter.4=\u5199\u5165 ODPS \u76ee\u7684\u8868\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. + + +odpswriterproxy.1=\u4eb2\uff0c\u914d\u7f6e\u4e2d\u7684\u6e90\u8868\u7684\u5217\u4e2a\u6570\u548c\u76ee\u7684\u7aef\u8868\u4e0d\u4e00\u81f4\uff0c\u6e90\u8868\u4e2d\u60a8\u914d\u7f6e\u7684\u5217\u6570\u662f:{0} \u5927\u4e8e\u76ee\u7684\u7aef\u7684\u5217\u6570\u662f:{1} , \u8fd9\u6837\u4f1a\u5bfc\u81f4\u6e90\u5934\u6570\u636e\u65e0\u6cd5\u6b63\u786e\u5bfc\u5165\u76ee\u7684\u7aef, \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4fee\u6539. +odpswriterproxy.2=\u6e90\u8868\u7684\u5217\u4e2a\u6570\u5c0f\u4e8e\u76ee\u7684\u8868\u7684\u5217\u4e2a\u6570\uff0c\u6e90\u8868\u5217\u6570\u662f:{0} \u76ee\u7684\u8868\u5217\u6570\u662f:{1} , \u6570\u76ee\u4e0d\u5339\u914d. DataX \u4f1a\u628a\u76ee\u7684\u7aef\u591a\u51fa\u7684\u5217\u7684\u503c\u8bbe\u7f6e\u4e3a\u7a7a\u503c. \u5982\u679c\u8fd9\u4e2a\u9ed8\u8ba4\u914d\u7f6e\u4e0d\u7b26\u5408\u60a8\u7684\u671f\u671b\uff0c\u8bf7\u4fdd\u6301\u6e90\u8868\u548c\u76ee\u7684\u8868\u914d\u7f6e\u7684\u5217\u6570\u76ee\u4fdd\u6301\u4e00\u81f4. +odpswriterproxy.3=Odps decimal \u7c7b\u578b\u7684\u6574\u6570\u4f4d\u4e2a\u6570\u4e0d\u80fd\u8d85\u8fc735 +odpswriterproxy.4=\u5199\u5165 ODPS \u76ee\u7684\u8868\u65f6\u9047\u5230\u4e86\u810f\u6570\u636e: \u7b2c[{0}]\u4e2a\u5b57\u6bb5 {1} \u7684\u6570\u636e\u51fa\u73b0\u9519\u8bef\uff0c\u8bf7\u68c0\u67e5\u8be5\u6570\u636e\u5e76\u4f5c\u51fa\u4fee\u6539 \u6216\u8005\u60a8\u53ef\u4ee5\u589e\u5927\u9600\u503c\uff0c\u5ffd\u7565\u8fd9\u6761\u8bb0\u5f55. \ No newline at end of file diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java index 60deb5dd30..c82fcef4f3 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java @@ -8,29 +8,49 @@ import com.alibaba.datax.common.statistics.PerfRecord; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.common.util.ListUtil; -import com.alibaba.datax.plugin.writer.odpswriter.util.IdAndKeyUtil; -import com.alibaba.datax.plugin.writer.odpswriter.util.OdpsUtil; - +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.writer.odpswriter.model.PartitionInfo; +import com.alibaba.datax.plugin.writer.odpswriter.model.UserDefinedFunction; +import com.alibaba.datax.plugin.writer.odpswriter.util.*; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; import com.aliyun.odps.Odps; import com.aliyun.odps.Table; import com.aliyun.odps.TableSchema; import com.aliyun.odps.tunnel.TableTunnel; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryUsage; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import static com.alibaba.datax.plugin.writer.odpswriter.util.CustomPartitionUtils.getListWithJson; /** * 已修改为:每个 task 各自创建自己的 upload,拥有自己的 uploadId,并在 task 中完成对对应 block 的提交。 */ public class OdpsWriter extends Writer { + public static HashSet partitionsDealedTruncate = new HashSet<>(); + static final Object lockForPartitionDealedTruncate = new Object(); + public static AtomicInteger partitionCnt = new AtomicInteger(0); + public static Long maxPartitionCnt; + public static AtomicLong globalTotalTruncatedRecordNumber = new AtomicLong(0); + public static Long maxOutputOverLengthRecord; + public static int maxOdpsFieldLength = Constant.DEFAULT_FIELD_MAX_SIZE; + public static class Job extends Writer.Job { private static final Logger LOG = LoggerFactory .getLogger(Job.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriter.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); @@ -47,6 +67,8 @@ public static class Job extends Writer.Job { private String uploadId; private TableTunnel.UploadSession masterUpload; private int blockSizeInMB; + private boolean consistencyCommit; + private boolean supportDynamicPartition; public void preCheck() { this.init(); @@ -54,74 +76,76 @@ public void preCheck() { } public void doPreCheck() { - //检查accessId,accessKey配置 - if (Constant.DEFAULT_ACCOUNT_TYPE - .equalsIgnoreCase(this.accountType)) { - this.originalConfig = IdAndKeyUtil.parseAccessIdAndKey(this.originalConfig); - String accessId = this.originalConfig.getString(Key.ACCESS_ID); - String accessKey = this.originalConfig.getString(Key.ACCESS_KEY); - if (IS_DEBUG) { - LOG.debug("accessId:[{}], accessKey:[{}] .", accessId, - accessKey); - } - LOG.info("accessId:[{}] .", accessId); - } - // init odps config - this.odps = OdpsUtil.initOdpsProject(this.originalConfig); - - //检查表等配置是否正确 - this.table = OdpsUtil.getTable(odps,this.projectName,this.tableName); //检查列信息是否正确 List allColumns = OdpsUtil.getAllColumns(this.table.getSchema()); LOG.info("allColumnList: {} .", StringUtils.join(allColumns, ',')); - dealColumn(this.originalConfig, allColumns); + List allPartColumns = OdpsUtil.getAllPartColumns(this.table.getSchema()); + LOG.info("allPartColumnsList: {} .", StringUtils.join(allPartColumns, ',')); + dealColumn(this.originalConfig, allColumns, allPartColumns); //检查分区信息是否正确 - OdpsUtil.preCheckPartition(this.odps, this.table, this.partition, this.truncate); + if (!supportDynamicPartition) { + OdpsUtil.preCheckPartition(this.odps, this.table, this.partition, this.truncate); + } } @Override public void init() { this.originalConfig = super.getPluginJobConf(); + OdpsUtil.checkNecessaryConfig(this.originalConfig); OdpsUtil.dealMaxRetryTime(this.originalConfig); + + this.projectName = this.originalConfig.getString(Key.PROJECT); this.tableName = this.originalConfig.getString(Key.TABLE); this.tunnelServer = this.originalConfig.getString(Key.TUNNEL_SERVER, null); + this.dealAK(); + + // init odps config + this.odps = OdpsUtil.initOdpsProject(this.originalConfig); + + //检查表等配置是否正确 + this.table = OdpsUtil.getTable(odps, this.projectName, this.tableName); + + // 处理动态分区参数,以及动态分区相关配置是否合法,如果没有配置动态分区,则根据列映射配置决定是否启用 + this.dealDynamicPartition(); + //check isCompress this.originalConfig.getBool(Key.IS_COMPRESS, false); - this.partition = OdpsUtil.formatPartition(this.originalConfig - .getString(Key.PARTITION, "")); - this.originalConfig.set(Key.PARTITION, this.partition); - - this.accountType = this.originalConfig.getString(Key.ACCOUNT_TYPE, - Constant.DEFAULT_ACCOUNT_TYPE); - if (!Constant.DEFAULT_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType) && - !Constant.TAOBAO_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, - String.format("账号类型错误,因为你的账号 [%s] 不是datax目前支持的账号类型,目前仅支持aliyun, taobao账号,请修改您的账号信息.", accountType)); + // 如果不是动态分区写入,则检查分区配置,动态分区写入不用检查 + if (!this.supportDynamicPartition) { + this.partition = OdpsUtil.formatPartition(this.originalConfig + .getString(Key.PARTITION, ""), true); + this.originalConfig.set(Key.PARTITION, this.partition); } - this.originalConfig.set(Key.ACCOUNT_TYPE, this.accountType); this.truncate = this.originalConfig.getBool(Key.TRUNCATE); + this.consistencyCommit = this.originalConfig.getBool(Key.CONSISTENCY_COMMIT, false); + boolean emptyAsNull = this.originalConfig.getBool(Key.EMPTY_AS_NULL, false); this.originalConfig.set(Key.EMPTY_AS_NULL, emptyAsNull); if (emptyAsNull) { - LOG.warn("这是一条需要注意的信息 由于您的作业配置了写入 ODPS 的目的表时emptyAsNull=true, 所以 DataX将会把长度为0的空字符串作为 java 的 null 写入 ODPS."); + LOG.warn(MESSAGE_SOURCE.message("odpswriter.2")); } this.blockSizeInMB = this.originalConfig.getInt(Key.BLOCK_SIZE_IN_MB, 64); - if(this.blockSizeInMB < 8) { + if (this.blockSizeInMB < 8) { this.blockSizeInMB = 8; } this.originalConfig.set(Key.BLOCK_SIZE_IN_MB, this.blockSizeInMB); LOG.info("blockSizeInMB={}.", this.blockSizeInMB); + maxPartitionCnt = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() / 1024 / 1024 / this.blockSizeInMB; + if (maxPartitionCnt < Constant.MAX_PARTITION_CNT) { + maxPartitionCnt = Constant.MAX_PARTITION_CNT; + } + LOG.info("maxPartitionCnt={}", maxPartitionCnt); if (IS_DEBUG) { LOG.debug("After master init(), job config now is: [\n{}\n] .", @@ -129,6 +153,92 @@ public void init() { } } + private void dealAK() { + this.accountType = this.originalConfig.getString(Key.ACCOUNT_TYPE, + Constant.DEFAULT_ACCOUNT_TYPE); + + if (!Constant.DEFAULT_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType) && + !Constant.TAOBAO_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType)) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, + MESSAGE_SOURCE.message("odpswriter.1", accountType)); + } + this.originalConfig.set(Key.ACCOUNT_TYPE, this.accountType); + + //检查accessId,accessKey配置 + if (Constant.DEFAULT_ACCOUNT_TYPE + .equalsIgnoreCase(this.accountType)) { + this.originalConfig = IdAndKeyUtil.parseAccessIdAndKey(this.originalConfig); + String accessId = this.originalConfig.getString(Key.ACCESS_ID); + String accessKey = this.originalConfig.getString(Key.ACCESS_KEY); + if (IS_DEBUG) { + LOG.debug("accessId:[{}], accessKey:[{}] .", accessId, + accessKey); + } + LOG.info("accessId:[{}] .", accessId); + } + } + + private void dealDynamicPartition() { + /* + * 如果显示配置了 supportDynamicPartition,则以配置为准 + * 如果没有配置,表为分区表且 列映射中包所有含分区列 + */ + List partitionCols = OdpsUtil.getAllPartColumns(this.table.getSchema()); + List configCols = this.originalConfig.getList(Key.COLUMN, String.class); + LOG.info("partition columns:{}", partitionCols); + LOG.info("config columns:{}", configCols); + LOG.info("support dynamic partition:{}",this.originalConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION)); + LOG.info("partition format type:{}",this.originalConfig.getString("partitionFormatType")); + if (this.originalConfig.getKeys().contains(Key.SUPPORT_DYNAMIC_PARTITION)) { + this.supportDynamicPartition = this.originalConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION); + if (supportDynamicPartition) { + // 自定义分区 + if("custom".equalsIgnoreCase(originalConfig.getString("partitionFormatType"))){ + List partitions = getListWithJson(originalConfig,"customPartitionColumns",PartitionInfo.class); + // 自定义分区配置必须与实际分区列完全一致 + if (!ListUtil.checkIfAllSameValue(partitions.stream().map(item->item.getName()).collect(Collectors.toList()), partitionCols)) { + throw DataXException.asDataXException("custom partition config is not same as real partition info."); + } + } else { + // 设置动态分区写入为真--检查是否所有分区列都配置在了列映射中,不满足则抛出异常 + if (!ListUtil.checkIfBInA(configCols, partitionCols, false)) { + throw DataXException.asDataXException("You config supportDynamicPartition as true, but didn't config all partition columns"); + } + } + } else { + // 设置动态分区写入为假--确保列映射中没有配置分区列,配置则抛出异常 + if (ListUtil.checkIfHasSameValue(configCols, partitionCols)) { + throw DataXException.asDataXException("You should config all partition columns in column param, or you can specify a static partition param"); + } + } + } else { + if (OdpsUtil.isPartitionedTable(table)) { + // 分区表,列映射配置了分区,同时检查所有分区列要么都被配置,要么都没有配置 + if (ListUtil.checkIfBInA(configCols, partitionCols, false)) { + // 所有的partition 列都配置在了column中 + this.supportDynamicPartition = true; + } else { + // 并非所有partition列都配置在了column中,此时还需检查是否只配置了部分,如果只配置了部分,则报错 + if (ListUtil.checkIfHasSameValue(configCols, partitionCols)) { + throw DataXException.asDataXException("You should config all partition columns in column param, or you can specify a static partition param"); + } + // 分区列没有配置任何分区列,则设置为false + this.supportDynamicPartition = false; + } + } else { + LOG.info("{} is not a partition tale, set supportDynamicParition as false", this.tableName); + this.supportDynamicPartition = false; + } + } + + // 分布式下不支持动态分区写入,如果是分布式模式则报错 + LOG.info("current run mode: {}", System.getProperty("datax.executeMode")); + if (supportDynamicPartition && StringUtils.equalsIgnoreCase("distribute", System.getProperty("datax.executeMode"))) { + LOG.error("Distribute mode don't support dynamic partition writing"); + System.exit(1); + } + } + @Override public void prepare() { String accessId = null; @@ -148,10 +258,29 @@ public void prepare() { // init odps config this.odps = OdpsUtil.initOdpsProject(this.originalConfig); + List preSqls = this.originalConfig.getList(Key.PRE_SQL, String.class); + if (preSqls != null && !preSqls.isEmpty()) { + LOG.info(String.format("Beigin to exectue preSql : %s. \n Attention: these preSqls must be idempotent!!!", + JSON.toJSONString(preSqls))); + long beginTime = System.currentTimeMillis(); + for (String preSql : preSqls) { + preSql = preSql.trim(); + if (!preSql.endsWith(";")) { + preSql = String.format("%s;", preSql); + } + OdpsUtil.runSqlTaskWithRetry(this.odps, preSql, "preSql"); + } + long endTime = System.currentTimeMillis(); + LOG.info(String.format("Exectue odpswriter preSql successfully! cost time: %s ms.", (endTime - beginTime))); + } + //检查表等配置是否正确 - this.table = OdpsUtil.getTable(odps,this.projectName,this.tableName); + this.table = OdpsUtil.getTable(odps, this.projectName, this.tableName); - OdpsUtil.dealTruncate(this.odps, this.table, this.partition, this.truncate); + // 如果是动态分区写入,因为无需配置分区信息,因此也无法在任务初始化时进行 truncate + if (!supportDynamicPartition) { + OdpsUtil.dealTruncate(this.odps, this.table, this.partition, this.truncate); + } } /** @@ -169,20 +298,34 @@ public List split(int mandatoryNumber) { tableTunnel.setEndpoint(tunnelServer); } - this.masterUpload = OdpsUtil.createMasterTunnelUpload( - tableTunnel, this.projectName, this.tableName, this.partition); - this.uploadId = this.masterUpload.getId(); - LOG.info("Master uploadId:[{}].", this.uploadId); - - TableSchema schema = this.masterUpload.getSchema(); + TableSchema schema = this.table.getSchema(); List allColumns = OdpsUtil.getAllColumns(schema); LOG.info("allColumnList: {} .", StringUtils.join(allColumns, ',')); - - dealColumn(this.originalConfig, allColumns); + List allPartColumns = OdpsUtil.getAllPartColumns(this.table.getSchema()); + LOG.info("allPartColumnsList: {} .", StringUtils.join(allPartColumns, ',')); + dealColumn(this.originalConfig, allColumns, allPartColumns); + this.originalConfig.set("allColumns", allColumns); + + // 动态分区模式下,无法事先根据分区创建好 session, + if (!supportDynamicPartition) { + this.masterUpload = OdpsUtil.createMasterTunnelUpload( + tableTunnel, this.projectName, this.tableName, this.partition); + this.uploadId = this.masterUpload.getId(); + LOG.info("Master uploadId:[{}].", this.uploadId); + } for (int i = 0; i < mandatoryNumber; i++) { Configuration tempConfig = this.originalConfig.clone(); + // 非动态分区模式下,设置了统一提交,则需要克隆主 upload session,否则各个 task "各自为战" + if (!supportDynamicPartition && this.consistencyCommit) { + tempConfig.set(Key.UPLOAD_ID, uploadId); + tempConfig.set(Key.TASK_COUNT, mandatoryNumber); + } + + // 设置task的supportDynamicPartition属性 + tempConfig.set(Key.SUPPORT_DYNAMIC_PARTITION, this.supportDynamicPartition); + configurations.add(tempConfig); } @@ -190,14 +333,18 @@ public List split(int mandatoryNumber) { LOG.debug("After master split, the job config now is:[\n{}\n].", this.originalConfig); } - this.masterUpload = null; - return configurations; } - private void dealColumn(Configuration originalConfig, List allColumns) { + private void dealColumn(Configuration originalConfig, List allColumns, List allPartColumns) { //之前已经检查了userConfiguredColumns 一定不为空 List userConfiguredColumns = originalConfig.getList(Key.COLUMN, String.class); + + // 动态分区下column不支持配置* + if (supportDynamicPartition && userConfiguredColumns.contains("*")) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + "In dynamic partition write mode you can't specify column with *."); + } if (1 == userConfiguredColumns.size() && "*".equals(userConfiguredColumns.get(0))) { userConfiguredColumns = allColumns; originalConfig.set(Key.COLUMN, allColumns); @@ -206,15 +353,51 @@ private void dealColumn(Configuration originalConfig, List allColumns) { ListUtil.makeSureNoValueDuplicate(userConfiguredColumns, false); //检查列是否存在,大小写不敏感 - ListUtil.makeSureBInA(allColumns, userConfiguredColumns, false); + if (supportDynamicPartition) { + List allColumnList = new ArrayList(); + allColumnList.addAll(allColumns); + allColumnList.addAll(allPartColumns); + ListUtil.makeSureBInA(allColumnList, userConfiguredColumns, false); + } else { + ListUtil.makeSureBInA(allColumns, userConfiguredColumns, false); + } } - List columnPositions = OdpsUtil.parsePosition(allColumns, userConfiguredColumns); + // 获取配置的所有数据列在目标表中所有数据列中的真正位置, -1 代表该列为分区列 + List columnPositions = OdpsUtil.parsePosition(allColumns, allPartColumns, userConfiguredColumns); originalConfig.set(Constant.COLUMN_POSITION, columnPositions); } @Override public void post() { + + if (supportDynamicPartition) { + LOG.info("Total create partition cnt:{}", partitionCnt); + } + + if (!supportDynamicPartition && this.consistencyCommit) { + LOG.info("Master which uploadId=[{}] begin to commit blocks.", this.uploadId); + OdpsUtil.masterComplete(this.masterUpload); + LOG.info("Master which uploadId=[{}] commit blocks ok.", this.uploadId); + } + + List postSqls = this.originalConfig.getList(Key.POST_SQL, String.class); + if (postSqls != null && !postSqls.isEmpty()) { + LOG.info(String.format("Beigin to exectue postSql : %s. \n Attention: these postSqls must be idempotent!!!", + JSON.toJSONString(postSqls))); + long beginTime = System.currentTimeMillis(); + for (String postSql : postSqls) { + postSql = postSql.trim(); + if (!postSql.endsWith(";")) { + postSql = String.format("%s;", postSql); + } + OdpsUtil.runSqlTaskWithRetry(this.odps, postSql, "postSql"); + } + long endTime = System.currentTimeMillis(); + LOG.info(String.format("Exectue odpswriter postSql successfully! cost time: %s ms.", (endTime - beginTime))); + } + + LOG.info("truncated record count: {}", globalTotalTruncatedRecordNumber.intValue() ); } @Override @@ -226,6 +409,7 @@ public void destroy() { public static class Task extends Writer.Task { private static final Logger LOG = LoggerFactory .getLogger(Task.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriter.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); @@ -246,18 +430,54 @@ public static class Task extends Writer.Task { private List blocks; private int blockSizeInMB; + private boolean consistencyCommit; + + private int taskId; + private int taskCount; + private Integer failoverState = 0; //0 未failover 1准备failover 2已提交,不能failover private byte[] lock = new byte[0]; + private List allColumns; + + /* + * Partition 和 session 的对应关系,处理 record 时,路由到哪个分区,则通过对应的 proxy 上传 + * Key 为 所有分区列的值按配置顺序拼接 + */ + private HashMap>> partitionUploadSessionHashMap; + private Boolean supportDynamicPartition; + private TableTunnel tableTunnel; + private Table table; + + /** + * 保存分区列格式转换规则,只支持源表是 Date 列,或者内容为日期的 String 列 + */ + private HashMap dateTransFormMap; + + private Long writeTimeOutInMs; + + private String overLengthRule; + private int maxFieldLength; + private Boolean enableOverLengthOutput; + + /** + * 动态分区写入模式下,内存使用率达到80%则flush时间间隔,单位分钟 + * 默认5分钟做flush, 避免出现频繁的flush导致小文件问题 + */ + private int dynamicPartitionMemUsageFlushIntervalInMinute = 1; + + private long latestFlushTime = 0; @Override public void init() { this.sliceConfig = super.getPluginJobConf(); + // 默认十分钟超时时间 + this.writeTimeOutInMs = this.sliceConfig.getLong(Key.WRITE_TIMEOUT_IN_MS, 10 * 60 * 1000); this.projectName = this.sliceConfig.getString(Key.PROJECT); this.tableName = this.sliceConfig.getString(Key.TABLE); this.tunnelServer = this.sliceConfig.getString(Key.TUNNEL_SERVER, null); this.partition = OdpsUtil.formatPartition(this.sliceConfig - .getString(Key.PARTITION, "")); + .getString(Key.PARTITION, ""), true); this.sliceConfig.set(Key.PARTITION, this.partition); this.emptyAsNull = this.sliceConfig.getBool(Key.EMPTY_AS_NULL); @@ -265,9 +485,49 @@ public void init() { this.isCompress = this.sliceConfig.getBool(Key.IS_COMPRESS, false); if (this.blockSizeInMB < 1 || this.blockSizeInMB > 512) { throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("您配置的blockSizeInMB:%s 参数错误. 正确的配置是[1-512]之间的整数. 请修改此参数的值为该区间内的数值", this.blockSizeInMB)); + MESSAGE_SOURCE.message("odpswriter.3", this.blockSizeInMB)); } + this.taskId = this.getTaskId(); + this.taskCount = this.sliceConfig.getInt(Key.TASK_COUNT, 0); + + this.supportDynamicPartition = this.sliceConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION, false); + + if (!supportDynamicPartition) { + this.consistencyCommit = this.sliceConfig.getBool(Key.CONSISTENCY_COMMIT, false); + if (consistencyCommit) { + this.uploadId = this.sliceConfig.getString(Key.UPLOAD_ID); + if (this.uploadId == null || this.uploadId.isEmpty()) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpswriter.3", this.uploadId)); + } + } + } else { + this.partitionUploadSessionHashMap = new HashMap<>(); + + // 根据 partColFormats 参数初始化 dateTransFormMap + String dateTransListStr = this.sliceConfig.getString(Key.PARTITION_COL_MAPPING); + if (StringUtils.isNotBlank(dateTransListStr)) { + this.dateTransFormMap = new HashMap<>(); + JSONArray dateTransFormJsonArray = JSONArray.parseArray(dateTransListStr); + for (Object dateTransFormJson : dateTransFormJsonArray) { + DateTransForm dateTransForm = new DateTransForm( + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_NAME), + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_SRC_COL_DATEFORMAT), + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_DATEFORMAT)); + this.dateTransFormMap.put(((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_NAME), dateTransForm); + } + } + } + this.allColumns = this.sliceConfig.getList("allColumns", String.class); + this.overLengthRule = this.sliceConfig.getString(Key.OVER_LENGTH_RULE, "keepOn").toUpperCase(); + this.maxFieldLength = this.sliceConfig.getInt(Key.MAX_FIELD_LENGTH, Constant.DEFAULT_FIELD_MAX_SIZE); + this.enableOverLengthOutput = this.sliceConfig.getBool(Key.ENABLE_OVER_LENGTH_OUTPUT, true); + maxOutputOverLengthRecord = this.sliceConfig.getLong(Key.MAX_OVER_LENGTH_OUTPUT_COUNT); + maxOdpsFieldLength = this.sliceConfig.getInt(Key.MAX_ODPS_FIELD_LENGTH, Constant.DEFAULT_FIELD_MAX_SIZE); + + this.dynamicPartitionMemUsageFlushIntervalInMinute = this.sliceConfig.getInt(Key.DYNAMIC_PARTITION_MEM_USAGE_FLUSH_INTERVAL_IN_MINUTE, + 1); if (IS_DEBUG) { LOG.debug("After init in task, sliceConfig now is:[\n{}\n].", this.sliceConfig); } @@ -277,24 +537,32 @@ public void init() { @Override public void prepare() { this.odps = OdpsUtil.initOdpsProject(this.sliceConfig); + this.tableTunnel = new TableTunnel(this.odps); - TableTunnel tableTunnel = new TableTunnel(this.odps); - if (StringUtils.isNoneBlank(tunnelServer)) { - tableTunnel.setEndpoint(tunnelServer); + if (! supportDynamicPartition ) { + if (StringUtils.isNoneBlank(tunnelServer)) { + tableTunnel.setEndpoint(tunnelServer); + } + if (this.consistencyCommit) { + this.managerUpload = OdpsUtil.getSlaveTunnelUpload(this.tableTunnel, this.projectName, this.tableName, + this.partition, this.uploadId); + } else { + this.managerUpload = OdpsUtil.createMasterTunnelUpload(this.tableTunnel, this.projectName, + this.tableName, this.partition); + this.uploadId = this.managerUpload.getId(); + } + LOG.info("task uploadId:[{}].", this.uploadId); + this.workerUpload = OdpsUtil.getSlaveTunnelUpload(this.tableTunnel, this.projectName, + this.tableName, this.partition, uploadId); + } else { + this.table = OdpsUtil.getTable(this.odps, this.projectName, this.tableName); } - - this.managerUpload = OdpsUtil.createMasterTunnelUpload(tableTunnel, this.projectName, - this.tableName, this.partition); - this.uploadId = this.managerUpload.getId(); - LOG.info("task uploadId:[{}].", this.uploadId); - - this.workerUpload = OdpsUtil.getSlaveTunnelUpload(tableTunnel, this.projectName, - this.tableName, this.partition, uploadId); } @Override public void startWrite(RecordReceiver recordReceiver) { blocks = new ArrayList(); + List currentWriteBlocks; AtomicLong blockId = new AtomicLong(0); @@ -304,35 +572,212 @@ public void startWrite(RecordReceiver recordReceiver) { try { TaskPluginCollector taskPluginCollector = super.getTaskPluginCollector(); - OdpsWriterProxy proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, - columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress); + OdpsWriterProxy proxy; + // 可以配置化,保平安 + boolean checkWithGetSize = this.sliceConfig.getBool("checkWithGetSize", true); + if (!supportDynamicPartition) { + if (this.consistencyCommit) { + proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, taskId, taskCount, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + } else { + proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, false, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + } + currentWriteBlocks = blocks; + } else { + proxy = null; + currentWriteBlocks = null; + } com.alibaba.datax.common.element.Record dataXRecord = null; - PerfRecord blockClose = new PerfRecord(super.getTaskGroupId(),super.getTaskId(), PerfRecord.PHASE.ODPS_BLOCK_CLOSE); + PerfRecord blockClose = new PerfRecord(super.getTaskGroupId(), super.getTaskId(), PerfRecord.PHASE.ODPS_BLOCK_CLOSE); blockClose.start(); long blockCloseUsedTime = 0; + boolean columnCntChecked = false; while ((dataXRecord = recordReceiver.getFromReader()) != null) { - blockCloseUsedTime += proxy.writeOneRecord(dataXRecord, blocks); + if (supportDynamicPartition) { + if (!columnCntChecked) { + // 动态分区模式下,读写两端的column数量必须相同 + if (dataXRecord.getColumnNumber() != this.sliceConfig.getList(Key.COLUMN).size()) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + "In dynamic partition write mode you must make sure reader and writer has same column count."); + } + columnCntChecked = true; + } + + // 如果是动态分区模式,则需要根据record内容来选择proxy + + String partitionFormatType = sliceConfig.getString("partitionFormatType"); + String partition; + if("custom".equalsIgnoreCase(partitionFormatType)){ + List partitions = getListWithJson(sliceConfig,"customPartitionColumns",PartitionInfo.class); + List functions = getListWithJson(sliceConfig,"customPartitionFunctions",UserDefinedFunction.class); + + partition = CustomPartitionUtils.generate(dataXRecord,functions, + partitions,sliceConfig.getList(Key.COLUMN, String.class)); + }else{ + partition = OdpsUtil.getPartColValFromDataXRecord(dataXRecord, columnPositions, + this.sliceConfig.getList(Key.COLUMN, String.class), + this.dateTransFormMap); + partition = OdpsUtil.formatPartition(partition, false); + } + + Pair> proxyBlocksPair = this.partitionUploadSessionHashMap.get(partition); + if (null != proxyBlocksPair) { + proxy = proxyBlocksPair.getLeft(); + currentWriteBlocks = proxyBlocksPair.getRight(); + if (null == proxy || null == currentWriteBlocks) { + throw DataXException.asDataXException("Get OdpsWriterProxy failed."); + } + } else { + /* + * 第一次写入该目标分区:处理truncate + * truncate 为 true,且还没有被truncate过,则truncate,加互斥锁 + */ + Boolean truncate = this.sliceConfig.getBool(Key.TRUNCATE); + if (truncate && !partitionsDealedTruncate.contains(partition)) { + synchronized (lockForPartitionDealedTruncate) { + if (!partitionsDealedTruncate.contains(partition)) { + LOG.info("Start to truncate partition {}", partition); + OdpsUtil.dealTruncate(this.odps, this.table, partition, truncate); + partitionsDealedTruncate.add(partition); + } + /* + * 判断分区是否创建过多,如果创建过多,则报错 + */ + if (partitionCnt.addAndGet(1) > maxPartitionCnt) { + throw new DataXException("Create too many partitions. Please make sure you config the right partition column"); + } + } + } + TableTunnel.UploadSession uploadSession = OdpsUtil.createMasterTunnelUpload(tableTunnel, this.projectName, + this.tableName, partition); + proxy = new OdpsWriterProxy(uploadSession, this.blockSizeInMB, blockId, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, true, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + currentWriteBlocks = new ArrayList<>(); + partitionUploadSessionHashMap.put(partition, new MutablePair<>(proxy, currentWriteBlocks)); + } + } + blockCloseUsedTime += proxy.writeOneRecord(dataXRecord, currentWriteBlocks); + + // 动态分区写入模式下,如果内存使用达到一定程度 80%,清理较久没有活动且缓存较多数据的分区 + if (supportDynamicPartition) { + boolean isNeedFush = checkIfNeedFlush(); + if (isNeedFush) { + LOG.info("====The memory used exceed 80%, start to clear...==="); + int releaseCnt = 0; + int remainCnt = 0; + for (String onePartition : partitionUploadSessionHashMap.keySet()) { + OdpsWriterProxy oneIdleProxy = partitionUploadSessionHashMap.get(onePartition) == null ? null : partitionUploadSessionHashMap.get(onePartition).getLeft(); + if (oneIdleProxy == null) { + continue; + } + + Long idleTime = System.currentTimeMillis() - oneIdleProxy.getLastActiveTime(); + if (idleTime > Constant.PROXY_MAX_IDLE_TIME_MS || oneIdleProxy.getCurrentTotalBytes() > (this.blockSizeInMB*1014*1024 / 2)) { + // 如果空闲一定时间,先把数据写出 + LOG.info("{} partition has no data last {} seconds, so release its uploadSession", onePartition, Constant.PROXY_MAX_IDLE_TIME_MS / 1000); + currentWriteBlocks = partitionUploadSessionHashMap.get(onePartition).getRight(); + blockCloseUsedTime += oneIdleProxy.writeRemainingRecord(currentWriteBlocks); + // 再清除 + partitionUploadSessionHashMap.put(onePartition, null); + releaseCnt++; + } else { + remainCnt++; + } + } + + // 释放的不足够多,再释放一次,这次随机释放,直到释放数量达到一半 + for (String onePartition : partitionUploadSessionHashMap.keySet()) { + if (releaseCnt >= remainCnt) { + break; + } + + if (partitionUploadSessionHashMap.get(onePartition) != null) { + OdpsWriterProxy oneIdleProxy = partitionUploadSessionHashMap.get(onePartition).getLeft(); + currentWriteBlocks = partitionUploadSessionHashMap.get(onePartition).getRight(); + blockCloseUsedTime += oneIdleProxy.writeRemainingRecord(currentWriteBlocks); + partitionUploadSessionHashMap.put(onePartition, null); + + releaseCnt++; + remainCnt--; + } + + } + + this.latestFlushTime = System.currentTimeMillis(); + LOG.info("===complete==="); + } + + } } - blockCloseUsedTime += proxy.writeRemainingRecord(blocks); - blockClose.end(blockCloseUsedTime); + // 对所有分区进行剩余 records 写入 + if (supportDynamicPartition) { + for (String partition : partitionUploadSessionHashMap.keySet()) { + if (partitionUploadSessionHashMap.get(partition) == null) { + continue; + } + proxy = partitionUploadSessionHashMap.get(partition).getLeft(); + currentWriteBlocks = partitionUploadSessionHashMap.get(partition).getRight(); + blockCloseUsedTime += proxy.writeRemainingRecord(currentWriteBlocks); + blockClose.end(blockCloseUsedTime); + } + } + else { + blockCloseUsedTime += proxy.writeRemainingRecord(blocks); + blockClose.end(blockCloseUsedTime); + } } catch (Exception e) { - throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, "写入 ODPS 目的表失败. 请联系 ODPS 管理员处理.", e); + throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, MESSAGE_SOURCE.message("odpswriter.4"), e); + } + } + + private boolean checkIfNeedFlush() { + + //检查是否到达flush时间,超过flush间隔时间 + boolean isArriveFlushTime = (System.currentTimeMillis() - this.latestFlushTime) > this.dynamicPartitionMemUsageFlushIntervalInMinute * 60 * 1000; + if (!isArriveFlushTime) { + //如果flush时间没有到,直接return掉 + return false; } + + MemoryUsage memoryUsage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage(); + boolean isMemUsageExceed = (double)memoryUsage.getUsed() / memoryUsage.getMax() > 0.8f; + return isMemUsageExceed; } @Override public void post() { - synchronized (lock){ - if(failoverState==0){ + synchronized (lock) { + if (failoverState == 0) { failoverState = 2; - LOG.info("Slave which uploadId=[{}] begin to commit blocks:[\n{}\n].", this.uploadId, - StringUtils.join(blocks, ",")); - OdpsUtil.masterCompleteBlocks(this.managerUpload, blocks.toArray(new Long[0])); - LOG.info("Slave which uploadId=[{}] commit blocks ok.", this.uploadId); - }else{ + if (! supportDynamicPartition) { + if (! this.consistencyCommit) { + LOG.info("Slave which uploadId=[{}] begin to commit blocks:[\n{}\n].", this.uploadId, + StringUtils.join(blocks, ",")); + OdpsUtil.masterCompleteBlocks(this.managerUpload, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] commit blocks ok.", this.uploadId); + } else { + LOG.info("Slave which uploadId=[{}] begin to check blocks:[\n{}\n].", this.uploadId, + StringUtils.join(blocks, ",")); + OdpsUtil.checkBlockComplete(this.managerUpload, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] check blocks ok.", this.uploadId); + } + } else { + for (String partition : partitionUploadSessionHashMap.keySet()) { + OdpsWriterProxy proxy = partitionUploadSessionHashMap.get(partition).getLeft(); + List blocks = partitionUploadSessionHashMap.get(partition).getRight(); + TableTunnel.UploadSession uploadSession = proxy.getSlaveUpload(); + LOG.info("Slave which uploadId=[{}] begin to check blocks:[\n{}\n].", uploadSession.getId(), + StringUtils.join(blocks, ",")); + OdpsUtil.masterCompleteBlocks(uploadSession, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] check blocks ok.", uploadSession.getId()); + } + } + + } else { throw DataXException.asDataXException(CommonErrorCode.SHUT_DOWN_TASK, ""); } } @@ -343,9 +788,9 @@ public void destroy() { } @Override - public boolean supportFailOver(){ - synchronized (lock){ - if(failoverState==0){ + public boolean supportFailOver() { + synchronized (lock) { + if (failoverState == 0) { failoverState = 1; return true; } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java index 02020c046e..35f2ed155b 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java @@ -1,42 +1,43 @@ package com.alibaba.datax.plugin.writer.odpswriter; import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; public enum OdpsWriterErrorCode implements ErrorCode { - REQUIRED_VALUE("OdpsWriter-00", "您缺失了必须填写的参数值."), - ILLEGAL_VALUE("OdpsWriter-01", "您配置的值不合法."), - UNSUPPORTED_COLUMN_TYPE("OdpsWriter-02", "DataX 不支持写入 ODPS 的目的表的此种数据类型."), + REQUIRED_VALUE("OdpsWriter-00", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.required_value")), + ILLEGAL_VALUE("OdpsWriter-01", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.illegal_value")), + UNSUPPORTED_COLUMN_TYPE("OdpsWriter-02", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.unsupported_column_type")), - TABLE_TRUNCATE_ERROR("OdpsWriter-03", "清空 ODPS 目的表时出错."), - CREATE_MASTER_UPLOAD_FAIL("OdpsWriter-04", "创建 ODPS 的 uploadSession 失败."), - GET_SLAVE_UPLOAD_FAIL("OdpsWriter-05", "获取 ODPS 的 uploadSession 失败."), - GET_ID_KEY_FAIL("OdpsWriter-06", "获取 accessId/accessKey 失败."), - GET_PARTITION_FAIL("OdpsWriter-07", "获取 ODPS 目的表的所有分区失败."), + TABLE_TRUNCATE_ERROR("OdpsWriter-03", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.table_truncate_error")), + CREATE_MASTER_UPLOAD_FAIL("OdpsWriter-04", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.create_master_upload_fail")), + GET_SLAVE_UPLOAD_FAIL("OdpsWriter-05", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_slave_upload_fail")), + GET_ID_KEY_FAIL("OdpsWriter-06", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_id_key_fail")), + GET_PARTITION_FAIL("OdpsWriter-07", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_partition_fail")), - ADD_PARTITION_FAILED("OdpsWriter-08", "添加分区到 ODPS 目的表失败."), - WRITER_RECORD_FAIL("OdpsWriter-09", "写入数据到 ODPS 目的表失败."), + ADD_PARTITION_FAILED("OdpsWriter-08", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.add_partition_failed")), + WRITER_RECORD_FAIL("OdpsWriter-09", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.writer_record_fail")), - COMMIT_BLOCK_FAIL("OdpsWriter-10", "提交 block 到 ODPS 目的表失败."), - RUN_SQL_FAILED("OdpsWriter-11", "执行 ODPS Sql 失败."), - CHECK_IF_PARTITIONED_TABLE_FAILED("OdpsWriter-12", "检查 ODPS 目的表:%s 是否为分区表失败."), + COMMIT_BLOCK_FAIL("OdpsWriter-10", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.commit_block_fail")), + RUN_SQL_FAILED("OdpsWriter-11", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.run_sql_failed")), + CHECK_IF_PARTITIONED_TABLE_FAILED("OdpsWriter-12", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.check_if_partitioned_table_failed")), - RUN_SQL_ODPS_EXCEPTION("OdpsWriter-13", "执行 ODPS Sql 时抛出异常, 可重试"), + RUN_SQL_ODPS_EXCEPTION("OdpsWriter-13", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.run_sql_odps_exception")), - ACCOUNT_TYPE_ERROR("OdpsWriter-30", "账号类型错误."), + ACCOUNT_TYPE_ERROR("OdpsWriter-30", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.account_type_error")), - PARTITION_ERROR("OdpsWriter-31", "分区配置错误."), + PARTITION_ERROR("OdpsWriter-31", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.partition_error")), - COLUMN_NOT_EXIST("OdpsWriter-32", "用户配置的列不存在."), + COLUMN_NOT_EXIST("OdpsWriter-32", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.column_not_exist")), - ODPS_PROJECT_NOT_FOUNT("OdpsWriter-100", "您配置的值不合法, odps project 不存在."), //ODPS-0420111: Project not found + ODPS_PROJECT_NOT_FOUNT("OdpsWriter-100", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_project_not_fount")), //ODPS-0420111: Project not found - ODPS_TABLE_NOT_FOUNT("OdpsWriter-101", "您配置的值不合法, odps table 不存在"), // ODPS-0130131:Table not found + ODPS_TABLE_NOT_FOUNT("OdpsWriter-101", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_table_not_fount")), // ODPS-0130131:Table not found - ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsWriter-102", "您配置的值不合法, odps accessId,accessKey 不存在"), //ODPS-0410051:Invalid credentials - accessKeyId not found + ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsWriter-102", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_key_id_not_found")), //ODPS-0410051:Invalid credentials - accessKeyId not found - ODPS_ACCESS_KEY_INVALID("OdpsWriter-103", "您配置的值不合法, odps accessKey 错误"), //ODPS-0410042:Invalid signature value - User signature dose not match; + ODPS_ACCESS_KEY_INVALID("OdpsWriter-103", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_key_invalid")), //ODPS-0410042:Invalid signature value - User signature dose not match; - ODPS_ACCESS_DENY("OdpsWriter-104", "拒绝访问, 您不在 您配置的 project 中") //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project + ODPS_ACCESS_DENY("OdpsWriter-104", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_deny")) //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project ; diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java index 9833616c5d..221aca79ec 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java @@ -3,29 +3,58 @@ import com.alibaba.datax.common.element.StringColumn; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.writer.odpswriter.util.OdpsUtil; - import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; import com.aliyun.odps.OdpsType; import com.aliyun.odps.TableSchema; - +import com.aliyun.odps.data.ArrayRecord; +import com.aliyun.odps.data.Binary; +import com.aliyun.odps.data.Char; +import com.aliyun.odps.data.IntervalDayTime; +import com.aliyun.odps.data.IntervalYearMonth; import com.aliyun.odps.data.Record; - +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.data.Struct; +import com.aliyun.odps.data.Varchar; import com.aliyun.odps.tunnel.TableTunnel; - import com.aliyun.odps.tunnel.TunnelException; import com.aliyun.odps.tunnel.io.ProtobufRecordPack; +import com.aliyun.odps.type.ArrayTypeInfo; +import com.aliyun.odps.type.CharTypeInfo; +import com.aliyun.odps.type.MapTypeInfo; +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.VarcharTypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; + +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; import java.util.concurrent.atomic.AtomicLong; public class OdpsWriterProxy { - private static final Logger LOG = LoggerFactory - .getLogger(OdpsWriterProxy.class); + private static final Logger LOG = LoggerFactory.getLogger(OdpsWriterProxy.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriterProxy.class); private volatile boolean printColumnLess;// 是否打印对于源头字段数小于 ODPS 目的表的行的日志 @@ -39,18 +68,98 @@ public class OdpsWriterProxy { private AtomicLong blockId; private List columnPositions; - private List tableOriginalColumnTypeList; + private List tableOriginalColumnTypeList; private boolean emptyAsNull; private boolean isCompress; + + private int taskId; + private int taskCOUNT; + private boolean consistencyCommit = false; + private boolean checkWithGetSize = true; + private List allColumns; + private String overLengthRule; + private int maxFieldLength; + private Boolean enableOverLengthOutput; + + /** + * 记录最近一次活动时间,动态分区写入模式下,超过一定时间不活动,则关闭这个proxy + */ + private Long lastActiveTime; + + /** + * 写block超时时间 + */ + private Long writeTimeoutInMs; - public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, - AtomicLong blockId, List columnPositions, - TaskPluginCollector taskPluginCollector, boolean emptyAsNull, boolean isCompress) - throws IOException, TunnelException { + private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + // 读取 jvm 默认时区 + private Calendar calendarForDate = null; + private boolean useDateWithCalendar = true; + + private Calendar initCalendar(Configuration config) { + // 理论上不会有其他选择,有配置化可以随时应急 + String calendarType = config.getString("calendarType", "iso8601"); + Boolean lenient = config.getBool("calendarLenient", true); + + // 默认jvm时区 + TimeZone timeZone = TimeZone.getDefault(); + String timeZoneStr = config.getString("calendarTimeZone"); + if (StringUtils.isNotBlank(timeZoneStr)) { + // 如果用户明确指定使用用户指定的 + timeZone = TimeZone.getTimeZone(timeZoneStr); + } + + Calendar calendarForDate = new Calendar.Builder().setCalendarType(calendarType).setLenient(lenient) + .setTimeZone(timeZone).build(); + return calendarForDate; + } + + public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, AtomicLong blockId, + List columnPositions, TaskPluginCollector taskPluginCollector, boolean emptyAsNull, + boolean isCompress, boolean checkWithGetSize, List allColumns, boolean initBufSizeZero, + Long writeTimeoutInMs, Configuration taskConfig, String overLengthRule, int maxFieldLength, + Boolean enableOverLengthOutput) throws IOException, TunnelException { this.slaveUpload = slaveUpload; this.schema = this.slaveUpload.getSchema(); - this.tableOriginalColumnTypeList = OdpsUtil - .getTableOriginalColumnTypeList(this.schema); + this.tableOriginalColumnTypeList = OdpsUtil.getTableOriginalColumnTypeList(this.schema); + + this.blockId = blockId; + this.columnPositions = columnPositions; + this.taskPluginCollector = taskPluginCollector; + this.emptyAsNull = emptyAsNull; + this.isCompress = isCompress; + + // 初始化与 buffer 区相关的值 + this.maxBufferSize = (blockSizeInMB - 4) * 1024 * 1024; + if (initBufSizeZero) { + // 动态分区下初始化为0,随着写入的reord变多慢慢增加 + this.protobufCapacity = 0; + } else { + this.protobufCapacity = blockSizeInMB * 1024 * 1024; + } + this.protobufRecordPack = new ProtobufRecordPack(this.schema, null, this.protobufCapacity); + this.printColumnLess = true; + this.checkWithGetSize = checkWithGetSize; + + this.allColumns = allColumns; + this.overLengthRule = overLengthRule; + this.maxFieldLength = maxFieldLength; + this.enableOverLengthOutput = enableOverLengthOutput; + + this.writeTimeoutInMs = writeTimeoutInMs; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); + } + + public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, AtomicLong blockId, int taskId, + int taskCount, List columnPositions, TaskPluginCollector taskPluginCollector, boolean emptyAsNull, + boolean isCompress, boolean checkWithGetSize, List allColumns, Long writeTimeoutInMs, Configuration taskConfig, + String overLengthRule, int maxFieldLength, Boolean enableOverLengthOutput) throws IOException, TunnelException { + this.slaveUpload = slaveUpload; + this.schema = this.slaveUpload.getSchema(); + this.tableOriginalColumnTypeList = OdpsUtil.getTableOriginalColumnTypeList(this.schema); this.blockId = blockId; this.columnPositions = columnPositions; @@ -63,12 +172,38 @@ public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, this.protobufCapacity = blockSizeInMB * 1024 * 1024; this.protobufRecordPack = new ProtobufRecordPack(this.schema, null, this.protobufCapacity); printColumnLess = true; + + this.taskId = taskId; + this.taskCOUNT = taskCount; + this.consistencyCommit = true; + this.checkWithGetSize = checkWithGetSize; + this.allColumns = allColumns; + this.overLengthRule = overLengthRule; + this.maxFieldLength = maxFieldLength; + this.enableOverLengthOutput = enableOverLengthOutput; + + this.writeTimeoutInMs = writeTimeoutInMs; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); + } + + public long getCurrentBlockId() { + if (this.consistencyCommit) { + return this.taskId + this.taskCOUNT * (this.blockId.get()); + } else { + return this.blockId.get(); + } + } + public TableTunnel.UploadSession getSlaveUpload() { + return this.slaveUpload; } + + public long writeOneRecord(com.alibaba.datax.common.element.Record dataXRecord, List blocks) + throws Exception { - public long writeOneRecord( - com.alibaba.datax.common.element.Record dataXRecord, - List blocks) throws Exception { + this.lastActiveTime = System.currentTimeMillis(); Record record = dataxRecordToOdpsRecord(dataXRecord); @@ -77,12 +212,11 @@ public long writeOneRecord( } protobufRecordPack.append(record); - if (protobufRecordPack.getTotalBytes() >= maxBufferSize) { + if (protobufRecordPack.getProtobufStream().size() >= maxBufferSize) { long startTimeInNs = System.nanoTime(); - OdpsUtil.slaveWriteOneBlock(this.slaveUpload, - protobufRecordPack, blockId.get(), this.isCompress); - LOG.info("write block {} ok.", blockId.get()); - blocks.add(blockId.get()); + OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, getCurrentBlockId(), this.writeTimeoutInMs); + LOG.info("write block {} ok.", getCurrentBlockId()); + blocks.add(getCurrentBlockId()); protobufRecordPack.reset(); this.blockId.incrementAndGet(); return System.nanoTime() - startTimeInNs; @@ -92,13 +226,20 @@ public long writeOneRecord( public long writeRemainingRecord(List blocks) throws Exception { // complete protobuf stream, then write to http - if (protobufRecordPack.getTotalBytes() != 0) { + // protobufRecordPack.getTotalBytes() 慕明: getTotalBytes并不一定保证能拿到写入的字节数,按你们的逻辑应该是用getTotalBytesWritten + // if (protobufRecordPack.getTotalBytes() != 0) { + boolean hasRemindData = false; + if (this.checkWithGetSize) { + hasRemindData = protobufRecordPack.getSize() != 0; + } else { + hasRemindData = protobufRecordPack.getTotalBytes() != 0; + } + if (hasRemindData) { long startTimeInNs = System.nanoTime(); - OdpsUtil.slaveWriteOneBlock(this.slaveUpload, - protobufRecordPack, blockId.get(), this.isCompress); - LOG.info("write block {} ok.", blockId.get()); + OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, getCurrentBlockId(), this.writeTimeoutInMs); + LOG.info("write block {} ok.", getCurrentBlockId()); - blocks.add(blockId.get()); + blocks.add(getCurrentBlockId()); // reset the buffer for next block protobufRecordPack.reset(); return System.nanoTime() - startTimeInNs; @@ -106,85 +247,846 @@ public long writeRemainingRecord(List blocks) throws Exception { return 0; } - public Record dataxRecordToOdpsRecord( - com.alibaba.datax.common.element.Record dataXRecord) throws Exception { + public Record dataxRecordToOdpsRecord(com.alibaba.datax.common.element.Record dataXRecord) throws Exception { int sourceColumnCount = dataXRecord.getColumnNumber(); - Record odpsRecord = slaveUpload.newRecord(); + ArrayRecord odpsRecord = (ArrayRecord) slaveUpload.newRecord(); int userConfiguredColumnNumber = this.columnPositions.size(); -//todo + if (sourceColumnCount > userConfiguredColumnNumber) { - throw DataXException - .asDataXException( - OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format( - "亲,配置中的源表的列个数和目的端表不一致,源表中您配置的列数是:%s 大于目的端的列数是:%s , 这样会导致源头数据无法正确导入目的端, 请检查您的配置并修改.", - sourceColumnCount, - userConfiguredColumnNumber)); + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpswriterproxy.1", sourceColumnCount, userConfiguredColumnNumber)); } else if (sourceColumnCount < userConfiguredColumnNumber) { if (printColumnLess) { - LOG.warn( - "源表的列个数小于目的表的列个数,源表列数是:{} 目的表列数是:{} , 数目不匹配. DataX 会把目的端多出的列的值设置为空值. 如果这个默认配置不符合您的期望,请保持源表和目的表配置的列数目保持一致.", - sourceColumnCount, userConfiguredColumnNumber); + LOG.warn(MESSAGE_SOURCE.message("odpswriterproxy.2", sourceColumnCount, userConfiguredColumnNumber)); } printColumnLess = false; } - int currentIndex; + int currentIndex = 0; int sourceIndex = 0; try { com.alibaba.datax.common.element.Column columnValue; for (; sourceIndex < sourceColumnCount; sourceIndex++) { + // 跳过分区列 + if (this.columnPositions.get(sourceIndex) == -1) { + continue; + } currentIndex = columnPositions.get(sourceIndex); - OdpsType type = this.tableOriginalColumnTypeList - .get(currentIndex); + TypeInfo typeInfo = this.tableOriginalColumnTypeList.get(currentIndex); + OdpsType type = typeInfo.getOdpsType(); + String typeName = typeInfo.getTypeName(); columnValue = dataXRecord.getColumn(sourceIndex); if (columnValue == null) { continue; } // for compatible dt lib, "" as null - if(this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())){ + if (this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())) { continue; } switch (type) { - case STRING: - odpsRecord.setString(currentIndex, columnValue.asString()); - break; - case BIGINT: - odpsRecord.setBigint(currentIndex, columnValue.asLong()); - break; - case BOOLEAN: - odpsRecord.setBoolean(currentIndex, columnValue.asBoolean()); - break; - case DATETIME: - odpsRecord.setDatetime(currentIndex, columnValue.asDate()); - break; - case DOUBLE: - odpsRecord.setDouble(currentIndex, columnValue.asDouble()); - break; - case DECIMAL: - odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal()); - String columnStr = columnValue.asString(); - if(columnStr != null && columnStr.indexOf(".") >= 36) { - throw new Exception("Odps decimal 类型的整数位个数不能超过35"); + case STRING: + String newValue = (String)OdpsUtil.processOverLengthData(columnValue.asString(), OdpsType.STRING, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + odpsRecord.setString(currentIndex, newValue); + break; + case BIGINT: + odpsRecord.setBigint(currentIndex, columnValue.asLong()); + break; + case BOOLEAN: + odpsRecord.setBoolean(currentIndex, columnValue.asBoolean()); + break; + case DATETIME: + odpsRecord.setDatetime(currentIndex, columnValue.asDate()); +// Date datetimeData = columnValue.asDate(); +// if (null == datetimeData) { +// odpsRecord.setDatetime(currentIndex, null); +// } else { +// Timestamp dateDataForOdps = new Timestamp(datetimeData.getTime()); +// if (datetimeData instanceof java.sql.Timestamp) { +// dateDataForOdps.setNanos(((java.sql.Timestamp)datetimeData).getNanos()); +// } +// odpsRecord.setDatetime(currentIndex, dateDataForOdps); +// } + break; + case DATE: + Date dateData = columnValue.asDate(); + if (null == dateData) { + odpsRecord.setDatetime(currentIndex, null); + } else { + if (this.useDateWithCalendar) { + odpsRecord.setDate(currentIndex, new java.sql.Date(dateData.getTime()), this.calendarForDate); + } else { + odpsRecord.setDatetime(currentIndex, new java.sql.Date(dateData.getTime())); } - default: - break; + } + break; + case DOUBLE: + odpsRecord.setDouble(currentIndex, columnValue.asDouble()); + break; + case FLOAT: + Double floatValue = columnValue.asDouble(); + if (null == floatValue) { + ((ArrayRecord) odpsRecord).setFloat(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setFloat(currentIndex, floatValue.floatValue()); + } + break; + case DECIMAL: + odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal()); + String columnStr = columnValue.asString(); + if (columnStr != null && columnStr.indexOf(".") >= 36) { + throw new Exception(MESSAGE_SOURCE.message("odpswriterproxy.3")); + } + break; + case TINYINT: + Long tinyintValueStr = columnValue.asLong(); + if (null == tinyintValueStr) { + ((ArrayRecord) odpsRecord).setTinyint(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setTinyint(currentIndex, + Byte.valueOf(String.valueOf(tinyintValueStr))); + } + break; + case SMALLINT: + Long smallIntValue = columnValue.asLong(); + if (null == smallIntValue) { + ((ArrayRecord) odpsRecord).setSmallint(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setSmallint(currentIndex, smallIntValue.shortValue()); + } + break; + case INT: + Long intValue = columnValue.asLong(); + if (null == intValue) { + ((ArrayRecord) odpsRecord).setInt(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setInt(currentIndex, intValue.intValue()); + } + break; + case VARCHAR: + // warn: columnValue.asString() 为 null 时 , odps sdk 有 BUG + // 不能用 Varchar 的默认构造函数,不然有 NPE + String varcharValueStr = columnValue.asString(); + Varchar varcharData = null; + if (varcharValueStr != null){ + varcharData = new Varchar(columnValue.asString()); + } + ((ArrayRecord) odpsRecord).setVarchar(currentIndex, varcharData); + break; + case CHAR: + String charValueStr = columnValue.asString(); + Char charData = null; + if (charValueStr != null ){ + charData = new Char(charValueStr); + } + ((ArrayRecord) odpsRecord).setChar(currentIndex, charData); + break; + case TIMESTAMP: + Date timestampData = columnValue.asDate(); + if (null == timestampData) { + ((ArrayRecord) odpsRecord).setTimestamp(currentIndex, null); + } else { + Timestamp timestampDataForOdps = new Timestamp(timestampData.getTime()); + if (timestampData instanceof java.sql.Timestamp) { + // 纳秒 + timestampDataForOdps.setNanos(((java.sql.Timestamp)timestampData).getNanos()); + } + // warn优化:如果原来类型就是Timestamp,直接使用就少创建了一个对象 + ((ArrayRecord) odpsRecord).setTimestamp(currentIndex, timestampDataForOdps); + } + break; + case BINARY: + Binary newBinaryData = (Binary)OdpsUtil.processOverLengthData(new Binary(columnValue.asBytes()), OdpsType.BINARY, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + ((ArrayRecord) odpsRecord).setBinary(currentIndex,columnValue.asBytes() == null ? null : newBinaryData); + break; + case ARRAY: + JSONArray arrayJson = JSON.parseArray(columnValue.asString()); + ((ArrayRecord) odpsRecord).setArray(currentIndex, parseArray(arrayJson, (ArrayTypeInfo) typeInfo)); + break; + case MAP: + JSONObject mapJson = JSON.parseObject(columnValue.asString()); + ((ArrayRecord) odpsRecord).setMap(currentIndex, parseMap(mapJson, (MapTypeInfo) typeInfo)); + break; + case STRUCT: + JSONObject structJson = JSON.parseObject(columnValue.asString()); + ((ArrayRecord) odpsRecord).setStruct(currentIndex, + parseStruct(structJson, (StructTypeInfo) typeInfo)); + break; + default: + break; } } return odpsRecord; } catch (Exception e) { - String message = String.format( - "写入 ODPS 目的表时遇到了脏数据: 第[%s]个字段的数据出现错误,请检查该数据并作出修改 或者您可以增大阀值,忽略这条记录.", sourceIndex); - this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, - message); + String dirtyColumnName = ""; + try { + dirtyColumnName = this.allColumns.get(currentIndex); + } catch (Exception ignoreEx) { + // ignore + } + String message = MESSAGE_SOURCE.message("odpswriterproxy.4", sourceIndex, dirtyColumnName); + this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, message); + return null; + } + } + + private List parseArray(JSONArray jsonArray, ArrayTypeInfo arrayTypeInfo) throws ParseException { + if (null == jsonArray) { return null; } + List result = new ArrayList(); + switch (arrayTypeInfo.getElementTypeInfo().getOdpsType()) { + case BIGINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getLong(i)); + } + return result; + /** + * 双精度浮点 + */ + case DOUBLE: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getDouble(i)); + } + return result; + /** + * 布尔型 + */ + case BOOLEAN: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getBoolean(i)); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (int i = 0; i < jsonArray.size(); i++) { + result.add(dateFormat.parse(jsonArray.getString(i))); + } + return result; + /** + * 字符串类型 + */ + case STRING: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getString(i)); + } + return result; + /** + * 精确小数类型 + */ + case DECIMAL: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getBigDecimal(i)); + } + return result; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getByte(i)); + } + return result; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getShort(i)); + } + return result; + /** + * 4字节有符号整型 + */ + case INT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getInteger(i)); + } + return result; + /** + * 单精度浮点 + */ + case FLOAT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getFloat(i)); + } + return result; + /** + * 固定长度字符串 + */ + case CHAR: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(new Char(jsonArray.getString(i), + ((CharTypeInfo) arrayTypeInfo.getElementTypeInfo()).getLength())); + } + return result; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(new Varchar(jsonArray.getString(i), + ((VarcharTypeInfo) arrayTypeInfo.getElementTypeInfo()).getLength())); + } + return result; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (int i = 0; i < jsonArray.size(); i++) { + result.add(java.sql.Date.valueOf(jsonArray.getString(i))); + } + return result; + /** + * 时间戳 + */ + case TIMESTAMP: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(Timestamp.valueOf(jsonArray.getString(i))); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(Base64.decodeBase64(jsonArray.getString(i))); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (int i = 0; i < jsonArray.size(); i++) { + JSONObject json = jsonArray.getJSONObject(i); + result.add(new IntervalDayTime(json.getInteger("totalSeconds"), json.getInteger("nanos"))); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (int i = 0; i < jsonArray.size(); i++) { + JSONObject json = jsonArray.getJSONObject(i); + result.add(new IntervalYearMonth(json.getInteger("years"), json.getInteger("months"))); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add( + parseStruct(jsonArray.getJSONObject(i), (StructTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + /** + * MAP类型 + */ + case MAP: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(parseMap(jsonArray.getJSONObject(i), (MapTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(parseArray(jsonArray.getJSONArray(i), (ArrayTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + + default: + return result; + } + } + + private Map parseMap(JSONObject json, MapTypeInfo typeInfo) throws ParseException { + if (json == null) { + return null; + } + Map keyMap = new HashMap(); + Set keys = json.keySet(); + switch (typeInfo.getKeyTypeInfo().getOdpsType()) { + case BIGINT: + for (String item : keys) { + keyMap.put(Long.parseLong(item), item); + } + break; + /** + * 双精度浮点 + */ + case DOUBLE: + for (String item : keys) { + keyMap.put(Double.parseDouble(item), item); + } + break; + /** + * 布尔型 + */ + case BOOLEAN: + for (String item : keys) { + keyMap.put(Boolean.parseBoolean(item), item); + } + break; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (String item : keys) { + keyMap.put(dateFormat.parse(item), item); + } + break; + /** + * 字符串类型 + */ + case STRING: + for (String item : keys) { + keyMap.put(item, item); + } + break; + /** + * 精确小数类型 + */ + case DECIMAL: + for (String item : keys) { + keyMap.put(new BigDecimal(item), item); + } + break; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (String item : keys) { + keyMap.put(Byte.parseByte(item), item); + } + break; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (String item : keys) { + keyMap.put(Short.parseShort(item), item); + } + break; + /** + * 4字节有符号整型 + */ + case INT: + for (String item : keys) { + keyMap.put(Integer.parseInt(item), item); + } + break; + /** + * 单精度浮点 + */ + case FLOAT: + for (String item : keys) { + keyMap.put(Float.parseFloat(item), item); + } + break; + /** + * 固定长度字符串 + */ + case CHAR: + for (String item : keys) { + keyMap.put(new Char(item, ((CharTypeInfo) typeInfo.getKeyTypeInfo()).getLength()), item); + } + break; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (String item : keys) { + keyMap.put(new Varchar(item, ((VarcharTypeInfo) typeInfo.getKeyTypeInfo()).getLength()), item); + } + break; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (String item : keys) { + keyMap.put(java.sql.Date.valueOf(item), item); + } + break; + /** + * 时间戳 + */ + case TIMESTAMP: + for (String item : keys) { + keyMap.put(Timestamp.valueOf(item), item); + } + break; + /** + * 字节数组 + */ + case BINARY: + for (String item : keys) { + keyMap.put(new Binary(Base64.decodeBase64(item)), item); + } + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (String item : keys) { + JSONObject jsonObject = JSON.parseObject(item); + keyMap.put(new IntervalDayTime(jsonObject.getInteger("totalSeconds"), jsonObject.getInteger("nanos")), + item); + } + break; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (String item : keys) { + JSONObject jsonObject = JSON.parseObject(item); + keyMap.put(new IntervalYearMonth(jsonObject.getInteger("years"), jsonObject.getInteger("months")), + item); + } + break; + default: + break; + // TODO throw an exception + } + Map result = new HashMap(); + // process map value + switch (typeInfo.getValueTypeInfo().getOdpsType()) { + case BIGINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getLong(item.getValue())); + } + return result; + /** + * 双精度浮点 + */ + case DOUBLE: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getDouble(item.getValue())); + } + return result; + /** + * 布尔型 + */ + case BOOLEAN: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getBoolean(item.getValue())); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), dateFormat.parse(json.getString(item.getValue()))); + } + return result; + /** + * 字符串类型 + */ + case STRING: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getString(item.getValue())); + } + return result; + /** + * 精确小数类型 + */ + case DECIMAL: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getBigDecimal(item.getValue())); + } + return result; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getByte(item.getValue())); + } + return result; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getShort(item.getValue())); + } + return result; + /** + * 4字节有符号整型 + */ + case INT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getInteger(item.getValue())); + } + return result; + /** + * 单精度浮点 + */ + case FLOAT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getFloat(item.getValue())); + } + return result; + /** + * 固定长度字符串 + */ + case CHAR: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Char(json.getString(item.getValue()), + ((CharTypeInfo) typeInfo.getValueTypeInfo()).getLength())); + } + return result; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Varchar(json.getString(item.getValue()), + ((VarcharTypeInfo) typeInfo.getValueTypeInfo()).getLength())); + } + return result; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), java.sql.Date.valueOf(json.getString(item.getValue()))); + } + return result; + /** + * 时间戳 + */ + case TIMESTAMP: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), Timestamp.valueOf(json.getString(item.getValue()))); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Binary(Base64.decodeBase64(json.getString(item.getValue())))); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (Map.Entry item : keyMap.entrySet()) { + JSONObject jsonObject = json.getJSONObject(item.getValue()); + result.put(item.getKey(), + new IntervalDayTime(jsonObject.getInteger("totalSeconds"), jsonObject.getInteger("nanos"))); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (Map.Entry item : keyMap.entrySet()) { + JSONObject jsonObject = json.getJSONObject(item.getValue()); + result.put(item.getKey(), + new IntervalYearMonth(jsonObject.getInteger("years"), jsonObject.getInteger("months"))); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseStruct(json.getJSONObject(item.getValue()), (StructTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + /** + * MAP类型 + */ + case MAP: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseMap(json.getJSONObject(item.getValue()), (MapTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseArray(json.getJSONArray(item.getValue()), (ArrayTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + + default: + throw new IllegalArgumentException("decode record failed. column type: " + typeInfo.getTypeName()); + } + } + + public Struct parseStruct(JSONObject json, StructTypeInfo struct) throws ParseException { + if (null == json) { + return null; + } + List fieldNames = struct.getFieldNames(); + List typeInfos = struct.getFieldTypeInfos(); + List structValues = new ArrayList(); + for (int i = 0; i < fieldNames.size(); i++) { + String fieldName = fieldNames.get(i); + switch (typeInfos.get(i).getOdpsType()) { + case BIGINT: + structValues.add(json.getLong(fieldName)); + break; + /** + * 双精度浮点 + */ + case DOUBLE: + structValues.add(json.getDouble(fieldName)); + break; + /** + * 布尔型 + */ + case BOOLEAN: + structValues.add(json.getBoolean(fieldName)); + break; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + structValues.add(dateFormat.parse(json.getString(fieldName))); + break; + /** + * 字符串类型 + */ + case STRING: + structValues.add(json.getString(fieldName)); + break; + /** + * 精确小数类型 + */ + case DECIMAL: + structValues.add(json.getBigDecimal(fieldName)); + break; + /** + * 1字节有符号整型 + */ + case TINYINT: + structValues.add(json.getByte(fieldName)); + break; + /** + * 2字节有符号整型 + */ + case SMALLINT: + structValues.add(json.getShort(fieldName)); + break; + /** + * 4字节有符号整型 + */ + case INT: + structValues.add(json.getInteger(fieldName)); + break; + /** + * 单精度浮点 + */ + case FLOAT: + structValues.add(json.getFloat(fieldName)); + break; + /** + * 固定长度字符串 + */ + case CHAR: + structValues.add(new Char(json.getString(fieldName), ((CharTypeInfo) typeInfos.get(i)).getLength())); + break; + /** + * 可变长度字符串 + */ + case VARCHAR: + structValues + .add(new Varchar(json.getString(fieldName), ((VarcharTypeInfo) typeInfos.get(i)).getLength())); + break; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + structValues.add(java.sql.Date.valueOf(json.getString(fieldName))); + break; + /** + * 时间戳 + */ + case TIMESTAMP: + structValues.add(Timestamp.valueOf(json.getString(fieldName))); + break; + /** + * 字节数组 + */ + case BINARY: + structValues.add(Base64.decodeBase64(json.getString(fieldName))); + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + // TODO special process as map object + structValues.add(new IntervalDayTime(json.getInteger("totalSeconds"), json.getInteger("nanos"))); + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + structValues.add(new IntervalYearMonth(json.getInteger("years"), json.getInteger("months"))); + /** + * 结构体 + */ + case STRUCT: + structValues.add(parseStruct(json.getJSONObject(fieldName), (StructTypeInfo) typeInfos.get(i))); + break; + /** + * MAP类型 + */ + case MAP: + structValues.add(parseMap(json.getJSONObject(fieldName), (MapTypeInfo) typeInfos.get(i))); + break; + /** + * ARRAY类型 + */ + case ARRAY: + structValues.add(parseArray(json.getJSONArray(fieldName), (ArrayTypeInfo) typeInfos.get(i))); + break; + } + } + + SimpleStruct simpleStruct = new SimpleStruct(struct, structValues); + return simpleStruct; + } + + public Long getLastActiveTime() { + return lastActiveTime; + } + + public void setLastActiveTime(Long lastActiveTime) { + this.lastActiveTime = lastActiveTime; + } + public Long getCurrentTotalBytes() throws IOException { + return this.protobufRecordPack.getTotalBytes(); } } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java new file mode 100644 index 0000000000..f293d8ccbe --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java @@ -0,0 +1,87 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +public class PartitionInfo { + /** + * 字段名 + */ + private String name; + /** + * String + */ + private String type; + /** + * eventTime or function + * yyyy/MM/dd/HH/mm + * 可自定义组合 + */ + private String valueMode; + private String value; + private String comment; + /** + * 自定义分区有效 + * eventTime / constant + * function + */ + private String category; + /** + * 当 partitionType 为function时 + * functionExpression 为 valueMode 对应的expression + */ + private String functionExpression; + + public String getFunctionExpression() { + return functionExpression; + } + + public void setFunctionExpression(String functionExpression) { + this.functionExpression = functionExpression; + } + + public String getCategory() { + return category; + } + + public void setCategory(String category) { + this.category = category; + } + + public String getComment() { + return comment; + } + + public void setComment(String comment) { + this.comment = comment; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getValueMode() { + return valueMode; + } + + public void setValueMode(String valueMode) { + this.valueMode = valueMode; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java new file mode 100644 index 0000000000..55c8a1145c --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java @@ -0,0 +1,44 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +import java.io.Serializable; +import java.util.List; + +public class UserDefinedFunction implements Serializable { + private static final long serialVersionUID = 1L; + private String name; + private String expression; + private String inputColumn; + private List variableRule; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getExpression() { + return expression; + } + + public void setExpression(String expression) { + this.expression = expression; + } + + public String getInputColumn() { + return inputColumn; + } + + public void setInputColumn(String inputColumn) { + this.inputColumn = inputColumn; + } + + public List getVariableRule() { + return variableRule; + } + + public void setVariableRule(List variableRule) { + this.variableRule = variableRule; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java new file mode 100644 index 0000000000..5676eb4574 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java @@ -0,0 +1,26 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +import java.io.Serializable; +import java.util.List; + +public class UserDefinedFunctionRule implements Serializable { + private static final long serialVersionUID = 1L; + private String type; + private List params; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public List getParams() { + return params; + } + + public void setParams(List params) { + this.params = params; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java new file mode 100644 index 0000000000..51ad45a151 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java @@ -0,0 +1,54 @@ +package com.alibaba.datax.plugin.writer.odpswriter.util; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.odpswriter.model.PartitionInfo; +import com.alibaba.datax.plugin.writer.odpswriter.model.UserDefinedFunction; +import com.alibaba.fastjson.JSON; +import com.google.common.base.Joiner; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +public class CustomPartitionUtils implements Serializable { + private static final long serialVersionUID = 1L; + protected static Logger logger = LoggerFactory.getLogger(CustomPartitionUtils.class); + + public static List getListWithJson(Configuration config, String path, Class clazz) { + Object object = config.get(path, List.class); + if (null == object) { + return null; + } + + return JSON.parseArray(JSON.toJSONString(object), clazz); + } + + public static String generate(Record record, List functions, List partitions, + List allColumns) { + for (PartitionInfo partitionInfo : partitions) { + partitionInfo.setValue(buildPartitionValue(partitionInfo, functions, record, allColumns)); + } + List partitionList = partitions.stream() + .map(item -> String.format("%s='%s'", item.getName(), item.getValue())) + .collect(Collectors.toList()); + return Joiner.on(",").join(partitionList); + } + + private static String buildPartitionValue(PartitionInfo partitionInfo, List functions, Record record, + List allColumns) { +// logger.info("try build partition value:partitionInfo:\n{},functions:\n{}", +// JSON.toJSONString(partitionInfo), JSON.toJSONString(functions)); + if (StringUtils.isBlank(partitionInfo.getCategory()) + || "eventTime".equalsIgnoreCase(partitionInfo.getCategory()) + || "constant".equalsIgnoreCase(partitionInfo.getCategory())) { + // 直接输出原样字符串 + return partitionInfo.getValueMode(); +// throw new RuntimeException("not support partition category:" + partitionInfo.getCategory()); + } + throw new RuntimeException("un support partition info type:" + partitionInfo.getCategory()); + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/DESCipher.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/DESCipher.java deleted file mode 100755 index 4afead5219..0000000000 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/DESCipher.java +++ /dev/null @@ -1,355 +0,0 @@ -/** - * (C) 2010-2014 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.datax.plugin.writer.odpswriter.util; - -import javax.crypto.Cipher; -import javax.crypto.SecretKey; -import javax.crypto.SecretKeyFactory; -import javax.crypto.spec.DESKeySpec; -import java.security.SecureRandom; - -/** - *   * DES加解密,支持与delphi交互(字符串编码需统一为UTF-8) - * - *   * - * - *   * @author wym - * - *    - */ - -public class DESCipher { - - /** - *   * 密钥 - * - *    - */ - - public static final String KEY = "DESDES"; - - private final static String DES = "DES"; - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] encrypt(byte[] src, byte[] key) throws Exception { - - // DES算法要求有一个可信任的随机数源 - - SecureRandom sr = new SecureRandom(); - - // 从原始密匙数据创建DESKeySpec对象 - - DESKeySpec dks = new DESKeySpec(key); - - // 创建一个密匙工厂,然后用它把DESKeySpec转换成 - - // 一个SecretKey对象 - - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - - SecretKey securekey = keyFactory.generateSecret(dks); - - // Cipher对象实际完成加密操作 - - Cipher cipher = Cipher.getInstance(DES); - - // 用密匙初始化Cipher对象 - - cipher.init(Cipher.ENCRYPT_MODE, securekey, sr); - - // 现在,获取数据并加密 - - // 正式执行加密操作 - - return cipher.doFinal(src); - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] decrypt(byte[] src, byte[] key) throws Exception { - - // DES算法要求有一个可信任的随机数源 - - SecureRandom sr = new SecureRandom(); - - // 从原始密匙数据创建一个DESKeySpec对象 - - DESKeySpec dks = new DESKeySpec(key); - - // 创建一个密匙工厂,然后用它把DESKeySpec对象转换成 - - // 一个SecretKey对象 - - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - - SecretKey securekey = keyFactory.generateSecret(dks); - - // Cipher对象实际完成解密操作 - - Cipher cipher = Cipher.getInstance(DES); - - // 用密匙初始化Cipher对象 - - cipher.init(Cipher.DECRYPT_MODE, securekey, sr); - - // 现在,获取数据并解密 - - // 正式执行解密操作 - - return cipher.doFinal(src); - - } - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] encrypt(byte[] src) throws Exception { - - return encrypt(src, KEY.getBytes()); - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] decrypt(byte[] src) throws Exception { - - return decrypt(src, KEY.getBytes()); - - } - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字符串) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    - */ - - public final static String encrypt(String src) { - - try { - - return byte2hex(encrypt(src.getBytes(), KEY.getBytes())); - - } catch (Exception e) { - - e.printStackTrace(); - - } - - return null; - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字符串) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    - */ - - public final static String decrypt(String src) { - try { - - return new String(decrypt(hex2byte(src.getBytes()), KEY.getBytes())); - - } catch (Exception e) { - - e.printStackTrace(); - - } - - return null; - - } - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    - */ - - public static String encryptToString(byte[] src) throws Exception { - - return encrypt(new String(src)); - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    - */ - - public static String decryptToString(byte[] src) throws Exception { - - return decrypt(new String(src)); - - } - - public static String byte2hex(byte[] b) { - - String hs = ""; - - String stmp = ""; - - for (int n = 0; n < b.length; n++) { - - stmp = (Integer.toHexString(b[n] & 0XFF)); - - if (stmp.length() == 1) - - hs = hs + "0" + stmp; - - else - - hs = hs + stmp; - - } - - return hs.toUpperCase(); - - } - - public static byte[] hex2byte(byte[] b) { - - if ((b.length % 2) != 0) - - throw new IllegalArgumentException("长度不是偶数"); - - byte[] b2 = new byte[b.length / 2]; - - for (int n = 0; n < b.length; n += 2) { - - String item = new String(b, n, 2); - - b2[n / 2] = (byte) Integer.parseInt(item, 16); - - } - return b2; - - } - - /* - * public static void main(String[] args) { try { String src = "cheetah"; - * String crypto = DESCipher.encrypt(src); System.out.println("密文[" + src + - * "]:" + crypto); System.out.println("解密后:" + DESCipher.decrypt(crypto)); } - * catch (Exception e) { e.printStackTrace(); } } - */ -} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java index 95e4b56b54..98c9afdd95 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java @@ -1,5 +1,5 @@ /** - * (C) 2010-2014 Alibaba Group Holding Limited. + * (C) 2010-2022 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,11 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.writer.odpswriter.Constant; +import com.alibaba.datax.common.util.IdAndKeyRollingUtil; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.writer.odpswriter.Key; import com.alibaba.datax.plugin.writer.odpswriter.OdpsWriterErrorCode; + import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,6 +31,7 @@ public class IdAndKeyUtil { private static Logger LOG = LoggerFactory.getLogger(IdAndKeyUtil.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(IdAndKeyUtil.class); public static Configuration parseAccessIdAndKey(Configuration originalConfig) { String accessId = originalConfig.getString(Key.ACCESS_ID); @@ -50,36 +53,13 @@ public static Configuration parseAccessIdAndKey(Configuration originalConfig) { private static Configuration getAccessIdAndKeyFromEnv(Configuration originalConfig, Map envProp) { - String accessId = null; - String accessKey = null; - - String skynetAccessID = envProp.get(Constant.SKYNET_ACCESSID); - String skynetAccessKey = envProp.get(Constant.SKYNET_ACCESSKEY); - - if (StringUtils.isNotBlank(skynetAccessID) - || StringUtils.isNotBlank(skynetAccessKey)) { - /** - * 环境变量中,如果存在SKYNET_ACCESSID/SKYNET_ACCESSKEy(只要有其中一个变量,则认为一定是两个都存在的!), - * 则使用其值作为odps的accessId/accessKey(会解密) - */ - - LOG.info("Try to get accessId/accessKey from environment."); - accessId = skynetAccessID; - accessKey = DESCipher.decrypt(skynetAccessKey); - if (StringUtils.isNotBlank(accessKey)) { - originalConfig.set(Key.ACCESS_ID, accessId); - originalConfig.set(Key.ACCESS_KEY, accessKey); - LOG.info("Get accessId/accessKey from environment variables successfully."); - } else { - throw DataXException.asDataXException(OdpsWriterErrorCode.GET_ID_KEY_FAIL, - String.format("从环境变量中获取accessId/accessKey 失败, accessId=[%s]", accessId)); - } - } else { - // 无处获取(既没有配置在作业中,也没用在环境变量中) + // 如果获取到ak,在getAccessIdAndKeyFromEnv中已经设置到originalConfig了 + String accessKey = IdAndKeyRollingUtil.getAccessIdAndKeyFromEnv(originalConfig); + if (StringUtils.isBlank(accessKey)) { + // 无处获取(既没有配置在作业中,也没用在环境变量中) throw DataXException.asDataXException(OdpsWriterErrorCode.GET_ID_KEY_FAIL, - "无法获取到accessId/accessKey. 它们既不存在于您的配置中,也不存在于环境变量中."); - } - + MESSAGE_SOURCE.message("idandkeyutil.2")); + } return originalConfig; } } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties new file mode 100644 index 0000000000..289c70fa05 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties @@ -0,0 +1,39 @@ +descipher.1=\u957f\u5ea6\u4e0d\u662f\u5076\u6570 + +idandkeyutil.1=\u4ece\u73af\u5883\u53d8\u91cf\u4e2d\u83b7\u53d6accessId/accessKey \u5931\u8d25, accessId=[{0}] +idandkeyutil.2=\u65e0\u6cd5\u83b7\u53d6\u5230accessId/accessKey. \u5b83\u4eec\u65e2\u4e0d\u5b58\u5728\u4e8e\u60a8\u7684\u914d\u7f6e\u4e2d\uff0c\u4e5f\u4e0d\u5b58\u5728\u4e8e\u73af\u5883\u53d8\u91cf\u4e2d. + +odpsutil.1=\u60a8\u672a\u914d\u7f6e\u5199\u5165 ODPS \u76ee\u7684\u8868\u7684\u5217\u4fe1\u606f. \u6b63\u786e\u7684\u914d\u7f6e\u65b9\u5f0f\u662f\u7ed9datax\u7684 column \u9879\u914d\u7f6e\u4e0a\u60a8\u9700\u8981\u8bfb\u53d6\u7684\u5217\u540d\u79f0,\u7528\u82f1\u6587\u9017\u53f7\u5206\u9694 \u4f8b\u5982: \"column\": [\"id\",\"name\"]. +odpsutil.2=[truncate]\u662f\u5fc5\u586b\u914d\u7f6e\u9879, \u610f\u601d\u662f\u5199\u5165 ODPS \u76ee\u7684\u8868\u524d\u662f\u5426\u6e05\u7a7a\u8868/\u5206\u533a. \u8bf7\u60a8\u589e\u52a0 truncate \u7684\u914d\u7f6e\uff0c\u6839\u636e\u4e1a\u52a1\u9700\u8981\u9009\u62e9\u4e0atrue \u6216\u8005 false. +odpsutil.3=\u60a8\u6240\u914d\u7f6e\u7684maxRetryTime \u503c\u9519\u8bef. \u8be5\u503c\u4e0d\u80fd\u5c0f\u4e8e1, \u4e14\u4e0d\u80fd\u5927\u4e8e {0}. \u63a8\u8350\u7684\u914d\u7f6e\u65b9\u5f0f\u662f\u7ed9maxRetryTime \u914d\u7f6e1-11\u4e4b\u95f4\u7684\u67d0\u4e2a\u503c. \u8bf7\u60a8\u68c0\u67e5\u914d\u7f6e\u5e76\u505a\u51fa\u76f8\u5e94\u4fee\u6539. +odpsutil.4=\u4e0d\u652f\u6301\u7684\u8d26\u53f7\u7c7b\u578b:[{0}]. \u8d26\u53f7\u7c7b\u578b\u76ee\u524d\u4ec5\u652f\u6301aliyun, taobao. +odpsutil.5=\u83b7\u53d6 ODPS \u76ee\u7684\u8868:{0} \u7684\u6240\u6709\u5206\u533a\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.6=\u68c0\u67e5 ODPS \u76ee\u7684\u8868:{0} \u662f\u5426\u4e3a\u5206\u533a\u8868\u5931\u8d25, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.7=\u6e05\u7a7a ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.8=\u6dfb\u52a0 ODPS \u76ee\u7684\u8868\u7684\u5206\u533a\u5931\u8d25. \u9519\u8bef\u53d1\u751f\u5728\u6dfb\u52a0 ODPS \u7684\u9879\u76ee:{0} \u7684\u8868:{1} \u7684\u5206\u533a:{2}. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.9=\u521b\u5efaTunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.10=\u521b\u5efaTunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.11=\u83b7\u53d6TunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.12=\u83b7\u53d6TunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.13=Drop ODPS \u76ee\u7684\u8868\u5206\u533a\u5931\u8d25. \u9519\u8bef\u53d1\u751f\u5728\u9879\u76ee:{0} \u7684\u8868:{1} \u7684\u5206\u533a:{2} .\u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.14=ODPS \u76ee\u7684\u8868\u81ea\u8eab\u7684 partition:{0} \u683c\u5f0f\u4e0d\u5bf9. \u6b63\u786e\u7684\u683c\u5f0f\u5f62\u5982: pt=1,ds=hangzhou +odpsutil.15=ODPS \u76ee\u7684\u8868\u5728\u8fd0\u884c ODPS SQL\u5931\u8d25, \u8fd4\u56de\u503c\u4e3a:{0}. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. SQL \u5185\u5bb9\u4e3a:[\n{1}\n]. +odpsutil.16=ODPS \u76ee\u7684\u8868\u5728\u8fd0\u884c ODPS SQL \u65f6\u629b\u51fa\u5f02\u5e38, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. SQL \u5185\u5bb9\u4e3a:[\n{0}\n]. +odpsutil.17=ODPS \u76ee\u7684\u8868\u5728\u63d0\u4ea4 block:[\n{0}\n] \u65f6\u5931\u8d25, uploadId=[{1}]. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.18=ODPS \u76ee\u7684\u8868\u5199 block:{0} \u5931\u8d25\uff0c uploadId=[{1}]. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.19=ODPS \u76ee\u7684\u8868\u7684\u5217\u914d\u7f6e\u9519\u8bef. \u7531\u4e8e\u60a8\u6240\u914d\u7f6e\u7684\u5217:{0} \u4e0d\u5b58\u5728\uff0c\u4f1a\u5bfc\u81f4datax\u65e0\u6cd5\u6b63\u5e38\u63d2\u5165\u6570\u636e\uff0c\u8bf7\u68c0\u67e5\u8be5\u5217\u662f\u5426\u5b58\u5728\uff0c\u5982\u679c\u5b58\u5728\u8bf7\u68c0\u67e5\u5927\u5c0f\u5199\u7b49\u914d\u7f6e. +odpsutil.20=DataX \u5199\u5165 ODPS \u8868\u4e0d\u652f\u6301\u8be5\u5b57\u6bb5\u7c7b\u578b:[{0}]. \u76ee\u524d\u652f\u6301\u62bd\u53d6\u7684\u5b57\u6bb5\u7c7b\u578b\u6709\uff1abigint, boolean, datetime, double, string. \u60a8\u53ef\u4ee5\u9009\u62e9\u4e0d\u62bd\u53d6 DataX \u4e0d\u652f\u6301\u7684\u5b57\u6bb5\u6216\u8005\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5bfb\u6c42\u5e2e\u52a9. +odpsutil.21=\u60a8\u6ca1\u6709\u914d\u7f6e\u5206\u533a\u4fe1\u606f\uff0c\u56e0\u4e3a\u4f60\u914d\u7f6e\u7684\u8868\u662f\u5206\u533a\u8868:{0} \u5982\u679c\u9700\u8981\u8fdb\u884c truncate \u64cd\u4f5c\uff0c\u5fc5\u987b\u6307\u5b9a\u9700\u8981\u6e05\u7a7a\u7684\u5177\u4f53\u5206\u533a. \u8bf7\u4fee\u6539\u5206\u533a\u914d\u7f6e\uff0c\u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'} . +odpsutil.22=\u5206\u533a\u4fe1\u606f\u914d\u7f6e\u9519\u8bef\uff0c\u4f60\u7684ODPS\u8868\u662f\u975e\u5206\u533a\u8868:{0} \u8fdb\u884c truncate \u64cd\u4f5c\u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u68c0\u67e5\u60a8\u7684\u5206\u533a\u914d\u7f6e\uff0c\u5220\u9664\u8be5\u914d\u7f6e\u9879\u7684\u503c. +odpsutil.23=\u60a8\u7684\u76ee\u7684\u8868\u662f\u5206\u533a\u8868\uff0c\u5199\u5165\u5206\u533a\u8868:{0} \u65f6\u5fc5\u987b\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u4fee\u6539\u60a8\u7684\u5206\u533a\u914d\u7f6e\u4fe1\u606f\uff0c\u683c\u5f0f\u5f62\u5982 \u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'}. +odpsutil.24=\u60a8\u7684\u76ee\u7684\u8868\u662f\u975e\u5206\u533a\u8868\uff0c\u5199\u5165\u975e\u5206\u533a\u8868:{0} \u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u5220\u9664\u5206\u533a\u914d\u7f6e\u4fe1\u606f +odpsutil.25=\u60a8\u6ca1\u6709\u914d\u7f6e\u5206\u533a\u4fe1\u606f\uff0c\u56e0\u4e3a\u4f60\u914d\u7f6e\u7684\u8868\u662f\u5206\u533a\u8868:{0} \u5982\u679c\u9700\u8981\u8fdb\u884c truncate \u64cd\u4f5c\uff0c\u5fc5\u987b\u6307\u5b9a\u9700\u8981\u6e05\u7a7a\u7684\u5177\u4f53\u5206\u533a. \u8bf7\u4fee\u6539\u5206\u533a\u914d\u7f6e\uff0c\u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'} . +odpsutil.26=\u5206\u533a\u4fe1\u606f\u914d\u7f6e\u9519\u8bef\uff0c\u4f60\u7684ODPS\u8868\u662f\u975e\u5206\u533a\u8868:{0} \u8fdb\u884c truncate \u64cd\u4f5c\u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u68c0\u67e5\u60a8\u7684\u5206\u533a\u914d\u7f6e\uff0c\u5220\u9664\u8be5\u914d\u7f6e\u9879\u7684\u503c. +odpsutil.27=\u60a8\u7684\u76ee\u7684\u8868\u662f\u5206\u533a\u8868\uff0c\u5199\u5165\u5206\u533a\u8868:{0} \u65f6\u5fc5\u987b\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u4fee\u6539\u60a8\u7684\u5206\u533a\u914d\u7f6e\u4fe1\u606f\uff0c\u683c\u5f0f\u5f62\u5982 \u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'}. +odpsutil.28=\u60a8\u7684\u76ee\u7684\u8868\u662f\u975e\u5206\u533a\u8868\uff0c\u5199\u5165\u975e\u5206\u533a\u8868:{0} \u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u5220\u9664\u5206\u533a\u914d\u7f6e\u4fe1\u606f +odpsutil.29=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [project] \u662f\u5426\u6b63\u786e. +odpsutil.30=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [table] \u662f\u5426\u6b63\u786e. +odpsutil.31=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessId] [accessKey]\u662f\u5426\u6b63\u786e. +odpsutil.32=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessKey] \u662f\u5426\u6b63\u786e. +odpsutil.33=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessId] [accessKey] [project]\u662f\u5426\u5339\u914d. +odpsutil.34=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 project,table,accessId,accessKey,odpsServer\u7b49\u503c. \ No newline at end of file diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java index d613eefda9..ae6f275c19 100644 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java @@ -1,8 +1,5 @@ package com.alibaba.datax.plugin.writer.odpswriter.util; -/** - * Created by hongjiao.hj on 2015/6/9. - */ public class OdpsExceptionMsg { public static final String ODPS_PROJECT_NOT_FOUNT = "ODPS-0420111: Project not found"; diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java index 2a401b696c..a663da85b1 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java @@ -1,29 +1,35 @@ package com.alibaba.datax.plugin.writer.odpswriter.util; +import com.alibaba.datax.common.element.*; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.common.util.RetryUtil; -import com.alibaba.datax.plugin.writer.odpswriter.Constant; -import com.alibaba.datax.plugin.writer.odpswriter.Key; - -import com.alibaba.datax.plugin.writer.odpswriter.OdpsWriterErrorCode; +import com.alibaba.datax.plugin.writer.odpswriter.*; import com.aliyun.odps.*; +import com.aliyun.odps.Column; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; +import com.aliyun.odps.data.ResultSet; +import com.aliyun.odps.data.Binary; import com.aliyun.odps.task.SQLTask; import com.aliyun.odps.tunnel.TableTunnel; - import com.aliyun.odps.tunnel.io.ProtobufRecordPack; import com.aliyun.odps.tunnel.io.TunnelRecordWriter; +import com.aliyun.odps.type.TypeInfo; + import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateFormatUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.Callable; public class OdpsUtil { private static final Logger LOG = LoggerFactory.getLogger(OdpsUtil.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsUtil.class); public static int MAX_RETRY_TIME = 10; @@ -38,15 +44,14 @@ public static void checkNecessaryConfig(Configuration originalConfig) { if (null == originalConfig.getList(Key.COLUMN) || originalConfig.getList(Key.COLUMN, String.class).isEmpty()) { - throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, "您未配置写入 ODPS 目的表的列信息. " + - "正确的配置方式是给datax的 column 项配置上您需要读取的列名称,用英文逗号分隔 例如: \"column\": [\"id\",\"name\"]."); + throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("odpsutil.1")); } // getBool 内部要求,值只能为 true,false 的字符串(大小写不敏感),其他一律报错,不再有默认配置 + // 如果是动态分区写入,不进行truncate Boolean truncate = originalConfig.getBool(Key.TRUNCATE); if (null == truncate) { - throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, "[truncate]是必填配置项, 意思是写入 ODPS 目的表前是否清空表/分区. " + - "请您增加 truncate 的配置,根据业务需要选择上true 或者 false."); + throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("odpsutil.2")); } } @@ -54,19 +59,22 @@ public static void dealMaxRetryTime(Configuration originalConfig) { int maxRetryTime = originalConfig.getInt(Key.MAX_RETRY_TIME, OdpsUtil.MAX_RETRY_TIME); if (maxRetryTime < 1 || maxRetryTime > OdpsUtil.MAX_RETRY_TIME) { - throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, "您所配置的maxRetryTime 值错误. 该值不能小于1, 且不能大于 " + OdpsUtil.MAX_RETRY_TIME + - ". 推荐的配置方式是给maxRetryTime 配置1-11之间的某个值. 请您检查配置并做出相应修改."); + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("odpsutil.3", OdpsUtil.MAX_RETRY_TIME)); } MAX_RETRY_TIME = maxRetryTime; } - public static String formatPartition(String partitionString) { + public static String formatPartition(String partitionString, Boolean printLog) { if (null == partitionString) { return null; } - - return partitionString.trim().replaceAll(" *= *", "=").replaceAll(" */ *", ",") + String parsedPartition = partitionString.trim().replaceAll(" *= *", "=").replaceAll(" */ *", ",") .replaceAll(" *, *", ",").replaceAll("'", ""); + if (printLog) { + LOG.info("format partition with rules: remove all space; remove all '; replace / to ,"); + LOG.info("original partiton {} parsed partition {}", partitionString, parsedPartition); + } + return parsedPartition; } @@ -77,13 +85,18 @@ public static Odps initOdpsProject(Configuration originalConfig) { String odpsServer = originalConfig.getString(Key.ODPS_SERVER); String project = originalConfig.getString(Key.PROJECT); + String securityToken = originalConfig.getString(Key.SECURITY_TOKEN); Account account; if (accountType.equalsIgnoreCase(Constant.DEFAULT_ACCOUNT_TYPE)) { - account = new AliyunAccount(accessId, accessKey); + if (StringUtils.isNotBlank(securityToken)) { + account = new com.aliyun.odps.account.StsAccount(accessId, accessKey, securityToken); + } else { + account = new AliyunAccount(accessId, accessKey); + } } else { throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, - String.format("不支持的账号类型:[%s]. 账号类型目前仅支持aliyun, taobao.", accountType)); + MESSAGE_SOURCE.message("odpsutil.4", accountType)); } Odps odps = new Odps(account); @@ -95,6 +108,7 @@ public static Odps initOdpsProject(Configuration originalConfig) { } odps.setDefaultProject(project); odps.setEndpoint(odpsServer); + odps.setUserAgent("DATAX"); return odps; } @@ -124,8 +138,7 @@ public static List listOdpsPartitions(Table table) { parts.add(partition.getPartitionSpec().toString()); } } catch (Exception e) { - throw DataXException.asDataXException(OdpsWriterErrorCode.GET_PARTITION_FAIL, String.format("获取 ODPS 目的表:%s 的所有分区失败. 请联系 ODPS 管理员处理.", - table.getName()), e); + throw DataXException.asDataXException(OdpsWriterErrorCode.GET_PARTITION_FAIL, MESSAGE_SOURCE.message("odpsutil.5", table.getName()), e); } return parts; } @@ -140,37 +153,45 @@ public static boolean isPartitionedTable(Table table) { } } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CHECK_IF_PARTITIONED_TABLE_FAILED, - String.format("检查 ODPS 目的表:%s 是否为分区表失败, 请联系 ODPS 管理员处理.", table.getName()), e); + MESSAGE_SOURCE.message("odpsutil.6", table.getName()), e); } return false; } public static void truncateNonPartitionedTable(Odps odps, Table tab) { - String truncateNonPartitionedTableSql = "truncate table " + tab.getName() + ";"; + truncateNonPartitionedTable(odps, tab.getName()); + } + + public static void truncateNonPartitionedTable(Odps odps, String tableName) { + String truncateNonPartitionedTableSql = "truncate table " + tableName + ";"; try { - runSqlTaskWithRetry(odps, truncateNonPartitionedTableSql, MAX_RETRY_TIME, 1000, true); + LOG.info("truncate non partitioned table with sql: {}", truncateNonPartitionedTableSql); + runSqlTaskWithRetry(odps, truncateNonPartitionedTableSql, MAX_RETRY_TIME, 1000, true, "truncate", null); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.TABLE_TRUNCATE_ERROR, - String.format(" 清空 ODPS 目的表:%s 失败, 请联系 ODPS 管理员处理.", tab.getName()), e); + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); } } public static void truncatePartition(Odps odps, Table table, String partition) { if (isPartitionExist(table, partition)) { + LOG.info("partition {} is already exist, truncate it to clean old data", partition); dropPart(odps, table, partition); } + LOG.info("begin to add partition {}", partition); addPart(odps, table, partition); } private static boolean isPartitionExist(Table table, String partition) { // check if exist partition 返回值不为 null List odpsParts = OdpsUtil.listOdpsPartitions(table); - int j = 0; for (; j < odpsParts.size(); j++) { if (odpsParts.get(j).replaceAll("'", "").equals(partition)) { + LOG.info("found a partiton {} equals to (ignore ' if contains) configured partiton {}", + odpsParts.get(j), partition); break; } } @@ -185,11 +206,14 @@ public static void addPart(Odps odps, Table table, String partition) { addPart.append("alter table ").append(table.getName()).append(" add IF NOT EXISTS partition(") .append(partSpec).append(");"); try { - runSqlTaskWithRetry(odps, addPart.toString(), MAX_RETRY_TIME, 1000, true); + Map hints = new HashMap(); + //开启ODPS SQL TYPE2.0类型 + hints.put("odps.sql.type.system.odps2", "true"); + LOG.info("add partition with sql: {}", addPart.toString()); + runSqlTaskWithRetry(odps, addPart.toString(), MAX_RETRY_TIME, 1000, true, "addPart", hints); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.ADD_PARTITION_FAILED, - String.format("添加 ODPS 目的表的分区失败. 错误发生在添加 ODPS 的项目:%s 的表:%s 的分区:%s. 请联系 ODPS 管理员处理.", - table.getProject(), table.getName(), partition), e); + MESSAGE_SOURCE.message("odpsutil.8", table.getProject(), table.getName(), partition), e); } } @@ -206,7 +230,7 @@ public TableTunnel.UploadSession call() throws Exception { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CREATE_MASTER_UPLOAD_FAIL, - "创建TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.9"), e); } } else { final PartitionSpec partitionSpec = new PartitionSpec(partition); @@ -219,7 +243,7 @@ public TableTunnel.UploadSession call() throws Exception { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CREATE_MASTER_UPLOAD_FAIL, - "创建TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.10"), e); } } } @@ -238,7 +262,7 @@ public TableTunnel.UploadSession call() throws Exception { } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.GET_SLAVE_UPLOAD_FAIL, - "获取TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.11"), e); } } else { final PartitionSpec partitionSpec = new PartitionSpec(partition); @@ -252,7 +276,7 @@ public TableTunnel.UploadSession call() throws Exception { } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.GET_SLAVE_UPLOAD_FAIL, - "获取TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.12"), e); } } } @@ -265,11 +289,14 @@ private static void dropPart(Odps odps, Table table, String partition) { .append(" drop IF EXISTS partition(").append(partSpec) .append(");"); try { - runSqlTaskWithRetry(odps, dropPart.toString(), MAX_RETRY_TIME, 1000, true); + Map hints = new HashMap(); + //开启ODPS SQL TYPE2.0类型 + hints.put("odps.sql.type.system.odps2", "true"); + LOG.info("drop partition with sql: {}", dropPart.toString()); + runSqlTaskWithRetry(odps, dropPart.toString(), MAX_RETRY_TIME, 1000, true, "truncate", hints); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.ADD_PARTITION_FAILED, - String.format("Drop ODPS 目的表分区失败. 错误发生在项目:%s 的表:%s 的分区:%s .请联系 ODPS 管理员处理.", - table.getProject(), table.getName(), partition), e); + MESSAGE_SOURCE.message("odpsutil.13", table.getProject(), table.getName(), partition), e); } } @@ -281,7 +308,7 @@ private static String getPartSpec(String partition) { String[] kv = part.split("="); if (kv.length != 2) { throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("ODPS 目的表自身的 partition:%s 格式不对. 正确的格式形如: pt=1,ds=hangzhou", partition)); + MESSAGE_SOURCE.message("odpsutil.14", partition)); } partSpec.append(kv[0]).append("="); partSpec.append("'").append(kv[1].replace("'", "")).append("'"); @@ -292,6 +319,38 @@ private static String getPartSpec(String partition) { return partSpec.toString(); } + public static Instance runSqlTaskWithRetry(final Odps odps, final String sql, String tag) { + try { + long beginTime = System.currentTimeMillis(); + + Instance instance = runSqlTaskWithRetry(odps, sql, MAX_RETRY_TIME, 1000, true, tag, null); + + long endIime = System.currentTimeMillis(); + LOG.info(String.format("exectue odps sql: %s finished, cost time : %s ms", + sql, (endIime - beginTime))); + return instance; + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql), e); + } + } + + public static ResultSet getSqlTaskRecordsWithRetry(final Odps odps, final String sql, String tag) { + Instance instance = runSqlTaskWithRetry(odps, sql, tag); + if (instance == null) { + LOG.error("can not get odps instance from sql {}", sql); + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql)); + } + try { + return SQLTask.getResultSet(instance, instance.getTaskNames().iterator().next()); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql), e); + } + } + + /** * 该方法只有在 sql 为幂等的才可以使用,且odps抛出异常时候才会进行重试 * @@ -299,12 +358,12 @@ private static String getPartSpec(String partition) { * @param query 执行sql * @throws Exception */ - public static void runSqlTaskWithRetry(final Odps odps, final String query, int retryTimes, - long sleepTimeInMilliSecond, boolean exponential) throws Exception { + public static Instance runSqlTaskWithRetry(final Odps odps, final String query, int retryTimes, + long sleepTimeInMilliSecond, boolean exponential, String tag, + Map hints) throws Exception { for(int i = 0; i < retryTimes; i++) { try { - runSqlTask(odps, query); - return; + return runSqlTask(odps, query, tag, hints); } catch (DataXException e) { if (OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION.equals(e.getErrorCode())) { LOG.debug("Exception when calling callable", e); @@ -337,37 +396,86 @@ public static void runSqlTaskWithRetry(final Odps odps, final String query, int throw e; } } + return null; } - public static void runSqlTask(Odps odps, String query) { + public static Instance runSqlTask(Odps odps, String query, String tag, Map hints) { if (StringUtils.isBlank(query)) { - return; + return null; } - String taskName = "datax_odpswriter_trunacte_" + UUID.randomUUID().toString().replace('-', '_'); - + String taskName = String.format("datax_odpswriter_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); LOG.info("Try to start sqlTask:[{}] to run odps sql:[\n{}\n] .", taskName, query); //todo:biz_id set (目前ddl先不做) Instance instance; Instance.TaskStatus status; try { - instance = SQLTask.run(odps, odps.getDefaultProject(), query, taskName, null, null); + instance = SQLTask.run(odps, odps.getDefaultProject(), query, taskName, hints, null); instance.waitForSuccess(); status = instance.getTaskStatus().get(taskName); if (!Instance.TaskStatus.Status.SUCCESS.equals(status.getStatus())) { throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, - String.format("ODPS 目的表在运行 ODPS SQL失败, 返回值为:%s. 请联系 ODPS 管理员处理. SQL 内容为:[\n%s\n].", instance.getTaskResults().get(taskName), - query)); + MESSAGE_SOURCE.message("odpsutil.15", query)); } + return instance; } catch (DataXException e) { throw e; } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, - String.format("ODPS 目的表在运行 ODPS SQL 时抛出异常, 请联系 ODPS 管理员处理. SQL 内容为:[\n%s\n].", query), e); + MESSAGE_SOURCE.message("odpsutil.16", query), e); } } + + public static String generateTaskName(String tag) { + return String.format("datax_odpswriter_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); + } + + public static void checkBlockComplete(final TableTunnel.UploadSession masterUpload, final Long[] blocks) { + Long[] serverBlocks; + try { + serverBlocks = + RetryUtil.executeWithRetry(new Callable() { + @Override + public Long[] call() throws Exception { + return masterUpload.getBlockList(); + } + }, MAX_RETRY_TIME, 1000L, true); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + MESSAGE_SOURCE.message("odpsutil.17", masterUpload.getId()), e); + } + + HashMap serverBlockMap = new HashMap(); + for (Long blockId : serverBlocks) { + serverBlockMap.put(blockId, true); + } + + for (Long blockId : blocks) { + if (!serverBlockMap.containsKey(blockId)) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + "BlockId[" + blockId + "] upload failed!"); + } + } + + } + + public static void masterComplete(final TableTunnel.UploadSession masterUpload) { + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Void call() throws Exception { + masterUpload.commit(); + return null; + } + }, MAX_RETRY_TIME, 1000L, true); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + MESSAGE_SOURCE.message("odpsutil.17", masterUpload.getId()), e); + } + } + public static void masterCompleteBlocks(final TableTunnel.UploadSession masterUpload, final Long[] blocks) { try { RetryUtil.executeWithRetry(new Callable() { @@ -379,30 +487,28 @@ public Void call() throws Exception { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, - String.format("ODPS 目的表在提交 block:[\n%s\n] 时失败, uploadId=[%s]. 请联系 ODPS 管理员处理.", StringUtils.join(blocks, ","), masterUpload.getId()), e); + MESSAGE_SOURCE.message("odpsutil.17", StringUtils.join(blocks, ","), masterUpload.getId()), e); } } public static void slaveWriteOneBlock(final TableTunnel.UploadSession slaveUpload, final ProtobufRecordPack protobufRecordPack, - final long blockId, final boolean isCompress) { + final long blockId, final Long timeoutInMs) { try { RetryUtil.executeWithRetry(new Callable() { @Override public Void call() throws Exception { - TunnelRecordWriter tunnelRecordWriter = (TunnelRecordWriter)slaveUpload.openRecordWriter(blockId, isCompress); - tunnelRecordWriter.write(protobufRecordPack); - tunnelRecordWriter.close(); + slaveUpload.writeBlock(blockId, protobufRecordPack, timeoutInMs); return null; } }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, - String.format("ODPS 目的表写 block:%s 失败, uploadId=[%s]. 请联系 ODPS 管理员处理.", blockId, slaveUpload.getId()), e); + MESSAGE_SOURCE.message("odpsutil.18", blockId, slaveUpload.getId()), e); } } - public static List parsePosition(List allColumnList, + public static List parsePosition(List allColumnList, List allPartColumnList, List userConfiguredColumns) { List retList = new ArrayList(); @@ -416,9 +522,20 @@ public static List parsePosition(List allColumnList, break; } } + + if (null != allPartColumnList) { + for (int i = 0, len = allPartColumnList.size(); i < len; i++) { + if (allPartColumnList.get(i).equalsIgnoreCase(col)) { + retList.add(-1); + hasColumn = true; + break; + } + } + } + if (!hasColumn) { throw DataXException.asDataXException(OdpsWriterErrorCode.COLUMN_NOT_EXIST, - String.format("ODPS 目的表的列配置错误. 由于您所配置的列:%s 不存在,会导致datax无法正常插入数据,请检查该列是否存在,如果存在请检查大小写等配置.", col)); + MESSAGE_SOURCE.message("odpsutil.19", col)); } } return retList; @@ -436,22 +553,81 @@ public static List getAllColumns(TableSchema schema) { for(Column column: columns) { allColumns.add(column.getName()); type = column.getType(); - if (type == OdpsType.ARRAY || type == OdpsType.MAP) { - throw DataXException.asDataXException(OdpsWriterErrorCode.UNSUPPORTED_COLUMN_TYPE, - String.format("DataX 写入 ODPS 表不支持该字段类型:[%s]. 目前支持抽取的字段类型有:bigint, boolean, datetime, double, string. " + - "您可以选择不抽取 DataX 不支持的字段或者联系 ODPS 管理员寻求帮助.", - type)); - } } return allColumns; } - public static List getTableOriginalColumnTypeList(TableSchema schema) { - List tableOriginalColumnTypeList = new ArrayList(); + public static List getAllPartColumns(TableSchema schema) { + if (null == schema) { + throw new IllegalArgumentException("parameter schema can not be null."); + } + + List allPartColumns = new ArrayList<>(); + + List partCols = schema.getPartitionColumns(); + + for (Column column : partCols) { + allPartColumns.add(column.getName()); + } + + return allPartColumns; + } + + public static String getPartColValFromDataXRecord(com.alibaba.datax.common.element.Record dataxRecord, + List positions, List userConfiguredColumns, + Map dateTransFormMap) { + StringBuilder partition = new StringBuilder(); + for (int i = 0, len = dataxRecord.getColumnNumber(); i < len; i++) { + if (positions.get(i) == -1) { + if (partition.length() > 0) { + partition.append(","); + } + String partName = userConfiguredColumns.get(i); + //todo: 这里应该根据分区列的类型做转换,这里先直接toString转换了 + com.alibaba.datax.common.element.Column partitionCol = dataxRecord.getColumn(i); + String partVal = partitionCol.getRawData().toString(); + if (StringUtils.isBlank(partVal)) { + throw new DataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, String.format( + "value of column %s exit null value, it can not be used as partition column", partName)); + } + + // 如果分区列的值的格式是一个日期,并且用户设置列的转换规则 + DateTransForm dateTransForm = null; + if (null != dateTransFormMap) { + dateTransForm = dateTransFormMap.get(partName); + } + if (null != dateTransForm) { + try { + // 日期列 + if (partitionCol.getType().equals(com.alibaba.datax.common.element.Column.Type.DATE)) { + partVal = OdpsUtil.date2StringWithFormat(partitionCol.asDate(), dateTransForm.getToFormat()); + } + // String 列,需要先按照 fromFormat 转换为日期 + if (partitionCol.getType().equals(com.alibaba.datax.common.element.Column.Type.STRING)) { + partVal = OdpsUtil.date2StringWithFormat(partitionCol.asDate(dateTransForm.getFromFormat()), dateTransForm.getToFormat()); + } + } catch (DataXException e) { + LOG.warn("Parse {} with format {} error! Please check the column config and {} config. So user original value '{}'. Detail info: {}", + partVal, dateTransForm.toString(), Key.PARTITION_COL_MAPPING, partVal, e); + } + } + + partition.append(partName).append("=").append(partVal); + } + } + return partition.toString(); + } + + public static String date2StringWithFormat(Date date, String dateFormat) { + return DateFormatUtils.format(date, dateFormat, TimeZone.getTimeZone("GMT+8")); + } + + public static List getTableOriginalColumnTypeList(TableSchema schema) { + List tableOriginalColumnTypeList = new ArrayList(); List columns = schema.getColumns(); for (Column column : columns) { - tableOriginalColumnTypeList.add(column.getType()); + tableOriginalColumnTypeList.add(column.getTypeInfo()); } return tableOriginalColumnTypeList; @@ -465,8 +641,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea if (isPartitionedTable) { //分区表 if (StringUtils.isBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("您没有配置分区信息,因为你配置的表是分区表:%s 如果需要进行 truncate 操作,必须指定需要清空的具体分区. 请修改分区配置,格式形如 pt=${bizdate} .", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.21", table.getName())); } else { LOG.info("Try to truncate partition=[{}] in table=[{}].", partition, table.getName()); OdpsUtil.truncatePartition(odps, table, partition); @@ -474,8 +649,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea } else { //非分区表 if (StringUtils.isNotBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("分区信息配置错误,你的ODPS表是非分区表:%s 进行 truncate 操作时不需要指定具体分区值. 请检查您的分区配置,删除该配置项的值.", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.22", table.getName())); } else { LOG.info("Try to truncate table:[{}].", table.getName()); OdpsUtil.truncateNonPartitionedTable(odps, table); @@ -487,7 +661,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea //分区表 if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是分区表,写入分区表:%s 时必须指定具体分区值. 请修改您的分区配置信息,格式形如 格式形如 pt=${bizdate}.", table.getName())); + MESSAGE_SOURCE.message("odpsutil.23", table.getName())); } else { boolean isPartitionExists = OdpsUtil.isPartitionExist(table, partition); if (!isPartitionExists) { @@ -500,7 +674,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea //非分区表 if (StringUtils.isNotBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是非分区表,写入非分区表:%s 时不需要指定具体分区值. 请删除分区配置信息", table.getName())); + MESSAGE_SOURCE.message("odpsutil.24", table.getName())); } } } @@ -523,14 +697,12 @@ public static void preCheckPartition(Odps odps, Table table, String partition, b if (isPartitionedTable) { //分区表 if (StringUtils.isBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("您没有配置分区信息,因为你配置的表是分区表:%s 如果需要进行 truncate 操作,必须指定需要清空的具体分区. 请修改分区配置,格式形如 pt=${bizdate} .", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.25", table.getName())); } } else { //非分区表 if (StringUtils.isNotBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("分区信息配置错误,你的ODPS表是非分区表:%s 进行 truncate 操作时不需要指定具体分区值. 请检查您的分区配置,删除该配置项的值.", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.26", table.getName())); } } } else { @@ -539,13 +711,13 @@ public static void preCheckPartition(Odps odps, Table table, String partition, b //分区表 if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是分区表,写入分区表:%s 时必须指定具体分区值. 请修改您的分区配置信息,格式形如 格式形如 pt=${bizdate}.", table.getName())); + MESSAGE_SOURCE.message("odpsutil.27", table.getName())); } } else { //非分区表 if (StringUtils.isNotBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是非分区表,写入非分区表:%s 时不需要指定具体分区值. 请删除分区配置信息", table.getName())); + MESSAGE_SOURCE.message("odpsutil.28", table.getName())); } } } @@ -558,29 +730,286 @@ public static void throwDataXExceptionWhenReloadTable(Exception e, String tableN if(e.getMessage() != null) { if(e.getMessage().contains(OdpsExceptionMsg.ODPS_PROJECT_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_PROJECT_NOT_FOUNT, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [project] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.29", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_TABLE_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_TABLE_NOT_FOUNT, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [table] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.30", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_ID_NOT_FOUND)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_KEY_ID_NOT_FOUND, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessId] [accessKey]是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.31", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_INVALID)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_KEY_INVALID, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessKey] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.32", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_DENY)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_DENY, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessId] [accessKey] [project]是否匹配.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.33", tableName), e); } } throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 project,table,accessId,accessKey,odpsServer等值.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.34", tableName), e); + } + + /** + * count统计数据,自动创建统计表 + * @param tableName 统计表名字 + * @return + */ + public static String getCreateSummaryTableDDL(String tableName) { + return String.format("CREATE TABLE IF NOT EXISTS %s " + + "(src_table_name STRING, " + + "dest_table_name STRING, " + + "src_row_num BIGINT, " + + "src_query_time DATETIME, " + + "read_succeed_records BIGINT," + + "write_succeed_records BIGINT," + + "dest_row_num BIGINT, " + + "write_time DATETIME);", + tableName); + } + + /** + * count统计数据,获取count dml + * @param tableName + * @return + */ + public static String countTableSql(final String tableName, final String partition) { + if (StringUtils.isNotBlank(partition)) { + String[] partitions = partition.split("\\,"); + String p = String.join(" and ", partitions); + return String.format("SELECT COUNT(1) AS odps_num FROM %s WHERE %s;", tableName, p); + } else { + return String.format("SELECT COUNT(1) AS odps_num FROM %s;", tableName); + } + } + + /** + * count统计数据 dml 对应字段,用于查询 + * @return + */ + public static String countName() { + return "odps_num"; + } + + /** + * count统计数据dml + * @param summaryTableName 统计数据写入表 + * @param sourceTableName datax reader 表 + * @param destTableName datax writer 表 + * @param srcCount reader表行数 + * @param queryTime reader表查询时间 + * @param destCount writer 表行书 + * @return insert dml sql + */ + public static String getInsertSummaryTableSql(String summaryTableName, String sourceTableName, String destTableName, + Long srcCount, String queryTime, Number readSucceedRecords, + Number writeSucceedRecords, Long destCount) { + final String sql = "INSERT INTO %s (src_table_name,dest_table_name," + + " src_row_num, src_query_time, read_succeed_records, write_succeed_records, dest_row_num, write_time) VALUES ( %s );"; + + String insertData = String.format("'%s', '%s', %s, %s, %s, %s, %s, getdate()", + sourceTableName, destTableName, srcCount, queryTime, readSucceedRecords, writeSucceedRecords, destCount ); + return String.format(sql, summaryTableName, insertData); + } + + public static void createTable(Odps odps, String tableName, final String sql) { + try { + LOG.info("create table with sql: {}", sql); + runSqlTaskWithRetry(odps, sql, MAX_RETRY_TIME, 1000, true, "create", null); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); + } + } + + public static void createTableFromTable(Odps odps, String resourceTable, String targetTable) { + TableSchema schema = odps.tables().get(resourceTable).getSchema(); + StringBuilder builder = new StringBuilder(); + Iterator iterator = schema.getColumns().iterator(); + while (iterator.hasNext()) { + Column c = iterator.next(); + builder.append(String.format(" %s %s ", c.getName(), c.getTypeInfo().getTypeName())); + if (iterator.hasNext()) { + builder.append(","); + } + } + String createTableSql = String.format("CREATE TABLE IF NOT EXISTS %s (%s);", targetTable, builder.toString()); + + try { + LOG.info("create table with sql: {}", createTableSql); + runSqlTaskWithRetry(odps, createTableSql, MAX_RETRY_TIME, 1000, true, "create", null); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.7", targetTable), e); + } + } + + public static Object truncateSingleFieldData(OdpsType type, Object data, int limit, Boolean enableOverLengthOutput) { + if (data == null) { + return data; + } + if (OdpsType.STRING.equals(type)) { + if(enableOverLengthOutput) { + LOG.warn( + "InvalidData: The string's length is more than " + limit + " bytes. content:" + data); + } + LOG.info("before truncate string length:" + ((String) data).length()); + //确保特殊字符场景下的截断 + limit -= Constant.UTF8_ENCODED_CHAR_MAX_SIZE; + data = cutString((String) data, limit); + LOG.info("after truncate string length:" + ((String) data).length()); + } else if (OdpsType.BINARY.equals(type)) { + byte[] oriDataBytes = ((Binary) data).data(); + if(oriDataBytes == null){ + return data; + } + int originLength = oriDataBytes.length; + if (originLength <= limit) { + return data; + } + if(enableOverLengthOutput) { + LOG.warn("InvalidData: The binary's length is more than " + limit + " bytes. content:" + byteArrToHex(oriDataBytes)); + } + LOG.info("before truncate binary length:" + oriDataBytes.length); + byte[] newData = new byte[limit]; + System.arraycopy(oriDataBytes, 0, newData, 0, limit); + LOG.info("after truncate binary length:" + newData.length); + return new Binary(newData); + } + return data; + } + public static Object setNull(OdpsType type,Object data, int limit, Boolean enableOverLengthOutput) { + if (data == null ) { + return null; + } + if (OdpsType.STRING.equals(type)) { + if(enableOverLengthOutput) { + LOG.warn( + "InvalidData: The string's length is more than " + limit + " bytes. content:" + data); + } + return null; + } else if (OdpsType.BINARY.equals(type)) { + byte[] oriDataBytes = ((Binary) data).data(); + int originLength = oriDataBytes.length; + if (originLength > limit) { + if(enableOverLengthOutput) { + LOG.warn("InvalidData: The binary's length is more than " + limit + " bytes. content:" + new String(oriDataBytes)); + } + return null; + } + } + return data; + } + public static boolean validateStringLength(String value, long limit) { + try { + if (value.length() * Constant.UTF8_ENCODED_CHAR_MAX_SIZE > limit + && value.getBytes("utf-8").length > limit) { + return false; + } + } catch (Exception e) { + e.printStackTrace(); + return true; + } + return true; + } + public static String cutString(String sourceString, int cutBytes) { + if (sourceString == null || "".equals(sourceString.trim()) || cutBytes < 1) { + return ""; + } + int lastIndex = 0; + boolean stopFlag = false; + int totalBytes = 0; + for (int i = 0; i < sourceString.length(); i++) { + String s = Integer.toBinaryString(sourceString.charAt(i)); + if (s.length() > 8) { + totalBytes += 3; + } else { + totalBytes += 1; + } + if (!stopFlag) { + if (totalBytes == cutBytes) { + lastIndex = i; + stopFlag = true; + } else if (totalBytes > cutBytes) { + lastIndex = i - 1; + stopFlag = true; + } + } + } + if (!stopFlag) { + return sourceString; + } else { + return sourceString.substring(0, lastIndex + 1); + } + } + public static boolean dataOverLength(OdpsType type, Object data, int limit){ + if (data == null ) { + return false; + } + if (OdpsType.STRING.equals(type)) { + if(!OdpsUtil.validateStringLength((String)data, limit)){ + return true; + } + }else if (OdpsType.BINARY.equals(type)){ + byte[] oriDataBytes = ((Binary) data).data(); + if(oriDataBytes == null){ + return false; + } + int originLength = oriDataBytes.length; + if (originLength > limit) { + return true; + } + } + return false; + } + public static Object processOverLengthData(Object data, OdpsType type, String overLengthRule, int maxFieldLength, Boolean enableOverLengthOutput) { + try{ + //超长数据检查 + if(OdpsWriter.maxOutputOverLengthRecord != null && OdpsWriter.globalTotalTruncatedRecordNumber.get() >= OdpsWriter.maxOutputOverLengthRecord){ + enableOverLengthOutput = false; + } + if ("truncate".equalsIgnoreCase(overLengthRule)) { + if (OdpsUtil.dataOverLength(type, data, OdpsWriter.maxOdpsFieldLength)) { + Object newData = OdpsUtil.truncateSingleFieldData(type, data, maxFieldLength, enableOverLengthOutput); + OdpsWriter.globalTotalTruncatedRecordNumber.incrementAndGet(); + return newData; + } + } else if ("setNull".equalsIgnoreCase(overLengthRule)) { + if (OdpsUtil.dataOverLength(type, data, OdpsWriter.maxOdpsFieldLength)) { + OdpsWriter.globalTotalTruncatedRecordNumber.incrementAndGet(); + return OdpsUtil.setNull(type, data, maxFieldLength, enableOverLengthOutput); + } + } + }catch (Throwable e){ + LOG.warn("truncate overLength data failed!", e); + } + return data; + } + private static final char HEX_CHAR_ARR[] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + /** + * 字节数组转十六进制字符串 + * @param btArr + * @return + */ + public static String byteArrToHex(byte[] btArr) { + char strArr[] = new char[btArr.length * 2]; + int i = 0; + for (byte bt : btArr) { + strArr[i++] = HEX_CHAR_ARR[bt>>>4 & 0xf]; + strArr[i++] = HEX_CHAR_ARR[bt & 0xf]; + } + return new String(strArr); + } + public static byte[] hexToByteArr(String hexStr) { + char[] charArr = hexStr.toCharArray(); + byte btArr[] = new byte[charArr.length / 2]; + int index = 0; + for (int i = 0; i < charArr.length; i++) { + int highBit = hexStr.indexOf(charArr[i]); + int lowBit = hexStr.indexOf(charArr[++i]); + btArr[index] = (byte) (highBit << 4 | lowBit); + index++; + } + return btArr; } } diff --git a/odpswriter/src/main/libs/bcprov-jdk15on-1.52.jar b/odpswriter/src/main/libs/bcprov-jdk15on-1.52.jar deleted file mode 100644 index 6c54dd901c..0000000000 Binary files a/odpswriter/src/main/libs/bcprov-jdk15on-1.52.jar and /dev/null differ diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java index fe8dce2b99..88822089f2 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java @@ -6,14 +6,30 @@ import java.util.ArrayList; import java.util.HashMap; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:CliQuery - * - * @author Benedict Jin - * @since 2019-04-17 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . final class CliQuery { /** diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java index 97a841cfad..4b75acb9a0 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java @@ -4,14 +4,30 @@ import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Connection for TSDB-like databases - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public interface Connection4TSDB { /** diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java index 1f69024508..64c124aec2 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java @@ -4,14 +4,30 @@ import java.util.Map; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:DataPoint for TSDB - * - * @author Benedict Jin - * @since 2019-04-10 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public class DataPoint4TSDB { private long timestamp; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java index 56ab0bc2ab..4aed1458a3 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java @@ -12,14 +12,30 @@ import java.util.*; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Tool to dump the data straight from HBase - * - * @author Benedict Jin - * @since 2019-04-17 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . final class DumpSeries { private static final Logger LOG = LoggerFactory.getLogger(DumpSeries.class); diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java index 9e7f12c93c..939a856f63 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java @@ -7,14 +7,30 @@ import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:OpenTSDB Connection - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public class OpenTSDBConnection implements Connection4TSDB { private String address; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java index 5ed0a314b3..009aa10098 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java @@ -7,14 +7,30 @@ import java.util.Map; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:OpenTSDB Dump - * - * @author Benedict Jin - * @since 2019-04-15 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . final class OpenTSDBDump { private static TSDB TSDB_INSTANCE; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java index 6017d4e5eb..286443de59 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java @@ -1,13 +1,29 @@ package com.alibaba.datax.plugin.reader.opentsdbreader; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Key - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class Constant { static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss"; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java index 5b8c4adc82..2d2c284437 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java @@ -1,13 +1,29 @@ package com.alibaba.datax.plugin.reader.opentsdbreader; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Key - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public class Key { static final String ENDPOINT = "endpoint"; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java index d57456d15e..4cd0476e2d 100755 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java @@ -18,14 +18,30 @@ import java.util.Collections; import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Key - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . @SuppressWarnings("unused") public class OpenTSDBReader extends Reader { diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java index 0d9de4c458..479936c668 100755 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java @@ -2,14 +2,30 @@ import com.alibaba.datax.common.spi.ErrorCode; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:OpenTSDB Reader Error Code - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public enum OpenTSDBReaderErrorCode implements ErrorCode { REQUIRED_VALUE("OpenTSDBReader-00", "缺失必要的值"), diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java index cdf5c9c1ab..cbd0d7cadd 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java @@ -9,14 +9,30 @@ import java.util.Map; import java.util.concurrent.TimeUnit; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:HttpUtils - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class HttpUtils { public final static Charset UTF_8 = Charset.forName("UTF-8"); diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java index 72c7fd62c3..bbfb75cb3c 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java @@ -7,14 +7,30 @@ import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:TSDB Utils - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class TSDBUtils { private static final Logger LOG = LoggerFactory.getLogger(TSDBUtils.class); diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java index 9bc11b3630..7d6bd11255 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java @@ -2,14 +2,30 @@ import java.util.concurrent.TimeUnit; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:TimeUtils - * - * @author Benedict Jin - * @since 2019-04-22 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class TimeUtils { private TimeUtils() { diff --git a/ossreader/pom.xml b/ossreader/pom.xml index 1feb42ff9e..d27b6a3dfb 100755 --- a/ossreader/pom.xml +++ b/ossreader/pom.xml @@ -11,6 +11,17 @@ jar + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -43,13 +54,19 @@ com.aliyun.oss aliyun-sdk-oss - 2.2.3 + 3.4.2 junit junit test + + com.alibaba.datax + hdfsreader + 0.0.1-SNAPSHOT + compile + diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java index e836fbbd09..efa953437b 100755 --- a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java @@ -18,4 +18,29 @@ public class Key { public static final String CNAME = "cname"; + public static final String SUCCESS_ON_NO_Object = "successOnNoObject"; + + public static final String PROXY_HOST = "proxyHost"; + + public static final String PROXY_PORT = "proxyPort"; + + public static final String PROXY_USERNAME = "proxyUsername"; + + public static final String PROXY_PASSWORD = "proxyPassword"; + + public static final String PROXY_DOMAIN = "proxyDomain"; + + public static final String PROXY_WORKSTATION = "proxyWorkstation"; + + public static final String HDOOP_CONFIG = "hadoopConfig"; + + public static final String FS_OSS_ACCESSID = "fs.oss.accessKeyId"; + + public static final String FS_OSS_ACCESSKEY = "fs.oss.accessKeySecret"; + + public static final String FS_OSS_ENDPOINT = "fs.oss.endpoint"; + + /*判断分片是否均匀的标准,是否有分片长度超出平均值的百分比*/ + public static final String BALANCE_THRESHOLD = "balanceThreshold"; + } diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssInputStream.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssInputStream.java new file mode 100644 index 0000000000..a43146e7cf --- /dev/null +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssInputStream.java @@ -0,0 +1,132 @@ +package com.alibaba.datax.plugin.reader.ossreader; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.RetryUtil; +import com.aliyun.oss.OSSClient; +import com.aliyun.oss.model.GetObjectRequest; +import com.aliyun.oss.model.OSSObject; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.Callable; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:52 + */ +public class OssInputStream extends InputStream { + + private final OSSClient ossClient; + private GetObjectRequest getObjectRequest; + + private long startIndex = 0; + private long endIndex = -1; + + private InputStream inputStream; + + /** + * retryTimes : 重试次数, 默认值是60次; + * description: 能够cover住的网络断连时间= retryTimes*(socket_timeout+sleepTime); + * 默认cover住的网络断连时间= 60*(5+5) = 600秒. + */ + private int retryTimes = 60; + + private static final Logger LOG = LoggerFactory.getLogger(OssInputStream.class); + + /** + * 如果start为0, end为1000, inputstream范围是[0,1000],共1001个字节 + * + * @param ossClient + * @param bucket + * @param object + * @param start inputstream start index + * @param end inputstream end index + */ + public OssInputStream(final OSSClient ossClient, final String bucket, final String object, long start, long end) { + this.ossClient = ossClient; + this.getObjectRequest = new GetObjectRequest(bucket, object); + this.startIndex = start; + this.getObjectRequest.setRange(this.startIndex, end); + this.endIndex = end; + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + OSSObject ossObject = ossClient.getObject(getObjectRequest); + // 读取InputStream + inputStream = ossObject.getObjectContent(); + return true; + } + }, this.retryTimes, 5000, false); + } catch (Exception e) { + throw DataXException.asDataXException( + OssReaderErrorCode.RUNTIME_EXCEPTION,e.getMessage(), e); + } + } + + public OssInputStream(final OSSClient ossClient, final String bucket, final String object) { + this.ossClient = ossClient; + this.getObjectRequest = new GetObjectRequest(bucket, object); + this.getObjectRequest.setRange(startIndex, -1); + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + OSSObject ossObject = ossClient.getObject(getObjectRequest); + // 读取InputStream + inputStream = ossObject.getObjectContent(); + return true; + } + }, this.retryTimes, 5000, false); + } catch (Exception e) { + throw DataXException.asDataXException( + OssReaderErrorCode.RUNTIME_EXCEPTION, e.getMessage(), e); + } + } + + @Override + public int read() throws IOException { + int cbyte; + try { + cbyte = RetryUtil.executeWithRetry(new Callable() { + @Override + public Integer call() throws Exception { + try { + int c = inputStream.read(); + startIndex++; + return c; + } catch (Exception e) { + LOG.warn(e.getMessage(),e); + /** + * 必须将inputStream先关闭, 否则会造成连接泄漏 + */ + IOUtils.closeQuietly(inputStream); + // getOssRangeInuptStream时,如果网络不连通,则会抛出异常,RetryUtil捕获异常进行重试 + inputStream = getOssRangeInuptStream(startIndex); + int c = inputStream.read(); + startIndex++; + return c; + } + } + }, this.retryTimes,5000, false); + return cbyte; + } catch (Exception e) { + throw DataXException.asDataXException( + OssReaderErrorCode.RUNTIME_EXCEPTION, e.getMessage(), e); + } + } + + private InputStream getOssRangeInuptStream(final long startIndex) { + LOG.info("Start to retry reading [inputStream] from Byte {}", startIndex); + // 第二个参数值设为-1,表示不设置结束的字节位置,读取startIndex及其以后的所有数据 + getObjectRequest.setRange(startIndex, this.endIndex); + // 范围下载 + OSSObject ossObject = ossClient.getObject(getObjectRequest); + // 读取InputStream + LOG.info("Start to retry reading [inputStream] from Byte {}", startIndex); + return ossObject.getObjectContent(); + } +} diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java index ce4f0875b4..62a1f81fe2 100755 --- a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java @@ -4,33 +4,37 @@ import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.hdfsreader.HdfsReader; +import com.alibaba.datax.plugin.reader.ossreader.util.HdfsParquetUtil; +import com.alibaba.datax.plugin.reader.ossreader.util.OssSplitUtil; import com.alibaba.datax.plugin.reader.ossreader.util.OssUtil; +import com.alibaba.datax.plugin.unstructuredstorage.FileFormat; import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil; +import com.alibaba.datax.plugin.unstructuredstorage.reader.binaryFileUtil.BinaryFileReaderUtil; +import com.alibaba.datax.plugin.unstructuredstorage.reader.split.StartEndPair; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.TypeReference; import com.aliyun.oss.ClientException; import com.aliyun.oss.OSSClient; import com.aliyun.oss.OSSException; import com.aliyun.oss.model.ListObjectsRequest; -import com.aliyun.oss.model.OSSObject; import com.aliyun.oss.model.OSSObjectSummary; import com.aliyun.oss.model.ObjectListing; -import com.google.common.collect.Sets; -import org.apache.commons.io.Charsets; +import com.aliyun.oss.model.ObjectMetadata; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.InputStream; -import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; import java.util.List; -import java.util.Set; +import java.util.Locale; import java.util.regex.Pattern; -/** - * Created by mengxin.liumx on 2014/12/7. - */ public class OssReader extends Reader { public static class Job extends Reader.Job { private static final Logger LOG = LoggerFactory @@ -38,194 +42,204 @@ public static class Job extends Reader.Job { private Configuration readerOriginConfig = null; + private OSSClient ossClient = null; + private String endpoint; + private String accessId; + private String accessKey; + private String bucket; + private boolean successOnNoObject; + private Boolean isBinaryFile; + + private List objects; + private List> objectSizePairs; /*用于任务切分的依据*/ + + private String fileFormat; + + private HdfsReader.Job hdfsReaderJob; + private boolean useHdfsReaderProxy = false; + @Override public void init() { LOG.debug("init() begin..."); this.readerOriginConfig = this.getPluginJobConf(); + this.basicValidateParameter(); + this.fileFormat = this.readerOriginConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_FILE_FORMAT); + this.useHdfsReaderProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsReaderProxy){ + HdfsParquetUtil.adaptConfiguration(this.readerOriginConfig); + this.hdfsReaderJob = new HdfsReader.Job(); + this.hdfsReaderJob.setJobPluginCollector(this.getJobPluginCollector()); + this.hdfsReaderJob.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsReaderJob.setPeerPluginName(this.getPeerPluginName()); + this.hdfsReaderJob.setPluginJobConf(this.getPluginJobConf()); + this.hdfsReaderJob.init(); + return; + } + + this.isBinaryFile = FileFormat.getFileFormatByConfiguration(this.readerOriginConfig).isBinary(); this.validate(); + UnstructuredStorageReaderUtil.validateCsvReaderConfig(this.readerOriginConfig); + this.successOnNoObject = this.readerOriginConfig.getBool( + Key.SUCCESS_ON_NO_Object, false); LOG.debug("init() ok and end..."); } - private void validate() { - String endpoint = this.readerOriginConfig.getString(Key.ENDPOINT); + + private void basicValidateParameter(){ + endpoint = this.readerOriginConfig.getString(Key.ENDPOINT); if (StringUtils.isBlank(endpoint)) { throw DataXException.asDataXException( - OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 endpoint"); + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION,"invalid endpoint"); } - String accessId = this.readerOriginConfig.getString(Key.ACCESSID); + accessId = this.readerOriginConfig.getString(Key.ACCESSID); if (StringUtils.isBlank(accessId)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 accessId"); + "invalid accessId"); } - String accessKey = this.readerOriginConfig.getString(Key.ACCESSKEY); + accessKey = this.readerOriginConfig.getString(Key.ACCESSKEY); if (StringUtils.isBlank(accessKey)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 accessKey"); + "invalid accessKey"); } + } + // warn: 提前验证endpoint,accessId,accessKey,bucket,object的有效性 + private void validate() { + // fxxk + // ossClient = new OSSClient(endpoint,accessId,accessKey); + ossClient = OssUtil.initOssClient(this.readerOriginConfig); + - String bucket = this.readerOriginConfig.getString(Key.BUCKET); + bucket = this.readerOriginConfig.getString(Key.BUCKET); if (StringUtils.isBlank(bucket)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 endpoint"); - } - - String object = this.readerOriginConfig.getString(Key.OBJECT); - if (StringUtils.isBlank(object)) { + "invalid bucket"); + }else if(!ossClient.doesBucketExist(bucket)){ throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 object"); + "invalid bucket"); } - String fieldDelimiter = this.readerOriginConfig - .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FIELD_DELIMITER); - // warn: need length 1 - if (null == fieldDelimiter || fieldDelimiter.length() == 0) { + String object = this.readerOriginConfig.getString(Key.OBJECT); + if (StringUtils.isBlank(object)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 fieldDelimiter"); + "invalid object"); } - String encoding = this.readerOriginConfig - .getString( - com.alibaba.datax.plugin.unstructuredstorage.reader.Key.ENCODING, - com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_ENCODING); - try { - Charsets.toCharset(encoding); - } catch (UnsupportedCharsetException uce) { - throw DataXException.asDataXException( - OssReaderErrorCode.ILLEGAL_VALUE, - String.format("不支持的编码格式 : [%s]", encoding), uce); - } catch (Exception e) { - throw DataXException.asDataXException( - OssReaderErrorCode.ILLEGAL_VALUE, - String.format("运行配置异常 : %s", e.getMessage()), e); + if (this.isBinaryFile){ + return; } + UnstructuredStorageReaderUtil.validateParameter(this.readerOriginConfig); + } - // 检测是column 是否为 ["*"] 若是则填为空 - List column = this.readerOriginConfig - .getListConfiguration(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN); - if (null != column - && 1 == column.size() - && ("\"*\"".equals(column.get(0).toString()) || "'*'" - .equals(column.get(0).toString()))) { - readerOriginConfig - .set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN, - new ArrayList()); - } else { - // column: 1. index type 2.value type 3.when type is Data, may - // have - // format - List columns = this.readerOriginConfig - .getListConfiguration(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN); - - if (null == columns || columns.size() == 0) { - throw DataXException.asDataXException( - OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 columns"); - } - - if (null != columns && columns.size() != 0) { - for (Configuration eachColumnConf : columns) { - eachColumnConf - .getNecessaryValue( - com.alibaba.datax.plugin.unstructuredstorage.reader.Key.TYPE, - OssReaderErrorCode.REQUIRED_VALUE); - Integer columnIndex = eachColumnConf - .getInt(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.INDEX); - String columnValue = eachColumnConf - .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.VALUE); - - if (null == columnIndex && null == columnValue) { - throw DataXException.asDataXException( - OssReaderErrorCode.NO_INDEX_VALUE, - "由于您配置了type, 则至少需要配置 index 或 value"); - } - - if (null != columnIndex && null != columnValue) { - throw DataXException.asDataXException( - OssReaderErrorCode.MIXED_INDEX_VALUE, - "您混合配置了index, value, 每一列同时仅能选择其中一种"); - } - - } - } - } - // only support compress: gzip,bzip2,zip - String compress = this.readerOriginConfig - .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS); - if (StringUtils.isBlank(compress)) { - this.readerOriginConfig - .set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS, - null); - } else { - Set supportedCompress = Sets - .newHashSet("gzip", "bzip2", "zip"); - compress = compress.toLowerCase().trim(); - if (!supportedCompress.contains(compress)) { - throw DataXException - .asDataXException( - OssReaderErrorCode.ILLEGAL_VALUE, - String.format( - "仅支持 gzip, bzip2, zip 文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", - compress)); - } - this.readerOriginConfig - .set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS, - compress); - } - } @Override public void prepare() { - LOG.debug("prepare()"); + if(useHdfsReaderProxy){ + this.hdfsReaderJob.prepare(); + return; + } + // 将每个单独的 object 作为一个 slice + this.objectSizePairs = parseOriginObjectSizePairs(readerOriginConfig.getList(Key.OBJECT, String.class)); + this.objects = parseOriginObjects(readerOriginConfig.getList(Key.OBJECT, String.class)); + UnstructuredStorageReaderUtil.setSourceFileName(readerOriginConfig, this.objects); + UnstructuredStorageReaderUtil.setSourceFile(readerOriginConfig, this.objects); } @Override public void post() { + if(useHdfsReaderProxy){ + this.hdfsReaderJob.post(); + return; + } LOG.debug("post()"); } @Override public void destroy() { + if(useHdfsReaderProxy){ + this.hdfsReaderJob.destroy(); + return; + } LOG.debug("destroy()"); } @Override public List split(int adviceNumber) { LOG.debug("split() begin..."); - List readerSplitConfigs = new ArrayList(); + if(useHdfsReaderProxy){ + return hdfsReaderJob.split(adviceNumber); + } + List readerSplitConfigs; - // 将每个单独的 object 作为一个 slice - List objects = parseOriginObjects(readerOriginConfig - .getList(Constant.OBJECT, String.class)); - if (0 == objects.size()) { + if (0 == objects.size() && this.successOnNoObject) { + readerSplitConfigs = new ArrayList(); + Configuration splitedConfig = this.readerOriginConfig.clone(); + splitedConfig.set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SPLIT_SLICE_CONFIG, null); + readerSplitConfigs.add(splitedConfig); + LOG.info(String.format("no OSS object to be read")); + LOG.debug("split() ok and end..."); + return readerSplitConfigs; + }else if (0 == objects.size()) { throw DataXException.asDataXException( OssReaderErrorCode.EMPTY_BUCKET_EXCEPTION, - String.format( - "未能找到待读取的Object,请确认您的配置项bucket: %s object: %s", + String.format("Unable to find the object to read. Please confirm your configured item [bucket]: %s object: %s", this.readerOriginConfig.get(Key.BUCKET), this.readerOriginConfig.get(Key.OBJECT))); } - for (String object : objects) { - Configuration splitedConfig = this.readerOriginConfig.clone(); - splitedConfig.set(Constant.OBJECT, object); - readerSplitConfigs.add(splitedConfig); - LOG.info(String.format("OSS object to be read:%s", object)); + /** + * 当文件类型是text纯文本文件,并且不是压缩的情况下, + * 可以对纯文本文件进行内部切分实现并发读取, 如果用户不希望对文件拆分, 可以指定fileFormat为csv + * + * 注意:这里判断文件是否为text以及是否压缩,信息都是通过任务配置项来获取的 + * + * 这里抽出一个方法来判断是否需要分片 + * */ + OssSplitUtil ossFileSplit = new OssSplitUtil(this.ossClient, this.bucket); + long t1 = System.currentTimeMillis(); + readerSplitConfigs = ossFileSplit.getSplitedConfigurations(this.readerOriginConfig, this.objectSizePairs, + adviceNumber); + long t2 = System.currentTimeMillis(); + LOG.info("all split done, cost {}ms", t2 - t1); + /** + * 在日志中告知用户,为什么实际datax切分跑的channel数会小于用户配置的channel数 + * 注意:这里的报告的原因不准确,报的原因是一个文件一个task,所以最终切分数小于用户配置数,实际原因还有单文件切分时, + * 单文件的大小太小(理论64M一个block),导致问题比较少 + */ + if(readerSplitConfigs.size() < adviceNumber){ + LOG.info("[Note]: During OSSReader data synchronization, one file can only be synchronized in one task. You want to synchronize {} files " + + "and the number is less than the number of channels you configured: {}. " + + "Therefore, please take note that DataX will actually have {} sub-tasks, that is, the actual concurrent channels = {}", + objects.size(), adviceNumber, objects.size(), objects.size()); } - LOG.debug("split() ok and end..."); + LOG.info("split() ok and end..."); return readerSplitConfigs; } private List parseOriginObjects(List originObjects) { - List parsedObjects = new ArrayList(); + List objList = new ArrayList<>(); + + if (this.objectSizePairs == null) { + this.objectSizePairs = parseOriginObjectSizePairs(originObjects); + } + + for (Pair objSizePair : this.objectSizePairs) { + objList.add(objSizePair.getKey()); + } + + return objList; + } + + private List> parseOriginObjectSizePairs(List originObjects) { + List> parsedObjectSizePaires = new ArrayList>(); for (String object : originObjects) { int firstMetaChar = (object.indexOf('*') > object.indexOf('?')) ? object @@ -236,52 +250,130 @@ private List parseOriginObjects(List originObjects) { IOUtils.DIR_SEPARATOR, firstMetaChar); String parentDir = object .substring(0, lastDirSeparator + 1); - List remoteObjects = getRemoteObjects(parentDir); + List> allRemoteObjectSizePairs = getAllRemoteObjectsKeyAndSizeInDir(parentDir); Pattern pattern = Pattern.compile(object.replace("*", ".*") .replace("?", ".?")); - for (String remoteObject : remoteObjects) { - if (pattern.matcher(remoteObject).matches()) { - parsedObjects.add(remoteObject); + for (Pair remoteObjectSizePair : allRemoteObjectSizePairs) { + if (pattern.matcher(remoteObjectSizePair.getKey()).matches()) { + parsedObjectSizePaires.add(remoteObjectSizePair); + LOG.info(String + .format("add object [%s] as a candidate to be read.", + remoteObjectSizePair.getKey())); } } } else { - parsedObjects.add(object); + // 如果没有配正则匹配,那么需要对用户自己配置的object存在性进行检测 + try{ + ossClient.getObject(bucket, object); + ObjectMetadata objMeta = ossClient.getObjectMetadata(bucket, object); + parsedObjectSizePaires.add(new MutablePair(object, objMeta.getContentLength() <= OssSplitUtil.SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE ? -1L : objMeta.getContentLength())); + LOG.info(String.format( + "add object [%s] as a candidate to be read.", + object)); + }catch (Exception e){ + trackOssDetailException(e, object); + } } } - return parsedObjects; + return parsedObjectSizePaires; } - private List getRemoteObjects(String parentDir) - throws OSSException, ClientException { + // 对oss配置异常信息进行细分定位 + private void trackOssDetailException(Exception e, String object){ + // 对异常信息进行细分定位 + String errorMessage = e.getMessage(); + if(StringUtils.isNotBlank(errorMessage)){ + if(errorMessage.contains("UnknownHost")){ + // endPoint配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The endpoint you configured is not correct. Please check the endpoint configuration", e); + }else if(errorMessage.contains("InvalidAccessKeyId")){ + // accessId配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The accessId you configured is not correct. Please check the accessId configuration", e); + }else if(errorMessage.contains("SignatureDoesNotMatch")){ + // accessKey配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The accessKey you configured is not correct. Please check the accessId configuration", e); + }else if(errorMessage.contains("NoSuchKey")){ + if (e instanceof OSSException) { + OSSException ossException = (OSSException) e; + if ("NoSuchKey".equalsIgnoreCase(ossException + .getErrorCode()) && this.successOnNoObject) { + LOG.warn(String.format("oss file %s is not exits to read:", object), e); + return; + } + } + // object配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The object you configured is not correct. Please check the accessId configuration"); + }else{ + // 其他错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + String.format("Please check whether the configuration of [endpoint], [accessId], [accessKey], [bucket], and [object] are correct. Error reason: %s",e.getMessage()), e); + } + }else{ + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The configured json is invalid", e); + } + } + + private List> getAllRemoteObjectsKeyAndSizeInDir(String parentDir) + throws OSSException, ClientException{ + List> objectSizePairs = new ArrayList>(); + List objectListings = getRemoteObjectListings(parentDir); + + if (objectListings.size() == 0) { + return objectSizePairs; + } + + for (ObjectListing objectList : objectListings){ + for (OSSObjectSummary objectSummary : objectList.getObjectSummaries()) { + Pair objNameSize = new MutablePair(objectSummary.getKey(), objectSummary.getSize() <= OssSplitUtil.SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE ? -1L : objectSummary.getSize()); + objectSizePairs.add(objNameSize); + } + } - LOG.debug(String.format("父文件夹 : %s", parentDir)); + return objectSizePairs; + } + + private List getRemoteObjectListings(String parentDir) throws OSSException, ClientException { + + List remoteObjectListings = new ArrayList(); + + LOG.debug("Parent folder: {}", parentDir); List remoteObjects = new ArrayList(); OSSClient client = OssUtil.initOssClient(readerOriginConfig); + try { ListObjectsRequest listObjectsRequest = new ListObjectsRequest( readerOriginConfig.getString(Key.BUCKET)); listObjectsRequest.setPrefix(parentDir); - ObjectListing objectList; + ObjectListing remoteObjectList; do { - objectList = client.listObjects(listObjectsRequest); - for (OSSObjectSummary objectSummary : objectList - .getObjectSummaries()) { - LOG.debug(String.format("找到文件 : %s", - objectSummary.getKey())); - remoteObjects.add(objectSummary.getKey()); + remoteObjectList = client.listObjects(listObjectsRequest); + if (null != remoteObjectList) { + LOG.info("ListObjects prefix: {} requestId: {}", remoteObjectList.getPrefix(), remoteObjectList.getRequestId()); + } else { + LOG.info("ListObjectsRequest get null"); } - listObjectsRequest.setMarker(objectList.getNextMarker()); + remoteObjectListings.add(remoteObjectList); + listObjectsRequest.setMarker(remoteObjectList.getNextMarker()); LOG.debug(listObjectsRequest.getMarker()); - LOG.debug(String.valueOf(objectList.isTruncated())); - - } while (objectList.isTruncated()); - } catch (IllegalArgumentException e) { - throw DataXException.asDataXException( - OssReaderErrorCode.OSS_EXCEPTION, e.getMessage()); + LOG.debug(String.valueOf(remoteObjectList.isTruncated())); + } while (remoteObjectList.isTruncated()); + } catch (Exception e) { + trackOssDetailException(e, null); } - return remoteObjects; + return remoteObjectListings; } } @@ -289,30 +381,116 @@ public static class Task extends Reader.Task { private static Logger LOG = LoggerFactory.getLogger(Reader.Task.class); private Configuration readerSliceConfig; + private Boolean isBinaryFile; + private Integer blockSizeInByte; + private List allWorksForTask; + private boolean originSkipHeader; + private OSSClient ossClient; + private String fileFormat; + private HdfsReader.Task hdfsReaderTask; + private boolean useHdfsReaderProxy = false; + + @Override + public void init() { + this.readerSliceConfig = this.getPluginJobConf(); + this.fileFormat = this.readerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_FILE_FORMAT); + this.useHdfsReaderProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsReaderProxy){ + this.hdfsReaderTask = new HdfsReader.Task(); + this.hdfsReaderTask.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsReaderTask.setPeerPluginName(this.getPeerPluginName()); + this.hdfsReaderTask.setPluginJobConf(this.getPluginJobConf()); + this.hdfsReaderTask.setReaderPluginSplitConf(this.getReaderPluginSplitConf()); + this.hdfsReaderTask.setTaskGroupId(this.getTaskGroupId()); + this.hdfsReaderTask.setTaskId(this.getTaskId()); + this.hdfsReaderTask.setTaskPluginCollector(this.getTaskPluginCollector()); + this.hdfsReaderTask.init(); + return; + } + String allWorksForTaskStr = this.readerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SPLIT_SLICE_CONFIG); + if (StringUtils.isBlank(allWorksForTaskStr)) { + allWorksForTaskStr = "[]"; + } + this.allWorksForTask = JSON.parseObject(allWorksForTaskStr, new TypeReference>() { + }); + this.isBinaryFile = FileFormat.getFileFormatByConfiguration(this.readerSliceConfig).isBinary(); + this.blockSizeInByte = this.readerSliceConfig.getInt( + com.alibaba.datax.plugin.unstructuredstorage.reader.Key.BLOCK_SIZE_IN_BYTE, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_BLOCK_SIZE_IN_BYTE); + this.originSkipHeader = this.readerSliceConfig + .getBool(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SKIP_HEADER, false); + } + + @Override + public void prepare() { + LOG.info("task prepare() begin..."); + if(useHdfsReaderProxy){ + this.hdfsReaderTask.prepare(); + return; + } + } + @Override public void startRead(RecordSender recordSender) { - LOG.debug("read start"); - String object = readerSliceConfig.getString(Key.OBJECT); - OSSClient client = OssUtil.initOssClient(readerSliceConfig); - - OSSObject ossObject = client.getObject( - readerSliceConfig.getString(Key.BUCKET), object); - InputStream objectStream = ossObject.getObjectContent(); - UnstructuredStorageReaderUtil.readFromStream(objectStream, object, - this.readerSliceConfig, recordSender, - this.getTaskPluginCollector()); + if(useHdfsReaderProxy){ + this.hdfsReaderTask.startRead(recordSender); + return; + } + boolean successOnNoObject = this.readerSliceConfig.getBool(Key.SUCCESS_ON_NO_Object, false); + if (this.allWorksForTask.isEmpty() && successOnNoObject) { + recordSender.flush(); + return; + } + String bucket = this.readerSliceConfig.getString(Key.BUCKET); + this.ossClient = OssUtil.initOssClient(this.readerSliceConfig); + for (StartEndPair eachSlice : this.allWorksForTask) { + String object = eachSlice.getFilePath(); + Long start = eachSlice.getStart(); + Long end = eachSlice.getEnd(); + LOG.info(String.format("read bucket=[%s] object=[%s], range: [start=%s, end=%s] start...", bucket, + object, start, end)); + InputStream ossInputStream = new OssInputStream(ossClient, bucket, object, start, end); + // 检查是否要跳过表头, 防止重复跳过首行 + Boolean skipHeaderValue = this.originSkipHeader && (0L == start); + this.readerSliceConfig.set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SKIP_HEADER, + skipHeaderValue); + try { + if (!this.isBinaryFile) { + UnstructuredStorageReaderUtil.readFromStream(ossInputStream, object, this.readerSliceConfig, + recordSender, this.getTaskPluginCollector()); + } else { + BinaryFileReaderUtil.readFromStream(ossInputStream, object, recordSender, this.blockSizeInByte); + } + } finally { + IOUtils.closeQuietly(ossInputStream); + } + } recordSender.flush(); } @Override - public void init() { - this.readerSliceConfig = this.getPluginJobConf(); + public void post() { + LOG.info("task post() begin..."); + if(useHdfsReaderProxy){ + this.hdfsReaderTask.post(); + return; + } } @Override public void destroy() { - + if(useHdfsReaderProxy){ + this.hdfsReaderTask.destroy(); + return; + } + try { + // this.ossClient.shutdown(); + } catch (Exception e) { + LOG.warn("shutdown ossclient meet a exception:" + e.getMessage(), e); + } } } } diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/HdfsParquetUtil.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/HdfsParquetUtil.java new file mode 100644 index 0000000000..f332bb9514 --- /dev/null +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/HdfsParquetUtil.java @@ -0,0 +1,40 @@ +package com.alibaba.datax.plugin.reader.ossreader.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.ossreader.Key; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:46 + */ +public class HdfsParquetUtil { + public static boolean isUseHdfsWriterProxy( String fileFormat){ + if("orc".equalsIgnoreCase(fileFormat) || "parquet".equalsIgnoreCase(fileFormat)){ + return true; + } + return false; + } + + /** + * 配置readerOriginConfig 适配hdfsreader读取oss parquet + * https://help.aliyun.com/knowledge_detail/74344.html + * @param readerOriginConfig + */ + public static void adaptConfiguration(Configuration readerOriginConfig){ + String bucket = readerOriginConfig.getString(Key.BUCKET); + String fs =String.format("oss://%s",bucket); + readerOriginConfig.set(com.alibaba.datax.plugin.reader.hdfsreader.Key.DEFAULT_FS,fs); + readerOriginConfig.set(com.alibaba.datax.plugin.reader.hdfsreader.Key.FILETYPE, + readerOriginConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT)); + /** + * "path"、 "column" 相互一致 + */ + JSONObject hadoopConfig = new JSONObject(); + hadoopConfig.put(Key.FS_OSS_ACCESSID,readerOriginConfig.getString(Key.ACCESSID)); + hadoopConfig.put(Key.FS_OSS_ACCESSKEY,readerOriginConfig.getString(Key.ACCESSKEY)); + hadoopConfig.put(Key.FS_OSS_ENDPOINT,readerOriginConfig.getString(Key.ENDPOINT)); + readerOriginConfig.set(Key.HDOOP_CONFIG,Configuration.from(JSON.toJSONString(hadoopConfig))); + } +} diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/OssSplitUtil.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/OssSplitUtil.java new file mode 100644 index 0000000000..760d8d5ff3 --- /dev/null +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/OssSplitUtil.java @@ -0,0 +1,485 @@ +package com.alibaba.datax.plugin.reader.ossreader.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.ossreader.OssInputStream; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.datax.plugin.unstructuredstorage.reader.split.StartEndPair; +import com.alibaba.datax.plugin.unstructuredstorage.reader.split.UnstructuredSplitUtil; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import com.aliyun.oss.OSSClient; +import com.aliyun.oss.model.GetObjectRequest; +import com.aliyun.oss.model.OSSObject; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:48 + */ +public class OssSplitUtil extends UnstructuredSplitUtil { + + private static final Logger LOG = LoggerFactory.getLogger(UnstructuredSplitUtil.class); + public static final Long SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE = 64 * 1024 * 1024L; // 小于 1MB 的文件不做内部切分 + private OSSClient ossClient; + private String bucketName; + private Double balanceThreshold; + private Long avgLen = -1L; + private Integer splitGroupNum = -1; + + public OssSplitUtil(OSSClient ossClient, String bucketName) { + super(false); + this.ossClient = ossClient; + this.bucketName = bucketName; + } + + @Override + public Long getFileTotalLength(String filePath) { + // 获取object字节总数 + GetObjectRequest getObjectRequest = new GetObjectRequest(this.bucketName, filePath); + OSSObject ossObject = this.ossClient.getObject(getObjectRequest); + return ossObject.getObjectMetadata().getContentLength(); + } + + @Override + public InputStream getFileInputStream(StartEndPair startEndPair) { + InputStream inputStream = new OssInputStream(this.ossClient, this.bucketName, startEndPair.getFilePath(), + startEndPair.getStart(), startEndPair.getEnd()); + return inputStream; + } + + private Boolean canSplitSingleFile(Configuration jobConfig) { + Boolean enableInnerSplit = jobConfig.getBool(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.ENABLE_INNER_SPLIT, true); + if (!enableInnerSplit) { + return false; + } + + // 默认不切分 + String fileFormat = jobConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_FILE_FORMAT); + String compressType = jobConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS); + + // 如果不满足"是text格式且非压缩文件",则直接返回false + if (! StringUtils.equalsIgnoreCase(fileFormat, com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.FILE_FORMAT_TEXT) || + ! StringUtils.isBlank(compressType)) { + return false; + } + + // todo: 判断文件是否为软连接文件,如果为软连接文件,则不支持内部切分 + + return true; + } + + private boolean isGroupsBalance(List groups) { + assert (groups != null); + + if(groups.size() <= 1) { + return true; + } + + double avg = (double) this.avgLen * (1.0 + this.balanceThreshold/100); + for (Group group : groups) { + if(group.getFilledLenght() > avg) { + return false; + } + } + return true; + } + + /* + * 把 allObjectKeySizePares 分成 N 组,尽量使得各组中文件 size 之和 近似 + * */ + private List splitObjectToGroups(List> allObjKeySizePares, Integer N) { + List groups; + + // 若文件数 <= N,则每个文件分一个组 + if(allObjKeySizePares.size() <= N) { + groups = new ArrayList<>(); + int index = 0; + for (Pair pair : allObjKeySizePares) { + // capacity 初始化为avgLen + Group group = new Group(avgLen); + FileBlock fileBlock = new FileBlock(pair.getKey(), 0L, pair.getValue() - 1); + group.fill(fileBlock); + groups.add(group); + } + + // 文件不足N,则以空group补全 + for (int i = groups.size(); i < N; i++) { + groups.add(new Group(avgLen)); + } + + return groups; + } + + //文件数量 > N + //对 allObjKeySizePairs 按照 size 从大到小排序 + allObjKeySizePares.sort(new Comparator>() { + @Override + public int compare(Pair o1, Pair o2) { + if (o1.getValue().compareTo(o2.getValue()) < 0) { + return 1; + } + if (o1.getValue().equals(o2.getValue())) { + return 0; + } + return -1; + } + }); + + groups = new ArrayList<>(N); + + for (int i = 0; i < N; i++) { + Group group = new Group(avgLen); + groups.add(group); + } + + for (Pair pair : allObjKeySizePares) { + FileBlock fileBlock = new FileBlock(pair.getKey(), 0L, pair.getValue() - 1); + + // 对于avgLen < 0 的极端情况,直接将文件按照数量均分到各个group + if (avgLen > 0 && pair.getValue() >= avgLen) { + // 若果文件size > avgLen,则独立成组(放在一个空的group中 + for (int index = 0; index < N; index++) { + if (groups.get(index).isEmpty()) { + groups.get(index).fill(fileBlock); + break; + } + } + } else { + // 如果文件小于平均长度,则将其放在一个当前能够容纳,且容量最接近的 group 中 + int selectedIndex = 0, index = 0; + // 先找到第一个能容纳的 + for (; index < N; index++) { + if (groups.get(index).getCapacity() >= fileBlock.getSize()) { + selectedIndex = index; + } + } + // 找到能容纳且剩余容量最小的 + for (;index < N; index++) { + if (groups.get(index).getCapacity() >= fileBlock.getSize() + && groups.get(index).getCapacity() < groups.get(selectedIndex).getCapacity()) { + selectedIndex = index; + } + } + groups.get(selectedIndex).fill(fileBlock); + } + + } + + return groups; + } + + private void reBalanceGroup(List groups) { + LOG.info("reBalance start"); + assert (groups != null && groups.size() > 0); + // 对某些group内部的文件进行进一步切分 + /* 1. 选出负载最小和最大的组 */ + Group groupMinLoad = groups.get(0); + Group groupMaxLoad = groups.get(0); + for (Group group : groups) { + if (group.getFilledLenght() > groupMaxLoad.getFilledLenght()) { + groupMaxLoad = group; + } + + if (group.getFilledLenght() < groupMinLoad.getFilledLenght()) { + groupMinLoad = group; + } + } + + /* 2. 将 groupMaxLoad 最大文件切分出部分放入 groupMinLoad + * 大小为 min{grouMaxLoad.length - mean, mean - groupMinLoad.length} */ + Long splitLen = Math.min(groupMinLoad.getCapacity(), groupMaxLoad.getOverloadLength()); + FileBlock splitOutBlock = groupMaxLoad.split(splitLen, this.ossClient, this.bucketName); + + groupMinLoad.fill(splitOutBlock); + LOG.info("reBalance end"); + } + + private Long getTotoalLenOfObjList(List> objKeySizePares) { + Long totalLen = 0L; + for (Pair pair : objKeySizePares) { + totalLen += (pair.getValue() < 0 ? 1 : pair.getValue()); + } + + return totalLen; + } + + public List getSplitedConfigurations(Configuration originConfiguration, List> objKeySizePares, + int adviceNumber) { + List configurationList = new ArrayList<>(); + + this.splitGroupNum = adviceNumber; + this.avgLen = (long)Math.ceil((double)this.getTotoalLenOfObjList(objKeySizePares) / this.splitGroupNum); + this.balanceThreshold = originConfiguration.getDouble(com.alibaba.datax.plugin.reader.ossreader.Key.BALANCE_THRESHOLD, 10.0); + + List groups = this.splitObjectToGroups(objKeySizePares, this.splitGroupNum); + + // 划分后,各个组间如果长度确实比较近似,则不需要进一步对单个文件进行内部切分,反之,则需要对单个文件进行内部切分以进行进一步的调整 + if (canSplitSingleFile(originConfiguration)) { + // 防止文件内部单行过大,对循环加以限制,理论上最多只需要调整 splitGroupNum 次 + Integer i = 0; + Long timeStart = System.currentTimeMillis(); + while (i++ < splitGroupNum && ! this.isGroupsBalance(groups)) { + this.reBalanceGroup(groups); + } + Long timeEnd = System.currentTimeMillis(); + LOG.info("split groups cost {} ms", timeEnd - timeStart); + } + + LOG.info("Splited gourps:\n"); + for (Group group : groups) { + LOG.info(group.toString()); + } + + // 根据Groups划分结果初始化各个分片任务配置 + for (Group group : groups) { + Configuration configuration = originConfiguration.clone(); + // 根据groups初始化分片 + List startEndPairs = new ArrayList<>(); + for (FileBlock fileBlock : group.getFileBLocks()) { + if (canSplitSingleFile(originConfiguration)) { + startEndPairs.add(new StartEndPair(fileBlock.getStartOffset(), fileBlock.getEndOffset(), fileBlock.getObjName())); + } else { + // 如果不支持内部切分,则设置结束位点为-1,直接读取文件全部内容 + // 对于软连接文件,这是必要的 30190064 + startEndPairs.add(new StartEndPair(fileBlock.getStartOffset(), -1L, fileBlock.getObjName())); + } + } + configuration.set(Key.SPLIT_SLICE_CONFIG, startEndPairs); + configurationList.add(configuration); + } + + return configurationList; + } +} + +class Group { + /* + * fileBlockList 表示该Group中对应的文件块列表,单个文件块用一个三元组 表示 + * */ + private List fileBLockList; + private Long capacity; + private Long filledLenght; + private static final Logger LOG = LoggerFactory.getLogger(Group.class); + + Group (Long capacity) { + this(new ArrayList<>(), capacity); + } + + Group (List fileBLockList, Long capacity) { + this.capacity = capacity; + this.fileBLockList = fileBLockList; + this.filledLenght = 0L; + for (FileBlock fileBlock : fileBLockList) { + this.filledLenght += fileBlock.getSize(); + this.capacity -= fileBlock.getSize(); + } + } + + void fill(FileBlock fileBlock) { + if (null == fileBlock) { + return; + } + this.fileBLockList.add(fileBlock); + this.capacity -= fileBlock.getSize(); + this.filledLenght += fileBlock.getSize(); + } + + void take(FileBlock fileBlock) { + this.capacity += fileBlock.getSize(); + this.filledLenght -= fileBlock.getSize(); + this.fileBLockList.remove(fileBlock); + } + + Long getCapacity() { + return this.capacity; + } + + void setCapacity(Long capacity) { + this.capacity = capacity; + } + + Long getFilledLenght() { + return this.filledLenght; + } + + public boolean isEmpty() { + return this.fileBLockList.isEmpty(); + } + + public boolean isFull() { + return this.capacity <= 0; + } + + List getFileBLocks() { + return this.fileBLockList; + } + + private Integer getBiggestFileBlock() { + Integer index = 0; + Long maxSize = -1L; + for (int i = 0; i < this.fileBLockList.size(); i++) { + if (this.fileBLockList.get(index).getSize() > maxSize) { + index = i; + } + } + return index; + } + + /* + * 对Group进行切分,切分逻辑为:对最大block进行切分,前splitLen个字节作为一个新块 + * */ + FileBlock split(Long splitLen, OSSClient ossClient, String ossBucketName) { + Integer bigBlockIndex = this.getBiggestFileBlock(); + FileBlock bigBlock = this.fileBLockList.get(bigBlockIndex); + // 如果最大块的不足 10MB,则不进行内部切分直接返回 + if (bigBlock.getSize() <= OssSplitUtil.SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE) { + return null; + } + + FileBlock outBlock; + FileBlock remainBlock; + + this.take(bigBlock); + + // 如果splitLen 大于 最大块的长度, 则直接把最大块切分出去 + if (splitLen >= bigBlock.getSize()) { + outBlock = new FileBlock(bigBlock); + } else { + Long originalEnd = bigBlock.getEndOffset(); + outBlock = new FileBlock(bigBlock.getObjName(), bigBlock.getStartOffset(), bigBlock.getStartOffset() + splitLen - 1); + + // 校准第一个block的结束位点,即往后推到第一个换行符 + InputStream inputStream = new OssInputStream(ossClient, ossBucketName, outBlock.getObjName(), outBlock.getEndOffset(), originalEnd); + Long endForward = this.getLFIndex(inputStream); + outBlock.setEndOffset(outBlock.getEndOffset() + endForward); + + // outblock取的是前边部分record,切分除去后,剩余部分可能为空,这时候不生成remainBlock,确保有剩余(outBlock.end > originEnd)时再生成remainBlock. + if (outBlock.getEndOffset() < originalEnd) { + remainBlock = new FileBlock(bigBlock.getObjName(), outBlock.getEndOffset() + 1, originalEnd); + this.fill(remainBlock); + } + } + + return outBlock; + } + + Long getOverloadLength() { + return Math.max(0, -this.capacity); + } + + /** + * 获取到输入流开始的第一个'\n'偏移量 + * + * @param inputStream + * 输入流 + * @return + */ + public Long getLFIndex(InputStream inputStream) { + Long hasReadByteIndex = -1L; + int ch = 0; + while (ch != -1) { + try { + ch = inputStream.read(); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, + String.format("inputstream read Byte has exception: %s", e.getMessage()), e); + } + hasReadByteIndex++; + + if (ch == '\n') { + return hasReadByteIndex; + } + } + return -1L; + } + + public String toString() { + JSONArray fbList = new JSONArray(); + int index = 0; + for (FileBlock fb : this.fileBLockList) { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(String.format("block[%d]", index++), fb.toString()); + fbList.add(jsonObject); + } + return fbList.toString(); + } +} + +class FileBlock { + private String objName; + private Long startOffset; + private Long endOffset; + private Long size; + + FileBlock(String objName, Long startOffset, Long endOffset) { + assert (StringUtils.isNotBlank(objName) && startOffset >= 0 ); + assert (endOffset == -1 || startOffset <= endOffset); + + this.objName = objName; + this.startOffset = startOffset; + // endOffset < 0 的情况下,统一设置为-1,size 设置为0 + this.endOffset = endOffset < 0 ? -1 : endOffset; + this.size = endOffset < 0 ? 1 : this.endOffset - this.startOffset + 1; + } + + public FileBlock(String objName) { + this(objName, 0L, -1L); + } + + public FileBlock(String objName, Pair starEndPair) { + this(objName, starEndPair.getKey(), starEndPair.getValue()); + } + + public FileBlock(FileBlock fileBlock) { + assert (fileBlock != null); + this.objName = fileBlock.objName; + this.startOffset = fileBlock.startOffset; + this.endOffset = fileBlock.endOffset; + this.size = fileBlock.size; + } + + Long getSize() { + return this.size; + } + + Long getStartOffset() { + return this.startOffset; + } + + void setStartOffset(Long startOffset) { + Long deltaSize = this.startOffset - startOffset; + this.startOffset = startOffset; + this.size += deltaSize; + } + + Long getEndOffset() { + return this.endOffset; + } + + void setEndOffset(Long endOffset) { + Long deltaSize = endOffset - this.endOffset; + this.endOffset = endOffset; + //size随之调整 + this.size += deltaSize; + } + + String getObjName() { + return this.objName; + } + + public String toString() { + return String.format("<%s,%d,%d>", this.objName, this.startOffset, this.endOffset); + } +} diff --git a/osswriter/pom.xml b/osswriter/pom.xml index 90d84c10d0..ac4029e0e9 100644 --- a/osswriter/pom.xml +++ b/osswriter/pom.xml @@ -10,6 +10,17 @@ osswriter jar + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -44,6 +55,60 @@ aliyun-sdk-oss 2.2.3 + + + + org.apache.parquet + parquet-column + 1.8.1 + + + org.apache.parquet + parquet-avro + 1.8.1 + + + org.apache.parquet + parquet-common + 1.8.1 + + + org.apache.parquet + parquet-format + 2.3.1 + + + org.apache.parquet + parquet-jackson + 1.8.1 + + + org.apache.parquet + parquet-encoding + 1.8.1 + + + org.apache.parquet + parquet-hadoop + 1.8.1 + + + com.twitter + parquet-hadoop-bundle + 1.6.0 + + + com.alibaba.datax + hdfswriter + 0.0.1-SNAPSHOT + compile + + + com.alibaba.datax + datax-core + 0.0.1-SNAPSHOT + compile + @@ -77,4 +142,4 @@ - \ No newline at end of file + diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java index 5bf2eb46e3..b5c7110b5c 100644 --- a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java @@ -6,4 +6,11 @@ public class Constant { public static final String OBJECT = "object"; public static final int SOCKETTIMEOUT = 5000000; + public static final String DEFAULT_NULL_FORMAT = "null"; + + /** + * 每一个上传的Part都有一个标识它的号码(part number,范围是1-10000) + * https://help.aliyun.com/document_detail/31993.html + */ + public static final int MAX_BLOCK_SIZE = 10000; } diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java index b922f59c0c..8ce263b052 100644 --- a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java @@ -16,4 +16,55 @@ public class Key { public static final String CNAME = "cname"; + public static final String PARTITION = "partition"; + + /** + * encrypt: 是否需要将数据在oss上加密存储 + */ + public static final String ENCRYPT = "encrypt"; + + public static final String BLOCK_SIZE_IN_MB = "blockSizeInMB"; + + public static final String OSS_CONFIG = "oss"; + public static final String POSTGRESQL_CONFIG = "postgresql"; + + public static final String PROXY_HOST = "proxyHost"; + + public static final String PROXY_PORT = "proxyPort"; + + public static final String PROXY_USERNAME = "proxyUsername"; + + public static final String PROXY_PASSWORD = "proxyPassword"; + + public static final String PROXY_DOMAIN = "proxyDomain"; + + public static final String PROXY_WORKSTATION = "proxyWorkstation"; + + public static final String HDOOP_CONFIG = "hadoopConfig"; + + public static final String FS_OSS_ACCESSID = "fs.oss.accessKeyId"; + + public static final String FS_OSS_ACCESSKEY = "fs.oss.accessKeySecret"; + + public static final String FS_OSS_ENDPOINT = "fs.oss.endpoint"; + /** + * 多个task是否写单个object文件: + * false 多个task写多个object(默认是false, 保持向前兼容) + * true 多个task写单个object + */ + public static final String WRITE_SINGLE_OBJECT = "writeSingleObject"; + + public static final String UPLOAD_ID = "uploadId"; + + /** + * Only for parquet or orc fileType + */ + public static final String PATH = "path"; + /** + * Only for parquet or orc fileType + */ + public static final String FILE_NAME = "fileName"; + + public static final String GENERATE_EMPTY_FILE = "generateEmptyFile"; + } diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssSingleObject.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssSingleObject.java new file mode 100644 index 0000000000..415cf39cdf --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssSingleObject.java @@ -0,0 +1,78 @@ +package com.alibaba.datax.plugin.writer.osswriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.aliyun.oss.model.PartETag; +import org.apache.commons.lang3.ArrayUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:30 + */ +public class OssSingleObject { + private static Logger logger = LoggerFactory.getLogger(OssSingleObject.class); + + /** + * 一个uploadId即一个oss对象 + */ + public static String uploadId; + + /** + * 将最后一个未提交的block全部缓存到lastBlockBuffer中 + */ + private static byte[] lastBlockBuffer; + + /** + * 当前part number + */ + public static AtomicInteger currentPartNumber = new AtomicInteger(1); + + /** + * 所有已经提交的block + * 注:allPartETags是线程安全的list + */ + public static List allPartETags = Collections.synchronizedList(new ArrayList()); + + /** + * 将每个task最后未upload的block加入到lastBlockBuffer, + * 如果lastBlockBuffer的大小已经超过blockSizeInByte,则需要upload一次, 防止task过多导致lastBlockBuffer暴增OOM + * + * @param lastBlock + * @param ossWriterProxy + * @param blockSizeInByte + * @param object + */ + public synchronized static void addLastBlockBuffer(byte[] lastBlock, + OssWriterProxy ossWriterProxy, + long blockSizeInByte, + String object, OssWriterProxy.HeaderProvider headerProvider) { + lastBlockBuffer = ArrayUtils.addAll(lastBlockBuffer, lastBlock); + //lastBlockBuffer大小超过blockSizeInByte则需要upload part + if (lastBlockBuffer != null && lastBlockBuffer.length >= blockSizeInByte) { + logger.info("write last block buffer part size [{}] to object [{}], all has uploaded part size:{}, current part number:{}, uploadId:{}", + lastBlockBuffer.length, object, allPartETags.size(), currentPartNumber.intValue(), uploadId); + try { + ossWriterProxy.uploadOnePartForSingleObject(lastBlockBuffer, uploadId, allPartETags, object, headerProvider); + } catch (Exception e) { + logger.error("upload part error: {}", e.getMessage(), e); + throw DataXException.asDataXException(e.getMessage()); + } + //currentPartNumber自增 + currentPartNumber.incrementAndGet(); + //清空lastBlockBuffer + lastBlockBuffer = null; + } + + } + + public static byte[] getLastBlockBuffer() { + return lastBlockBuffer; + } + +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java index 90a34ad7bf..a8aec0e69d 100644 --- a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java @@ -1,18 +1,21 @@ package com.alibaba.datax.plugin.writer.osswriter; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; +import java.io.*; import java.text.DateFormat; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.UUID; +import java.util.*; import java.util.concurrent.Callable; +import com.alibaba.datax.common.element.BytesColumn; +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.util.RangeSplitUtil; +import com.alibaba.datax.plugin.unstructuredstorage.FileFormat; +import com.alibaba.datax.plugin.unstructuredstorage.writer.binaryFileUtil.BinaryFileWriterUtil; +import com.alibaba.datax.plugin.writer.hdfswriter.HdfsWriter; +import com.alibaba.datax.plugin.writer.osswriter.util.HandlerUtil; +import com.alibaba.datax.plugin.writer.osswriter.util.HdfsParquetUtil; +import com.alibaba.fastjson.JSON; +import com.aliyun.oss.model.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -31,50 +34,121 @@ import com.aliyun.oss.ClientException; import com.aliyun.oss.OSSClient; import com.aliyun.oss.OSSException; -import com.aliyun.oss.model.CompleteMultipartUploadRequest; -import com.aliyun.oss.model.CompleteMultipartUploadResult; -import com.aliyun.oss.model.InitiateMultipartUploadRequest; -import com.aliyun.oss.model.InitiateMultipartUploadResult; -import com.aliyun.oss.model.OSSObjectSummary; -import com.aliyun.oss.model.ObjectListing; -import com.aliyun.oss.model.PartETag; -import com.aliyun.oss.model.UploadPartRequest; -import com.aliyun.oss.model.UploadPartResult; + +import static com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.*; /** * Created by haiwei.luo on 15-02-09. */ public class OssWriter extends Writer { + + public static int parseParentPathLength(List path) { + if (path == null || path.size() != 1) { + throw DataXException.asDataXException( + OssWriterErrorCode.CONFIG_INVALID_EXCEPTION, String.format("only support configure one path in binary copy mode, your config: %s", JSON.toJSONString(path))); + } + String eachPath = path.get(0); + int endMark; + for (endMark = 0; endMark < eachPath.length(); endMark++) { + if ('*' != eachPath.charAt(endMark) && '?' != eachPath.charAt(endMark)) { + continue; + } else { + break; + } + } + + int lastDirSeparator = eachPath.lastIndexOf(IOUtils.DIR_SEPARATOR) + 1; + if (endMark < eachPath.length()) { + lastDirSeparator = eachPath.substring(0, endMark).lastIndexOf(IOUtils.DIR_SEPARATOR) + 1; + } + return lastDirSeparator; + } + public static class Job extends Writer.Job { private static final Logger LOG = LoggerFactory.getLogger(Job.class); private Configuration writerSliceConfig = null; private OSSClient ossClient = null; + private Configuration peerPluginJobConf; + private Boolean isBinaryFile; + private String objectDir; + private String syncMode; + private String fileFormat; + private String encoding; + private HdfsWriter.Job hdfsWriterJob; + private boolean useHdfsWriterProxy = false; + private boolean writeSingleObject; + private OssWriterProxy ossWriterProxy; + private String bucket; + private String object; + private List header; + + @Override + public void preHandler(Configuration jobConfiguration) { + HandlerUtil.preHandler(jobConfiguration); + } + @Override public void init() { this.writerSliceConfig = this.getPluginJobConf(); + this.basicValidateParameter(); + this.fileFormat = this.writerSliceConfig.getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); + this.encoding = this.writerSliceConfig.getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.ENCODING, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_ENCODING); + this.useHdfsWriterProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsWriterProxy){ + this.hdfsWriterJob = new HdfsWriter.Job(); + HdfsParquetUtil.adaptConfiguration(this.hdfsWriterJob, this.writerSliceConfig); + + this.hdfsWriterJob.setJobPluginCollector(this.getJobPluginCollector()); + this.hdfsWriterJob.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsWriterJob.setPeerPluginName(this.getPeerPluginName()); + this.hdfsWriterJob.setPluginJobConf(this.getPluginJobConf()); + this.hdfsWriterJob.init(); + return; + } + this.peerPluginJobConf = this.getPeerPluginJobConf(); + this.isBinaryFile = FileFormat.getFileFormatByConfiguration(this.peerPluginJobConf).isBinary(); + this.syncMode = this.writerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.SYNC_MODE, ""); + this.writeSingleObject = this.writerSliceConfig.getBool(Key.WRITE_SINGLE_OBJECT, false); + this.header = this.writerSliceConfig + .getList(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.HEADER, null, String.class); this.validateParameter(); this.ossClient = OssUtil.initOssClient(this.writerSliceConfig); + this.ossWriterProxy = new OssWriterProxy(this.writerSliceConfig, this.ossClient); + } + + private void basicValidateParameter(){ + this.writerSliceConfig.getNecessaryValue(Key.ENDPOINT, OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getNecessaryValue(Key.ACCESSID, OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getNecessaryValue(Key.ACCESSKEY, OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getNecessaryValue(Key.BUCKET, OssWriterErrorCode.REQUIRED_VALUE); } private void validateParameter() { - this.writerSliceConfig.getNecessaryValue(Key.ENDPOINT, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.ACCESSID, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.ACCESSKEY, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.BUCKET, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.OBJECT, - OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getBool(Key.ENCRYPT); + + if (this.isBinaryFile){ + BinaryFileWriterUtil.validateParameter(this.writerSliceConfig); + return; + } + + if (!this.isPeer2PeerCopyMode()) { + // 非对等拷贝模式下必选 + this.writerSliceConfig.getNecessaryValue(Key.OBJECT, + OssWriterErrorCode.REQUIRED_VALUE); + } + // warn: do not support compress!! String compress = this.writerSliceConfig .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.COMPRESS); if (StringUtils.isNotBlank(compress)) { - String errorMessage = String.format( - "OSS写暂时不支持压缩, 该压缩配置项[%s]不起效用", compress); + String errorMessage = String.format("OSS writes do not support compression for the moment. The compressed item %s does not work", compress); LOG.error(errorMessage); throw DataXException.asDataXException( OssWriterErrorCode.ILLEGAL_VALUE, errorMessage); @@ -82,107 +156,376 @@ private void validateParameter() { } UnstructuredStorageWriterUtil .validateParameter(this.writerSliceConfig); - + LOG.info("writeSingleObject is: {}", this.writeSingleObject); } @Override public void prepare() { LOG.info("begin do prepare..."); - String bucket = this.writerSliceConfig.getString(Key.BUCKET); - String object = this.writerSliceConfig.getString(Key.OBJECT); + if(useHdfsWriterProxy){ + this.hdfsWriterJob.prepare(); + return; + } + this.bucket = this.writerSliceConfig.getString(Key.BUCKET); + this.object = this.writerSliceConfig.getString(Key.OBJECT); String writeMode = this.writerSliceConfig .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.WRITE_MODE); - // warn: bucket is not exists, create it - try { - // warn: do not create bucket for user - if (!this.ossClient.doesBucketExist(bucket)) { - // this.ossClient.createBucket(bucket); - String errorMessage = String.format( - "您配置的bucket [%s] 不存在, 请您确认您的配置项.", bucket); - LOG.error(errorMessage); - throw DataXException.asDataXException( - OssWriterErrorCode.ILLEGAL_VALUE, errorMessage); - } - LOG.info(String.format("access control details [%s].", - this.ossClient.getBucketAcl(bucket).toString())); - - // truncate option handler - if ("truncate".equals(writeMode)) { - LOG.info(String - .format("由于您配置了writeMode truncate, 开始清理 [%s] 下面以 [%s] 开头的Object", - bucket, object)); - // warn: 默认情况下,如果Bucket中的Object数量大于100,则只会返回100个Object - while (true) { - ObjectListing listing = null; - LOG.info("list objects with listObject(bucket, object)"); - listing = this.ossClient.listObjects(bucket, object); - List objectSummarys = listing - .getObjectSummaries(); - for (OSSObjectSummary objectSummary : objectSummarys) { - LOG.info(String.format("delete oss object [%s].", - objectSummary.getKey())); - this.ossClient.deleteObject(bucket, - objectSummary.getKey()); - } - if (objectSummarys.isEmpty()) { - break; + List sourceFileName = this.peerPluginJobConf.getList(SOURCE_FILE_NAME, new ArrayList(), + String.class); + this.objectDir = this.getObjectDir(object); + + // 对等拷贝模式下将源头获取的文件列表在目的端删除 + if (this.isPeer2PeerCopyMode()) { + String fullObjectName = null; + String truncateMode = this.writerSliceConfig.getString("truncateMode", "objectMatch"); + // 前缀删除模式 + if ("prefix".equalsIgnoreCase(truncateMode)) { + BinaryFileWriterUtil.checkFileNameIfRepeatedThrowException(sourceFileName); + if (TRUNCATE.equals(writeMode)) { + LOG.info("You have configured [writeMode] [truncate], so the system will start to clear the objects starting with [{}] under [{}]. ", bucket, object); + // warn: 默认情况下,如果Bucket中的Object数量大于100,则只会返回100个Object + while (true) { + ObjectListing listing = null; + LOG.info("list objects with listObject(bucket, object)"); + listing = this.ossClient.listObjects(bucket, object); + List objectSummarys = listing + .getObjectSummaries(); + if (objectSummarys.isEmpty()) { + break; + } + List objects2Delete = new ArrayList(); + for (OSSObjectSummary objectSummary : objectSummarys) { + objects2Delete.add(objectSummary.getKey()); + } + LOG.info(String.format("[prefix truncate mode]delete oss object [%s].", JSON.toJSONString(objects2Delete))); + DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucket); + deleteRequest.setKeys(objects2Delete); + deleteRequest.setQuiet(true);// 简单模式 + DeleteObjectsResult deleteResult = this.ossClient.deleteObjects(deleteRequest); + assert deleteResult.getDeletedObjects().isEmpty(); + LOG.warn("OSS request id:{}, objects delete failed:{}", deleteResult.getRequestId(), + JSON.toJSONString(deleteResult.getDeletedObjects())); } + + }else { + throw DataXException.asDataXException(OssWriterErrorCode.ILLEGAL_VALUE, + "only support truncate writeMode in copy sync mode."); } - } else if ("append".equals(writeMode)) { - LOG.info(String - .format("由于您配置了writeMode append, 写入前不做清理工作, 数据写入Bucket [%s] 下, 写入相应Object的前缀为 [%s]", - bucket, object)); - } else if ("nonConflict".equals(writeMode)) { - LOG.info(String - .format("由于您配置了writeMode nonConflict, 开始检查Bucket [%s] 下面以 [%s] 命名开头的Object", - bucket, object)); - ObjectListing listing = this.ossClient.listObjects(bucket, - object); - if (0 < listing.getObjectSummaries().size()) { - StringBuilder objectKeys = new StringBuilder(); - objectKeys.append("[ "); - for (OSSObjectSummary ossObjectSummary : listing - .getObjectSummaries()) { - objectKeys.append(ossObjectSummary.getKey() + " ,"); + } else { + if (TRUNCATE.equals(writeMode)) { + sourceFileName = this.peerPluginJobConf.getList(com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.SOURCE_FILE, new ArrayList(), + String.class); + List readerPath = this.peerPluginJobConf.getList(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.PATH, new ArrayList(), + String.class); + int parentPathLength = OssWriter.parseParentPathLength(readerPath); + this.writerSliceConfig.set("__parentPathLength", parentPathLength); + BinaryFileWriterUtil.checkFileNameIfRepeatedThrowException(sourceFileName); + + // 原样文件名删除模式 + int splitCount = sourceFileName.size() / 1000 + 1; + List> splitResult = RangeSplitUtil.doListSplit(sourceFileName, splitCount); + for (List eachSlice : splitResult) { + assert eachSlice.size() <= 1000; + if (eachSlice.isEmpty()) { + continue; + } + List ossObjFullPath = new ArrayList(); + for (String eachObj : eachSlice) { + fullObjectName = String.format("%s%s", objectDir, eachObj.substring(parentPathLength, eachObj.length())); + ossObjFullPath.add(fullObjectName); + } + LOG.info(String.format("[origin object name truncate mode]delete oss object [%s].", JSON.toJSONString(ossObjFullPath))); + DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucket); + deleteRequest.setKeys(ossObjFullPath); + deleteRequest.setQuiet(true);// 简单模式 + DeleteObjectsResult deleteResult = this.ossClient.deleteObjects(deleteRequest); + assert deleteResult.getDeletedObjects().isEmpty(); + LOG.warn("OSS request id:{}, objects delete failed:{}", deleteResult.getRequestId(), + JSON.toJSONString(deleteResult.getDeletedObjects())); } - objectKeys.append(" ]"); - LOG.info(String.format( - "object with prefix [%s] details: %s", object, - objectKeys.toString())); - throw DataXException - .asDataXException( - OssWriterErrorCode.ILLEGAL_VALUE, - String.format( - "您配置的Bucket: [%s] 下面存在其Object有前缀 [%s].", - bucket, object)); + } else { + throw DataXException.asDataXException(OssWriterErrorCode.ILLEGAL_VALUE, + "only support truncate writeMode in copy sync mode."); } } - } catch (OSSException e) { - throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); - } catch (ClientException e) { - throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); + return; + } else { + // warn: 源头表不是半结构化或者不是对等copy模式时走前缀删除策略 + // warn: bucket is not exists, create it + try { + // warn: do not create bucket for user + if (!this.ossClient.doesBucketExist(bucket)) { + // this.ossClient.createBucket(bucket); + String errorMessage = String.format("The [bucket]: %s you configured does not exist. Please confirm your configuration items. ", bucket); + LOG.error(errorMessage); + throw DataXException.asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, errorMessage); + } + LOG.info(String.format("access control details [%s].", + this.ossClient.getBucketAcl(bucket).toString())); + + if (writeSingleObject) { + doPrepareForSingleObject(bucket, object, writeMode); + } else { + doPrepareForMutliObject(bucket, object, writeMode); + } + } catch (OSSException e) { + throw DataXException.asDataXException( + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); + } catch (ClientException e) { + throw DataXException.asDataXException( + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); + } + } + } + + /** + * 执行多个task写单个object prepare逻辑 + * + * @param bucket + * @param object + * @param writeMode + */ + private void doPrepareForSingleObject(String bucket, String object, String writeMode) { + boolean doesObjectExist = this.ossClient.doesObjectExist(bucket, object); + LOG.info("does object [{}] exist in bucket {} : {}", object, bucket, doesObjectExist); + if (TRUNCATE.equals(writeMode)) { + LOG.info("Because you have configured writeMode truncate, and writeSingleObject is true, start cleaning up the duplicate object [{}] under [{}]", bucket, object); + if (doesObjectExist) { + LOG.info("object [{}] has exist in bucket, delete it!", object, bucket); + this.ossClient.deleteObject(bucket, object); + } + } else if (APPEND.equals(writeMode)) { + throw DataXException + .asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, + "Illegal value"); + } else if (NOCONFLICT.equals(writeMode)) { + LOG.info("Because you have configured writeMode nonConflict, and writeSingleObject is true, start checking bucket [{}] under the same name object [{}]", bucket, object); + if (doesObjectExist) { + throw DataXException + .asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, + String.format("Buffet you configured: %s There is a duplicate name of Object %s", bucket, object)); + } + } + } + + /** + * 执行多个task写多个object的prepare逻辑,这个是osswriter已有的逻辑,需要保持向前兼容性 + * + * @param bucket + * @param object + * @param writeMode + */ + private void doPrepareForMutliObject(String bucket, String object, String writeMode) { + // truncate option handler + if (TRUNCATE.equals(writeMode)) { + LOG.info("You have configured [writeMode] [truncate], so the system will start to clear the objects starting with [{}] under [{}]. ", bucket, object); + // warn: 默认情况下,如果Bucket中的Object数量大于100,则只会返回100个Object + while (true) { + ObjectListing listing = null; + LOG.info("list objects with listObject(bucket, object)"); + listing = this.ossClient.listObjects(bucket, object); + List objectSummarys = listing + .getObjectSummaries(); + for (OSSObjectSummary objectSummary : objectSummarys) { + LOG.info(String.format("delete oss object [%s].", + objectSummary.getKey())); + this.ossClient.deleteObject(bucket, + objectSummary.getKey()); + } + if (objectSummarys.isEmpty()) { + break; + } + } + } else if (APPEND.equals(writeMode)) { + LOG.info("You have configured [writeMode] [append], so the system won\\u2019t perform the clearing before writing. Data is written to objects with the name prefix of [{}] under the bucket: [{}]. ", bucket, object); + } else if (NOCONFLICT.equals(writeMode)) { + LOG.info("You have configured [writeMode] [nonConflict], so the system will start to check objects whose names start with [{}] under the bucket: [{}]. ", bucket, object); + ObjectListing listing = this.ossClient.listObjects(bucket, + object); + if (0 < listing.getObjectSummaries().size()) { + StringBuilder objectKeys = new StringBuilder(); + objectKeys.append("[ "); + for (OSSObjectSummary ossObjectSummary : listing + .getObjectSummaries()) { + objectKeys.append(ossObjectSummary.getKey() + " ,"); + } + objectKeys.append(" ]"); + LOG.info(String.format( + "object with prefix [%s] details: %s", object, + objectKeys.toString())); + throw DataXException + .asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, + String.format("The [bucket] you configured: %s contains objects with the name prefix of %s.", bucket, object)); + } } } @Override public void post() { + if(useHdfsWriterProxy){ + this.hdfsWriterJob.post(); + return; + } + if (this.writeSingleObject) { + try { + /**1. 合并上传最后一个block*/ + LOG.info("Has upload part size: {}", OssSingleObject.allPartETags.size()); + if (OssSingleObject.getLastBlockBuffer() != null && OssSingleObject.getLastBlockBuffer().length != 0) { + byte[] byteBuffer = OssSingleObject.getLastBlockBuffer(); + LOG.info("post writer single object last merge block size is : {}", byteBuffer.length); + this.ossWriterProxy.uploadOnePartForSingleObject(byteBuffer, OssSingleObject.uploadId, + OssSingleObject.allPartETags, this.object, this::getHeaderBytes); + } + + if (OssSingleObject.allPartETags.size() == 0) { + LOG.warn("allPartETags size is 0, there is no part of data need to be complete uploaded, " + + "skip complete multipart upload!"); + this.ossWriterProxy.abortMultipartUpload(this.object,OssSingleObject.uploadId); + return; + } + + /**2. 完成complete upload */ + LOG.info("begin complete multi part upload, bucket:{}, object:{}, uploadId:{}, all has upload part size:{}", + this.bucket, this.object, OssSingleObject.uploadId, OssSingleObject.allPartETags.size()); + orderPartETages(OssSingleObject.allPartETags); + CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, this.object, OssSingleObject.uploadId, OssSingleObject.allPartETags); + CompleteMultipartUploadResult completeMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload(completeMultipartUploadRequest); + LOG.info(String.format("post final object etag is:[%s]", completeMultipartUploadResult.getETag())); + } catch (Exception e) { + LOG.error("osswriter post error: {}", e.getMessage(), e); + throw DataXException.asDataXException(e.getMessage()); + } + } + } + + private byte[] getHeaderBytes() throws IOException { + if (null != this.header && !this.header.isEmpty()) { + // write header to writer + try (StringWriter sw = new StringWriter(); + UnstructuredWriter headerWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw)) { + headerWriter.writeOneRecord(this.header); + return sw.toString().getBytes(this.encoding); + } + } + return new byte[0]; + } + /** + * 对allPartETags做递增排序 + * + * @param allPartETags + * @return + */ + private void orderPartETages(List allPartETags) { + Collections.sort(allPartETags, new Comparator() { + @Override + public int compare(PartETag o1, PartETag o2) { + //按照partNumber递增排序 + return o1.getPartNumber() - o2.getPartNumber(); + } + }); } @Override public void destroy() { - + if(useHdfsWriterProxy){ + this.hdfsWriterJob.destroy(); + return; + } + try { + // this.ossClient.shutdown(); + } catch (Exception e) { + LOG.warn("shutdown ossclient meet a exception:" + e.getMessage(), e); + } } @Override public List split(int mandatoryNumber) { LOG.info("begin do split..."); + if(useHdfsWriterProxy){ + return this.hdfsWriterJob.split(mandatoryNumber); + } + List writerSplitConfigs = new ArrayList(); + + // warn: 这个地方其实可能有bug,datax frame其实会shuffle, 文件内部切分也不好支持这个诉求 + if(this.isPeer2PeerCopyMode()){ + // 有这个需求风险: 源头oss的文件 abc/123/data.txt yixiao.txt 2个文件对等拷贝过来, 这个场景下data.txt + // yixiao.txt 只能放一个目录 + List readerSplitConfigs = this.getReaderPluginSplitConf(); + for (int i = 0; i < readerSplitConfigs.size(); i++) { + Configuration splitedTaskConfig = writerSliceConfig.clone(); + splitedTaskConfig.set(Key.OBJECT, objectDir); + splitedTaskConfig.set(com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.BINARY, + this.isBinaryFile); + writerSplitConfigs.add(splitedTaskConfig); + } + } else { + if (this.writeSingleObject) { + writerSplitConfigs = doSplitForWriteSingleObject(mandatoryNumber); + } else { + writerSplitConfigs = doSplitForWriteMultiObject(mandatoryNumber); + } + } + LOG.info("end do split. split size: {}", writerSplitConfigs.size()); + return writerSplitConfigs; + } + + /** + * 针对多个task写单个文件模式,新增split逻辑 + * + * @param mandatoryNumber + * @return + */ + private List doSplitForWriteSingleObject(int mandatoryNumber) { + LOG.info("writeSingleObject is true, begin do split for write single object."); List writerSplitConfigs = new ArrayList(); String object = this.writerSliceConfig.getString(Key.OBJECT); - String bucket = this.writerSliceConfig.getString(Key.BUCKET); + InitiateMultipartUploadRequest uploadRequest = this.ossWriterProxy.getInitiateMultipartUploadRequest( + object); + + InitiateMultipartUploadResult uploadResult; + try { + uploadResult = this.ossWriterProxy.initiateMultipartUpload( + uploadRequest); + } catch (Exception e) { + LOG.error("initiateMultipartUpload error: {}", e.getMessage(), e); + throw DataXException.asDataXException(e.getMessage()); + } + /** + * 如果需要写同一个object,需要保证使用同一个upload Id + * see: https://help.aliyun.com/document_detail/31993.html + */ + String uploadId = uploadResult.getUploadId(); + OssSingleObject.uploadId = uploadId; + LOG.info("writeSingleObject use uploadId: {}", uploadId); + + for (int i = 0; i < mandatoryNumber; i++) { + Configuration splitedTaskConfig = this.writerSliceConfig + .clone(); + splitedTaskConfig.set(Key.OBJECT, object); + splitedTaskConfig.set(Key.UPLOAD_ID, uploadId); + writerSplitConfigs.add(splitedTaskConfig); + } + return writerSplitConfigs; + } + + /** + * osswriter多个task写多个object文件split逻辑,历史已有该逻辑,保持向前兼容性 + * + * @param mandatoryNumber + * @return + */ + private List doSplitForWriteMultiObject(int mandatoryNumber) { + List writerSplitConfigs = new ArrayList(); + String bucket = this.writerSliceConfig.getString(Key.BUCKET); + String object = this.writerSliceConfig.getString(Key.OBJECT); Set allObjects = new HashSet(); try { List ossObjectlisting = this.ossClient @@ -192,10 +535,10 @@ public List split(int mandatoryNumber) { } } catch (OSSException e) { throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); } catch (ClientException e) { throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); } String objectSuffix; @@ -223,9 +566,25 @@ public List split(int mandatoryNumber) { writerSplitConfigs.add(splitedTaskConfig); } - LOG.info("end do split."); return writerSplitConfigs; } + + private boolean isPeer2PeerCopyMode() { + return this.isBinaryFile + || com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.SYNC_MODE_VALUE_COPY + .equalsIgnoreCase(this.syncMode); + } + + private String getObjectDir(String object) { + String dir = null; + if (StringUtils.isBlank(object)) { + dir = ""; + } else { + dir = object.trim(); + dir = dir.endsWith("/") ? dir : String.format("%s/", dir); + } + return dir; + } } public static class Task extends Writer.Task { @@ -237,17 +596,48 @@ public static class Task extends Writer.Task { private String object; private String nullFormat; private String encoding; - private char fieldDelimiter; private String dateFormat; private DateFormat dateParse; private String fileFormat; private List header; private Long maxFileSize;// MB private String suffix; + private Boolean encrypt;// 是否在服务器端进行加密存储 + private long blockSizeInByte; + private Boolean isBinaryFile; + private String objectDir; + private String syncMode; + private int parentPathLength; + private String byteEncoding; + private HdfsWriter.Task hdfsWriterTask; + private boolean useHdfsWriterProxy = false; + private boolean writeSingleObject; + private String uploadId; + private OssWriterProxy ossWriterProxy; + private List partition; + private boolean generateEmptyFile; @Override public void init() { this.writerSliceConfig = this.getPluginJobConf(); + this.fileFormat = this.writerSliceConfig + .getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); + this.useHdfsWriterProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsWriterProxy){ + this.hdfsWriterTask = new HdfsWriter.Task(); + this.hdfsWriterTask.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsWriterTask.setPeerPluginName(this.getPeerPluginName()); + this.hdfsWriterTask.setPluginJobConf(this.getPluginJobConf()); + this.hdfsWriterTask.setReaderPluginSplitConf(this.getReaderPluginSplitConf()); + this.hdfsWriterTask.setTaskGroupId(this.getTaskGroupId()); + this.hdfsWriterTask.setTaskId(this.getTaskId()); + this.hdfsWriterTask.setTaskPluginCollector(this.getTaskPluginCollector()); + this.hdfsWriterTask.init(); + return; + } + this.ossClient = OssUtil.initOssClient(this.writerSliceConfig); this.bucket = this.writerSliceConfig.getString(Key.BUCKET); this.object = this.writerSliceConfig.getString(Key.OBJECT); @@ -264,14 +654,6 @@ public void init() { .getString( com.alibaba.datax.plugin.unstructuredstorage.writer.Key.ENCODING, com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_ENCODING); - this.fieldDelimiter = this.writerSliceConfig - .getChar( - com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FIELD_DELIMITER, - com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_FIELD_DELIMITER); - this.fileFormat = this.writerSliceConfig - .getString( - com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, - com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); this.header = this.writerSliceConfig .getList( com.alibaba.datax.plugin.unstructuredstorage.writer.Key.HEADER, @@ -285,67 +667,334 @@ public void init() { com.alibaba.datax.plugin.unstructuredstorage.writer.Key.SUFFIX, com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_SUFFIX); this.suffix = this.suffix.trim();// warn: need trim + this.encrypt = this.writerSliceConfig.getBool(Key.ENCRYPT, false); + + // 设置每块字符串长度 + this.blockSizeInByte = this.writerSliceConfig.getLong(Key.BLOCK_SIZE_IN_MB, 10L) * 1024 * 1024; + + this.isBinaryFile = this.writerSliceConfig.getBool( + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.BINARY, false); + + this.objectDir = this.getObjectDir(this.object); + this.syncMode = this.writerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.SYNC_MODE, ""); + this.parentPathLength = this.writerSliceConfig.getInt("__parentPathLength", 0); + + this.byteEncoding = this.writerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.BYTE_ENCODING); + + this.writeSingleObject = this.writerSliceConfig.getBool(Key.WRITE_SINGLE_OBJECT, false); + this.uploadId = this.writerSliceConfig.getString(Key.UPLOAD_ID); + this.ossWriterProxy = new OssWriterProxy(this.writerSliceConfig, this.ossClient); + this.partition = this.writerSliceConfig.getList(Key.PARTITION, new ArrayList<>(), String.class); + //是否生成空文件开关 + this.generateEmptyFile = this.writerSliceConfig.getBool(Key.GENERATE_EMPTY_FILE,true); } @Override public void startWrite(RecordReceiver lineReceiver) { + if(useHdfsWriterProxy){ + hdfsWriterTask.startWrite(lineReceiver); + return; + } + if (this.isPeer2PeerCopyMode()) { + // 对等拷贝 + this.startWriteBinaryFile(lineReceiver); + } else if (this.writeSingleObject) { + this.startWriteSingleObjectUnstructedStorageFile(lineReceiver); + } else { + this.startWriteUnstructedStorageFile(lineReceiver,generateEmptyFile); + } + } + + /** + * 单object写入 + * + * @param lineReceiver + */ + public void startWriteSingleObjectUnstructedStorageFile(RecordReceiver lineReceiver) { + + try { + Record record; + String currentObject = this.object; + List currentPartETags = new ArrayList(); + + //warn: may be StringBuffer->StringBuilder + StringWriter sw = new StringWriter(); + StringBuffer sb = sw.getBuffer(); + UnstructuredWriter unstructuredWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw); + + while ((record = lineReceiver.getFromReader()) != null) { + //单文件同步暂不支持轮转[目前单文件支持同步约最大100GB大小] + if (OssSingleObject.currentPartNumber.intValue() > Constant.MAX_BLOCK_SIZE) { + throw DataXException.asDataXException(String.format("When writeSingleObject is true, the write size of your single object has exceeded the maximum value of %s MB.", + (Constant.MAX_BLOCK_SIZE * this.blockSizeInByte / 1024 / 1024))); + } + + // write: upload data to current object + UnstructuredStorageWriterUtil.transportOneRecord(record, + this.nullFormat, this.dateParse, + this.getTaskPluginCollector(), unstructuredWriter, this.byteEncoding); + + // 达到 this.blockSizeInByte ,上传文件块 + if (sb.length() >= this.blockSizeInByte) { + LOG.info(String + .format("write to bucket: [%s] object: [%s] with oss uploadId: [%s], currentPartNumber: %s", + this.bucket, currentObject, + this.uploadId, OssSingleObject.currentPartNumber.intValue())); + byte[] byteArray = sw.toString().getBytes(this.encoding); + this.ossWriterProxy.uploadOnePartForSingleObject(byteArray, this.uploadId, currentPartETags, currentObject, this::getHeaderBytes); + sb.setLength(0); + } + } + //将本task所有upload的part加入到allPartETags中 + OssSingleObject.allPartETags.addAll(currentPartETags); + + //将task未写完的最后一个block加入到 OssSingleObject.lastBlockBuffer 中,待job阶段合并上传 + if (sb.length() > 0) { + byte[] lastBlock = sw.toString().getBytes(this.encoding); + LOG.info("begin add last block to buffer, last block size: {}", lastBlock.length); + OssSingleObject.addLastBlockBuffer(lastBlock, this.ossWriterProxy, this.blockSizeInByte, this.object, this::getHeaderBytes); + } + } catch (IOException e) { + // 脏数据UnstructuredStorageWriterUtil.transportOneRecord已经记录,header + // 都是字符串不认为有脏数据 + throw DataXException.asDataXException( + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } catch (Exception e) { + throw DataXException.asDataXException( + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } + LOG.info("single oss object end do write"); + } + + private byte[] getHeaderBytes() throws IOException { + if (null != this.header && !this.header.isEmpty()) { + // write header to writer + try (StringWriter sw = new StringWriter(); + UnstructuredWriter headerWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw)) { + headerWriter.writeOneRecord(this.header); + return sw.toString().getBytes(this.encoding); + } + } + return new byte[0]; + } + + /** + * 同步音视频等无结构化文件 + * warn: 代码和startWriteUnstructedStorageFile重复程度太高,后续需要继续重构 + */ + private void startWriteBinaryFile(RecordReceiver lineReceiver) { + Record record; + String currentObject = null; + InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest; + InitiateMultipartUploadResult currentInitiateMultipartUploadResult = null; + String lastUploadId = null; + boolean gotData = false; + List currentPartETags = null; + int currentPartNumber = 1; + Map meta; + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + long currentSize = 0; + try { + // warn + boolean needInitMultipartTransform = true; + while ((record = lineReceiver.getFromReader()) != null) { + Column column = record.getColumn(0); + meta = record.getMeta(); + assert meta != null; + gotData = true; + String objectNameTmp = meta + .get(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.META_KEY_FILE_PATH); + String fullObjectNameTmp = String.format("%s%s", this.objectDir, objectNameTmp.substring(this.parentPathLength, objectNameTmp.length())); + + // init: 2 condition begin new multipart upload + if (needInitMultipartTransform || !StringUtils.equals(currentObject, fullObjectNameTmp)) { + // 先将上一个分块上传的request complete掉 + if (null != currentInitiateMultipartUploadResult) { + // 如果还有部分分库数据没有提交,则先提交 + if (currentSize > 0) { + this.ossWriterProxy.uploadOnePart(byteArrayOutputStream.toByteArray(), currentPartNumber, + currentInitiateMultipartUploadResult, currentPartETags, currentObject); + currentPartNumber++; + currentSize = 0; + byteArrayOutputStream.reset(); + } + // TODO 如果当前文件是空文件 + String commitKey = currentInitiateMultipartUploadResult.getKey(); + LOG.info(String.format( + "current object [%s] size %s, complete current multipart upload %s and begin new one", + commitKey, currentPartNumber * this.blockSizeInByte, + currentInitiateMultipartUploadResult.getUploadId())); + CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, commitKey, currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + CompleteMultipartUploadResult currentCompleteMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload( + currentCompleteMultipartUploadRequest); + lastUploadId = currentInitiateMultipartUploadResult.getUploadId(); + LOG.info(String.format("final object [%s] etag is:[%s]", commitKey, + currentCompleteMultipartUploadResult.getETag())); + } + // 这里发现一个全新的文件需要分块上传 + currentObject = fullObjectNameTmp; + currentInitiateMultipartUploadRequest = this.ossWriterProxy.getInitiateMultipartUploadRequest(currentObject); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload( + currentInitiateMultipartUploadRequest); + currentPartETags = new ArrayList(); + LOG.info(String.format("write to bucket: [%s] object: [%s] with oss uploadId: [%s]", + this.bucket, currentObject, currentInitiateMultipartUploadResult.getUploadId())); + // warn + needInitMultipartTransform = false; + currentPartNumber = 1; + } + // write: upload data to current object + byte[] data; + if (column instanceof BytesColumn) { + data = column.asBytes(); + byteArrayOutputStream.write(data); + currentSize += data.length; + } else { + String message = "the type of column must be BytesColumn!"; + throw DataXException.asDataXException(OssWriterErrorCode.Write_OBJECT_ERROR, message); + } + if (currentSize >= this.blockSizeInByte) { + this.ossWriterProxy.uploadOnePart(byteArrayOutputStream.toByteArray(), currentPartNumber, + currentInitiateMultipartUploadResult, currentPartETags, currentObject); + currentPartNumber++; + currentSize = 0; + byteArrayOutputStream.reset(); + } + } + + // TODO binary 模式读取,源头为空文件时是有问题的 + if (!gotData) { + LOG.info("Receive no data from the source."); + currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest(this.bucket, + currentObject); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload( + currentInitiateMultipartUploadRequest); + currentPartETags = new ArrayList(); + } + + // warn: may be some data stall in byteArrayOutputStream + if (byteArrayOutputStream.size() > 0) { + this.ossWriterProxy.uploadOnePart(byteArrayOutputStream.toByteArray(), currentPartNumber, + currentInitiateMultipartUploadResult, currentPartETags, currentObject); + currentPartNumber++; + } + + // 避免重复提交 + if (!StringUtils.equals(lastUploadId, currentInitiateMultipartUploadResult.getUploadId())) { + CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, currentObject, currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + CompleteMultipartUploadResult completeMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload( + completeMultipartUploadRequest); + LOG.info(String.format("final object etag is:[%s]", completeMultipartUploadResult.getETag())); + } + } catch (IOException e) { + // 脏数据UnstructuredStorageWriterUtil.transportOneRecord已经记录,header + // 都是字符串不认为有脏数据 + throw DataXException.asDataXException(OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } catch (Exception e) { + throw DataXException.asDataXException(OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } + LOG.info("end do write"); + } + + /** + * 开始写半结构化文件 + * + * @param lineReceiver + */ + private void startWriteUnstructedStorageFile(RecordReceiver lineReceiver, boolean generateEmptyFile){ // 设置每块字符串长度 - final long partSize = 1024 * 1024 * 10L; - long numberCacul = (this.maxFileSize * 1024 * 1024L) / partSize; + long numberCacul = (this.maxFileSize * 1024 * 1024L) / this.blockSizeInByte; final long maxPartNumber = numberCacul >= 1 ? numberCacul : 1; int objectRollingNumber = 0; - //warn: may be StringBuffer->StringBuilder - StringWriter sw = new StringWriter(); - StringBuffer sb = sw.getBuffer(); - UnstructuredWriter unstructuredWriter = TextCsvWriterManager - .produceUnstructuredWriter(this.fileFormat, - this.fieldDelimiter, sw); - Record record = null; - - LOG.info(String.format( - "begin do write, each object maxFileSize: [%s]MB...", - maxPartNumber * 10)); + Record record; String currentObject = this.object; - InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest = null; + if (this.isPeer2PeerCopyMode()) { + currentObject = null; + } else { + // 加上suffix + currentObject = appedSuffixTo(currentObject); + } + InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest; InitiateMultipartUploadResult currentInitiateMultipartUploadResult = null; + String lastUploadId = null; boolean gotData = false; List currentPartETags = null; // to do: // 可以根据currentPartNumber做分块级别的重试,InitiateMultipartUploadRequest多次一个currentPartNumber会覆盖原有 int currentPartNumber = 1; + Map meta; + + //warn: may be StringBuffer->StringBuilder + StringWriter sw = new StringWriter(); + StringBuffer sb = sw.getBuffer(); + UnstructuredWriter unstructuredWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw); + LOG.info(String.format( + "begin do write, each object maxFileSize: [%s]MB...", + maxPartNumber * 10)); try { - // warn + // warn 源头可能是MySQL中,导致没有meta这个第一次初始化标示省不掉 boolean needInitMultipartTransform = true; while ((record = lineReceiver.getFromReader()) != null) { + meta = record.getMeta(); gotData = true; - // init:begin new multipart upload - if (needInitMultipartTransform) { - if (objectRollingNumber == 0) { - if (StringUtils.isBlank(this.suffix)) { - currentObject = this.object; - } else { - currentObject = String.format("%s%s", - this.object, this.suffix); - } - } else { - // currentObject is like(no suffix) - // myfile__9b886b70fbef11e59a3600163e00068c_1 - if (StringUtils.isBlank(this.suffix)) { - currentObject = String.format("%s_%s", - this.object, objectRollingNumber); - } else { - // or with suffix - // myfile__9b886b70fbef11e59a3600163e00068c_1.csv - currentObject = String.format("%s_%s%s", - this.object, objectRollingNumber, - this.suffix); + // init: 2 condition begin new multipart upload 轮转策略(文件名规则)不一致 + // condition: 对等拷贝模式 && Record中的Meta切换文件名 && + // condition: 类log4j日志轮转 && !对等拷贝模式 + boolean realyNeedInitUploadRequest = false; + if (this.isPeer2PeerCopyMode()) { + assert meta != null; + String objectNameTmp = meta + .get(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.META_KEY_FILE_PATH); + String fullObjectNameTmp = String.format("%s%s", this.objectDir, objectNameTmp.substring(this.parentPathLength, objectNameTmp.length())); + if (!StringUtils.equals(currentObject, fullObjectNameTmp)) { + currentObject = fullObjectNameTmp; + realyNeedInitUploadRequest = true; + } + } else { + if (needInitMultipartTransform || currentPartNumber > maxPartNumber) { + currentObject = getCurrentObject(objectRollingNumber, record); + objectRollingNumber++; + realyNeedInitUploadRequest = true; + } + } + + if (realyNeedInitUploadRequest) { + // 先将上一个分块上传的request complete掉 + if (null != currentInitiateMultipartUploadResult) { + if (sb.length() > 0) { + this.uploadOnePart(sw, currentPartNumber, currentInitiateMultipartUploadResult, + currentPartETags, currentObject); + currentPartNumber++; + sb.setLength(0); } + // TODO 如果当前文件是空文件 + String commitKey = currentInitiateMultipartUploadResult.getKey(); + LOG.info(String.format( + "current object [%s] size %s, complete current multipart upload %s and begin new one", + commitKey, currentPartNumber * this.blockSizeInByte, + currentInitiateMultipartUploadResult.getUploadId())); + CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, commitKey, currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + CompleteMultipartUploadResult currentCompleteMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload( + currentCompleteMultipartUploadRequest); + lastUploadId = currentInitiateMultipartUploadResult.getUploadId(); + LOG.info(String.format("final object [%s] etag is:[%s]", commitKey, + currentCompleteMultipartUploadResult.getETag())); } - objectRollingNumber++; - currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( - this.bucket, currentObject); - currentInitiateMultipartUploadResult = this.ossClient - .initiateMultipartUpload(currentInitiateMultipartUploadRequest); + + currentInitiateMultipartUploadRequest = this.ossWriterProxy.getInitiateMultipartUploadRequest(currentObject); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload(currentInitiateMultipartUploadRequest); currentPartETags = new ArrayList(); LOG.info(String .format("write to bucket: [%s] object: [%s] with oss uploadId: [%s]", @@ -365,43 +1014,22 @@ public void startWrite(RecordReceiver lineReceiver) { // write: upload data to current object UnstructuredStorageWriterUtil.transportOneRecord(record, this.nullFormat, this.dateParse, - this.getTaskPluginCollector(), unstructuredWriter); + this.getTaskPluginCollector(), unstructuredWriter, this.byteEncoding); - if (sb.length() >= partSize) { + if (sb.length() >= this.blockSizeInByte) { this.uploadOnePart(sw, currentPartNumber, currentInitiateMultipartUploadResult, currentPartETags, currentObject); currentPartNumber++; sb.setLength(0); } - - // save: end current multipart upload - if (currentPartNumber > maxPartNumber) { - LOG.info(String - .format("current object [%s] size > %s, complete current multipart upload and begin new one", - currentObject, currentPartNumber - * partSize)); - CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest = new CompleteMultipartUploadRequest( - this.bucket, currentObject, - currentInitiateMultipartUploadResult - .getUploadId(), currentPartETags); - CompleteMultipartUploadResult currentCompleteMultipartUploadResult = this.ossClient - .completeMultipartUpload(currentCompleteMultipartUploadRequest); - LOG.info(String.format( - "final object [%s] etag is:[%s]", - currentObject, - currentCompleteMultipartUploadResult.getETag())); - // warn - needInitMultipartTransform = true; - } } if (!gotData) { LOG.info("Receive no data from the source."); currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( this.bucket, currentObject); - currentInitiateMultipartUploadResult = this.ossClient - .initiateMultipartUpload(currentInitiateMultipartUploadRequest); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload(currentInitiateMultipartUploadRequest); currentPartETags = new ArrayList(); // each object's header if (null != this.header && !this.header.isEmpty()) { @@ -414,81 +1042,145 @@ public void startWrite(RecordReceiver lineReceiver) { currentInitiateMultipartUploadResult, currentPartETags, currentObject); } - CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( - this.bucket, currentObject, - currentInitiateMultipartUploadResult.getUploadId(), - currentPartETags); - CompleteMultipartUploadResult completeMultipartUploadResult = this.ossClient - .completeMultipartUpload(completeMultipartUploadRequest); - LOG.info(String.format("final object etag is:[%s]", - completeMultipartUploadResult.getETag())); + + // 避免重复提交 + if (!StringUtils.equals(lastUploadId, currentInitiateMultipartUploadResult.getUploadId())) { + CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, currentObject, + currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + if (gotData) { + completeUpload(completeMultipartUploadRequest); + } else{ + if (generateEmptyFile) { + LOG.info("Due to without data, oss will generate empty file, " + + "the generateEmptyFile is {}, you can set it false to avoid this",generateEmptyFile); + completeUpload(completeMultipartUploadRequest); + } else { + LOG.info("The generateEmptyFile is false, datax will not generate empty file"); + } + } + } } catch (IOException e) { // 脏数据UnstructuredStorageWriterUtil.transportOneRecord已经记录,header // 都是字符串不认为有脏数据 throw DataXException.asDataXException( - OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage()); + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); } catch (Exception e) { throw DataXException.asDataXException( - OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage()); + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); } LOG.info("end do write"); } + private void completeUpload(CompleteMultipartUploadRequest completeMultipartUploadRequest) throws Exception { + CompleteMultipartUploadResult completeMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload(completeMultipartUploadRequest); + LOG.info(String.format("final object etag is:[%s]", + completeMultipartUploadResult.getETag())); + } + + + private String getCurrentObject(int objectRollingNumber, Record record) { + String currentObject = this.object; + + if (!this.partition.isEmpty()) { + String partitionValues = getPartitionValues(record); + currentObject = String.format("%s_%s", currentObject, partitionValues); + } + + if (objectRollingNumber > 0) { + currentObject = String.format("%s_%s", currentObject, objectRollingNumber); + } + + currentObject = appedSuffixTo(currentObject); + + return currentObject; + } + + private String getPartitionValues(Record record) { + // config like "partition": "ds,venture" + String partitionValues = ""; + // assume that partition columns are located in the last of order + for (int i = 0; i < this.partition.size(); i++) { + partitionValues += record.getColumn(record.getColumnNumber() - 1 - i).asString(); + } + return partitionValues; + } + + private String appedSuffixTo(String currentObject) { + StringBuilder sbCurrentObject = new StringBuilder(currentObject); + + if (StringUtils.isNotBlank(this.suffix)) { + if (!this.suffix.startsWith(".")) { + sbCurrentObject.append("."); + } + sbCurrentObject.append(suffix); + } + + return sbCurrentObject.toString(); + } + /** * 对于同一个UploadID,该号码不但唯一标识这一块数据,也标识了这块数据在整个文件内的相对位置。 * 如果你用同一个part号码,上传了新的数据,那么OSS上已有的这个号码的Part数据将被覆盖。 - * + * * @throws Exception * */ private void uploadOnePart( final StringWriter sw, final int partNumber, - final InitiateMultipartUploadResult initiateMultipartUploadResult, + final InitiateMultipartUploadResult currentInitiateMultipartUploadResult, final List partETags, final String currentObject) throws Exception { final String encoding = this.encoding; - final String bucket = this.bucket; - final OSSClient ossClient = this.ossClient; - RetryUtil.executeWithRetry(new Callable() { - @Override - public Boolean call() throws Exception { - byte[] byteArray = sw.toString().getBytes(encoding); - InputStream inputStream = new ByteArrayInputStream( - byteArray); - // 创建UploadPartRequest,上传分块 - UploadPartRequest uploadPartRequest = new UploadPartRequest(); - uploadPartRequest.setBucketName(bucket); - uploadPartRequest.setKey(currentObject); - uploadPartRequest.setUploadId(initiateMultipartUploadResult - .getUploadId()); - uploadPartRequest.setInputStream(inputStream); - uploadPartRequest.setPartSize(byteArray.length); - uploadPartRequest.setPartNumber(partNumber); - UploadPartResult uploadPartResult = ossClient - .uploadPart(uploadPartRequest); - partETags.add(uploadPartResult.getPartETag()); - LOG.info(String - .format("upload part [%s] size [%s] Byte has been completed.", - partNumber, byteArray.length)); - IOUtils.closeQuietly(inputStream); - return true; - } - }, 3, 1000L, false); + final byte[] byteArray = sw.toString().getBytes(encoding); + this.ossWriterProxy.uploadOnePart(byteArray, partNumber, currentInitiateMultipartUploadResult, partETags, currentObject); } @Override public void prepare() { - + if(useHdfsWriterProxy){ + hdfsWriterTask.prepare(); + return; + } } @Override public void post() { - + if(useHdfsWriterProxy){ + hdfsWriterTask.post(); + return; + } } @Override public void destroy() { + if(useHdfsWriterProxy){ + hdfsWriterTask.destroy(); + return; + } + try { + // this.ossClient.shutdown(); + } catch (Exception e) { + LOG.warn("shutdown ossclient meet a exception:" + e.getMessage(), e); + } + } + + private boolean isPeer2PeerCopyMode() { + return this.isBinaryFile + || com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.SYNC_MODE_VALUE_COPY + .equalsIgnoreCase(this.syncMode); + } + private String getObjectDir(String object) { + String dir = null; + if (StringUtils.isBlank(object)) { + dir = ""; + } else { + dir = object.trim(); + dir = dir.endsWith("/") ? dir : String.format("%s/", dir); + } + return dir; } } } diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriterProxy.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriterProxy.java new file mode 100644 index 0000000000..45516f73d4 --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriterProxy.java @@ -0,0 +1,171 @@ +package com.alibaba.datax.plugin.writer.osswriter; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RetryUtil; +import com.aliyun.oss.OSSClient; +import com.aliyun.oss.model.*; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.ArrayUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.util.List; +import java.util.concurrent.Callable; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:29 + */ +public class OssWriterProxy { + private static Logger logger = LoggerFactory.getLogger(OssWriterProxy.class); + + private OSSClient ossClient; + private Configuration configuration; + /** + * 是否在服务器端进行加密存储 + */ + private Boolean encrypt; + private String bucket; + + + public OssWriterProxy (Configuration configuration, OSSClient ossClient) { + this.configuration = configuration; + this.ossClient = ossClient; + this.encrypt = configuration.getBool(Key.ENCRYPT, false); + this.bucket = configuration.getString(Key.BUCKET); + } + + public InitiateMultipartUploadRequest getInitiateMultipartUploadRequest(String currentObject){ + InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest; + if( !this.encrypt ) { + currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( + this.bucket, currentObject); + } else { + // 将数据加密存储在oss + ObjectMetadata objectMetadata = new ObjectMetadata(); + objectMetadata.setHeader("x-oss-server-side-encryption", + ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); + currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( + this.bucket, currentObject, objectMetadata); + } + return currentInitiateMultipartUploadRequest; + } + + public InitiateMultipartUploadResult initiateMultipartUpload( + final InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest) throws Exception { + final OSSClient ossClient = this.ossClient; + return RetryUtil.executeWithRetry(new Callable() { + @Override + public InitiateMultipartUploadResult call() throws Exception { + return ossClient.initiateMultipartUpload(currentInitiateMultipartUploadRequest); + } + }, 10, 1000L, false); + } + + public CompleteMultipartUploadResult completeMultipartUpload( + final CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest) throws Exception { + + final OSSClient ossClient = this.ossClient; + return RetryUtil.executeWithRetry(new Callable() { + @Override + public CompleteMultipartUploadResult call() throws Exception { + return ossClient.completeMultipartUpload(currentCompleteMultipartUploadRequest); + } + }, 10, 1000L, false); + } + + public void uploadOnePart( + final byte[] byteArray, + final int partNumber, + final InitiateMultipartUploadResult currentInitiateMultipartUploadResult, + final List partETags, + final String currentObject) + throws Exception { + final String bucket = this.bucket; + final OSSClient ossClient = this.ossClient; + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + InputStream inputStream = new ByteArrayInputStream( + byteArray); + // 创建UploadPartRequest,上传分块 + UploadPartRequest uploadPartRequest = new UploadPartRequest(); + uploadPartRequest.setBucketName(bucket); + uploadPartRequest.setKey(currentObject); + uploadPartRequest.setUploadId(currentInitiateMultipartUploadResult.getUploadId()); + uploadPartRequest.setInputStream(inputStream); + uploadPartRequest.setPartSize(byteArray.length); + uploadPartRequest.setPartNumber(partNumber); + UploadPartResult uploadPartResult = ossClient + .uploadPart(uploadPartRequest); + partETags.add(uploadPartResult.getPartETag()); + logger.info(String + .format("upload part [%s] size [%s] Byte has been completed.", + partNumber, byteArray.length)); + IOUtils.closeQuietly(inputStream); + return true; + } + }, 10, 1000L, false); + } + + public void abortMultipartUpload(final String currentObject, final String uploadId) { + final String bucket = this.bucket; + final OSSClient ossClient = this.ossClient; + try { + RetryUtil.executeWithRetry((Callable) () -> { + AbortMultipartUploadRequest abortMultipartUploadRequest = + new AbortMultipartUploadRequest(bucket, currentObject, uploadId); + ossClient.abortMultipartUpload(abortMultipartUploadRequest); + return null; + }, 5, 1, true); + } catch (Throwable e) { + logger.error(String.format("AbortMultipartUpload failed, msg is %s",e.getMessage()), e); + } + } + + public void uploadOnePartForSingleObject( + final byte[] byteArray, + final String uploadId, + final List partETags, + final String currentObject, + final HeaderProvider headerProvider) + throws Exception { + final String bucket = this.bucket; + final OSSClient ossClient = this.ossClient; + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + // 创建UploadPartRequest,上传分块 + UploadPartRequest uploadPartRequest = new UploadPartRequest(); + uploadPartRequest.setPartNumber(OssSingleObject.currentPartNumber.getAndIncrement()); + byte[] data = byteArray; + if (uploadPartRequest.getPartNumber() == 1) { + // write header + byte[] headerBytes = headerProvider.getHeader(); + logger.info("write header to part {}. header size: {}", + uploadPartRequest.getPartNumber(), ArrayUtils.getLength(headerBytes)); + data = ArrayUtils.addAll(headerBytes, byteArray); + } + ByteArrayInputStream inputStream = new ByteArrayInputStream(data); + uploadPartRequest.setBucketName(bucket); + uploadPartRequest.setKey(currentObject); + uploadPartRequest.setUploadId(uploadId); + uploadPartRequest.setInputStream(inputStream); + uploadPartRequest.setPartSize(data.length); + UploadPartResult uploadPartResult = ossClient + .uploadPart(uploadPartRequest); + partETags.add(uploadPartResult.getPartETag()); + logger.info("upload part number [{}] size [{}] Byte has been completed, uploadId: {}.", + uploadPartRequest.getPartNumber(), data.length, uploadId); + IOUtils.closeQuietly(inputStream); + return true; + } + }, 10, 1000L, false); + } + + public interface HeaderProvider { + byte[] getHeader() throws Exception; + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileProccessor.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileProccessor.java new file mode 100644 index 0000000000..c88a6f109f --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileProccessor.java @@ -0,0 +1,49 @@ +package com.alibaba.datax.plugin.writer.osswriter.parquet; + +import org.apache.hadoop.fs.Path; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; + +import parquet.hadoop.ParquetWriter; +import parquet.hadoop.metadata.CompressionCodecName; +import parquet.schema.MessageType; + +import java.io.IOException; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:23 + */ +public class ParquetFileProccessor extends ParquetWriter { + private Path path; + + public ParquetFileProccessor(Path path, MessageType schema, Configuration taskConfig, + TaskPluginCollector taskPluginCollector) throws IOException { + this(path, schema, false, taskConfig, taskPluginCollector); + this.path = path; + } + + public ParquetFileProccessor(Path path, MessageType schema, boolean enableDictionary, Configuration taskConfig, + TaskPluginCollector taskPluginCollector) throws IOException { + this(path, schema, CompressionCodecName.UNCOMPRESSED, enableDictionary, taskConfig, taskPluginCollector); + this.path = path; + } + + public ParquetFileProccessor(Path path, MessageType schema, CompressionCodecName codecName, + boolean enableDictionary, Configuration taskConfig, TaskPluginCollector taskPluginCollector) + throws IOException { + super(path, new ParquetFileSupport(schema, taskConfig, taskPluginCollector), codecName, DEFAULT_BLOCK_SIZE, + DEFAULT_PAGE_SIZE, enableDictionary, false); + this.path = path; + } + + public byte[] getParquetRawData() { + if (null == this.path) { + return null; + } else { + return null; + } + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileSupport.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileSupport.java new file mode 100644 index 0000000000..9daa5a7fb9 --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileSupport.java @@ -0,0 +1,355 @@ +package com.alibaba.datax.plugin.writer.osswriter.parquet; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.plugin.unstructuredstorage.writer.Key; +import com.alibaba.datax.plugin.writer.osswriter.Constant; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import parquet.hadoop.api.WriteSupport; +import parquet.io.api.Binary; +import parquet.io.api.RecordConsumer; +import parquet.schema.*; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.List; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:25 + */ +public class ParquetFileSupport extends WriteSupport { + public static final Logger LOGGER = LoggerFactory.getLogger(ParquetFileSupport.class); + private MessageType schema; + private RecordConsumer recordConsumer; + private boolean printStackTrace = true; + + // 不通类型的nullFormat + private String nullFormat; + + private String dateFormat; + private DateFormat dateParse; + private Binary binaryForNull; + private TaskPluginCollector taskPluginCollector; + + public ParquetFileSupport(MessageType schema, com.alibaba.datax.common.util.Configuration taskConfig, TaskPluginCollector taskPluginCollector) { + this.schema = schema; + // 不通类型的nullFormat + this.nullFormat = taskConfig.getString(Key.NULL_FORMAT, Constant.DEFAULT_NULL_FORMAT); + this.binaryForNull = Binary.fromString(this.nullFormat); + + this.dateFormat = taskConfig.getString(Key.DATE_FORMAT, null); + if (StringUtils.isNotBlank(this.dateFormat)) { + this.dateParse = new SimpleDateFormat(dateFormat); + } + + this.taskPluginCollector = taskPluginCollector; + } + + @Override + public WriteContext init(Configuration configuration) { + return new WriteContext(schema, new HashMap()); + } + + @Override + public void prepareForWrite(RecordConsumer recordConsumer) { + this.recordConsumer = recordConsumer; + } + + @Override + public void write(Record values) { + LOGGER.info("Writing parquet data using fields mode(The correct mode.)"); + List types = this.schema.getFields(); + if (values != null && types != null && values.getColumnNumber() == types.size()) { + recordConsumer.startMessage(); + writeFields(types, values); + recordConsumer.endMessage(); + } + } + + private void writeFields(List types, Record values) { + for (int i = 0; i < types.size(); i++) { + Type type = types.get(i); + Column value = values.getColumn(i); + if (value != null) { + try { + if (type.isPrimitive()) { + writePrimitiveType(type, value, i); + } else { + writeGroupType(type, (JSON) JSON.parse(value.asString()), i); + } + } catch (Exception e) { + if (printStackTrace) { + printStackTrace = false; + LOGGER.warn("write to parquet error: {}", e.getMessage(), e); + } + // dirty data + if (null != this.taskPluginCollector) { + // job post 里面的merge taskPluginCollector 为null + this.taskPluginCollector.collectDirtyRecord(values, e, e.getMessage()); + } + } + } + } + } + + private void writeFields(List types, JSONObject values) { + for (int i = 0; i < types.size(); i++) { + Type type = types.get(i); + Object value = values.get(type.getName()); + + if (value != null) { + try { + if (type.isPrimitive()) { + writePrimitiveType(type, value, i); + } else { + writeGroupType(type, (JSON) value, i); + } + } catch (Exception e) { + if (printStackTrace) { + printStackTrace = false; + LOGGER.warn("write to parquet error: {}", e.getMessage(), e); + } + } + } else { + recordConsumer.addBinary(this.binaryForNull); + } + } + } + + private void writeGroupType(Type type, JSON value, int index) { + GroupType groupType = type.asGroupType(); + OriginalType originalType = groupType.getOriginalType(); + if (originalType != null) { + switch (originalType) { + case MAP: + writeMap(groupType, value, index); + break; + case LIST: + writeList(groupType, value, index); + break; + default: + break; + } + } else { + // struct + writeStruct(groupType, value, index); + } + } + + private void writeMap(GroupType groupType, JSON value, int index) { + if (value == null) { + return; + } + + JSONObject json = (JSONObject) value; + + if (json.isEmpty()) { + return; + } + + recordConsumer.startField(groupType.getName(), index); + + recordConsumer.startGroup(); + + // map + // key_value start + recordConsumer.startField("key_value", 0); + recordConsumer.startGroup(); + + List keyValueFields = groupType.getFields().get(0).asGroupType().getFields(); + Type keyType = keyValueFields.get(0); + Type valueType = keyValueFields.get(1); + for (String key : json.keySet()) { + // key + writePrimitiveType(keyType, key, 0); + + // value + if (valueType.isPrimitive()) { + writePrimitiveType(valueType, json.get(key), 1); + } else { + writeGroupType(valueType, (JSON) json.get(key), 1); + } + } + + recordConsumer.endGroup(); + recordConsumer.endField("key_value", 0); + // key_value end + + recordConsumer.endGroup(); + recordConsumer.endField(groupType.getName(), index); + } + + private void writeList(GroupType groupType, JSON value, int index) { + if (value == null) { + return; + } + + JSONArray json = (JSONArray) value; + + if (json.isEmpty()) { + return; + } + + recordConsumer.startField(groupType.getName(), index); + // list + recordConsumer.startGroup(); + + + // list start + recordConsumer.startField("list", 0); + recordConsumer.startGroup(); + + Type elementType = groupType.getFields().get(0).asGroupType().getFields().get(0); + + if (elementType.isPrimitive()) { + for (Object elementValue : json) { + writePrimitiveType(elementType, elementValue, 0); + } + } else { + for (Object elementValue : json) { + writeGroupType(elementType, (JSON) elementValue, 0); + } + } + + recordConsumer.endGroup(); + recordConsumer.endField("list", 0); + // list end + recordConsumer.endGroup(); + + recordConsumer.endField(groupType.getName(), index); + } + + private void writeStruct(GroupType groupType, JSON value, int index) { + if (value == null) { + return; + } + JSONObject json = (JSONObject) value; + if (json.isEmpty()) { + return; + } + + recordConsumer.startField(groupType.getName(), index); + // struct start + recordConsumer.startGroup(); + + writeFields(groupType.getFields(), json); + recordConsumer.endGroup(); + // struct end + recordConsumer.endField(groupType.getName(), index); + } + + private void writePrimitiveType(Type type, Object value, int index) { + if (value == null) { + return; + } + + recordConsumer.startField(type.getName(), index); + PrimitiveType primitiveType = type.asPrimitiveType(); + + switch (primitiveType.getPrimitiveTypeName()) { + case BOOLEAN: + recordConsumer.addBoolean((Boolean) value); + break; + case FLOAT: + if (value instanceof Float) { + recordConsumer.addFloat(((Float) value).floatValue()); + } else if (value instanceof Double) { + recordConsumer.addFloat(((Double) value).floatValue()); + } else if (value instanceof Long) { + recordConsumer.addFloat(((Long) value).floatValue()); + } else if (value instanceof Integer) { + recordConsumer.addFloat(((Integer) value).floatValue()); + } + break; + case DOUBLE: + if (value instanceof Float) { + recordConsumer.addDouble(((Float) value).doubleValue()); + } else if (value instanceof Double) { + recordConsumer.addDouble(((Double) value).doubleValue()); + } else if (value instanceof Long) { + recordConsumer.addDouble(((Long) value).doubleValue()); + } else if (value instanceof Integer) { + recordConsumer.addDouble(((Integer) value).doubleValue()); + } + break; + case INT32: + if (value instanceof Integer) { + recordConsumer.addInteger((Integer) value); + } else if (value instanceof Long) { + recordConsumer.addInteger(((Long) value).intValue()); + } else { + new IllegalArgumentException( + String.format("Invalid value: %s(clazz: %s) for field: %s", value, value.getClass(), type.getName()) + ); + } + break; + case INT64: + case INT96: + if (value instanceof Integer) { + recordConsumer.addLong(((Integer) value).longValue()); + } else if (value instanceof Long) { + recordConsumer.addInteger(((Long) value).intValue()); + } else { + new IllegalArgumentException( + String.format("Invalid value: %s(clazz: %s) for field: %s", value, value.getClass(), type.getName()) + ); + } + break; + case BINARY: + default: + recordConsumer.addBinary(Binary.fromString((String) value)); + break; + } + recordConsumer.endField(type.getName(), index); + } + + private void writePrimitiveType(Type type, Column value, int index) { + if (value == null || value.getRawData() == null) { + return; + } + + recordConsumer.startField(type.getName(), index); + PrimitiveType primitiveType = type.asPrimitiveType(); + switch (primitiveType.getPrimitiveTypeName()) { + case BOOLEAN: + recordConsumer.addBoolean(value.asBoolean()); + break; + case FLOAT: + recordConsumer.addFloat(value.asDouble().floatValue()); + break; + case DOUBLE: + recordConsumer.addDouble(value.asDouble()); + break; + case INT32: + recordConsumer.addInteger(value.asLong().intValue()); + break; + case INT64: + case INT96: + recordConsumer.addLong(value.asLong()); + break; + case BINARY: + String valueAsString2Write = null; + if (Column.Type.DATE == value.getType() && null != this.dateParse) { + valueAsString2Write = dateParse.format(value.asDate()); + } + else { + valueAsString2Write = value.asString(); + } + recordConsumer.addBinary(Binary.fromString(valueAsString2Write)); + break; + default: + recordConsumer.addBinary(Binary.fromString(value.asString())); + break; + } + recordConsumer.endField(type.getName(), index); + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HandlerUtil.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HandlerUtil.java new file mode 100644 index 0000000000..488c119ce5 --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HandlerUtil.java @@ -0,0 +1,38 @@ +package com.alibaba.datax.plugin.writer.osswriter.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.util.container.CoreConstant; +import com.alibaba.datax.plugin.writer.osswriter.Key; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:35 + */ +public class HandlerUtil { + + private static final Logger LOG = LoggerFactory.getLogger(HandlerUtil.class); + + /** + * 将configuration处理成 ODPS->OSS的 config + * + * @param jobConfiguration + */ + public static void preHandler(Configuration jobConfiguration) { + LOG.info("================ OssWriter Phase 1 preHandler starting... ================ "); + Configuration writerOriginPluginConf = jobConfiguration.getConfiguration( + CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER); + Configuration writerOssPluginConf = writerOriginPluginConf.getConfiguration(Key.OSS_CONFIG); + Configuration newWriterPluginConf = Configuration.newDefault(); + jobConfiguration.remove(CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER); + //将postgresqlwriter的pg配置注入到postgresqlConfig中, 供后面的postHandler使用 + writerOriginPluginConf.remove(Key.OSS_CONFIG); + newWriterPluginConf.set(Key.POSTGRESQL_CONFIG, writerOriginPluginConf); + newWriterPluginConf.merge(writerOssPluginConf, true); + //设置writer的名称为osswriter + jobConfiguration.set(CoreConstant.DATAX_JOB_CONTENT_WRITER_NAME, "osswriter"); + jobConfiguration.set(CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER, newWriterPluginConf); + LOG.info("================ OssWriter Phase 1 preHandler end... ================ "); + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HdfsParquetUtil.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HdfsParquetUtil.java new file mode 100644 index 0000000000..ccd3aa3543 --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HdfsParquetUtil.java @@ -0,0 +1,145 @@ +package com.alibaba.datax.plugin.writer.osswriter.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.hdfswriter.HdfsWriter; +import com.alibaba.datax.plugin.writer.osswriter.Key; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem; +import org.apache.hadoop.mapred.JobConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:35 + */ +public class HdfsParquetUtil { + + + private static final Logger logger = LoggerFactory.getLogger(HdfsParquetUtil.class); + + public static boolean isUseHdfsWriterProxy( String fileFormat){ + if("orc".equalsIgnoreCase(fileFormat) || "parquet".equalsIgnoreCase(fileFormat)){ + return true; + } + return false; + } + + /** + * 配置writerSliceConfig 适配hdfswriter写oss parquet + * https://help.aliyun.com/knowledge_detail/74344.html + * @param hdfsWriterJob + * @param writerSliceConfig + */ + public static void adaptConfiguration(HdfsWriter.Job hdfsWriterJob, Configuration writerSliceConfig){ + String fileFormat = writerSliceConfig.getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); + + String bucket = writerSliceConfig.getString(Key.BUCKET); + String fs =String.format("oss://%s",bucket); + writerSliceConfig.set(com.alibaba.datax.plugin.writer.hdfswriter.Key.DEFAULT_FS,fs); + writerSliceConfig.set(com.alibaba.datax.plugin.writer.hdfswriter.Key.FILE_TYPE, + writerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT)); + + /** + * "writeMode"、 "compress"、"encoding" 、path、fileName 相互一致 + */ + JSONObject hadoopConfig = new JSONObject(); + hadoopConfig.put(Key.FS_OSS_ACCESSID,writerSliceConfig.getString(Key.ACCESSID)); + hadoopConfig.put(Key.FS_OSS_ACCESSKEY,writerSliceConfig.getString(Key.ACCESSKEY)); + hadoopConfig.put(Key.FS_OSS_ENDPOINT,writerSliceConfig.getString(Key.ENDPOINT)); + writerSliceConfig.set(Key.HDOOP_CONFIG,Configuration.from(JSON.toJSONString(hadoopConfig))); + + String object = writerSliceConfig.getString(Key.OBJECT); + String path = writerSliceConfig.getString(Key.PATH); + String fielName = writerSliceConfig.getString(Key.FILE_NAME); + + if (StringUtils.isNotBlank(object) && (StringUtils.isNotBlank(path) || StringUtils.isNotBlank(fielName))) { + logger.warn("You configure both the \"object\" property and the \"path\" or \"fileName\" property, ignoring the object property. " + + "It is recommended to remove the \"path\" or \"fileName\" attribute, which has been deprecated."); + } + + //兼容之前配置了PATH的datax任务, 如果已经配置了PATH,则无需从object里解析 + if (StringUtils.isBlank(path)) { + Validate.notBlank(object, "object can't be blank!"); + writerSliceConfig.set(Key.PATH, getPathAndFileNameFromObject(object.trim()).get(Key.PATH)); + } + //兼容之前配置了fileName的datax任务,如果已经配置了fileName,则无需从object里解析 + if (StringUtils.isBlank(fielName)) { + Validate.notBlank(object, "object can't be blank!"); + writerSliceConfig.set(Key.FILE_NAME, getPathAndFileNameFromObject(object.trim()).get(Key.FILE_NAME)); + } + if (StringUtils.equalsIgnoreCase(fileFormat, "parquet")) { + hdfsWriterJob.unitizeParquetConfig(writerSliceConfig); + } + + } + + + + /** + * 从object中 解析出 path和fileName + * + * 举例1: + * /hello/aaa/bbb/ccc.txt + * path: /hello/aaa/bbb + * fileName: ccc.txt + * + * 举例2: + * hello/aaa/bbb/ccc.txt + * path: /hello/aaa/bbb + * fileName: ccc.txt + * + * 举例3: + * ccc.txt + * path: / + * fileName: ccc.txt + * + * 举例4: + * /ccc.txt + * path: / + * fileName: ccc.txt + * + * @param object + * @return + */ + public static Map getPathAndFileNameFromObject(String object) { + Map pathAndFileName = new HashMap<>(); + + boolean isContainsBackslash = object.contains("/"); + + //object里没有包含"/", 则将path设置为 "/", fileName设置为 object + if (!isContainsBackslash) { + pathAndFileName.put(Key.PATH, "/"); + pathAndFileName.put(Key.FILE_NAME, object); + return pathAndFileName; + } + + if (!object.startsWith("/")) { + object = "/" + object; + } + + int lastIndex = object.lastIndexOf("/"); + String path = object.substring(0, lastIndex); + String fileName = object.substring(lastIndex + 1); + + path = StringUtils.isNotBlank(path) ? path : "/"; + + logger.info("path: {}", path); + logger.info("fileName: {}", fileName); + + pathAndFileName.put(Key.PATH, path); + pathAndFileName.put(Key.FILE_NAME, fileName); + return pathAndFileName; + } +} diff --git a/otsreader/pom.xml b/otsreader/pom.xml index b1e0e735a8..eaac8804da 100644 --- a/otsreader/pom.xml +++ b/otsreader/pom.xml @@ -10,6 +10,17 @@ otsreader + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common diff --git a/otsstreamreader/pom.xml b/otsstreamreader/pom.xml index 84ca2d6a05..cb4a6206d7 100644 --- a/otsstreamreader/pom.xml +++ b/otsstreamreader/pom.xml @@ -13,6 +13,17 @@ 0.0.1 + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common diff --git a/otsstreamreader/src/main/resources/plugin.json b/otsstreamreader/src/main/resources/plugin.json index 9a70a47a46..57071d6f14 100644 --- a/otsstreamreader/src/main/resources/plugin.json +++ b/otsstreamreader/src/main/resources/plugin.json @@ -2,5 +2,5 @@ "name": "otsstreamreader", "class": "com.alibaba.datax.plugin.reader.otsstreamreader.internal.OTSStreamReader", "description": "", - "developer": "zhaofeng.zhou@alibaba-inc.com" + "developer": "alibaba" } diff --git a/otswriter/pom.xml b/otswriter/pom.xml index d40f68b3dd..cb255e1f40 100644 --- a/otswriter/pom.xml +++ b/otswriter/pom.xml @@ -10,6 +10,17 @@ otswriter + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common diff --git a/package.xml b/package.xml index fd9c8b52cb..456f780c86 100755 --- a/package.xml +++ b/package.xml @@ -131,6 +131,13 @@ datax + + tdenginereader/target/datax/ + + **/*.* + + datax + streamreader/target/datax/ @@ -181,7 +188,28 @@ datax - tdenginereader/target/datax/ + gdbreader/target/datax/ + + **/*.* + + datax + + + hbase11xsqlreader/target/datax/ + + **/*.* + + datax + + + hbase20xsqlreader/target/datax/ + + **/*.* + + datax + + + tsdbreader/target/datax/ **/*.* @@ -337,77 +365,77 @@ datax - hbase11xsqlreader/target/datax/ + elasticsearchwriter/target/datax/ **/*.* datax - elasticsearchwriter/target/datax/ + hbase20xsqlwriter/target/datax/ **/*.* datax - hbase20xsqlreader/target/datax/ + tsdbwriter/target/datax/ **/*.* datax - hbase20xsqlwriter/target/datax/ + adbpgwriter/target/datax/ **/*.* datax - tsdbwriter/target/datax/ + cassandrawriter/target/datax/ **/*.* datax - tsdbreader/target/datax/ + clickhousewriter/target/datax/ **/*.* datax - adbpgwriter/target/datax/ + oscarwriter/target/datax/ **/*.* datax - cassandrawriter/target/datax/ + oceanbasev10writer/target/datax/ **/*.* datax - clickhousewriter/target/datax/ + gdbwriter/target/datax/ **/*.* datax - oscarwriter/target/datax/ + kuduwriter/target/datax/ **/*.* datax - oceanbasev10writer/target/datax/ + hologresjdbcwriter/target/datax/ **/*.* diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/FileFormat.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/FileFormat.java new file mode 100644 index 0000000000..b9368e676c --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/FileFormat.java @@ -0,0 +1,97 @@ +package com.alibaba.datax.plugin.unstructuredstorage; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Constant; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import org.apache.commons.lang3.StringUtils; + +import java.util.Arrays; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:04 + */ +public enum FileFormat { + TEXT("text"), + CSV("csv"), + EXCEL("excel"), + BINARY("binary"); + + private String fileFormat; + + private boolean isText; + private boolean isCsv; + private boolean isExcel; + private boolean isBinary; + + FileFormat(String fileFormat) { + this.fileFormat = fileFormat.toLowerCase(); + } + + /** + * 获取文件类型: 目前支持text,csv,excel,binary + * @param configuration + * @return + */ + public static FileFormat getFileFormatByConfiguration(Configuration configuration) { + String fileFormat = configuration.getString(Key.FILE_FORMAT, Constant.DEFAULT_FILE_FORMAT); + return FileFormat.getByTypeName(fileFormat); + } + + public String getFileFormat() { + return this.fileFormat; + } + + public static FileFormat getByTypeName(String fileFormat) { + for (FileFormat fFormat : values()) { + if (fFormat.fileFormat.equalsIgnoreCase(fileFormat)) { + return fFormat; + } + } + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, + String.format("DataX 不支持该 fileFormat 类型:%s, 目前支持的 fileFormat 类型是:%s", fileFormat, Arrays.asList(values()))); + } + + public boolean equalsIgnoreCase(String fileFormat){ + return StringUtils.equalsIgnoreCase(fileFormat, this.fileFormat); + } + + public boolean isText() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_TEXT); + } + + public void setText(boolean text) { + isText = text; + } + + public boolean isCsv() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_CSV); + } + + public void setCsv(boolean csv) { + isCsv = csv; + } + + public boolean isExcel() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_EXCEL); + } + + public void setExcel(boolean excel) { + isExcel = excel; + } + + public boolean isBinary() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_BINARY); + } + + public void setBinary(boolean binary) { + isBinary = binary; + } + + @Override + public String toString(){ + return this.fileFormat; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings.properties new file mode 100644 index 0000000000..d53d474963 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_en_US.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_en_US.properties new file mode 100644 index 0000000000..d53d474963 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_en_US.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_ja_JP.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_ja_JP.properties new file mode 100644 index 0000000000..d53d474963 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_ja_JP.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_CN.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_CN.properties new file mode 100644 index 0000000000..d53d474963 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_CN.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_HK.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_HK.properties new file mode 100644 index 0000000000..b92a73ecd7 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_HK.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1}fileformaterror.1=DataX不支持該fileFormat類型:{0},現時支持的fileFormat類型是:{1} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_TW.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_TW.properties new file mode 100644 index 0000000000..b92a73ecd7 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_TW.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1}fileformaterror.1=DataX不支持該fileFormat類型:{0},現時支持的fileFormat類型是:{1} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java index 7c6bc13956..6be46cb20c 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java @@ -10,4 +10,47 @@ public class Constant { public static final String DEFAULT_NULL_FORMAT = "\\N"; public static final Integer DEFAULT_BUFFER_SIZE = 8192; + + public static final String FILE_FORMAT_CSV = "csv"; + + public static final String FILE_FORMAT_TEXT = "text"; + + public static final String FILE_FORMAT_EXCEL = "excel"; + + public static final String FILE_FORMAT_BINARY = "binary"; + + public static final String DEFAULT_FILE_FORMAT = "csv"; + + public static final Boolean DEFAULE_SKIP_TEXT_EMPTY_RECORDS = true; + + public static final String EXCEL_VERSION_03_OR_EARLIER = "03_OR_EARLIER"; + + public static final String EXCEL_VERSION_07_OR_LATER = "07_OR_LATER"; + + /** + * 文件全限定名 + * */ + public static final String SOURCE_FILE = "sourceFile"; + + /** + * 单纯的文件名 + * */ + public static final String SOURCE_FILE_NAME = "sourceFileName"; + + public static final boolean DEFAULT_OUTPUT_SHEET_NAME = false; + + /** + * TODO 暂时先不考虑整个文件夹同步 + * 在同步音视频等二进制文件的情况下: + * 半结构读插件(txtfilreader, ftpreader, hdfsreader, ossreader)需要将相对文件路径注入 RELATIVE_SOURCE_FILE 属性 + * 目的是半结构化写插件可以统一使用 RELATIVE_SOURCE_FILE 获取到读端插件的所有二进制文件名及其相对路径。 + * 举个栗子: + * 读端插件PATH配置了/home/admin/myapp/ + */ + public static final String RELATIVE_SOURCE_FILE = "relativeSourceFile"; + + /** + * 默认读取二进制文件一次性读取的Byte数目: 1048576 Byte [1MB] + */ + public static final int DEFAULT_BLOCK_SIZE_IN_BYTE = 1048576; } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java index bb5bf59fee..71e13ad244 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java @@ -28,4 +28,63 @@ public class Key { public static final String CSV_READER_CONFIG = "csvReaderConfig"; + public static final String MARK_DONE_FILE_NAME = "markDoneFileName"; + + public static final String MARK_DOING_FILE_NAME = "markDoingFileName"; + + // public static final String RETRY_TIME = "retryTime"; + public final static String MAX_RETRY_TIME = "maxRetryTime"; + + public final static String RETRY_INTERVAL = "retryInterval"; + + public static final String TEXT_READER_CONFIG = "textReaderConfig"; + + public static final String SKIP_EMPTY_RECORDS = "skipEmptyRecords"; + + public static final String EXCEL_READER_CONFIG = "excelReaderConfig"; + + public static final String EXCEL_SHEET_NAME = "excelSheetName"; + + public static final String VERSION = "version"; + + public static final String OUTPUT_SHEET_NAME = "outputSheetName"; + + /** + * csv or text or excel + */ + public static final String FILE_FORMAT = "fileFormat"; + + /** + * 是否把一个file当做一个column + */ + public static final String FILE_AS_COLUMN = "fileAsColumn"; + + /** + * 读取二进制文件一次性读取的Byte数目 + */ + public static final String BLOCK_SIZE_IN_BYTE = "blockSizeInByte"; + + /** + * 半结构化标示一个Record来源的绝对文件路径名,可以是ftp文件,oss的object等 + * */ + public static final String META_KEY_FILE_PATH = "filePath"; + + /** + * 多文件切分的工作项,Task通过此配置项表示工作内容, 文件内部切分相关key + */ + public static final String SPLIT_SLICE_CONFIG = "__splitSliceConfig"; + public static final String SPLIT_SLICE_FILE_PATH = "filePath"; + public static final String SPLIT_SLICE_START_POINT = "startPoint"; + public static final String SPLIT_SLICE_END_POINT = "endPoint"; + + /** + * tar.gz压缩包,支持配置 tarFileFilterPattern 参数,来过滤要同步的文件 + * For Example: + * "tarFileFilterPattern" : "*.dat" + * + * 同步的时候,只同步 tar.gz 里面文件名后缀为 .dat 的文件 + */ + public static final String TAR_FILE_FILTER_PATTERN = "tarFileFilterPattern"; + public static final String ENABLE_INNER_SPLIT = "enableInnerSplit"; + } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java index 423f66db99..645971d0da 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java @@ -26,10 +26,7 @@ import java.io.*; import java.nio.charset.UnsupportedCharsetException; import java.text.DateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; +import java.util.*; public class UnstructuredStorageReaderUtil { private static final Logger LOG = LoggerFactory @@ -695,4 +692,27 @@ public static void setCsvReaderConfig(CsvReader csvReader){ LOG.info(String.format("CsvReader使用默认值[%s],csvReaderConfig值为[%s]",JSON.toJSONString(csvReader),JSON.toJSONString(UnstructuredStorageReaderUtil.csvReaderConfigMap))); } } + + public static Map buildRecordMeta(String filePath) { + Map meta = new HashMap(); + // 上下文filePath元数据注入, 目前传递的是纯文件名 + // File file = new File(filePath); + // meta.put(Key.META_KEY_FILE_PATH, file.getName()); + meta.put(Key.META_KEY_FILE_PATH, filePath); + return meta; + } + + public static void setSourceFileName(Configuration configuration, List sourceFiles){ + List sourceFilesName = new ArrayList(); + File file; + for (String sourceFile: sourceFiles){ + file = new File(sourceFile); + sourceFilesName.add(file.getName()); + } + configuration.set(Constant.SOURCE_FILE_NAME, sourceFilesName); + } + + public static void setSourceFile(Configuration configuration, List sourceFiles){ + configuration.set(Constant.SOURCE_FILE, sourceFiles); + } } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/BinaryFileReaderUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/BinaryFileReaderUtil.java new file mode 100644 index 0000000000..a7d846b324 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/BinaryFileReaderUtil.java @@ -0,0 +1,62 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.binaryFileUtil; + +import com.alibaba.datax.common.element.BytesColumn; +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:59 + */ +public class BinaryFileReaderUtil { + private static final Logger LOG = LoggerFactory.getLogger(BinaryFileReaderUtil.class); + + public static void readFromStream(InputStream inputStream, String filePath, RecordSender recordSender, int blockSizeInByte) { + try { + Map meta = UnstructuredStorageReaderUtil.buildRecordMeta(filePath); + byte[] tmp = new byte[blockSizeInByte]; + int len; + ByteUtils byteUtils = new ByteUtils(); + while ((len = inputStream.read(tmp)) != -1) { + /**如果len小于blockSizeInByte,说明已经读到了最后一个byte数组 + * 此时需要将byte数组长度调整为实际读到的字节数, + * 否则会导致写入目的文件字节数大于实际文件字节数, 有可能会导致文件损坏(比如pptx, docx等文件) + */ + // warn: 这里可以优化掉,没必要做一次数组拷贝,直接复用byte[] tmp即可 + byte[] readBytesArray = Arrays.copyOf(tmp, len); + byteUtils.append(readBytesArray); + if (byteUtils.getSize() >= blockSizeInByte) { + recordSenderBytesColumn(recordSender, byteUtils.getBuffer(), meta); + byteUtils.clear(); + } + } + recordSenderBytesColumn(recordSender, byteUtils.getBuffer(), meta); + LOG.info("End read!!!"); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, e); + } + } + + private static void recordSenderBytesColumn(RecordSender recordSender, byte[] tmp, Map meta){ + Record record = recordSender.createRecord(); + Column column = new BytesColumn(tmp); + record.addColumn(column); + record.setMeta(meta); + recordSender.sendToWriter(record); + } + + +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/ByteUtils.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/ByteUtils.java new file mode 100644 index 0000000000..14ba9c47ea --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/ByteUtils.java @@ -0,0 +1,48 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.binaryFileUtil; + +import java.util.Arrays; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:00 + */ +public class ByteUtils { + + private int size; + private int kDefaultBufferSize = 0; + private byte[] buffer; + + public byte[] getBuffer() { + return buffer; + } + + public ByteUtils() { + buffer = new byte[0]; + size = 0; + } + + public long getSize() { + return size; + } + + public void setSize(int size) { + this.size = size; + } + + public ByteUtils append(byte[] buf) { + + if (buf == null){ + return this; + } + buffer = Arrays.copyOf(buffer, buffer.length + buf.length); + System.arraycopy(buf, 0, buffer, size, buf.length); + size += buf.length; + return this; + } + + public void clear() + { + buffer = new byte[kDefaultBufferSize]; + size = 0; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/StartEndPair.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/StartEndPair.java new file mode 100644 index 0000000000..aa021c9969 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/StartEndPair.java @@ -0,0 +1,49 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.split; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:50 + */ +public class StartEndPair { + private Long start; + private Long end; + private String filePath; + + public StartEndPair() { + } + + public StartEndPair(Long start, Long end, String filePath) { + this.start = start; + this.end = end; + this.filePath = filePath; + } + + public Long getEnd() { + return end; + } + + public void setEnd(Long end) { + this.end = end; + } + + public Long getStart() { + return start; + } + + public void setStart(Long start) { + this.start = start; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + @Override + public String toString() { + return "StartEndPair [start=" + start + ", end=" + end + ", filePath=" + filePath + "]"; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/UnstructuredSplitUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/UnstructuredSplitUtil.java new file mode 100644 index 0000000000..8087ed631a --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/UnstructuredSplitUtil.java @@ -0,0 +1,191 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.split; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RangeSplitUtil; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.fastjson.JSON; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.apache.commons.lang3.tuple.Triple; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:49 + */ +public abstract class UnstructuredSplitUtil { + private static final Logger LOG = LoggerFactory.getLogger(UnstructuredSplitUtil.class); + private boolean needInnerSplit; + // 对每个文件进行切分的块大小是 64MB + // warn: 这个最好弄成可配置的, 用户配置channel为2但是有10个文件,不一定需要文件内部切分; + // 弄成可配置的有些情况下可以避免文件内部切分切分的task太碎 + private static final Long BLOCK_BYTE_CAPACITY = 64 * FileUtils.ONE_MB; + + public UnstructuredSplitUtil(boolean needInnerSplit) { + this.needInnerSplit = needInnerSplit; + } + + public List getSplitConfiguration(Configuration originConfiguration, List sourceObjectList, + int adviceNumber) { + + List splitConfiguration = new ArrayList(); + List regulateSplitStartEndPairList = new ArrayList(); + + for (String object : sourceObjectList) { + boolean realNeedInnerSplit = false; + Long contentTotalLength = -1L; + if (this.needInnerSplit) { + // 减少不必要的oss接口调用 + contentTotalLength = this.getFileTotalLength(object); + if (isNeedSplit(contentTotalLength)) { + realNeedInnerSplit = true; + } + } + // warn: 数据读模式允许文件内部切分,并且文件大小满足 + if (realNeedInnerSplit) { + List startEndPairList = getSplitStartEndPairList(contentTotalLength, object); + List> startEndInputStreamTripleList = new ArrayList>(); + for (int i = 0; i < startEndPairList.size(); i++) { + StartEndPair startEndPair = startEndPairList.get(i); + InputStream inputStream = this.getFileInputStream(startEndPair); + Triple startEndInputStreamTriple = new ImmutableTriple( + startEndPair.getStart(), startEndPair.getEnd(), inputStream); + startEndInputStreamTripleList.add(startEndInputStreamTriple); + } + regulateSplitStartEndPairList.addAll(regulateSplitStartEndPair(startEndInputStreamTripleList, object)); + } else { + // 如果指定的Range无效(比如开始位置、结束位置为负数,大于文件大小),则会下载整个文件; + StartEndPair startEndPair = new StartEndPair(0L, -1L, object); + regulateSplitStartEndPairList.add(startEndPair); + } + } + + // merge task 将多个文件merge到一个task中执行 + List> splitResult = RangeSplitUtil.doListSplit(regulateSplitStartEndPairList, adviceNumber); + // at here this.objects is not null and not empty + for (List eachSlice : splitResult) { + Configuration splitedConfig = originConfiguration.clone(); + splitedConfig.set(Key.SPLIT_SLICE_CONFIG, eachSlice); + splitConfiguration.add(splitedConfig); + LOG.info(String.format("File to be read:%s", JSON.toJSONString(eachSlice))); + } + return splitConfiguration; + } + + /** + * 对原始的切分点位进行调节校准, 将点位落在每一行数据的换行符处 + * + * @param startEndInputStreamTripleList + * 原始的切分点位及inputstream (start, end, inputStream) + * @return + */ + private List regulateSplitStartEndPair( + List> startEndInputStreamTripleList, String filePath) { + List regulatedStartEndPairList = new ArrayList(); + + for (int i = 0; i < startEndInputStreamTripleList.size(); i++) { + if (i == 0) { + Triple firstBlock = startEndInputStreamTripleList.get(i); + StartEndPair startEndPair = new StartEndPair(firstBlock.getLeft(), null, filePath); + regulatedStartEndPairList.add(startEndPair); + continue; + } + Triple block = startEndInputStreamTripleList.get(i); + long start = block.getLeft(); + long offset = 0; + // 对切分点位进行调节,将切分起始点移动到行尾(即'\n'上) + if (i < startEndInputStreamTripleList.size()) { + offset = getLFIndex(block.getRight()); + } + // 调节正确的切分点位 + long regulatedPoint = start + offset; + // 将上一个block的末尾点位调节成行尾 + regulatedStartEndPairList.get(i - 1).setEnd(regulatedPoint); + if (i < startEndInputStreamTripleList.size() - 1) { + // 将本block起始点位进行调节, 结束点位暂不调节 + regulatedStartEndPairList.add(new StartEndPair(regulatedPoint + 1, null, filePath)); + } else { + // 调节最后一个block, 调节起始点位, 结束点位就用文件的字节总长度 + regulatedStartEndPairList.add(new StartEndPair(regulatedPoint + 1, block.getMiddle(), filePath)); + } + } + return regulatedStartEndPairList; + } + + /** + * 获取到输入流开始的第一个'\n'偏移量, 如果向后偏移了ByteCapacity个字节,还是没有找到'\n'的话,则抛出异常 注: + * 对文件切分的最后一个分块不会调用该方法 + * + * @param inputStream + * 输入流 + * @return + */ + private Long getLFIndex(InputStream inputStream) { + Long hasReadByteIndex = -1L; + int ch = 0; + while (ch != -1) { + try { + ch = inputStream.read(); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, + String.format("inputstream read Byte has exception: %s", e.getMessage()), e); + } + hasReadByteIndex++; + if (ch == '\n') { + return hasReadByteIndex; + } + } + return hasReadByteIndex; + } + + /** + * 得到一个文件最多能拆分成的份数 + * + * @param fileTotalLength + * @return + */ + private List getSplitStartEndPairList(Long fileTotalLength, String filePath) { + long splitNum = (long) Math.ceil(fileTotalLength * 1.0 / BLOCK_BYTE_CAPACITY); + List startEndPairList = new ArrayList(); + long start, end; + for (int i = 1; i <= splitNum; i++) { + if (i == 1) { + start = (i - 1) * BLOCK_BYTE_CAPACITY; + end = i * BLOCK_BYTE_CAPACITY; + } else if (i < splitNum) { + start = (i - 1) * BLOCK_BYTE_CAPACITY + 1; + end = i * BLOCK_BYTE_CAPACITY; + } else { + start = (i - 1) * BLOCK_BYTE_CAPACITY + 1; + end = fileTotalLength - 1; + } + StartEndPair startEndPair = new StartEndPair(start, end, filePath); + startEndPairList.add(startEndPair); + } + return startEndPairList; + } + + /** + * 判断文件是否需要切分, 切分的条件是必须要大于 transport.channel.byteCapacity + * + * @param fileTotalLength: + * 文件总字节数 + * @return + */ + private boolean isNeedSplit(Long fileTotalLength) { + boolean fileSizeCouldSplit = fileTotalLength > BLOCK_BYTE_CAPACITY ? true : false; + return fileSizeCouldSplit && this.needInnerSplit; + } + + public abstract Long getFileTotalLength(String filePath); + + public abstract InputStream getFileInputStream(StartEndPair startEndPair); +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/ColumnTypeUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/ColumnTypeUtil.java new file mode 100644 index 0000000000..8215bc3640 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/ColumnTypeUtil.java @@ -0,0 +1,90 @@ +package com.alibaba.datax.plugin.unstructuredstorage.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:40 + */ +public class ColumnTypeUtil { + + private static final String TYPE_NAME = "decimal"; + private static final String LEFT_BRACKETS = "("; + private static final String RIGHT_BRACKETS = ")"; + private static final String DELIM = ","; + + public static boolean isDecimalType(String typeName){ + return typeName.toLowerCase().startsWith(TYPE_NAME); + } + + public static DecimalInfo getDecimalInfo(String typeName, DecimalInfo defaultInfo){ + if(!isDecimalType(typeName)){ + throw new IllegalArgumentException("Unsupported column type:" + typeName); + } + + if (typeName.contains(LEFT_BRACKETS) && typeName.contains(RIGHT_BRACKETS)){ + int precision = Integer.parseInt(typeName.substring(typeName.indexOf(LEFT_BRACKETS) + 1,typeName.indexOf(DELIM)).trim()); + int scale = Integer.parseInt(typeName.substring(typeName.indexOf(DELIM) + 1,typeName.indexOf(RIGHT_BRACKETS)).trim()); + return new DecimalInfo(precision, scale); + } else { + return defaultInfo; + } + } + + public static class DecimalInfo { + private int precision; + private int scale; + + public DecimalInfo(int precision, int scale) { + this.precision = precision; + this.scale = scale; + } + + public int getPrecision() { + return precision; + } + + public int getScale() { + return scale; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()){ + return false; + + } + DecimalInfo that = (DecimalInfo) o; + return precision == that.precision && scale == that.scale; + } + + @Override + public int hashCode() { + return Objects.hash(precision, scale); + } + } + + public static List getListColumnEntry( + Configuration configuration, final String path) { + List lists = configuration.getList(path, JSONObject.class); + if (lists == null) { + return null; + } + List result = new ArrayList<>(); + for (final JSONObject object : lists) { + result.add(JSON.parseObject(object.toJSONString(), ColumnEntry.class)); + } + return result; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/HdfsUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/HdfsUtil.java new file mode 100644 index 0000000000..4098ff1d8c --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/HdfsUtil.java @@ -0,0 +1,16 @@ +package com.alibaba.datax.plugin.unstructuredstorage.util; + + +public class HdfsUtil { + private static final double SCALE_TWO = 2.0; + private static final double SCALE_TEN = 10.0; + private static final int BIT_SIZE = 8; + public static int computeMinBytesForPrecision(int precision){ + + int numBytes = 1; + while (Math.pow(SCALE_TWO, BIT_SIZE * numBytes - 1.0) < Math.pow(SCALE_TEN, precision)) { + numBytes += 1; + } + return numBytes; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java index 93b4baa978..092fbfd7c8 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java @@ -11,9 +11,31 @@ public class Constant { public static final String FILE_FORMAT_CSV = "csv"; public static final String FILE_FORMAT_TEXT = "text"; - - //每个分块10MB,最大10000个分块 - public static final Long MAX_FILE_SIZE = 1024 * 1024 * 10 * 10000L; - + + //每个分块10MB,最大10000个分块, MAX_FILE_SIZE 单位: MB + public static final Long MAX_FILE_SIZE = 10 * 10000L; + public static final String DEFAULT_SUFFIX = ""; + + public static final String TRUNCATE = "truncate"; + public static final String APPEND = "append"; + public static final String NOCONFLICT = "nonConflict"; + + /** + * 在同步音视频等二进制文件的情况下: + * 半结构化写插件可以统一使用 SOURCE_FILE 获取到读端插件的split file路径 + */ + public static final String SOURCE_FILE = "sourceFile"; + + public static final String SOURCE_FILE_NAME = "sourceFileName"; + + /** + * 是否是音视频等无结构化文件 + */ + public static final String BINARY = "binary"; + + /** + * 文件同步模式, 如果是copy表示纯文件拷贝 + * */ + public static final String SYNC_MODE_VALUE_COPY = "copy"; } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/DataXCsvWriter.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/DataXCsvWriter.java new file mode 100644 index 0000000000..85deea2cd9 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/DataXCsvWriter.java @@ -0,0 +1,296 @@ +package com.alibaba.datax.plugin.unstructuredstorage.writer; + +import org.apache.commons.io.IOUtils; + +import java.io.IOException; +import java.io.Writer; + +/** + * @Author: guxuan + * @Date 2022-05-19 10:44 + */ +public class DataXCsvWriter { + private Writer writer; + @SuppressWarnings("unused") + private String fileName; + private boolean firstColumn; + private boolean useCustomRecordDelimiter; + private UserSettings userSettings; + private boolean initialized; + private boolean closed; + public static final int ESCAPE_MODE_DOUBLED = 1; + public static final int ESCAPE_MODE_BACKSLASH = 2; + + public DataXCsvWriter(Writer writer, char delimiter) { + this.writer = null; + this.fileName = null; + this.firstColumn = true; + this.useCustomRecordDelimiter = false; + this.userSettings = new UserSettings(); + this.initialized = false; + this.closed = false; + if(writer == null) { + throw new IllegalArgumentException("Parameter writer can not be null."); + } else { + this.writer = writer; + this.userSettings.Delimiter = delimiter; + this.initialized = true; + } + } + + public char getDelimiter() { + return this.userSettings.Delimiter; + } + + public void setDelimiter(char var1) { + this.userSettings.Delimiter = var1; + } + + public char getRecordDelimiter() { + return this.userSettings.RecordDelimiter; + } + + public void setRecordDelimiter(char var1) { + this.useCustomRecordDelimiter = true; + this.userSettings.RecordDelimiter = var1; + } + + public char getTextQualifier() { + return this.userSettings.TextQualifier; + } + + public void setTextQualifier(char var1) { + this.userSettings.TextQualifier = var1; + } + + public boolean getUseTextQualifier() { + return this.userSettings.UseTextQualifier; + } + + public void setUseTextQualifier(boolean var1) { + this.userSettings.UseTextQualifier = var1; + } + + public int getEscapeMode() { + return this.userSettings.EscapeMode; + } + + public void setEscapeMode(int var1) { + this.userSettings.EscapeMode = var1; + } + + public void setComment(char var1) { + this.userSettings.Comment = var1; + } + + public char getComment() { + return this.userSettings.Comment; + } + + public boolean getForceQualifier() { + return this.userSettings.ForceQualifier; + } + + public void setForceQualifier(boolean var1) { + this.userSettings.ForceQualifier = var1; + } + + public void write(String var1, boolean var2) throws IOException { + this.checkClosed(); + if(var1 == null) { + var1 = ""; + } + + if(!this.firstColumn) { + this.writer.write(this.userSettings.Delimiter); + } + + boolean var3 = this.userSettings.ForceQualifier; + if(!var2 && var1.length() > 0) { + var1 = var1.trim(); + } + + if(!var3 && this.userSettings.UseTextQualifier && (var1.indexOf(this.userSettings.TextQualifier) > -1 || var1.indexOf(this.userSettings.Delimiter) > -1 || !this.useCustomRecordDelimiter && (var1.indexOf(10) > -1 || var1.indexOf(13) > -1) || this.useCustomRecordDelimiter && var1.indexOf(this.userSettings.RecordDelimiter) > -1 || this.firstColumn && var1.length() > 0 && var1.charAt(0) == this.userSettings.Comment || this.firstColumn && var1.length() == 0)) { + var3 = true; + } + + if(this.userSettings.UseTextQualifier && !var3 && var1.length() > 0 && var2) { + char var4 = var1.charAt(0); + if(var4 == 32 || var4 == 9) { + var3 = true; + } + + if(!var3 && var1.length() > 1) { + char var5 = var1.charAt(var1.length() - 1); + if(var5 == 32 || var5 == 9) { + var3 = true; + } + } + } + + if(var3) { + this.writer.write(this.userSettings.TextQualifier); + if(this.userSettings.EscapeMode == 2) { + var1 = replace(var1, "\\", "\\\\"); + var1 = replace(var1, "" + this.userSettings.TextQualifier, "\\" + this.userSettings.TextQualifier); + } else { + var1 = replace(var1, "" + this.userSettings.TextQualifier, "" + this.userSettings.TextQualifier + this.userSettings.TextQualifier); + } + } else if(this.userSettings.EscapeMode == 2) { + var1 = replace(var1, "\\", "\\\\"); + var1 = replace(var1, "" + this.userSettings.Delimiter, "\\" + this.userSettings.Delimiter); + if(this.useCustomRecordDelimiter) { + var1 = replace(var1, "" + this.userSettings.RecordDelimiter, "\\" + this.userSettings.RecordDelimiter); + } else { + var1 = replace(var1, "\r", "\\\r"); + var1 = replace(var1, "\n", "\\\n"); + } + + if(this.firstColumn && var1.length() > 0 && var1.charAt(0) == this.userSettings.Comment) { + if(var1.length() > 1) { + var1 = "\\" + this.userSettings.Comment + var1.substring(1); + } else { + var1 = "\\" + this.userSettings.Comment; + } + } + } + + this.writer.write(var1); + if(var3) { + this.writer.write(this.userSettings.TextQualifier); + } + + this.firstColumn = false; + } + + public void write(String var1) throws IOException { + this.write(var1, false); + } + + public void writeComment(String var1) throws IOException { + this.checkClosed(); + this.writer.write(this.userSettings.Comment); + this.writer.write(var1); + if(this.useCustomRecordDelimiter) { + this.writer.write(this.userSettings.RecordDelimiter); + } else { + this.writer.write(IOUtils.LINE_SEPARATOR); + } + + this.firstColumn = true; + } + + public void writeRecord(String[] var1, boolean var2) throws IOException { + if(var1 != null && var1.length > 0) { + for(int var3 = 0; var3 < var1.length; ++var3) { + this.write(var1[var3], var2); + } + + this.endRecord(); + } + + } + + public void writeRecord(String[] var1) throws IOException { + this.writeRecord(var1, false); + } + + public void endRecord() throws IOException { + this.checkClosed(); + if(this.useCustomRecordDelimiter) { + this.writer.write(this.userSettings.RecordDelimiter); + } else { + this.writer.write(IOUtils.LINE_SEPARATOR); + } + + this.firstColumn = true; + } + + public void flush() throws IOException { + this.writer.flush(); + } + + public void close() { + if(!this.closed) { + this.close(true); + this.closed = true; + } + + } + + private void close(boolean var1) { + if(!this.closed) { + try { + if(this.initialized) { + this.writer.close(); + } + } catch (Exception var3) { + ; + } + + this.writer = null; + this.closed = true; + } + + } + + private void checkClosed() throws IOException { + if(this.closed) { + throw new IOException("This instance of the CsvWriter class has already been closed."); + } + } + + @Override + protected void finalize() { + this.close(false); + } + + public static String replace(String var0, String var1, String var2) { + int var3 = var1.length(); + int var4 = var0.indexOf(var1); + if(var4 <= -1) { + return var0; + } else { + StringBuffer var5 = new StringBuffer(); + + int var6; + for(var6 = 0; var4 != -1; var4 = var0.indexOf(var1, var6)) { + var5.append(var0.substring(var6, var4)); + var5.append(var2); + var6 = var4 + var3; + } + + var5.append(var0.substring(var6)); + return var5.toString(); + } + } + + private class UserSettings { + public char TextQualifier = 34; + public boolean UseTextQualifier = true; + public char Delimiter = 44; + public char RecordDelimiter = 0; + public char Comment = 35; + public int EscapeMode = 1; + public boolean ForceQualifier = false; + + public UserSettings() { + } + } + + @SuppressWarnings("unused") + private class Letters { + public static final char LF = '\n'; + public static final char CR = '\r'; + public static final char QUOTE = '\"'; + public static final char COMMA = ','; + public static final char SPACE = ' '; + public static final char TAB = '\t'; + public static final char POUND = '#'; + public static final char BACKSLASH = '\\'; + public static final char NULL = '\u0000'; + + private Letters() { + } + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java index 2e7fe079f3..125957f189 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java @@ -1,6 +1,7 @@ package com.alibaba.datax.plugin.unstructuredstorage.writer; public class Key { + public static final String PATH = "path"; // must have public static final String FILE_NAME = "fileName"; @@ -10,6 +11,11 @@ public class Key { // not must , not default , public static final String FIELD_DELIMITER = "fieldDelimiter"; + // not must , default os's line delimiter + public static final String LINE_DELIMITER = "lineDelimiter"; + + public static final String CSV_WRITER_CONFIG = "csvWriterConfig"; + // not must, default UTF-8 public static final String ENCODING = "encoding"; @@ -35,4 +41,32 @@ public class Key { // writer file type suffix, like .txt .csv public static final String SUFFIX = "suffix"; + + public static final String MARK_DONE_FILE_NAME = "markDoneFileName"; + + public static final String MARK_DOING_FILE_NAME = "markDoingFileName"; + + // public static final String RETRY_TIME = "retryTime"; + + public final static String MAX_RETRY_TIME = "maxRetryTime"; + + /** + * 半结构化标示一个Record来源的绝对文件路径名,可以是ftp文件,oss的object等 + * */ + public static final String META_KEY_FILE_PATH = "filePath"; + + /** + * 多文件切分的工作项,Task通过此配置项表示工作内容, 文件内部切分相关key + */ + public static final String SPLIT_SLICE_CONFIG = "__splitSliceConfig"; + public static final String SPLIT_SLICE_FILE_PATH = "filePath"; + public static final String SPLIT_SLICE_START_POINT = "startPoint"; + public static final String SPLIT_SLICE_END_POINT = "endPoint"; + + /** + * 文件同步模式, 如果是copy表示纯文件拷贝 + * */ + public static final String SYNC_MODE = "syncMode"; + + public static final String BYTE_ENCODING = "byteEncoding"; } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java index 1ea8275963..167a7a872d 100644 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java @@ -2,8 +2,13 @@ import java.io.IOException; import java.io.Writer; +import java.util.HashMap; import java.util.List; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.TypeReference; +import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -11,15 +16,15 @@ import com.csvreader.CsvWriter; + public class TextCsvWriterManager { - public static UnstructuredWriter produceUnstructuredWriter( - String fileFormat, char fieldDelimiter, Writer writer) { - // warn: false means plain text(old way), true means strict csv format - if (Constant.FILE_FORMAT_TEXT.equals(fileFormat)) { - return new TextWriterImpl(writer, fieldDelimiter); - } else { - return new CsvWriterImpl(writer, fieldDelimiter); - } + + public static UnstructuredWriter produceTextWriter( Writer writer, String fieldDelimiter, Configuration config) { + return new TextWriterImpl(writer, fieldDelimiter, config); + } + + public static UnstructuredWriter produceCsvWriter( Writer writer, char fieldDelimiter, Configuration config) { + return new CsvWriterImpl(writer, fieldDelimiter, config); } } @@ -28,15 +33,40 @@ class CsvWriterImpl implements UnstructuredWriter { .getLogger(CsvWriterImpl.class); // csv 严格符合csv语法, 有标准的转义等处理 private char fieldDelimiter; - private CsvWriter csvWriter; + private String lineDelimiter; + private DataXCsvWriter csvWriter; - public CsvWriterImpl(Writer writer, char fieldDelimiter) { + public CsvWriterImpl(Writer writer, char fieldDelimiter, Configuration config) { this.fieldDelimiter = fieldDelimiter; - this.csvWriter = new CsvWriter(writer, this.fieldDelimiter); + this.lineDelimiter = config.getString(Key.LINE_DELIMITER, IOUtils.LINE_SEPARATOR); + this.csvWriter = new DataXCsvWriter(writer, this.fieldDelimiter); this.csvWriter.setTextQualifier('"'); this.csvWriter.setUseTextQualifier(true); // warn: in linux is \n , in windows is \r\n - this.csvWriter.setRecordDelimiter(IOUtils.LINE_SEPARATOR.charAt(0)); + this.csvWriter.setRecordDelimiter(this.lineDelimiter.charAt(0)); + + String csvWriterConfig = config.getString(Key.CSV_WRITER_CONFIG); + if (StringUtils.isNotBlank(csvWriterConfig)) { + try { + HashMap csvWriterConfigMap = JSON.parseObject(csvWriterConfig, + new TypeReference>() { + }); + if (!csvWriterConfigMap.isEmpty()) { + // this.csvWriter.setComment(var1); + // this.csvWriter.setDelimiter(var1); + // this.csvWriter.setEscapeMode(var1); + // this.csvWriter.setForceQualifier(var1); + // this.csvWriter.setRecordDelimiter(var1); + // this.csvWriter.setTextQualifier(var1); + // this.csvWriter.setUseTextQualifier(var1); + BeanUtils.populate(this.csvWriter, csvWriterConfigMap); + LOG.info(String.format("csvwriterConfig is set successfully. After setting, csvwriter:%s", JSON.toJSONString(this.csvWriter))); + } + } catch (Exception e) { + LOG.warn(String.format("invalid csvWriterConfig config: %s, DataX will ignore it.", csvWriterConfig), + e); + } + } } @Override @@ -44,8 +74,7 @@ public void writeOneRecord(List splitedRows) throws IOException { if (splitedRows.isEmpty()) { LOG.info("Found one record line which is empty."); } - this.csvWriter.writeRecord((String[]) splitedRows - .toArray(new String[0])); + this.csvWriter.writeRecord(splitedRows.toArray(new String[0])); } @Override @@ -64,12 +93,14 @@ class TextWriterImpl implements UnstructuredWriter { private static final Logger LOG = LoggerFactory .getLogger(TextWriterImpl.class); // text StringUtils的join方式, 简单的字符串拼接 - private char fieldDelimiter; + private String fieldDelimiter; private Writer textWriter; + private String lineDelimiter; - public TextWriterImpl(Writer writer, char fieldDelimiter) { + public TextWriterImpl(Writer writer, String fieldDelimiter, Configuration config) { this.fieldDelimiter = fieldDelimiter; this.textWriter = writer; + this.lineDelimiter = config.getString(Key.LINE_DELIMITER, IOUtils.LINE_SEPARATOR); } @Override @@ -79,7 +110,7 @@ public void writeOneRecord(List splitedRows) throws IOException { } this.textWriter.write(String.format("%s%s", StringUtils.join(splitedRows, this.fieldDelimiter), - IOUtils.LINE_SEPARATOR)); + this.lineDelimiter)); } @Override diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java index 0f780ebdd1..b83cfa1c74 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java @@ -8,7 +8,8 @@ public enum UnstructuredStorageWriterErrorCode implements ErrorCode { Write_FILE_WITH_CHARSET_ERROR("UnstructuredStorageWriter-01", "您配置的编码未能正常写入."), Write_FILE_IO_ERROR("UnstructuredStorageWriter-02", "您配置的文件在写入时出现IO异常."), RUNTIME_EXCEPTION("UnstructuredStorageWriter-03", "出现运行时异常, 请联系我们"), - REQUIRED_VALUE("UnstructuredStorageWriter-04", "您缺失了必须填写的参数值."),; + REQUIRED_VALUE("UnstructuredStorageWriter-04", "您缺失了必须填写的参数值."), + Write_ERROR("UnstructuredStorageWriter-05", "errorcode.write_error"),; private final String code; private final String description; diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java index b1927ce79b..e9040662ab 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java @@ -1,10 +1,6 @@ package com.alibaba.datax.plugin.unstructuredstorage.writer; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -13,6 +9,8 @@ import java.util.Set; import java.util.UUID; +import com.alibaba.datax.common.element.BytesColumn; +import org.apache.commons.codec.binary.Base64; import org.apache.commons.compress.compressors.CompressorOutputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; @@ -53,10 +51,7 @@ public static void validateParameter(Configuration writerConfiguration) { if (!supportedWriteModes.contains(writeMode)) { throw DataXException .asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format( - "仅支持 truncate, append, nonConflict 三种模式, 不支持您配置的 writeMode 模式 : [%s]", - writeMode)); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, writeMode); } writerConfiguration.set(Key.WRITE_MODE, writeMode); @@ -64,8 +59,6 @@ public static void validateParameter(Configuration writerConfiguration) { String encoding = writerConfiguration.getString(Key.ENCODING); if (StringUtils.isBlank(encoding)) { // like " ", null - LOG.warn(String.format("您的encoding配置为空, 将使用默认值[%s]", - Constant.DEFAULT_ENCODING)); writerConfiguration.set(Key.ENCODING, Constant.DEFAULT_ENCODING); } else { try { @@ -74,8 +67,7 @@ public static void validateParameter(Configuration writerConfiguration) { Charsets.toCharset(encoding); } catch (Exception e) { throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format("不支持您配置的编码格式:[%s]", encoding), e); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, e); } } @@ -86,45 +78,40 @@ public static void validateParameter(Configuration writerConfiguration) { } else { Set supportedCompress = Sets.newHashSet("gzip", "bzip2"); if (!supportedCompress.contains(compress.toLowerCase().trim())) { - String message = String.format( - "仅支持 [%s] 文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", - StringUtils.join(supportedCompress, ","), compress); throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format(message, compress)); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, String.format("unsupported commpress format %s ", compress)); } } + // fileFormat check + String fileFormat = writerConfiguration.getString(Key.FILE_FORMAT); + if (StringUtils.isBlank(fileFormat)) { + fileFormat = Constant.FILE_FORMAT_TEXT; + writerConfiguration.set(Key.FILE_FORMAT, fileFormat); + } + if (!Constant.FILE_FORMAT_CSV.equals(fileFormat) + && !Constant.FILE_FORMAT_TEXT.equals(fileFormat)) { + throw DataXException.asDataXException( + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, String.format("unsupported fileFormat %s ", fileFormat)); + } + // fieldDelimiter check - String delimiterInStr = writerConfiguration - .getString(Key.FIELD_DELIMITER); - // warn: if have, length must be one - if (null != delimiterInStr && 1 != delimiterInStr.length()) { + String delimiterInStr = writerConfiguration.getString(Key.FIELD_DELIMITER); + + if (StringUtils.equalsIgnoreCase(fileFormat, Constant.FILE_FORMAT_CSV) && + null != delimiterInStr && 1 != delimiterInStr.length()) { throw DataXException.asDataXException( UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format("仅仅支持单字符切分, 您配置的切分为 : [%s]", delimiterInStr)); + String.format("unsupported delimiterInStr %s ", delimiterInStr)); } if (null == delimiterInStr) { - LOG.warn(String.format("您没有配置列分隔符, 使用默认值[%s]", - Constant.DEFAULT_FIELD_DELIMITER)); - writerConfiguration.set(Key.FIELD_DELIMITER, - Constant.DEFAULT_FIELD_DELIMITER); - } - - // fileFormat check - String fileFormat = writerConfiguration.getString(Key.FILE_FORMAT, - Constant.FILE_FORMAT_TEXT); - if (!Constant.FILE_FORMAT_CSV.equals(fileFormat) - && !Constant.FILE_FORMAT_TEXT.equals(fileFormat)) { - throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, String - .format("您配置的fileFormat [%s]错误, 支持csv, text两种.", - fileFormat)); + delimiterInStr = String.valueOf(Constant.DEFAULT_FIELD_DELIMITER); + writerConfiguration.set(Key.FIELD_DELIMITER, delimiterInStr); } } public static List split(Configuration writerSliceConfig, - Set originAllFileExists, int mandatoryNumber) { + Set originAllFileExists, int mandatoryNumber) { LOG.info("begin do split..."); Set allFileExists = new HashSet(); allFileExists.addAll(originAllFileExists); @@ -153,19 +140,19 @@ public static List split(Configuration writerSliceConfig, } public static String buildFilePath(String path, String fileName, - String suffix) { + String suffix) { boolean isEndWithSeparator = false; switch (IOUtils.DIR_SEPARATOR) { - case IOUtils.DIR_SEPARATOR_UNIX: - isEndWithSeparator = path.endsWith(String - .valueOf(IOUtils.DIR_SEPARATOR)); - break; - case IOUtils.DIR_SEPARATOR_WINDOWS: - isEndWithSeparator = path.endsWith(String - .valueOf(IOUtils.DIR_SEPARATOR_WINDOWS)); - break; - default: - break; + case IOUtils.DIR_SEPARATOR_UNIX: + isEndWithSeparator = path.endsWith(String + .valueOf(IOUtils.DIR_SEPARATOR)); + break; + case IOUtils.DIR_SEPARATOR_WINDOWS: + isEndWithSeparator = path.endsWith(String + .valueOf(IOUtils.DIR_SEPARATOR_WINDOWS)); + break; + default: + break; } if (!isEndWithSeparator) { path = path + IOUtils.DIR_SEPARATOR; @@ -179,14 +166,12 @@ public static String buildFilePath(String path, String fileName, } public static void writeToStream(RecordReceiver lineReceiver, - OutputStream outputStream, Configuration config, String context, - TaskPluginCollector taskPluginCollector) { + OutputStream outputStream, Configuration config, String context, + TaskPluginCollector taskPluginCollector) { String encoding = config.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); // handle blank encoding if (StringUtils.isBlank(encoding)) { - LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, - Constant.DEFAULT_ENCODING)); encoding = Constant.DEFAULT_ENCODING; } String compress = config.getString(Key.COMPRESS); @@ -212,10 +197,7 @@ public static void writeToStream(RecordReceiver lineReceiver, } else { throw DataXException .asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format( - "仅支持 gzip, bzip2 文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", - compress)); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, compress); } } UnstructuredStorageWriterUtil.doWriteToStream(lineReceiver, writer, @@ -223,24 +205,21 @@ public static void writeToStream(RecordReceiver lineReceiver, } catch (UnsupportedEncodingException uee) { throw DataXException .asDataXException( - UnstructuredStorageWriterErrorCode.Write_FILE_WITH_CHARSET_ERROR, - String.format("不支持的编码格式 : [%s]", encoding), uee); + UnstructuredStorageWriterErrorCode.Write_FILE_WITH_CHARSET_ERROR, uee); } catch (NullPointerException e) { throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.RUNTIME_EXCEPTION, - "运行时错误, 请联系我们", e); + UnstructuredStorageWriterErrorCode.RUNTIME_EXCEPTION,e); } catch (IOException e) { throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.Write_FILE_IO_ERROR, - String.format("流写入错误 : [%s]", context), e); + UnstructuredStorageWriterErrorCode.Write_FILE_IO_ERROR, e); } finally { IOUtils.closeQuietly(writer); } } private static void doWriteToStream(RecordReceiver lineReceiver, - BufferedWriter writer, String contex, Configuration config, - TaskPluginCollector taskPluginCollector) throws IOException { + BufferedWriter writer, String contex, Configuration config, + TaskPluginCollector taskPluginCollector) throws IOException { String nullFormat = config.getString(Key.NULL_FORMAT); @@ -252,26 +231,9 @@ private static void doWriteToStream(RecordReceiver lineReceiver, } // warn: default false - String fileFormat = config.getString(Key.FILE_FORMAT, - Constant.FILE_FORMAT_TEXT); - - String delimiterInStr = config.getString(Key.FIELD_DELIMITER); - if (null != delimiterInStr && 1 != delimiterInStr.length()) { - throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format("仅仅支持单字符切分, 您配置的切分为 : [%s]", delimiterInStr)); - } - if (null == delimiterInStr) { - LOG.warn(String.format("您没有配置列分隔符, 使用默认值[%s]", - Constant.DEFAULT_FIELD_DELIMITER)); - } - - // warn: fieldDelimiter could not be '' for no fieldDelimiter - char fieldDelimiter = config.getChar(Key.FIELD_DELIMITER, - Constant.DEFAULT_FIELD_DELIMITER); + String fileFormat = config.getString(Key.FILE_FORMAT, Constant.FILE_FORMAT_TEXT); - UnstructuredWriter unstructuredWriter = TextCsvWriterManager - .produceUnstructuredWriter(fileFormat, fieldDelimiter, writer); + UnstructuredWriter unstructuredWriter = produceUnstructuredWriter(fileFormat, config, writer); List headers = config.getList(Key.HEADER, String.class); if (null != headers && !headers.isEmpty()) { @@ -279,22 +241,38 @@ private static void doWriteToStream(RecordReceiver lineReceiver, } Record record = null; + String byteEncoding = config.getString(Key.BYTE_ENCODING); while ((record = lineReceiver.getFromReader()) != null) { UnstructuredStorageWriterUtil.transportOneRecord(record, nullFormat, dateParse, taskPluginCollector, - unstructuredWriter); + unstructuredWriter, byteEncoding); } // warn:由调用方控制流的关闭 // IOUtils.closeQuietly(unstructuredWriter); } + public static UnstructuredWriter produceUnstructuredWriter(String fileFormat, Configuration config, Writer writer){ + UnstructuredWriter unstructuredWriter = null; + if (StringUtils.equalsIgnoreCase(fileFormat, Constant.FILE_FORMAT_CSV)) { + + Character fieldDelimiter = config.getChar(Key.FIELD_DELIMITER, Constant.DEFAULT_FIELD_DELIMITER); + unstructuredWriter = TextCsvWriterManager.produceCsvWriter(writer, fieldDelimiter, config); + } else if (StringUtils.equalsIgnoreCase(fileFormat, Constant.FILE_FORMAT_TEXT)) { + + String fieldDelimiter = config.getString(Key.FIELD_DELIMITER, String.valueOf(Constant.DEFAULT_FIELD_DELIMITER)); + unstructuredWriter = TextCsvWriterManager.produceTextWriter(writer, fieldDelimiter, config); + } + + return unstructuredWriter; + } + /** * 异常表示脏数据 * */ public static void transportOneRecord(Record record, String nullFormat, - DateFormat dateParse, TaskPluginCollector taskPluginCollector, - UnstructuredWriter unstructuredWriter) { + DateFormat dateParse, TaskPluginCollector taskPluginCollector, + UnstructuredWriter unstructuredWriter, String byteEncoding) { // warn: default is null if (null == nullFormat) { nullFormat = "null"; @@ -309,7 +287,15 @@ public static void transportOneRecord(Record record, String nullFormat, if (null != column.getRawData()) { boolean isDateColumn = column instanceof DateColumn; if (!isDateColumn) { - splitedRows.add(column.asString()); + if (column instanceof BytesColumn) { + if ("base64".equalsIgnoreCase(byteEncoding)) { + splitedRows.add(Base64.encodeBase64String(column.asBytes())); + } else { + splitedRows.add(column.asString()); + } + } else { + splitedRows.add(column.asString()); + } } else { if (null != dateParse) { splitedRows.add(dateParse.format(column @@ -325,9 +311,18 @@ public static void transportOneRecord(Record record, String nullFormat, } } unstructuredWriter.writeOneRecord(splitedRows); - } catch (Exception e) { + } catch (IllegalArgumentException e){ // warn: dirty data taskPluginCollector.collectDirtyRecord(record, e); + } catch (DataXException e){ + // warn: dirty data + taskPluginCollector.collectDirtyRecord(record, e); + } catch (Exception e) { + // throw exception, it is not dirty data, + // may be network unreachable and the other problem + throw DataXException.asDataXException( + UnstructuredStorageWriterErrorCode.Write_ERROR, e.getMessage(),e); } } + } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterErrorCode.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterErrorCode.java new file mode 100755 index 0000000000..77e51026cc --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterErrorCode.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.plugin.unstructuredstorage.writer.binaryFileUtil; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum BinaryFileWriterErrorCode implements ErrorCode { + ILLEGAL_VALUE("UnstructuredStorageWriter-00", "errorcode.illegal_value"), + REPEATED_FILE_NAME("UnstructuredStorageWriter-01", "errorcode.repeated_file_name"), + REQUIRED_VALUE("UnstructuredStorageWriter-02","errorcode.required_value"),; + + private final String code; + private final String description; + + private BinaryFileWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s].", this.code, + this.description); + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterUtil.java new file mode 100644 index 0000000000..e2c9ad164f --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterUtil.java @@ -0,0 +1,126 @@ +package com.alibaba.datax.plugin.unstructuredstorage.writer.binaryFileUtil; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.datax.plugin.unstructuredstorage.writer.Key; +import com.alibaba.datax.plugin.unstructuredstorage.writer.UnstructuredStorageWriterErrorCode; +import com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.*; + +/** + * @Author: guxuan + * @Date 2022-05-17 17:01 + */ +public class BinaryFileWriterUtil { + + private static final Logger LOG = LoggerFactory.getLogger(BinaryFileWriterUtil.class); + + + /** + * 从RecordReceiver获取源文件Bytes数组, 写到目的端 + * + * @param outputStream: 写文件流 + * @param recordReceiver: RecordReceiver + */ + public static void writeFileFromRecordReceiver(OutputStream outputStream, RecordReceiver recordReceiver) { + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + Column column = record.getColumn(0); + outputStream.write(column.asBytes()); + } + outputStream.flush(); + LOG.info("End write!!!"); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, e); + } + } + + /** + * 校验同步二进制文件的参数 + * + * @param writerConfiguration: writer的配置 + */ + public static void validateParameter(Configuration writerConfiguration) { + // writeMode check + String writeMode = writerConfiguration.getNecessaryValue( + Key.WRITE_MODE, + UnstructuredStorageWriterErrorCode.REQUIRED_VALUE); + writeMode = writeMode.trim(); + Set supportedWriteModes = Sets.newHashSet(TRUNCATE, NOCONFLICT); + if (!supportedWriteModes.contains(writeMode)) { + throw DataXException + .asDataXException( + BinaryFileWriterErrorCode.ILLEGAL_VALUE, + String.format("Synchronous binary format file, only supports truncate and nonConflict modes, does not support the writeMode mode you configured: %s", writeMode)); + } + writerConfiguration.set(Key.WRITE_MODE, writeMode); + } + + /** + * 校验文件名是否有重复的,如果有重复的文件名则抛出异常 + * @param fileNameList + */ + public static void checkFileNameIfRepeatedThrowException(List fileNameList) { + Set sourceFileNameSet = new HashSet(); + for (String fileName : fileNameList) { + if (!sourceFileNameSet.contains(fileName)) { + sourceFileNameSet.add(fileName); + } else { + throw DataXException.asDataXException(BinaryFileWriterErrorCode.REPEATED_FILE_NAME, + String.format("Source File Name [%s] is repeated!", fileName)); + } + } + } + + /** + * + * @param readerSplitConfigs + * @param writerSliceConfig + * @return 切分后的结果 + */ + public static List split(List readerSplitConfigs, Configuration writerSliceConfig) { + List writerSplitConfigs = new ArrayList(); + + for (Configuration readerSliceConfig : readerSplitConfigs) { + Configuration splitedTaskConfig = writerSliceConfig.clone(); + String fileName = getFileName(readerSliceConfig.getString(SOURCE_FILE)); + splitedTaskConfig + .set(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_NAME, fileName); + splitedTaskConfig. + set(com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.BINARY, true); + writerSplitConfigs.add(splitedTaskConfig); + } + LOG.info("end do split."); + return writerSplitConfigs; + } + + /** + * 根据文件路径获取到文件名, filePath必定包含了文件名 + * + * @param filePath: 文件路径 + */ + public static String getFileName(String filePath) { + if (StringUtils.isBlank(filePath)) { + return null; + } + File file = new File(filePath); + return file.getName(); + } +} diff --git a/pom.xml b/pom.xml index 5b00bb3ac2..1d298eb634 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 3.3.2 1.10 1.2 - 1.1.46.sec10 + 1.2.49 16.0.1 3.7.2.1-SNAPSHOT @@ -53,62 +53,71 @@ postgresqlreader kingbaseesreader oraclereader + cassandrareader + oceanbasev10reader + rdbmsreader + odpsreader otsreader otsstreamreader - txtfilereader - hdfsreader - streamreader + hbase11xreader + hbase094xreader + hbase11xsqlreader + hbase20xsqlreader + ossreader + hdfsreader ftpreader + txtfilereader + streamreader + mongodbreader - rdbmsreader - hbase11xreader - hbase094xreader + tdenginereader + gdbreader tsdbreader opentsdbreader - cassandrareader - gdbreader - oceanbasev10reader + mysqlwriter - tdenginewriter drdswriter - odpswriter - txtfilewriter - ftpwriter - hdfswriter - streamwriter - otswriter oraclewriter sqlserverwriter postgresqlwriter kingbaseeswriter - osswriter - mongodbwriter adswriter - ocswriter + oceanbasev10writer + cassandrawriter + clickhousewriter + adbpgwriter + hologresjdbcwriter rdbmswriter + + + odpswriter + osswriter + otswriter hbase11xwriter hbase094xwriter hbase11xsqlwriter - hbase11xsqlreader + hbase20xsqlwriter + kuduwriter + ftpwriter + hdfswriter + txtfilewriter + streamwriter + elasticsearchwriter + mongodbwriter + tdenginewriter + ocswriter tsdbwriter - adbpgwriter gdbwriter - cassandrawriter - clickhousewriter oscarwriter - oceanbasev10writer + plugin-rdbms-util plugin-unstructured-storage-util - hbase20xsqlreader - hbase20xsqlwriter - kuduwriter - tdenginereader @@ -210,6 +219,17 @@ true + + spring + spring + https://maven.aliyun.com/repository/spring + + true + + + true + + diff --git a/postgresqlreader/pom.xml b/postgresqlreader/pom.xml index e7f9032547..410d10a227 100755 --- a/postgresqlreader/pom.xml +++ b/postgresqlreader/pom.xml @@ -45,7 +45,7 @@ org.postgresql postgresql - 9.3-1102-jdbc4 + 42.3.3 diff --git a/postgresqlwriter/pom.xml b/postgresqlwriter/pom.xml index c3240e04f5..1c9edaf63c 100755 --- a/postgresqlwriter/pom.xml +++ b/postgresqlwriter/pom.xml @@ -43,7 +43,7 @@ org.postgresql postgresql - 9.3-1102-jdbc4 + 42.3.3 diff --git a/tsdbwriter/pom.xml b/tsdbwriter/pom.xml index fd4cc6f578..6f2bac5244 100644 --- a/tsdbwriter/pom.xml +++ b/tsdbwriter/pom.xml @@ -95,7 +95,7 @@ com.aliyun hitsdb-client - 0.4.0-SNAPSHOT + 0.3.7 diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java index 85a32a0762..433527daba 100755 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java @@ -5,6 +5,7 @@ import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.ConfigurationUtil; import com.alibaba.datax.common.util.RetryUtil; import com.alibaba.datax.plugin.writer.conn.TSDBConnection; import com.aliyun.hitsdb.client.TSDB; @@ -133,10 +134,11 @@ public void init() { if (StringUtils.isBlank(database)) { LOG.info("The parameter [" + Key.DATABASE + "] is blank."); } else { - tsdb.useDatabase(database); + LOG.warn("The parameter [" + Key.DATABASE + "] : {} is ignored."); + // tsdb.useDatabase(database); } - LOG.info("Tsdb config:" + originalConfig.toJSON()); + LOG.info("Tsdb config: {}", ConfigurationUtil.filterSensitive(originalConfig).toJSON()); } }