From 5026d7900d603ca8741ffa6cb8e6385924d5681e Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 28 Sep 2022 16:39:56 +0800 Subject: [PATCH 01/32] support aop fetch appIds --- .../mysql/dolphinscheduler_env.sh | 9 ++ .../postgresql/dolphinscheduler_env.sh | 9 ++ CONTRIBUTING.md | 0 LICENSE | 0 NOTICE | 0 README.md | 0 README_zh_CN.md | 0 .../kubernetes/dolphinscheduler/values.yaml | 4 + docs/docs/en/architecture/configuration.md | 11 ++ docs/docs/zh/architecture/configuration.md | 11 ++ dolphinscheduler-aop/pom.xml | 135 ++++++++++++++++++ .../main/assembly/dolphinscheduler-aop.xml | 32 +++++ .../aop/YarnClientAspect.java | 100 +++++++++++++ .../src/main/resources/META-INF/aop.xml | 26 ++++ .../docker/file-manage/common.properties | 8 +- .../dolphinscheduler/common/Constants.java | 6 + .../common/utils/CommonUtils.java | 30 ++-- .../common/utils/FileUtils.java | 25 ++-- .../src/main/resources/common.properties | 11 +- .../docker/file-manage/common.properties | 7 +- .../server/log/LoggerRequestProcessor.java | 8 +- .../builder/TaskExecutionContextBuilder.java | 21 +++ .../master/runner/task/BaseTaskProcessor.java | 1 + .../command/log/GetAppIdRequestCommand.java | 2 + .../server/utils/ProcessUtils.java | 2 +- .../service/log/LogClient.java | 14 +- .../dolphinscheduler/spi/utils/Constants.java | 10 +- .../task/api/AbstractCommandExecutor.java | 4 +- .../plugin/task/api/AbstractYarnTask.java | 5 +- .../plugin/task/api/TaskExecutionContext.java | 5 + .../plugin/task/api/utils/LogUtils.java | 32 ++++- .../plugin/task/api/utils/LogUtilsTest.java | 12 +- .../test/resources/{appId.txt => appId.log} | 0 .../src/test/resources/appInfo.log | 1 + .../main/assembly/dolphinscheduler-tools.xml | 7 + .../worker/processor/TaskKillProcessor.java | 16 ++- .../runner/WorkerTaskExecuteRunnable.java | 5 +- .../utils/TaskExecutionCheckerUtils.java | 16 +-- lombok.config | 0 mvnw.cmd | 0 pom.xml | 1 + script/env/dolphinscheduler_env.sh | 9 ++ 42 files changed, 533 insertions(+), 62 deletions(-) mode change 100644 => 100755 CONTRIBUTING.md mode change 100644 => 100755 LICENSE mode change 100644 => 100755 NOTICE mode change 100644 => 100755 README.md mode change 100644 => 100755 README_zh_CN.md create mode 100644 dolphinscheduler-aop/pom.xml create mode 100644 dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml create mode 100644 dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java create mode 100644 dolphinscheduler-aop/src/main/resources/META-INF/aop.xml rename dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/{appId.txt => appId.log} (100%) create mode 100644 dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log mode change 100644 => 100755 lombok.config mode change 100644 => 100755 mvnw.cmd mode change 100644 => 100755 pom.xml diff --git a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh index bb2d54e6936d..373fb94496c0 100755 --- a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh @@ -44,3 +44,12 @@ export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH + +# applicationId auto collection related configuration +export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH +export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS +# print detailed info of yarn application +export PARA_NAME_ASPECTJ_DEBUG=false diff --git a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh index 085676f82f5f..3d68b05b1b62 100644 --- a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh @@ -44,3 +44,12 @@ export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH + +# applicationId auto collection related configuration +export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH +export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS +# print detailed info of yarn application +export PARA_NAME_ASPECTJ_DEBUG=false diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/NOTICE b/NOTICE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/README_zh_CN.md b/README_zh_CN.md old mode 100644 new mode 100755 diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml index 1929a153a20f..3d0d826cb2ab 100644 --- a/deploy/kubernetes/dolphinscheduler/values.yaml +++ b/deploy/kubernetes/dolphinscheduler/values.yaml @@ -149,6 +149,10 @@ conf: alert.rpc.port: 50052 # Url endpoint for zeppelin RESTful API zeppelin.rest.url: http://localhost:8080 + # way to collect applicationId: log, aop + appId.collect: log + # appIds info log path + appId.file.path = appInfo.log common: ## Configmap diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 45d73412777a..607d2f03215c 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -352,8 +352,19 @@ export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} +export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel} +export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH + +# applicationId auto collection related configuration +export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH +export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS +# print detailed info of yarn application +export PARA_NAME_ASPECTJ_DEBUG=false ``` ### Log related configuration diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 500f08c9c801..af6bae2bb834 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -344,8 +344,19 @@ export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} +export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel} +export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH + +# applicationId auto collection related configuration +export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH +export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS +# print detailed info of yarn application +export PARA_NAME_ASPECTJ_DEBUG=false ``` ## 日志相关配置 diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml new file mode 100644 index 000000000000..318d1d54e89b --- /dev/null +++ b/dolphinscheduler-aop/pom.xml @@ -0,0 +1,135 @@ + + + 4.0.0 + + org.apache.dolphinscheduler + dolphinscheduler + dev-SNAPSHOT + + dolphinscheduler-aop + jar + ${project.artifactId} + aop 4 YarnClient to get application id when submitting jars using 'yarn jar mainClass args' + + + UTF-8 + 1.8 + 1.8 + 1.9.7 + 3.2.0 + + + + + + org.aspectj + aspectjweaver + ${aspectj.version} + runtime + + + + org.aspectj + aspectjrt + ${aspectj.version} + + + + org.apache.hadoop + hadoop-yarn-client + ${yarn.version} + + + + + org.apache.hadoop + hadoop-yarn-common + ${yarn.version} + + + + org.apache.hadoop + hadoop-common + ${yarn.version} + + + + + junit + junit + ${junit.version} + test + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + *.yaml + *.xml + + + + + maven-assembly-plugin + + + dolphinscheduler-aop + + single + + package + + aop + + src/main/assembly/dolphinscheduler-aop.xml + + false + + + + + + org.codehaus.mojo + aspectj-maven-plugin + + 1.8 + 1.8 + 1.8 + true + true + ignore + UTF-8 + + + + + compile + test-compile + + + + + + + + + + docker + + + + org.codehaus.mojo + exec-maven-plugin + + + + + + diff --git a/dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml b/dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml new file mode 100644 index 000000000000..779d5508ac58 --- /dev/null +++ b/dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml @@ -0,0 +1,32 @@ + + + + dolphinscheduler-aop + + dir + + false + aop + + + libs + + + \ No newline at end of file diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java new file mode 100644 index 000000000000..3a9a615d5ff9 --- /dev/null +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.aop; + +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.Collections; + +import org.aspectj.lang.annotation.AfterReturning; +import org.aspectj.lang.annotation.Aspect; + +@Aspect +public class YarnClientAspect { + + /** + * flag to indicate whether print debug logs + */ + private static final String PARA_NAME_ASPECTJ_DEBUG = "PARA_NAME_ASPECTJ_DEBUG"; + + /** + * The current application report when application submitted successfully + */ + private ApplicationReport currentApplicationReport = null; + + private String appInfoFilePath; + private boolean debug; + + public YarnClientAspect() { + appInfoFilePath = System.getProperty("user.dir") + "/appInfo.log"; + debug = Boolean.parseBoolean(System.getenv(PARA_NAME_ASPECTJ_DEBUG)); + } + + /** + * Trigger submitApplication when invoking YarnClientImpl.submitApplication + * + * @param appContext application context when invoking YarnClientImpl.submitApplication + * @param submittedAppId the submitted application id returned by YarnClientImpl.submitApplication + * @throws Throwable exceptions + */ + @AfterReturning(pointcut = "execution(ApplicationId org.apache.hadoop.yarn.client.api.impl.YarnClientImpl." + + "submitApplication(ApplicationSubmissionContext)) && args(appContext)", + returning = "submittedAppId", argNames = "appContext,submittedAppId") + public void registerApplicationInfo(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) { + if (appInfoFilePath != null) { + try { + Files.write(Paths.get(appInfoFilePath), + Collections.singletonList(submittedAppId.toString()), + StandardOpenOption.CREATE, + StandardOpenOption.WRITE, + StandardOpenOption.APPEND); + } catch (IOException ioException) { + System.out.println( + "YarnClientAspect[registerAppInfo]: can't output current application information, because " + + ioException.getMessage()); + } + } + if (debug) { + System.out.println("YarnClientAspect[submitApplication]: current application context " + appContext); + System.out.println("YarnClientAspect[submitApplication]: submitted application id " + submittedAppId); + System.out.println( + "YarnClientAspect[submitApplication]: current application report " + currentApplicationReport); + } + } + + /** + * Trigger getAppReport only when invoking getApplicationReport within submitApplication + * This method will invoke many times, however, the last ApplicationReport instance assigned to currentApplicationReport + * + * @param appReport current application report when invoking getApplicationReport within submitApplication + * @param appId current application id, which is the parameter of getApplicationReport + * @throws Throwable exceptions + */ + @AfterReturning(pointcut = "cflow(execution(ApplicationId org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.submitApplication(ApplicationSubmissionContext))) " + + + "&& !within(CfowAspect) && execution(ApplicationReport org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(ApplicationId)) && args(appId)", returning = "appReport", argNames = "appReport,appId") + public void registerApplicationReport(ApplicationReport appReport, ApplicationId appId) { + currentApplicationReport = appReport; + } +} diff --git a/dolphinscheduler-aop/src/main/resources/META-INF/aop.xml b/dolphinscheduler-aop/src/main/resources/META-INF/aop.xml new file mode 100644 index 000000000000..9cfa776853fe --- /dev/null +++ b/dolphinscheduler-aop/src/main/resources/META-INF/aop.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties index abac3ad39188..26dc23492c9c 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties @@ -15,7 +15,7 @@ # limitations under the License. # # user data local directory path, please make sure the directory exists and have read write permissions -data.basedir.path=/tmp/dolphinscheduler +data.basedir.path=/home/wangwr/tmp/dolphinscheduler # resource storage type: HDFS, S3, NONE resource.storage.type=S3 # resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration @@ -66,4 +66,8 @@ aws.secret.access.key=secretKey123 aws.region=us-east-1 aws.endpoint=http://s3:9000 # Task resource limit state -task.resource.limit.state=false \ No newline at end of file +task.resource.limit.state=false +# way to collect applicationId: log(original regex match), aop +appId.collect: log +# appIds info log path +appId.file.path = appInfo.log \ No newline at end of file diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java index 042b28a980d5..ad7335384744 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java @@ -156,6 +156,12 @@ private Constants() { public static final String ALIBABA_CLOUD_OSS_BUCKET_NAME = "resource.alibaba.cloud.oss.bucket.name"; public static final String ALIBABA_CLOUD_OSS_END_POINT = "resource.alibaba.cloud.oss.endpoint"; + /** + * fetch applicationId way + */ + public static final String APPID_COLLECT = "appId.collect"; + public static final String APPID_FILE_PATH = "appId.file.path"; + /** * comma , */ diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java index acda412aa38f..d9636a9b4f57 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java @@ -36,6 +36,7 @@ * common utils */ public class CommonUtils { + private static final Logger logger = LoggerFactory.getLogger(CommonUtils.class); private static final Base64 BASE64 = new Base64(); @@ -85,7 +86,8 @@ public static boolean isSudoEnable() { public static boolean getKerberosStartupState() { String resUploadStartupType = PropertyUtils.getUpperCaseString(Constants.RESOURCE_STORAGE_TYPE); ResUploadType resUploadType = ResUploadType.valueOf(resUploadStartupType); - Boolean kerberosStartupState = PropertyUtils.getBoolean(Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE, false); + Boolean kerberosStartupState = + PropertyUtils.getBoolean(Constants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE, false); return resUploadType == ResUploadType.HDFS && kerberosStartupState; } @@ -110,7 +112,8 @@ public static boolean loadKerberosConf(Configuration configuration) throws IOExc * @param loginUserKeytabPath loginUserKeytabPath * @throws IOException errors */ - public static void loadKerberosConf(String javaSecurityKrb5Conf, String loginUserKeytabUsername, String loginUserKeytabPath) throws IOException { + public static void loadKerberosConf(String javaSecurityKrb5Conf, String loginUserKeytabUsername, + String loginUserKeytabPath) throws IOException { loadKerberosConf(javaSecurityKrb5Conf, loginUserKeytabUsername, loginUserKeytabPath, new Configuration()); } @@ -124,13 +127,18 @@ public static void loadKerberosConf(String javaSecurityKrb5Conf, String loginUse * @return load kerberos config return true * @throws IOException errors */ - public static boolean loadKerberosConf(String javaSecurityKrb5Conf, String loginUserKeytabUsername, String loginUserKeytabPath, Configuration configuration) throws IOException { + public static boolean loadKerberosConf(String javaSecurityKrb5Conf, String loginUserKeytabUsername, + String loginUserKeytabPath, Configuration configuration) throws IOException { if (CommonUtils.getKerberosStartupState()) { - System.setProperty(Constants.JAVA_SECURITY_KRB5_CONF, StringUtils.defaultIfBlank(javaSecurityKrb5Conf, PropertyUtils.getString(Constants.JAVA_SECURITY_KRB5_CONF_PATH))); + System.setProperty(Constants.JAVA_SECURITY_KRB5_CONF, StringUtils.defaultIfBlank(javaSecurityKrb5Conf, + PropertyUtils.getString(Constants.JAVA_SECURITY_KRB5_CONF_PATH))); configuration.set(Constants.HADOOP_SECURITY_AUTHENTICATION, Constants.KERBEROS); UserGroupInformation.setConfiguration(configuration); - UserGroupInformation.loginUserFromKeytab(StringUtils.defaultIfBlank(loginUserKeytabUsername, PropertyUtils.getString(Constants.LOGIN_USER_KEY_TAB_USERNAME)), - StringUtils.defaultIfBlank(loginUserKeytabPath, PropertyUtils.getString(Constants.LOGIN_USER_KEY_TAB_PATH))); + UserGroupInformation.loginUserFromKeytab( + StringUtils.defaultIfBlank(loginUserKeytabUsername, + PropertyUtils.getString(Constants.LOGIN_USER_KEY_TAB_USERNAME)), + StringUtils.defaultIfBlank(loginUserKeytabPath, + PropertyUtils.getString(Constants.LOGIN_USER_KEY_TAB_PATH))); return true; } return false; @@ -143,14 +151,15 @@ public static String encodePassword(String password) { if (StringUtils.isEmpty(password)) { return StringUtils.EMPTY; } - //if encryption is not turned on, return directly + // if encryption is not turned on, return directly boolean encryptionEnable = PropertyUtils.getBoolean(Constants.DATASOURCE_ENCRYPTION_ENABLE, false); if (!encryptionEnable) { return password; } // Using Base64 + salt to process password - String salt = PropertyUtils.getString(Constants.DATASOURCE_ENCRYPTION_SALT, Constants.DATASOURCE_ENCRYPTION_SALT_DEFAULT); + String salt = PropertyUtils.getString(Constants.DATASOURCE_ENCRYPTION_SALT, + Constants.DATASOURCE_ENCRYPTION_SALT_DEFAULT); String passwordWithSalt = salt + new String(BASE64.encode(password.getBytes(StandardCharsets.UTF_8))); return new String(BASE64.encode(passwordWithSalt.getBytes(StandardCharsets.UTF_8))); } @@ -163,14 +172,15 @@ public static String decodePassword(String password) { return StringUtils.EMPTY; } - //if encryption is not turned on, return directly + // if encryption is not turned on, return directly boolean encryptionEnable = PropertyUtils.getBoolean(Constants.DATASOURCE_ENCRYPTION_ENABLE, false); if (!encryptionEnable) { return password; } // Using Base64 + salt to process password - String salt = PropertyUtils.getString(Constants.DATASOURCE_ENCRYPTION_SALT, Constants.DATASOURCE_ENCRYPTION_SALT_DEFAULT); + String salt = PropertyUtils.getString(Constants.DATASOURCE_ENCRYPTION_SALT, + Constants.DATASOURCE_ENCRYPTION_SALT_DEFAULT); String passwordWithSalt = new String(BASE64.decode(password), StandardCharsets.UTF_8); if (!passwordWithSalt.startsWith(salt)) { logger.warn("There is a password and salt mismatch: {} ", password); diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java index 23e4b74b7573..4c352d85236c 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java @@ -17,12 +17,7 @@ package org.apache.dolphinscheduler.common.utils; -import static org.apache.dolphinscheduler.common.Constants.DATA_BASEDIR_PATH; -import static org.apache.dolphinscheduler.common.Constants.FOLDER_SEPARATOR; -import static org.apache.dolphinscheduler.common.Constants.RESOURCE_VIEW_SUFFIXES; -import static org.apache.dolphinscheduler.common.Constants.RESOURCE_VIEW_SUFFIXES_DEFAULT_VALUE; -import static org.apache.dolphinscheduler.common.Constants.UTF_8; -import static org.apache.dolphinscheduler.common.Constants.YYYYMMDDHHMMSS; +import static org.apache.dolphinscheduler.common.Constants.*; import org.apache.commons.io.IOUtils; @@ -46,6 +41,8 @@ public class FileUtils { public static final String DATA_BASEDIR = PropertyUtils.getString(DATA_BASEDIR_PATH, "/tmp/dolphinscheduler"); + public static final String APPINFO_PATH = PropertyUtils.getString(APPID_FILE_PATH, "appInfo.log"); + private FileUtils() { throw new UnsupportedOperationException("Construct FileUtils"); } @@ -105,6 +102,16 @@ public static String getProcessExecDir(long projectCode, long processDefineCode, return fileName; } + /** + * absolute path of appInfo file + * + * @param execPath directory of process execution + * @return + */ + public static String getAppInfoPath(String execPath) { + return String.format("%s/%s", execPath, APPINFO_PATH); + } + /** * @return get suffixes for resource files that support online viewing */ @@ -119,7 +126,7 @@ public static String getResourceViewSuffixes() { * @throws IOException errors */ public static void createWorkDirIfAbsent(String execLocalPath) throws IOException { - //if work dir exists, first delete + // if work dir exists, first delete File execLocalPathFile = new File(execLocalPath); if (execLocalPathFile.exists()) { @@ -134,7 +141,7 @@ public static void createWorkDirIfAbsent(String execLocalPath) throws IOExceptio } } - //create work dir + // create work dir org.apache.commons.io.FileUtils.forceMkdir(execLocalPathFile); String mkdirLog = "create dir success " + execLocalPath; logger.info(mkdirLog); @@ -230,7 +237,7 @@ public static String readFile2Str(InputStream inputStream) { * @param filename String type of filename * @return whether file path could be traversal or not */ - public static boolean directoryTraversal(String filename){ + public static boolean directoryTraversal(String filename) { if (filename.contains(FOLDER_SEPARATOR)) { return true; } diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 8a266ab9d13f..a26d14e4526f 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -16,7 +16,7 @@ # # user data local directory path, please make sure the directory exists and have read write permissions -data.basedir.path=/tmp/dolphinscheduler +data.basedir.path=/home/wangwr/tmp/dolphinscheduler # resource view suffixs #resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js @@ -51,7 +51,7 @@ resource.alibaba.cloud.oss.endpoint=https://oss-cn-hangzhou.aliyuncs.com # if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path resource.hdfs.root.user=hdfs # if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir -resource.hdfs.fs.defaultFS=hdfs://mycluster:8020 +resource.hdfs.fs.defaultFS=hdfs://analysis-5:9000 # whether to startup kerberos hadoop.security.authentication.startup.state=false @@ -121,4 +121,9 @@ task.resource.limit.state=false # mlflow task plugin preset repository ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow # mlflow task plugin preset repository version -ml.mlflow.preset_repository_version="main" \ No newline at end of file +ml.mlflow.preset_repository_version="main" + +# way to collect applicationId: log(original regex match), aop +appId.collect: log +# appIds info log path +appId.file.path = appInfo.log \ No newline at end of file diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index c8a3c32433c2..3fd8f5d8c072 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -88,4 +88,9 @@ resource.aws.s3.bucket.name=dolphinscheduler resource.aws.s3.endpoint=http://s3:9000 # Task resource limit state -task.resource.limit.state=false \ No newline at end of file +task.resource.limit.state=false + +# way to collect applicationId: log(original regex match), aop +appId.collect: log +# appIds info log path +appId.file.path = appInfo.log \ No newline at end of file diff --git a/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java b/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java index 0d04ed859661..e40b3c019d71 100644 --- a/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java +++ b/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java @@ -17,8 +17,11 @@ package org.apache.dolphinscheduler.server.log; +import static org.apache.dolphinscheduler.common.Constants.*; + import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.LoggerUtils; +import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.task.api.utils.LogUtils; import org.apache.dolphinscheduler.remote.command.Command; import org.apache.dolphinscheduler.remote.command.CommandType; @@ -162,11 +165,12 @@ public void process(Channel channel, Command command) { case GET_APP_ID_REQUEST: GetAppIdRequestCommand getAppIdRequestCommand = JSONUtils.parseObject(command.getBody(), GetAppIdRequestCommand.class); + String appInfoPath = getAppIdRequestCommand.getAppInfoPath(); String logPath = getAppIdRequestCommand.getLogPath(); - if (!checkPathSecurity(logPath)) { + if (!checkPathSecurity(appInfoPath) || !checkPathSecurity(logPath)) { throw new IllegalArgumentException("Illegal path"); } - List appIds = LogUtils.getAppIdsFromLogFile(logPath); + List appIds = LogUtils.getAppIds(logPath, appInfoPath, PropertyUtils.getString(APPID_COLLECT, "log")); channel.writeAndFlush( new GetAppIdResponseCommand(appIds).convert2Command(command.getOpaque())); break; diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java index 5dfd708f56f0..bf3271bbe96d 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java @@ -20,6 +20,7 @@ import static org.apache.dolphinscheduler.common.Constants.SEC_2_MINUTES_TIME_UNIT; import org.apache.dolphinscheduler.common.enums.TimeoutFlag; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.dao.entity.ProcessDefinition; import org.apache.dolphinscheduler.dao.entity.ProcessInstance; import org.apache.dolphinscheduler.dao.entity.TaskDefinition; @@ -124,6 +125,23 @@ public TaskExecutionContextBuilder buildProcessDefinitionRelatedInfo(ProcessDefi return this; } + /** + * build execPath related info + * + * @return TaskExecutionContextBuilder + */ + public TaskExecutionContextBuilder buildExecPathRelatedInfo() { + String execPath = FileUtils.getProcessExecDir( + taskExecutionContext.getProjectCode(), + taskExecutionContext.getProcessDefineCode(), + taskExecutionContext.getProcessDefineVersion(), + taskExecutionContext.getProcessInstanceId(), + taskExecutionContext.getTaskInstanceId()); + taskExecutionContext.setExecutePath(execPath); + taskExecutionContext.setAppInfoPath(FileUtils.getAppInfoPath(execPath)); + return this; + } + public TaskExecutionContextBuilder buildDataQualityTaskExecutionContext(DataQualityTaskExecutionContext dataQualityTaskExecutionContext) { taskExecutionContext.setDataQualityTaskExecutionContext(dataQualityTaskExecutionContext); return this; @@ -133,6 +151,7 @@ public TaskExecutionContextBuilder buildResourceParametersInfo(ResourceParameter taskExecutionContext.setResourceParametersHelper(parametersHelper); return this; } + /** * build k8sTask related info * @@ -147,6 +166,7 @@ public TaskExecutionContextBuilder buildK8sTaskRelatedInfo(K8sTaskExecutionConte /** * build global and local params + * * @param propertyMap * @return */ @@ -157,6 +177,7 @@ public TaskExecutionContextBuilder buildParamInfo(Map property /** * build business params + * * @param businessParamsMap * @return */ diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java index a67ee628ea8c..a7b62bcbc75d 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java @@ -339,6 +339,7 @@ protected TaskExecutionContext getTaskExecutionContext(TaskInstance taskInstance .buildTaskDefinitionRelatedInfo(taskInstance.getTaskDefine()) .buildProcessInstanceRelatedInfo(taskInstance.getProcessInstance()) .buildProcessDefinitionRelatedInfo(taskInstance.getProcessDefine()) + .buildExecPathRelatedInfo() .buildResourceParametersInfo(resources) .buildDataQualityTaskExecutionContext(dataQualityTaskExecutionContext) .buildK8sTaskRelatedInfo(k8sTaskExecutionContext) diff --git a/dolphinscheduler-remote/src/main/java/org/apache/dolphinscheduler/remote/command/log/GetAppIdRequestCommand.java b/dolphinscheduler-remote/src/main/java/org/apache/dolphinscheduler/remote/command/log/GetAppIdRequestCommand.java index 26412b8283b1..ae0c99908b09 100644 --- a/dolphinscheduler-remote/src/main/java/org/apache/dolphinscheduler/remote/command/log/GetAppIdRequestCommand.java +++ b/dolphinscheduler-remote/src/main/java/org/apache/dolphinscheduler/remote/command/log/GetAppIdRequestCommand.java @@ -33,6 +33,8 @@ public class GetAppIdRequestCommand implements Serializable { private String logPath; + private String appInfoPath; + public Command convert2Command() { Command command = new Command(); command.setType(CommandType.GET_APP_ID_REQUEST); diff --git a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/ProcessUtils.java b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/ProcessUtils.java index a8361fa4ed9e..e5d4df653afc 100644 --- a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/ProcessUtils.java +++ b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/utils/ProcessUtils.java @@ -195,7 +195,7 @@ public static String getPidsStr(int processId) throws Exception { try { Thread.sleep(Constants.SLEEP_TIME_MILLIS); Host host = Host.of(taskExecutionContext.getHost()); - List appIds = logClient.getAppIds(host.getIp(), host.getPort(), taskExecutionContext.getLogPath()); + List appIds = logClient.getAppIds(host.getIp(), host.getPort(), taskExecutionContext.getLogPath(), taskExecutionContext.getAppInfoPath()); if (CollectionUtils.isNotEmpty(appIds)) { if (StringUtils.isEmpty(taskExecutionContext.getExecutePath())) { taskExecutionContext diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java index 611bb49b67f4..d385ed06386f 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java @@ -17,9 +17,11 @@ package org.apache.dolphinscheduler.service.log; +import static org.apache.dolphinscheduler.common.Constants.APPID_COLLECT; import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.LoggerUtils; import org.apache.dolphinscheduler.common.utils.NetUtils; +import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.task.api.utils.LogUtils; import org.apache.dolphinscheduler.remote.NettyRemotingClient; import org.apache.dolphinscheduler.remote.command.Command; @@ -203,15 +205,15 @@ public Boolean removeTaskLog(String host, int port, String path) { } } - public @Nullable List getAppIds(@NonNull String host, int port, - @NonNull String taskLogFilePath) throws RemotingException, InterruptedException { - logger.info("Begin to get appIds from worker: {}:{} taskLogPath: {}", host, port, taskLogFilePath); + public @Nullable List getAppIds(@NonNull String host, int port, @NonNull String taskLogFilePath, + @NonNull String taskAppInfoPath) throws RemotingException, InterruptedException { + logger.info("Begin to get appIds from worker: {}:{} taskAppInfoPath: {}", host, port, taskAppInfoPath); final Host workerAddress = new Host(host, port); List appIds = null; if (NetUtils.getHost().equals(host)) { - appIds = LogUtils.getAppIdsFromLogFile(taskLogFilePath); + appIds = LogUtils.getAppIds(taskLogFilePath, taskAppInfoPath, PropertyUtils.getString(APPID_COLLECT, "log")); } else { - final Command command = new GetAppIdRequestCommand(taskLogFilePath).convert2Command(); + final Command command = new GetAppIdRequestCommand(taskLogFilePath, taskAppInfoPath).convert2Command(); Command response = this.client.sendSync(workerAddress, command, LOG_REQUEST_TIMEOUT); if (response != null) { GetAppIdResponseCommand responseCommand = @@ -219,7 +221,7 @@ public Boolean removeTaskLog(String host, int port, String path) { appIds = responseCommand.getAppIds(); } } - logger.info("Get appIds: {} from worker: {}:{} taskLogPath: {}", appIds, host, port, taskLogFilePath); + logger.info("Get appIds: {} from worker: {}:{} taskLogPath: {}, taskAppInfoPath: {}", appIds, host, port, taskLogFilePath, taskAppInfoPath); return appIds; } diff --git a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java index 259bb453229a..ae12dbf7b483 100644 --- a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java +++ b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java @@ -44,7 +44,6 @@ private Constants() { /**plugin param emit string **/ public static final String STRING_PLUGIN_PARAM_EMIT = "emit"; - /** string true */ public static final String STRING_TRUE = "true"; /** string false */ @@ -164,7 +163,6 @@ private Constants() { public static final String COM_REDSHIFT_JDBC_DRIVER = "com.amazon.redshift.jdbc42.Driver"; public static final String COM_ATHENA_JDBC_DRIVER = "com.simba.athena.jdbc.Driver"; - /** * validation Query */ @@ -204,6 +202,12 @@ private Constants() { public static final String KERBEROS_KEY_TAB_USERNAME = "loginUserKeytabUsername"; public static final String KERBEROS_KEY_TAB_PATH = "loginUserKeytabPath"; + /** + * fetch applicationId way + */ + public static final String APPID_COLLECT = "appId.collect"; + public static final String APPID_FILE_PATH = "appId.file.path"; + /** * DOUBLE_SLASH // */ @@ -224,7 +228,6 @@ private Constants() { */ public static final String COLON = ":"; - /** * AT SIGN @ */ @@ -235,7 +238,6 @@ private Constants() { */ public static final String SEMICOLON = ";"; - /** * EQUAL_SIGN = */ diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java index 49e224e95d2b..2ef12c8e43af 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java @@ -112,7 +112,7 @@ private void buildProcess(String commandFile) throws IOException { // setting up user to run commands List command = new LinkedList<>(); - //init process builder + // init process builder ProcessBuilder processBuilder = new ProcessBuilder(); // setting up a working directory processBuilder.directory(new File(taskRequest.getExecutePath())); @@ -190,7 +190,7 @@ public TaskResponse run(String execCommand) throws IOException, InterruptedExcep // create command file if not exists createCommandFileIfNotExists(execCommand, commandFilePath); - //build process + // build process buildProcess(commandFilePath); // parse process output diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java index 5e3ec8fab29e..35e735b4857e 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java @@ -20,10 +20,13 @@ import org.apache.dolphinscheduler.plugin.task.api.model.ResourceInfo; import org.apache.dolphinscheduler.plugin.task.api.model.TaskResponse; import org.apache.dolphinscheduler.plugin.task.api.utils.LogUtils; +import org.apache.dolphinscheduler.spi.utils.PropertyUtils; import java.util.List; import java.util.regex.Pattern; +import static org.apache.dolphinscheduler.spi.utils.Constants.APPID_COLLECT; + /** * abstract yarn task */ @@ -107,7 +110,7 @@ public void cancelApplication() throws TaskException { */ @Override public List getApplicationIds() throws TaskException { - return LogUtils.getAppIdsFromLogFile(taskRequest.getLogPath(), logger); + return LogUtils.getAppIds(taskRequest.getLogPath(), taskRequest.getAppInfoPath(), PropertyUtils.getString(APPID_COLLECT, "log")); } /** diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskExecutionContext.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskExecutionContext.java index ad6d6b50d12e..8c741014f96a 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskExecutionContext.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskExecutionContext.java @@ -82,6 +82,11 @@ public class TaskExecutionContext implements Serializable { */ private String logPath; + /** + * applicationId path + */ + private String appInfoPath; + /** * task json */ diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java index c3833c071a7f..c2fc070b8d28 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java @@ -44,10 +44,38 @@ public class LogUtils { private static final Pattern APPLICATION_REGEX = Pattern.compile(TaskConstants.YARN_APPLICATION_REGEX); - public List getAppIdsFromLogFile(@NonNull String logPath) { - return getAppIdsFromLogFile(logPath, log); + public List getAppIds(@NonNull String logPath, @NonNull String appInfoPath, String fetchWay) { + switch (fetchWay) { + case "aop": + log.info("Start finding appId in {}, fetch way: {} ", appInfoPath); + return getAppIdsFromAppInfoFile(appInfoPath, log); + case "log": + log.info("Start finding appId in {}, fetch way: {} ", logPath); + return getAppIdsFromLogFile(logPath, log); + default: + log.info("Match No Way!!"); + return null; + } } + public List getAppIdsFromAppInfoFile(@NonNull String appInfoPath, Logger logger) { + File appInfoFile = new File(appInfoPath); + if (!appInfoFile.exists() || !appInfoFile.isFile()) { + return Collections.emptyList(); + } + List appIds = new ArrayList<>(); + try (Stream stream = Files.lines(Paths.get(appInfoPath))) { + stream.forEach(line -> { + appIds.add(line); + }); + return new ArrayList<>(appIds); + } catch (IOException e) { + logger.error("Get appId from appInfo file error, appInfoPath: {}", appInfoPath, e); + return Collections.emptyList(); + } + } + + public List getAppIdsFromLogFile(@NonNull String logPath, Logger logger) { File logFile = new File(logPath); if (!logFile.exists() || !logFile.isFile()) { diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java index d72a9737bfb5..edf499c3ad42 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java @@ -26,12 +26,20 @@ public class LogUtilsTest { - private static final String APP_ID_FILE = LogUtilsTest.class.getResource("/appId.txt") + private static final String APP_ID_FILE = LogUtilsTest.class.getResource("/appId.log") + .getFile(); + private static final String APP_INFO_FILE = LogUtilsTest.class.getResource("/appInfo.log") .getFile(); @Test public void getAppIdsFromLogFile() { - List appIds = LogUtils.getAppIdsFromLogFile(APP_ID_FILE); + List appIds = LogUtils.getAppIds(APP_ID_FILE, APP_INFO_FILE, "log"); + Assert.assertEquals(Lists.newArrayList("application_1548381669007_1234"), appIds); + } + + @Test + public void getAppIdsFromAppInfoFile() { + List appIds = LogUtils.getAppIds(APP_ID_FILE, APP_INFO_FILE, "log"); Assert.assertEquals(Lists.newArrayList("application_1548381669007_1234"), appIds); } } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.txt b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.log similarity index 100% rename from dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.txt rename to dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.log diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log new file mode 100644 index 000000000000..a008253d9eb1 --- /dev/null +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log @@ -0,0 +1 @@ +application_1548381669007_1234 \ No newline at end of file diff --git a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml index 49a95d25d0ce..7ff8fec886a9 100644 --- a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml +++ b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml @@ -52,6 +52,13 @@ conf + + ${basedir}/../dolphinscheduler-aop/target/aop/libs + + dolphinscheduler-aop*.jar + + libs/aop + diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java index ed351c3b86a7..050ad0f6406e 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java @@ -158,9 +158,10 @@ private Pair> doKill(TaskExecutionContext taskExecutionCon // find log and kill yarn job Pair> yarnResult = killYarnJob(Host.of(taskExecutionContext.getHost()), - taskExecutionContext.getLogPath(), - taskExecutionContext.getExecutePath(), - taskExecutionContext.getTenantCode()); + taskExecutionContext.getLogPath(), + taskExecutionContext.getAppInfoPath(), + taskExecutionContext.getExecutePath(), + taskExecutionContext.getTenantCode()); return Pair.of(processFlag && yarnResult.getLeft(), yarnResult.getRight()); } @@ -223,16 +224,17 @@ protected boolean killProcess(String tenantCode, Integer processId) { */ private Pair> killYarnJob(@NonNull Host host, String logPath, + String appInfoPath, String executePath, String tenantCode) { - if (logPath == null || executePath == null || tenantCode == null) { - logger.error("Kill yarn job error, the input params is illegal, host: {}, logPath: {}, executePath: {}, tenantCode: {}", - host, logPath, executePath, tenantCode); + if (logPath == null || appInfoPath == null || executePath == null || tenantCode == null) { + logger.error("Kill yarn job error, the input params is illegal, host: {}, logPath: {}, appInfoPath: {}, executePath: {}, tenantCode: {}", + host, logPath, appInfoPath, executePath, tenantCode); return Pair.of(false, Collections.emptyList()); } try { logger.info("Get appIds from worker {}:{} taskLogPath: {}", host.getIp(), host.getPort(), logPath); - List appIds = logClient.getAppIds(host.getIp(), host.getPort(), logPath); + List appIds = logClient.getAppIds(host.getIp(), host.getPort(), logPath, appInfoPath); if (CollectionUtils.isEmpty(appIds)) { logger.info("The appId is empty"); return Pair.of(true, Collections.emptyList()); diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java index 9cb1a1e4f426..a6b56b56b7e5 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java @@ -26,6 +26,7 @@ import org.apache.dolphinscheduler.common.utils.CommonUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.LoggerUtils; +import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.task.api.AbstractTask; import org.apache.dolphinscheduler.plugin.task.api.TaskCallBack; import org.apache.dolphinscheduler.plugin.task.api.TaskChannel; @@ -54,6 +55,7 @@ import java.util.Date; import java.util.List; +import static org.apache.dolphinscheduler.common.Constants.APPID_COLLECT; import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH; public abstract class WorkerTaskExecuteRunnable implements Runnable { @@ -123,7 +125,7 @@ public void cancelTask() { if (task != null) { try { task.cancel(); - List appIds = LogUtils.getAppIdsFromLogFile(taskExecutionContext.getLogPath()); + List appIds = LogUtils.getAppIds(taskExecutionContext.getLogPath(), taskExecutionContext.getExecutePath(), PropertyUtils.getString(APPID_COLLECT, "log")); if (CollectionUtils.isNotEmpty(appIds)) { ProcessUtils.cancelApplication(appIds, logger, taskExecutionContext.getTenantCode(), taskExecutionContext.getExecutePath()); } @@ -242,7 +244,6 @@ protected void sendTaskResult() { } protected void clearTaskExecPathIfNeeded() { - String execLocalPath = taskExecutionContext.getExecutePath(); if (!CommonUtils.isDevelopMode()) { logger.info("The current execute mode isn't develop mode, will clear the task execute file: {}", execLocalPath); diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java index 56d948164205..08e285062923 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java @@ -75,14 +75,14 @@ public static void checkTenantExist(WorkerConfig workerConfig, TaskExecutionCont public static void createProcessLocalPathIfAbsent(TaskExecutionContext taskExecutionContext) throws TaskException { try { // local execute path - String execLocalPath = FileUtils.getProcessExecDir( - taskExecutionContext.getProjectCode(), - taskExecutionContext.getProcessDefineCode(), - taskExecutionContext.getProcessDefineVersion(), - taskExecutionContext.getProcessInstanceId(), - taskExecutionContext.getTaskInstanceId()); - taskExecutionContext.setExecutePath(execLocalPath); - FileUtils.createWorkDirIfAbsent(execLocalPath); + // String execLocalPath = FileUtils.getProcessExecDir( + // taskExecutionContext.getProjectCode(), + // taskExecutionContext.getProcessDefineCode(), + // taskExecutionContext.getProcessDefineVersion(), + // taskExecutionContext.getProcessInstanceId(), + // taskExecutionContext.getTaskInstanbu iceId()); + // taskExecutionContext.setExecutePath(execLocalPath); + FileUtils.createWorkDirIfAbsent(taskExecutionContext.getExecutePath()); } catch (Throwable ex) { throw new TaskException("Cannot create process execute dir", ex); } diff --git a/lombok.config b/lombok.config old mode 100644 new mode 100755 diff --git a/mvnw.cmd b/mvnw.cmd old mode 100644 new mode 100755 diff --git a/pom.xml b/pom.xml old mode 100644 new mode 100755 index ef784156f71c..e675c516c41b --- a/pom.xml +++ b/pom.xml @@ -41,6 +41,7 @@ dolphinscheduler-server dolphinscheduler-common dolphinscheduler-api + dolphinscheduler-aop dolphinscheduler-dao dolphinscheduler-dist dolphinscheduler-remote diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index c7e3878ac7a4..231c5710c6e5 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -33,3 +33,12 @@ export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel} export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH + +# applicationId auto collection related configuration +export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH +export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS +# print detailed info of yarn application +export PARA_NAME_ASPECTJ_DEBUG=false \ No newline at end of file From e8ef5a6363868ffd674219150e81188484f811d6 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 28 Sep 2022 19:50:44 +0800 Subject: [PATCH 02/32] [Improvement][Task] Improved way to collect yarn job's appIds import aop way to collect yarn job's applicationId add new environment configuration for each type of yarn tasks to support aop add user property `appId.collect` for user to decide how to collect applicationId This closes https://github.com/apache/dolphinscheduler/issues/11262 --- dolphinscheduler-aop/pom.xml | 37 ------------------- .../main/assembly/dolphinscheduler-aop.xml | 32 ---------------- .../main/assembly/dolphinscheduler-tools.xml | 2 +- 3 files changed, 1 insertion(+), 70 deletions(-) delete mode 100644 dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 318d1d54e89b..94259253e87f 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -54,47 +54,10 @@ ${yarn.version} - - - junit - junit - ${junit.version} - test - - - - org.apache.maven.plugins - maven-jar-plugin - - - *.yaml - *.xml - - - - - maven-assembly-plugin - - - dolphinscheduler-aop - - single - - package - - aop - - src/main/assembly/dolphinscheduler-aop.xml - - false - - - - org.codehaus.mojo aspectj-maven-plugin diff --git a/dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml b/dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml deleted file mode 100644 index 779d5508ac58..000000000000 --- a/dolphinscheduler-aop/src/main/assembly/dolphinscheduler-aop.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - dolphinscheduler-aop - - dir - - false - aop - - - libs - - - \ No newline at end of file diff --git a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml index 7ff8fec886a9..de6a73acc86c 100644 --- a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml +++ b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml @@ -53,7 +53,7 @@ conf - ${basedir}/../dolphinscheduler-aop/target/aop/libs + ${basedir}/../dolphinscheduler-aop/target/ dolphinscheduler-aop*.jar From 4d2355f90f4b27842dab9c8bbf651ee49f4bef37 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 28 Sep 2022 21:08:29 +0800 Subject: [PATCH 03/32] Update common.properties --- dolphinscheduler-common/src/main/resources/common.properties | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index a26d14e4526f..ca8a75819370 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -51,7 +51,7 @@ resource.alibaba.cloud.oss.endpoint=https://oss-cn-hangzhou.aliyuncs.com # if resource.storage.type=HDFS, the user must have the permission to create directories under the HDFS root path resource.hdfs.root.user=hdfs # if resource.storage.type=S3, the value like: s3a://dolphinscheduler; if resource.storage.type=HDFS and namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir -resource.hdfs.fs.defaultFS=hdfs://analysis-5:9000 +resource.hdfs.fs.defaultFS=hdfs://mycluster:8020 # whether to startup kerberos hadoop.security.authentication.startup.state=false @@ -126,4 +126,4 @@ ml.mlflow.preset_repository_version="main" # way to collect applicationId: log(original regex match), aop appId.collect: log # appIds info log path -appId.file.path = appInfo.log \ No newline at end of file +appId.file.path = appInfo.log From 21bee4b03b8aa83d839f5fea21811b1d396315be Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 28 Sep 2022 21:09:06 +0800 Subject: [PATCH 04/32] Update common.properties --- dolphinscheduler-common/src/main/resources/common.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index ca8a75819370..a223d810ad53 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -16,7 +16,7 @@ # # user data local directory path, please make sure the directory exists and have read write permissions -data.basedir.path=/home/wangwr/tmp/dolphinscheduler +data.basedir.path=/tmp/dolphinscheduler # resource view suffixs #resource.view.suffixs=txt,log,sh,bat,conf,cfg,py,java,sql,xml,hql,properties,json,yml,yaml,ini,js From 059a4149391981e5b5c844bc2a69b1bf19724be9 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 28 Sep 2022 21:34:02 +0800 Subject: [PATCH 05/32] Update configuration docs --- docs/docs/en/architecture/configuration.md | 2 ++ docs/docs/zh/architecture/configuration.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 607d2f03215c..fad420ce4d8d 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -224,6 +224,8 @@ The default configuration is as follows: |sudo.enable | true | whether to enable sudo| |alert.rpc.port | 50052 | the RPC port of Alert Server| |zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| +|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop| +|appId.file.path | appInfo.log | if use aop way,the relative log path to store applicationId (suggest not to change, need to re-package aop jar file)| ### Api-server related configuration diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index af6bae2bb834..526ad3222267 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -221,6 +221,8 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn相关的配置 |sudo.enable | true | 是否开启sudo| |alert.rpc.port | 50052 | Alert Server的RPC端口| |zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| +|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop| +|appId.file.path | appInfo.log | 采用aop方式,存取applicationId的日志文件相对路径,不建议修改,否则需要重新打包aop jar包| ## Api-server相关配置 From 43824ee663d95c3e7f6b5c81186798471e7b33bf Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 29 Sep 2022 16:58:32 +0800 Subject: [PATCH 06/32] add license --- dolphinscheduler-aop/pom.xml | 16 ++++++++++++++++ .../src/test/resources/appInfo.log | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 94259253e87f..0fc965b6ff9e 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -1,4 +1,20 @@ + 4.0.0 diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log index a008253d9eb1..cf1114efda98 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appInfo.log @@ -1 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + application_1548381669007_1234 \ No newline at end of file From caf5451a1042fe627e60656c653918ddc372d7ce Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Mon, 10 Oct 2022 18:03:55 +0800 Subject: [PATCH 07/32] update user properties --- .../mysql/dolphinscheduler_env.sh | 6 +- .../postgresql/dolphinscheduler_env.sh | 6 +- .../kubernetes/dolphinscheduler/values.yaml | 2 + docs/docs/en/architecture/configuration.md | 9 +-- docs/docs/en/guide/resource/configuration.md | 9 +++ docs/docs/zh/architecture/configuration.md | 9 +-- docs/docs/zh/guide/resource/configuration.md | 9 +++ dolphinscheduler-aop/pom.xml | 66 ++++++++++++++----- .../aop/YarnClientAspect.java | 17 ++--- .../docker/file-manage/common.properties | 4 +- .../dolphinscheduler/common/Constants.java | 3 + .../common/utils/FileUtils.java | 2 +- .../src/main/resources/common.properties | 2 + .../docker/file-manage/common.properties | 4 +- .../server/log/LoggerRequestProcessor.java | 5 +- .../service/log/LogClient.java | 5 +- .../dolphinscheduler/spi/utils/Constants.java | 7 +- .../plugin/task/api/AbstractYarnTask.java | 8 ++- dolphinscheduler-tools/pom.xml | 6 +- .../main/assembly/dolphinscheduler-tools.xml | 7 -- .../runner/WorkerTaskExecuteRunnable.java | 59 ++++++++++++----- pom.xml | 5 ++ script/env/dolphinscheduler_env.sh | 8 +-- 23 files changed, 172 insertions(+), 86 deletions(-) diff --git a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh index 373fb94496c0..29d194b30d1b 100755 --- a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh @@ -46,10 +46,8 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH -export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS -# print detailed info of yarn application -export PARA_NAME_ASPECTJ_DEBUG=false diff --git a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh index 3d68b05b1b62..ce5b7155ab98 100644 --- a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh @@ -46,10 +46,8 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH -export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS -# print detailed info of yarn application -export PARA_NAME_ASPECTJ_DEBUG=false diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml index 3d0d826cb2ab..4fd02fee9c8b 100644 --- a/deploy/kubernetes/dolphinscheduler/values.yaml +++ b/deploy/kubernetes/dolphinscheduler/values.yaml @@ -153,6 +153,8 @@ conf: appId.collect: log # appIds info log path appId.file.path = appInfo.log + # whether to print aop debug info + aop.debug = false common: ## Configmap diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index fad420ce4d8d..93a39202cc02 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -226,6 +226,7 @@ The default configuration is as follows: |zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| |appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop| |appId.file.path | appInfo.log | if use aop way,the relative log path to store applicationId (suggest not to change, need to re-package aop jar file)| +|aop.debug | false | whether to print aop debug info ### Api-server related configuration @@ -354,19 +355,15 @@ export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} -export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel} -export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH -export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS -# print detailed info of yarn application -export PARA_NAME_ASPECTJ_DEBUG=false ``` ### Log related configuration diff --git a/docs/docs/en/guide/resource/configuration.md b/docs/docs/en/guide/resource/configuration.md index 4d3fc7aaa405..5eeb307c146a 100644 --- a/docs/docs/en/guide/resource/configuration.md +++ b/docs/docs/en/guide/resource/configuration.md @@ -141,6 +141,15 @@ conda.path=/opt/anaconda3/etc/profile.d/conda.sh # Task resource limit state task.resource.limit.state=false + +# way to collect applicationId: log(original regex match), aop +appId.collect: log + +# appIds info log path +appId.file.path = appInfo.log + +# whether to print aop debug info +aop.debug = false ``` > **Note:** diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 526ad3222267..b586c88e5f7a 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -223,6 +223,7 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn相关的配置 |zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| |appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop| |appId.file.path | appInfo.log | 采用aop方式,存取applicationId的日志文件相对路径,不建议修改,否则需要重新打包aop jar包| +|aop.debug | false | 是否输出aop debug信息 ## Api-server相关配置 @@ -346,19 +347,15 @@ export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} -export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel} -export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH -export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS -# print detailed info of yarn application -export PARA_NAME_ASPECTJ_DEBUG=false ``` ## 日志相关配置 diff --git a/docs/docs/zh/guide/resource/configuration.md b/docs/docs/zh/guide/resource/configuration.md index 78c453b40b25..89143d1d45fb 100644 --- a/docs/docs/zh/guide/resource/configuration.md +++ b/docs/docs/zh/guide/resource/configuration.md @@ -142,6 +142,15 @@ development.state=false # rpc port alert.rpc.port=50052 + +# way to collect applicationId: log(original regex match), aop +appId.collect: log + +# appIds info log path +appId.file.path = appInfo.log + +# whether to print aop debug info +aop.debug = false ``` > **注意**: diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 0fc965b6ff9e..5865feab1b56 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -37,39 +37,41 @@ - + + org.apache.dolphinscheduler + dolphinscheduler-common + org.aspectj aspectjweaver ${aspectj.version} runtime - org.aspectj aspectjrt ${aspectj.version} - org.apache.hadoop hadoop-yarn-client ${yarn.version} - - - - - org.apache.hadoop - hadoop-yarn-common - ${yarn.version} - - - - org.apache.hadoop - hadoop-common - ${yarn.version} - + + + + + + + + + + + + + + + @@ -77,6 +79,7 @@ org.codehaus.mojo aspectj-maven-plugin + 1.11 1.8 1.8 @@ -95,6 +98,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index 3a9a615d5ff9..74e4cdd65ed1 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -17,6 +17,10 @@ package org.apache.dolphinscheduler.aop; +import static org.apache.dolphinscheduler.common.Constants.*; + +import org.apache.dolphinscheduler.common.utils.PropertyUtils; + import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -33,10 +37,7 @@ @Aspect public class YarnClientAspect { - /** - * flag to indicate whether print debug logs - */ - private static final String PARA_NAME_ASPECTJ_DEBUG = "PARA_NAME_ASPECTJ_DEBUG"; + // public static final Logger logger = LoggerFactory.getLogger(YarnClientAspect.class); /** * The current application report when application submitted successfully @@ -47,8 +48,9 @@ public class YarnClientAspect { private boolean debug; public YarnClientAspect() { - appInfoFilePath = System.getProperty("user.dir") + "/appInfo.log"; - debug = Boolean.parseBoolean(System.getenv(PARA_NAME_ASPECTJ_DEBUG)); + appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), + PropertyUtils.getString(APPID_FILE_PATH, DEFAULT_APPID_FILE_PATH)); + debug = Boolean.parseBoolean(PropertyUtils.getString(AOP_DEBUG, "false")); } /** @@ -59,8 +61,7 @@ public YarnClientAspect() { * @throws Throwable exceptions */ @AfterReturning(pointcut = "execution(ApplicationId org.apache.hadoop.yarn.client.api.impl.YarnClientImpl." + - "submitApplication(ApplicationSubmissionContext)) && args(appContext)", - returning = "submittedAppId", argNames = "appContext,submittedAppId") + "submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext,submittedAppId") public void registerApplicationInfo(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) { if (appInfoFilePath != null) { try { diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties index 26dc23492c9c..337e25ac78fd 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties @@ -70,4 +70,6 @@ task.resource.limit.state=false # way to collect applicationId: log(original regex match), aop appId.collect: log # appIds info log path -appId.file.path = appInfo.log \ No newline at end of file +appId.file.path = appInfo.log +# whether to print aop debug info +aop.debug = false \ No newline at end of file diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java index ad7335384744..3ecced9bf471 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java @@ -161,6 +161,9 @@ private Constants() { */ public static final String APPID_COLLECT = "appId.collect"; public static final String APPID_FILE_PATH = "appId.file.path"; + public static final String AOP_DEBUG = "aop.debug"; + public static final String DEFAULT_COLLECT_WAY = "log"; + public static final String DEFAULT_APPID_FILE_PATH = "appInfo.log"; /** * comma , diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java index 4c352d85236c..16636885e779 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java @@ -41,7 +41,7 @@ public class FileUtils { public static final String DATA_BASEDIR = PropertyUtils.getString(DATA_BASEDIR_PATH, "/tmp/dolphinscheduler"); - public static final String APPINFO_PATH = PropertyUtils.getString(APPID_FILE_PATH, "appInfo.log"); + public static final String APPINFO_PATH = PropertyUtils.getString(APPID_FILE_PATH, DEFAULT_APPID_FILE_PATH); private FileUtils() { throw new UnsupportedOperationException("Construct FileUtils"); diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index a223d810ad53..17683c7429b1 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -127,3 +127,5 @@ ml.mlflow.preset_repository_version="main" appId.collect: log # appIds info log path appId.file.path = appInfo.log +# whether to print aop debug info +aop.debug = false \ No newline at end of file diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index 3fd8f5d8c072..c2897d94c9ba 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -93,4 +93,6 @@ task.resource.limit.state=false # way to collect applicationId: log(original regex match), aop appId.collect: log # appIds info log path -appId.file.path = appInfo.log \ No newline at end of file +appId.file.path = appInfo.log +# whether to print aop debug info +aop.debug = false \ No newline at end of file diff --git a/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java b/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java index e40b3c019d71..465c6c08e49c 100644 --- a/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java +++ b/dolphinscheduler-log-server/src/main/java/org/apache/dolphinscheduler/server/log/LoggerRequestProcessor.java @@ -17,7 +17,8 @@ package org.apache.dolphinscheduler.server.log; -import static org.apache.dolphinscheduler.common.Constants.*; +import static org.apache.dolphinscheduler.common.Constants.APPID_COLLECT; +import static org.apache.dolphinscheduler.common.Constants.DEFAULT_COLLECT_WAY; import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.LoggerUtils; @@ -170,7 +171,7 @@ public void process(Channel channel, Command command) { if (!checkPathSecurity(appInfoPath) || !checkPathSecurity(logPath)) { throw new IllegalArgumentException("Illegal path"); } - List appIds = LogUtils.getAppIds(logPath, appInfoPath, PropertyUtils.getString(APPID_COLLECT, "log")); + List appIds = LogUtils.getAppIds(logPath, appInfoPath, PropertyUtils.getString(APPID_COLLECT, DEFAULT_COLLECT_WAY)); channel.writeAndFlush( new GetAppIdResponseCommand(appIds).convert2Command(command.getOpaque())); break; diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java index 0744d65b5bf7..4d9326b71f0c 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java @@ -18,6 +18,8 @@ package org.apache.dolphinscheduler.service.log; import static org.apache.dolphinscheduler.common.Constants.APPID_COLLECT; +import static org.apache.dolphinscheduler.common.Constants.DEFAULT_COLLECT_WAY; + import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.LoggerUtils; import org.apache.dolphinscheduler.common.utils.NetUtils; @@ -209,7 +211,8 @@ public Boolean removeTaskLog(String host, int port, String path) { final Host workerAddress = new Host(host, port); List appIds = null; if (NetUtils.getHost().equals(host)) { - appIds = LogUtils.getAppIds(taskLogFilePath, taskAppInfoPath, PropertyUtils.getString(APPID_COLLECT, "log")); + appIds = LogUtils.getAppIds(taskLogFilePath, taskAppInfoPath, + PropertyUtils.getString(APPID_COLLECT, DEFAULT_COLLECT_WAY)); } else { final Command command = new GetAppIdRequestCommand(taskLogFilePath, taskAppInfoPath).convert2Command(); Command response = this.client.sendSync(workerAddress, command, LOG_REQUEST_TIMEOUT); diff --git a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java index ae12dbf7b483..b04596760290 100644 --- a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java +++ b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java @@ -21,6 +21,7 @@ * constants */ public class Constants { + private Constants() { throw new IllegalStateException("Constants class"); } @@ -121,7 +122,8 @@ private Constants() { /** * hadoop.security.authentication */ - public static final String HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE = "hadoop.security.authentication.startup.state"; + public static final String HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE = + "hadoop.security.authentication.startup.state"; /** * loginUserFromKeytab user @@ -207,6 +209,9 @@ private Constants() { */ public static final String APPID_COLLECT = "appId.collect"; public static final String APPID_FILE_PATH = "appId.file.path"; + public static final String AOP_DEBUG = "aop.debug"; + public static final String DEFAULT_COLLECT_WAY = "log"; + public static final String DEFAULT_APPID_FILE_PATH = "appInfo.log"; /** * DOUBLE_SLASH // diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java index 35e735b4857e..10acc36e0e76 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java @@ -17,6 +17,9 @@ package org.apache.dolphinscheduler.plugin.task.api; +import static org.apache.dolphinscheduler.spi.utils.Constants.APPID_COLLECT; +import static org.apache.dolphinscheduler.spi.utils.Constants.DEFAULT_COLLECT_WAY; + import org.apache.dolphinscheduler.plugin.task.api.model.ResourceInfo; import org.apache.dolphinscheduler.plugin.task.api.model.TaskResponse; import org.apache.dolphinscheduler.plugin.task.api.utils.LogUtils; @@ -25,8 +28,6 @@ import java.util.List; import java.util.regex.Pattern; -import static org.apache.dolphinscheduler.spi.utils.Constants.APPID_COLLECT; - /** * abstract yarn task */ @@ -110,7 +111,8 @@ public void cancelApplication() throws TaskException { */ @Override public List getApplicationIds() throws TaskException { - return LogUtils.getAppIds(taskRequest.getLogPath(), taskRequest.getAppInfoPath(), PropertyUtils.getString(APPID_COLLECT, "log")); + return LogUtils.getAppIds(taskRequest.getLogPath(), taskRequest.getAppInfoPath(), + PropertyUtils.getString(APPID_COLLECT, DEFAULT_COLLECT_WAY)); } /** diff --git a/dolphinscheduler-tools/pom.xml b/dolphinscheduler-tools/pom.xml index 4212bcf4bfd9..022ee5f4e58b 100644 --- a/dolphinscheduler-tools/pom.xml +++ b/dolphinscheduler-tools/pom.xml @@ -35,6 +35,10 @@ org.apache.dolphinscheduler dolphinscheduler-dao + + org.apache.dolphinscheduler + dolphinscheduler-aop + @@ -65,10 +69,10 @@ dolphinscheduler-tools - package single + package tools diff --git a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml index de6a73acc86c..49a95d25d0ce 100644 --- a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml +++ b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml @@ -52,13 +52,6 @@ conf - - ${basedir}/../dolphinscheduler-aop/target/ - - dolphinscheduler-aop*.jar - - libs/aop - diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java index a6b56b56b7e5..c8aa5dee8e65 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java @@ -54,10 +54,12 @@ import java.nio.file.NoSuchFileException; import java.util.Date; import java.util.List; - import static org.apache.dolphinscheduler.common.Constants.APPID_COLLECT; +import static org.apache.dolphinscheduler.common.Constants.DEFAULT_COLLECT_WAY; import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH; + + public abstract class WorkerTaskExecuteRunnable implements Runnable { protected final Logger logger = LoggerFactory.getLogger(String.format(TaskConstants.TASK_LOG_LOGGER_NAME_FORMAT, WorkerTaskExecuteRunnable.class)); @@ -117,7 +119,9 @@ protected void afterThrowing(Throwable throwable) throws TaskException { taskExecutionContext.setCurrentExecutionStatus(TaskExecutionStatus.FAILURE); taskExecutionContext.setEndTime(new Date()); workerMessageSender.sendMessageWithRetry(taskExecutionContext, masterAddress, CommandType.TASK_EXECUTE_RESULT); - logger.info("Get a exception when execute the task, will send the task execute result to master, the current task execute result is {}", TaskExecutionStatus.FAILURE); + logger.info( + "Get a exception when execute the task, will send the task execute result to master, the current task execute result is {}", + TaskExecutionStatus.FAILURE); } public void cancelTask() { @@ -125,12 +129,17 @@ public void cancelTask() { if (task != null) { try { task.cancel(); - List appIds = LogUtils.getAppIds(taskExecutionContext.getLogPath(), taskExecutionContext.getExecutePath(), PropertyUtils.getString(APPID_COLLECT, "log")); + List appIds = + LogUtils.getAppIds(taskExecutionContext.getLogPath(), taskExecutionContext.getExecutePath(), + PropertyUtils.getString(APPID_COLLECT, DEFAULT_COLLECT_WAY)); if (CollectionUtils.isNotEmpty(appIds)) { - ProcessUtils.cancelApplication(appIds, logger, taskExecutionContext.getTenantCode(), taskExecutionContext.getExecutePath()); + ProcessUtils.cancelApplication(appIds, logger, taskExecutionContext.getTenantCode(), + taskExecutionContext.getExecutePath()); } } catch (Exception e) { - logger.error("Task execute failed and cancel the application failed, this will not affect the taskInstance status, but you need to check manual", e); + logger.error( + "Task execute failed and cancel the application failed, this will not affect the taskInstance status, but you need to check manual", + e); } } } @@ -141,7 +150,8 @@ public void run() { // set the thread name to make sure the log be written to the task log file Thread.currentThread().setName(taskExecutionContext.getTaskLogName()); - LoggerUtils.setWorkflowAndTaskInstanceIDMDC(taskExecutionContext.getProcessInstanceId(), taskExecutionContext.getTaskInstanceId()); + LoggerUtils.setWorkflowAndTaskInstanceIDMDC(taskExecutionContext.getProcessInstanceId(), + taskExecutionContext.getTaskInstanceId()); logger.info("Begin to pulling task"); initializeTask(); @@ -150,14 +160,17 @@ public void run() { taskExecutionContext.setCurrentExecutionStatus(TaskExecutionStatus.SUCCESS); taskExecutionContext.setEndTime(new Date()); TaskExecutionContextCacheManager.removeByTaskInstanceId(taskExecutionContext.getTaskInstanceId()); - workerMessageSender.sendMessageWithRetry(taskExecutionContext, masterAddress, CommandType.TASK_EXECUTE_RESULT); - logger.info("The current execute mode is dry run, will stop the subsequent process and set the taskInstance status to success"); + workerMessageSender.sendMessageWithRetry(taskExecutionContext, masterAddress, + CommandType.TASK_EXECUTE_RESULT); + logger.info( + "The current execute mode is dry run, will stop the subsequent process and set the taskInstance status to success"); return; } beforeExecute(); - TaskCallBack taskCallBack = TaskCallbackImpl.builder().workerMessageSender(workerMessageSender).masterAddress(masterAddress).build(); + TaskCallBack taskCallBack = TaskCallbackImpl.builder().workerMessageSender(workerMessageSender) + .masterAddress(masterAddress).build(); executeTask(taskCallBack); afterExecute(); @@ -181,7 +194,8 @@ protected void initializeTask() { taskExecutionContext.setEnvFile(systemEnvPath); logger.info("Set task envFile: {}", systemEnvPath); - String taskAppId = String.format("%s_%s", taskExecutionContext.getProcessInstanceId(), taskExecutionContext.getTaskInstanceId()); + String taskAppId = String.format("%s_%s", taskExecutionContext.getProcessInstanceId(), + taskExecutionContext.getTaskInstanceId()); taskExecutionContext.setTaskAppId(taskAppId); logger.info("Set task appId: {}", taskAppId); @@ -204,11 +218,13 @@ protected void beforeExecute() { TaskChannel taskChannel = taskPluginManager.getTaskChannelMap().get(taskExecutionContext.getTaskType()); if (null == taskChannel) { - throw new TaskPluginException(String.format("%s task plugin not found, please check config file.", taskExecutionContext.getTaskType())); + throw new TaskPluginException(String.format("%s task plugin not found, please check config file.", + taskExecutionContext.getTaskType())); } task = taskChannel.createTask(taskExecutionContext); if (task == null) { - throw new TaskPluginException(String.format("%s task is null, please check the task plugin is correct", taskExecutionContext.getTaskType())); + throw new TaskPluginException(String.format("%s task is null, please check the task plugin is correct", + taskExecutionContext.getTaskType())); } logger.info("Task plugin: {} create success", taskExecutionContext.getTaskType()); @@ -227,8 +243,10 @@ protected void sendAlertIfNeeded() { logger.info("The current task need to send alert, begin to send alert"); TaskExecutionStatus status = task.getExitStatus(); TaskAlertInfo taskAlertInfo = task.getTaskAlertInfo(); - int strategy = status == TaskExecutionStatus.SUCCESS ? WarningType.SUCCESS.getCode() : WarningType.FAILURE.getCode(); - alertClientService.sendAlert(taskAlertInfo.getAlertGroupId(), taskAlertInfo.getTitle(), taskAlertInfo.getContent(), strategy); + int strategy = + status == TaskExecutionStatus.SUCCESS ? WarningType.SUCCESS.getCode() : WarningType.FAILURE.getCode(); + alertClientService.sendAlert(taskAlertInfo.getAlertGroupId(), taskAlertInfo.getTitle(), + taskAlertInfo.getContent(), strategy); logger.info("Success send alert"); } @@ -240,13 +258,15 @@ protected void sendTaskResult() { taskExecutionContext.setVarPool(JSONUtils.toJsonString(task.getParameters().getVarPool())); workerMessageSender.sendMessageWithRetry(taskExecutionContext, masterAddress, CommandType.TASK_EXECUTE_RESULT); - logger.info("Send task execute result to master, the current task status: {}", taskExecutionContext.getCurrentExecutionStatus()); + logger.info("Send task execute result to master, the current task status: {}", + taskExecutionContext.getCurrentExecutionStatus()); } protected void clearTaskExecPathIfNeeded() { String execLocalPath = taskExecutionContext.getExecutePath(); if (!CommonUtils.isDevelopMode()) { - logger.info("The current execute mode isn't develop mode, will clear the task execute file: {}", execLocalPath); + logger.info("The current execute mode isn't develop mode, will clear the task execute file: {}", + execLocalPath); // get exec dir if (Strings.isNullOrEmpty(execLocalPath)) { logger.warn("The task execute file is {} no need to clear", taskExecutionContext.getTaskName()); @@ -265,11 +285,14 @@ protected void clearTaskExecPathIfNeeded() { if (e instanceof NoSuchFileException) { // this is expected } else { - logger.error("Delete task execute file: {} failed, this will not affect the task status, but you need to clear this manually", execLocalPath, e); + logger.error( + "Delete task execute file: {} failed, this will not affect the task status, but you need to clear this manually", + execLocalPath, e); } } } else { - logger.info("The current execute mode is develop mode, will not clear the task execute file: {}", execLocalPath); + logger.info("The current execute mode is develop mode, will not clear the task execute file: {}", + execLocalPath); } } diff --git a/pom.xml b/pom.xml index e675c516c41b..d77e9c53f371 100755 --- a/pom.xml +++ b/pom.xml @@ -125,6 +125,11 @@ dolphinscheduler-standalone-server ${project.version} + + org.apache.dolphinscheduler + dolphinscheduler-aop + ${project.version} + org.apache.dolphinscheduler dolphinscheduler-common diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index 231c5710c6e5..c97dadcd3f9d 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -35,10 +35,8 @@ export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$HADOOP_CLASSPATH -export SPARK_DIST_CLASSPATH=${DOLPHINSCHEDULER_HOME}/tools/libs/aop/*:$SPARK_DIST_CLASS_PATH +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS -export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS -# print detailed info of yarn application -export PARA_NAME_ASPECTJ_DEBUG=false \ No newline at end of file +export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS \ No newline at end of file From b263c7c8d7ee96e6e6d1a4c6f36125aa3aba3f65 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Mon, 10 Oct 2022 20:12:05 +0800 Subject: [PATCH 08/32] remove redundant dependencies --- dolphinscheduler-aop/pom.xml | 44 ------------------------------------ 1 file changed, 44 deletions(-) diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 5865feab1b56..8a8702d93993 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -57,21 +57,6 @@ hadoop-yarn-client ${yarn.version} - - - - - - - - - - - - - - - @@ -98,35 +83,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From 088afb3caa2c30345563a90959d5a8bebc0758eb Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Mon, 10 Oct 2022 20:28:05 +0800 Subject: [PATCH 09/32] update architecture doc --- docs/docs/en/architecture/configuration.md | 2 +- docs/docs/zh/architecture/configuration.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 93a39202cc02..a69571f0cd4b 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -224,7 +224,7 @@ The default configuration is as follows: |sudo.enable | true | whether to enable sudo| |alert.rpc.port | 50052 | the RPC port of Alert Server| |zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| -|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop| +|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop. Note: Aop way will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh.| |appId.file.path | appInfo.log | if use aop way,the relative log path to store applicationId (suggest not to change, need to re-package aop jar file)| |aop.debug | false | whether to print aop debug info diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index b586c88e5f7a..8b396312ee52 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -181,7 +181,7 @@ DolphinScheduler同样可以通过`bin/env/dolphinscheduler_env.sh`进行Zookeep ## common.properties [hadoop、s3、yarn配置] -common.properties配置文件目前主要是配置hadoop/s3/yarn相关的配置,配置文件位置: +common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId收集相关的配置,配置文件位置: |服务名称| 配置文件 | |--|--| |Master Server | `master-server/conf/common.properties`| @@ -221,7 +221,7 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn相关的配置 |sudo.enable | true | 是否开启sudo| |alert.rpc.port | 50052 | Alert Server的RPC端口| |zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| -|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop| +|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,注意:如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| |appId.file.path | appInfo.log | 采用aop方式,存取applicationId的日志文件相对路径,不建议修改,否则需要重新打包aop jar包| |aop.debug | false | 是否输出aop debug信息 From db6575660328f68f9fd648251d81a4ad2c0d736d Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Sun, 16 Oct 2022 17:25:37 +0800 Subject: [PATCH 10/32] update log statement --- .../java/org/apache/dolphinscheduler/service/log/LogClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java index 70ff7d2bf66b..622c72b3af49 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java @@ -207,7 +207,7 @@ public Boolean removeTaskLog(String host, int port, String path) { public @Nullable List getAppIds(@NonNull String host, int port, @NonNull String taskLogFilePath, @NonNull String taskAppInfoPath) throws RemotingException, InterruptedException { - logger.info("Begin to get appIds from worker: {}:{} taskAppInfoPath: {}", host, port, taskAppInfoPath); + logger.info("Begin to get appIds from worker: {}:{} taskLogPath: {}, taskAppInfoPath: {}", host, port, taskLogFilePath, taskAppInfoPath); final Host workerAddress = new Host(host, port); List appIds = null; if (NetUtils.getHost().equals(host)) { From a26883d131328299b525b24f4cc28564b7cfd229 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Mon, 17 Oct 2022 15:30:14 +0800 Subject: [PATCH 11/32] fix some bugs --- .../cluster-test/mysql/dolphinscheduler_env.sh | 2 +- .../postgresql/dolphinscheduler_env.sh | 2 +- deploy/kubernetes/dolphinscheduler/values.yaml | 6 ------ docs/docs/en/architecture/configuration.md | 4 +--- docs/docs/en/guide/resource/configuration.md | 6 ------ docs/docs/zh/architecture/configuration.md | 4 +--- docs/docs/zh/guide/resource/configuration.md | 6 ------ dolphinscheduler-aop/pom.xml | 18 ------------------ .../dolphinscheduler/aop/YarnClientAspect.java | 9 ++------- .../docker/file-manage/common.properties | 6 +----- .../dolphinscheduler/common/Constants.java | 3 --- .../common/utils/FileUtils.java | 10 +++++----- .../src/main/resources/common.properties | 6 +----- .../docker/file-manage/common.properties | 6 +----- .../dolphinscheduler/spi/utils/Constants.java | 3 --- .../plugin/task/api/utils/LogUtils.java | 3 +-- .../plugin/task/api/utils/LogUtilsTest.java | 2 +- .../test/resources/{appId.log => appId.txt} | 0 lombok.config | 0 mvnw.cmd | 0 script/env/dolphinscheduler_env.sh | 2 +- 21 files changed, 17 insertions(+), 81 deletions(-) rename dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/{appId.log => appId.txt} (100%) mode change 100755 => 100644 lombok.config mode change 100755 => 100644 mvnw.cmd diff --git a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh index 29d194b30d1b..6ed641755a61 100755 --- a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh @@ -46,7 +46,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS diff --git a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh index ce5b7155ab98..0974ef58ea7e 100644 --- a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh @@ -46,7 +46,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml index 6a0c866e1564..1f2235179fda 100644 --- a/deploy/kubernetes/dolphinscheduler/values.yaml +++ b/deploy/kubernetes/dolphinscheduler/values.yaml @@ -176,12 +176,6 @@ conf: # way to collect applicationId: log, aop appId.collect: log - # appIds info log path - appId.file.path = appInfo.log - - # whether to print aop debug info - aop.debug = false - common: ## Configmap configmap: diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index d45668ea5dd1..a47b3a5d6e1a 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -225,8 +225,6 @@ The default configuration is as follows: |alert.rpc.port | 50052 | the RPC port of Alert Server| |zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| |appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop. Note: Aop way will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh.| -|appId.file.path | appInfo.log | if use aop way,the relative log path to store applicationId (suggest not to change, need to re-package aop jar file)| -|aop.debug | false | whether to print aop debug info ### Api-server related configuration @@ -358,7 +356,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS diff --git a/docs/docs/en/guide/resource/configuration.md b/docs/docs/en/guide/resource/configuration.md index 5eeb307c146a..eb2d32f91a25 100644 --- a/docs/docs/en/guide/resource/configuration.md +++ b/docs/docs/en/guide/resource/configuration.md @@ -144,12 +144,6 @@ task.resource.limit.state=false # way to collect applicationId: log(original regex match), aop appId.collect: log - -# appIds info log path -appId.file.path = appInfo.log - -# whether to print aop debug info -aop.debug = false ``` > **Note:** diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 934f2a7ac46e..16c4833b560e 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -222,8 +222,6 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId |alert.rpc.port | 50052 | Alert Server的RPC端口| |zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| |appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,注意:如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| -|appId.file.path | appInfo.log | 采用aop方式,存取applicationId的日志文件相对路径,不建议修改,否则需要重新打包aop jar包| -|aop.debug | false | 是否输出aop debug信息 ## Api-server相关配置 @@ -350,7 +348,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS diff --git a/docs/docs/zh/guide/resource/configuration.md b/docs/docs/zh/guide/resource/configuration.md index 89143d1d45fb..f87384b9274c 100644 --- a/docs/docs/zh/guide/resource/configuration.md +++ b/docs/docs/zh/guide/resource/configuration.md @@ -145,12 +145,6 @@ alert.rpc.port=50052 # way to collect applicationId: log(original regex match), aop appId.collect: log - -# appIds info log path -appId.file.path = appInfo.log - -# whether to print aop debug info -aop.debug = false ``` > **注意**: diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 8a8702d93993..ff0671df565d 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -37,10 +37,6 @@ - - org.apache.dolphinscheduler - dolphinscheduler-common - org.aspectj aspectjweaver @@ -85,18 +81,4 @@ - - - - docker - - - - org.codehaus.mojo - exec-maven-plugin - - - - - diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index 74e4cdd65ed1..e3ec96177895 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -17,10 +17,6 @@ package org.apache.dolphinscheduler.aop; -import static org.apache.dolphinscheduler.common.Constants.*; - -import org.apache.dolphinscheduler.common.utils.PropertyUtils; - import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -48,9 +44,8 @@ public class YarnClientAspect { private boolean debug; public YarnClientAspect() { - appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), - PropertyUtils.getString(APPID_FILE_PATH, DEFAULT_APPID_FILE_PATH)); - debug = Boolean.parseBoolean(PropertyUtils.getString(AOP_DEBUG, "false")); + appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), "appInfo.log"); + debug = true; } /** diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties index 345233d1c25c..93ee6571736e 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties @@ -68,8 +68,4 @@ aws.endpoint=http://s3:9000 # Task resource limit state task.resource.limit.state=false # way to collect applicationId: log(original regex match), aop -appId.collect: log -# appIds info log path -appId.file.path = appInfo.log -# whether to print aop debug info -aop.debug = false \ No newline at end of file +appId.collect: log \ No newline at end of file diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java index 66b0198f4586..e74d3b73f289 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java @@ -156,10 +156,7 @@ private Constants() { * fetch applicationId way */ public static final String APPID_COLLECT = "appId.collect"; - public static final String APPID_FILE_PATH = "appId.file.path"; - public static final String AOP_DEBUG = "aop.debug"; public static final String DEFAULT_COLLECT_WAY = "log"; - public static final String DEFAULT_APPID_FILE_PATH = "appInfo.log"; /** * comma , diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java index 08bcf2955ae6..61da9e5f7243 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java @@ -23,8 +23,6 @@ import static org.apache.dolphinscheduler.common.Constants.RESOURCE_VIEW_SUFFIXES_DEFAULT_VALUE; import static org.apache.dolphinscheduler.common.Constants.UTF_8; import static org.apache.dolphinscheduler.common.Constants.YYYYMMDDHHMMSS; -import static org.apache.dolphinscheduler.common.Constants.APPID_FILE_PATH; -import static org.apache.dolphinscheduler.common.Constants.DEFAULT_APPID_FILE_PATH; import org.apache.commons.io.IOUtils; @@ -48,7 +46,7 @@ public class FileUtils { public static final String DATA_BASEDIR = PropertyUtils.getString(DATA_BASEDIR_PATH, "/tmp/dolphinscheduler"); - public static final String APPINFO_PATH = PropertyUtils.getString(APPID_FILE_PATH, DEFAULT_APPID_FILE_PATH); + public static final String APPINFO_PATH = "appInfo.log"; private FileUtils() { throw new UnsupportedOperationException("Construct FileUtils"); @@ -61,7 +59,8 @@ private FileUtils() { * @return download file name */ public static String getDownloadFilename(String filename) { - String fileName = String.format("%s/download/%s/%s", DATA_BASEDIR, DateUtils.getCurrentTime(YYYYMMDDHHMMSS), filename); + String fileName = + String.format("%s/download/%s/%s", DATA_BASEDIR, DateUtils.getCurrentTime(YYYYMMDDHHMMSS), filename); File file = new File(fileName); if (!file.getParentFile().exists()) { @@ -98,7 +97,8 @@ public static String getUploadFilename(String tenantCode, String filename) { * @param taskInstanceId task instance id * @return directory of process execution */ - public static String getProcessExecDir(long projectCode, long processDefineCode, int processDefineVersion, int processInstanceId, int taskInstanceId) { + public static String getProcessExecDir(long projectCode, long processDefineCode, int processDefineVersion, + int processInstanceId, int taskInstanceId) { String fileName = String.format("%s/exec/process/%d/%s/%d/%d", DATA_BASEDIR, projectCode, processDefineCode + "_" + processDefineVersion, processInstanceId, taskInstanceId); File file = new File(fileName); diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 17683c7429b1..4a962043f05d 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -124,8 +124,4 @@ ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow ml.mlflow.preset_repository_version="main" # way to collect applicationId: log(original regex match), aop -appId.collect: log -# appIds info log path -appId.file.path = appInfo.log -# whether to print aop debug info -aop.debug = false \ No newline at end of file +appId.collect: log \ No newline at end of file diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index c2897d94c9ba..a35bf58fd0d4 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -91,8 +91,4 @@ resource.aws.s3.endpoint=http://s3:9000 task.resource.limit.state=false # way to collect applicationId: log(original regex match), aop -appId.collect: log -# appIds info log path -appId.file.path = appInfo.log -# whether to print aop debug info -aop.debug = false \ No newline at end of file +appId.collect: log \ No newline at end of file diff --git a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java index b04596760290..8527a6d9cf2f 100644 --- a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java +++ b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java @@ -208,10 +208,7 @@ private Constants() { * fetch applicationId way */ public static final String APPID_COLLECT = "appId.collect"; - public static final String APPID_FILE_PATH = "appId.file.path"; - public static final String AOP_DEBUG = "aop.debug"; public static final String DEFAULT_COLLECT_WAY = "log"; - public static final String DEFAULT_APPID_FILE_PATH = "appInfo.log"; /** * DOUBLE_SLASH // diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java index 338ed7529eb9..eaec86d3911d 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java @@ -72,7 +72,6 @@ public List getAppIdsFromAppInfoFile(@NonNull String appInfoPath, Logger } } - public List getAppIdsFromLogFile(@NonNull String logPath, Logger logger) { File logFile = new File(logPath); if (!logFile.exists() || !logFile.isFile()) { @@ -94,7 +93,7 @@ public List getAppIdsFromLogFile(@NonNull String logPath, Logger logger) }); return new ArrayList<>(appIds); } catch (IOException e) { - logger.error("Get appId from log file erro, logPath: {}", logPath, e); + logger.error("Get appId from log file error, logPath: {}", logPath, e); return Collections.emptyList(); } } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java index 6690a822f36c..f9d62c07787d 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java @@ -26,7 +26,7 @@ public class LogUtilsTest { - private static final String APP_ID_FILE = LogUtilsTest.class.getResource("/appId.log") + private static final String APP_ID_FILE = LogUtilsTest.class.getResource("/appId.txt") .getFile(); private static final String APP_INFO_FILE = LogUtilsTest.class.getResource("/appInfo.log") .getFile(); diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.log b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.txt similarity index 100% rename from dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.log rename to dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/appId.txt diff --git a/lombok.config b/lombok.config old mode 100755 new mode 100644 diff --git a/mvnw.cmd b/mvnw.cmd old mode 100755 new mode 100644 diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index c97dadcd3f9d..7d8eb14b9b64 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -35,7 +35,7 @@ export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH # applicationId auto collection related configuration -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/conf:${DOLPHINSCHEDULER_HOME}/tools/libs/* +export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS From 094e6e07dfa008de719737e8f4a6bdfdb7da42d7 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 19 Oct 2022 20:52:07 +0800 Subject: [PATCH 12/32] Update pom.xml --- dolphinscheduler-aop/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index ff0671df565d..0a858bb7e96b 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -60,7 +60,6 @@ org.codehaus.mojo aspectj-maven-plugin - 1.11 1.8 1.8 From 62087d856b04bcf23f1d87fa817551649e2017ff Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 19 Oct 2022 21:06:26 +0800 Subject: [PATCH 13/32] Update pom.xml --- dolphinscheduler-tools/pom.xml | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/dolphinscheduler-tools/pom.xml b/dolphinscheduler-tools/pom.xml index b94429dd28b3..f8e053f4aeee 100644 --- a/dolphinscheduler-tools/pom.xml +++ b/dolphinscheduler-tools/pom.xml @@ -29,17 +29,6 @@ dolphinscheduler-tools - - - org.apache.dolphinscheduler - dolphinscheduler-dao - - - org.apache.dolphinscheduler - dolphinscheduler-aop - - - @@ -57,6 +46,10 @@ org.apache.dolphinscheduler dolphinscheduler-dao + + org.apache.dolphinscheduler + dolphinscheduler-aop + From cec9f60cf535c3a44de8098a2b0b368c76118404 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 20 Oct 2022 20:00:24 +0800 Subject: [PATCH 14/32] update aop code --- .../org/apache/dolphinscheduler/aop/YarnClientAspect.java | 2 -- .../master/builder/TaskExecutionContextBuilder.java | 2 +- .../apache/dolphinscheduler/service/log/LogClient.java | 6 ++++-- .../service/log/LoggerRequestProcessor.java | 3 ++- .../dolphinscheduler/service/utils/ProcessUtils.java | 3 ++- .../server/worker/processor/TaskKillProcessor.java | 7 ++++--- .../server/worker/runner/WorkerTaskExecuteRunnable.java | 8 +++----- 7 files changed, 16 insertions(+), 15 deletions(-) diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index e3ec96177895..2b99a92e222d 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -33,8 +33,6 @@ @Aspect public class YarnClientAspect { - // public static final Logger logger = LoggerFactory.getLogger(YarnClientAspect.class); - /** * The current application report when application submitted successfully */ diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java index f84c8c23c19f..7ac9024eb3ee 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java @@ -20,8 +20,8 @@ import static org.apache.dolphinscheduler.common.Constants.SEC_2_MINUTES_TIME_UNIT; import org.apache.dolphinscheduler.common.enums.TimeoutFlag; -import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.DateUtils; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.dao.entity.ProcessDefinition; import org.apache.dolphinscheduler.dao.entity.ProcessInstance; import org.apache.dolphinscheduler.dao.entity.TaskDefinition; diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java index 622c72b3af49..4b02d4888169 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LogClient.java @@ -207,7 +207,8 @@ public Boolean removeTaskLog(String host, int port, String path) { public @Nullable List getAppIds(@NonNull String host, int port, @NonNull String taskLogFilePath, @NonNull String taskAppInfoPath) throws RemotingException, InterruptedException { - logger.info("Begin to get appIds from worker: {}:{} taskLogPath: {}, taskAppInfoPath: {}", host, port, taskLogFilePath, taskAppInfoPath); + logger.info("Begin to get appIds from worker: {}:{} taskLogPath: {}, taskAppInfoPath: {}", host, port, + taskLogFilePath, taskAppInfoPath); final Host workerAddress = new Host(host, port); List appIds = null; if (NetUtils.getHost().equals(host)) { @@ -222,7 +223,8 @@ public Boolean removeTaskLog(String host, int port, String path) { appIds = responseCommand.getAppIds(); } } - logger.info("Get appIds: {} from worker: {}:{} taskLogPath: {}, taskAppInfoPath: {}", appIds, host, port, taskLogFilePath, taskAppInfoPath); + logger.info("Get appIds: {} from worker: {}:{} taskLogPath: {}, taskAppInfoPath: {}", appIds, host, port, + taskLogFilePath, taskAppInfoPath); return appIds; } diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java index 86b035c5378e..3c99b34f0fd5 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java @@ -171,7 +171,8 @@ public void process(Channel channel, Command command) { if (!checkPathSecurity(appInfoPath) || !checkPathSecurity(logPath)) { throw new IllegalArgumentException("Illegal path"); } - List appIds = LogUtils.getAppIds(logPath, appInfoPath, PropertyUtils.getString(APPID_COLLECT, DEFAULT_COLLECT_WAY)); + List appIds = LogUtils.getAppIds(logPath, appInfoPath, + PropertyUtils.getString(APPID_COLLECT, DEFAULT_COLLECT_WAY)); channel.writeAndFlush( new GetAppIdResponseCommand(appIds).convert2Command(command.getOpaque())); break; diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java index 2c76a59d6c36..8ead5104c662 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java @@ -200,7 +200,8 @@ public static String getPidsStr(int processId) throws Exception { try { Thread.sleep(Constants.SLEEP_TIME_MILLIS); Host host = Host.of(taskExecutionContext.getHost()); - List appIds = logClient.getAppIds(host.getIp(), host.getPort(), taskExecutionContext.getLogPath(), taskExecutionContext.getAppInfoPath()); + List appIds = logClient.getAppIds(host.getIp(), host.getPort(), taskExecutionContext.getLogPath(), + taskExecutionContext.getAppInfoPath()); if (CollectionUtils.isNotEmpty(appIds)) { if (StringUtils.isEmpty(taskExecutionContext.getExecutePath())) { taskExecutionContext diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java index 39a5bdcce136..60a4c35f5f30 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java @@ -231,8 +231,9 @@ private Pair> killYarnJob(@NonNull Host host, String executePath, String tenantCode) { if (logPath == null || appInfoPath == null || executePath == null || tenantCode == null) { - logger.error("Kill yarn job error, the input params is illegal, host: {}, logPath: {}, appInfoPath: {}, executePath: {}, tenantCode: {}", - host, logPath, appInfoPath, executePath, tenantCode); + logger.error( + "Kill yarn job error, the input params is illegal, host: {}, logPath: {}, appInfoPath: {}, executePath: {}, tenantCode: {}", + host, logPath, appInfoPath, executePath, tenantCode); return Pair.of(false, Collections.emptyList()); } try { @@ -255,4 +256,4 @@ private Pair> killYarnJob(@NonNull Host host, return Pair.of(false, Collections.emptyList()); } -} \ No newline at end of file +} diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java index ac03d1875c34..5910da78509e 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerTaskExecuteRunnable.java @@ -17,11 +17,11 @@ package org.apache.dolphinscheduler.server.worker.runner; -import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH; - -import static org.apache.dolphinscheduler.common.Constants.DRY_RUN_FLAG_YES; import static org.apache.dolphinscheduler.common.Constants.APPID_COLLECT; import static org.apache.dolphinscheduler.common.Constants.DEFAULT_COLLECT_WAY; +import static org.apache.dolphinscheduler.common.Constants.DRY_RUN_FLAG_YES; +import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH; + import org.apache.dolphinscheduler.common.enums.WarningType; import org.apache.dolphinscheduler.common.utils.DateUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils; @@ -63,8 +63,6 @@ import com.baomidou.mybatisplus.core.toolkit.CollectionUtils; import com.google.common.base.Strings; - - public abstract class WorkerTaskExecuteRunnable implements Runnable { protected final Logger logger = LoggerFactory From da447a23f6ef286740df1b1d607320a368a8e9e5 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Fri, 21 Oct 2022 09:49:29 +0800 Subject: [PATCH 15/32] add ut's license header --- .../dolphinscheduler/aop/YarnClientAspect.java | 4 ++-- .../YarnClientAspectMocTest.java | 17 +++++++++++++++++ .../poc/YarnClientAspectMoc.java | 17 +++++++++++++++++ .../dolphinscheduler/poc/YarnClientMoc.java | 17 +++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index 2b99a92e222d..494845d56c8a 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -38,7 +38,7 @@ public class YarnClientAspect { */ private ApplicationReport currentApplicationReport = null; - private String appInfoFilePath; + private final String appInfoFilePath; private boolean debug; public YarnClientAspect() { @@ -64,7 +64,7 @@ public void registerApplicationInfo(ApplicationSubmissionContext appContext, App StandardOpenOption.WRITE, StandardOpenOption.APPEND); } catch (IOException ioException) { - System.out.println( + System.err.println( "YarnClientAspect[registerAppInfo]: can't output current application information, because " + ioException.getMessage()); } diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java index ab8a8d7a2cfd..6a823f06cc6e 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.dolphinscheduler; import org.apache.dolphinscheduler.poc.YarnClientMoc; diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java index 90453ed60c7e..394c934cc339 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.dolphinscheduler.poc; import org.apache.hadoop.yarn.api.records.ApplicationId; diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java index 75cec224b62e..6d906820b4bd 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.dolphinscheduler.poc; import org.apache.hadoop.yarn.api.records.ApplicationId; From 5f282b44e3ac0d489d6cf71dd156d121fad48c73 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Fri, 21 Oct 2022 15:12:35 +0800 Subject: [PATCH 16/32] add aspectjrt license --- dolphinscheduler-dist/release-docs/LICENSE | 3 +- .../licenses/LICENSE-aspectjweaver.txt | 299 ++++++++++++++---- tools/dependencies/known-dependencies.txt | 1 + 3 files changed, 248 insertions(+), 55 deletions(-) diff --git a/dolphinscheduler-dist/release-docs/LICENSE b/dolphinscheduler-dist/release-docs/LICENSE index 2ad573a24115..6a0730af60e6 100644 --- a/dolphinscheduler-dist/release-docs/LICENSE +++ b/dolphinscheduler-dist/release-docs/LICENSE @@ -563,7 +563,8 @@ EPL licenses The following components are provided under the EPL License. See project link for details. The text of each license is also included at licenses/LICENSE-[project].txt. - aspectjweaver 1.9.7:https://mvnrepository.com/artifact/org.aspectj/aspectjweaver/1.9.7, EPL 1.0 + aspectjweaver 1.9.7:https://mvnrepository.com/artifact/org.aspectj/aspectjweaver/1.9.7, EPL 2.0 + aspectjrt 1.9.7:https://mvnrepository.com/artifact/org.aspectj/aspectjrt/1.9.7, EPL 2.0 logback-classic 1.2.11: https://mvnrepository.com/artifact/ch.qos.logback/logback-classic/1.2.11, EPL 1.0 and LGPL 2.1 logback-core 1.2.11: https://mvnrepository.com/artifact/ch.qos.logback/logback-core/1.2.11, EPL 1.0 and LGPL 2.1 h2-2.1.210 https://github.com/h2database/h2database/blob/master/LICENSE.txt, MPL 2.0 or EPL 1.0 diff --git a/dolphinscheduler-dist/release-docs/licenses/LICENSE-aspectjweaver.txt b/dolphinscheduler-dist/release-docs/licenses/LICENSE-aspectjweaver.txt index 3fa00836fa41..e55f34467e25 100644 --- a/dolphinscheduler-dist/release-docs/licenses/LICENSE-aspectjweaver.txt +++ b/dolphinscheduler-dist/release-docs/licenses/LICENSE-aspectjweaver.txt @@ -1,86 +1,277 @@ -Eclipse Public License - v 1.0 -THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. +Eclipse Public License - v 2.0 + + THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE + PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION + OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 1. DEFINITIONS "Contribution" means: -a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and - -b) in the case of each subsequent Contributor: - -i) changes to the Program, and - -ii) additions to the Program; - -where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program. - -"Contributor" means any person or entity that distributes the Program. - -"Licensed Patents" mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program. - -"Program" means the Contributions distributed in accordance with this Agreement. - -"Recipient" means anyone who receives the Program under this Agreement, including all Contributors. + a) in the case of the initial Contributor, the initial content + Distributed under this Agreement, and + + b) in the case of each subsequent Contributor: + i) changes to the Program, and + ii) additions to the Program; + where such changes and/or additions to the Program originate from + and are Distributed by that particular Contributor. A Contribution + "originates" from a Contributor if it was added to the Program by + such Contributor itself or anyone acting on such Contributor's behalf. + Contributions do not include changes or additions to the Program that + are not Modified Works. + +"Contributor" means any person or entity that Distributes the Program. + +"Licensed Patents" mean patent claims licensable by a Contributor which +are necessarily infringed by the use or sale of its Contribution alone +or when combined with the Program. + +"Program" means the Contributions Distributed in accordance with this +Agreement. + +"Recipient" means anyone who receives the Program under this Agreement +or any Secondary License (as applicable), including Contributors. + +"Derivative Works" shall mean any work, whether in Source Code or other +form, that is based on (or derived from) the Program and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. + +"Modified Works" shall mean any work in Source Code or other form that +results from an addition to, deletion from, or modification of the +contents of the Program, including, for purposes of clarity any new file +in Source Code form that contains any contents of the Program. Modified +Works shall not include works that contain only declarations, +interfaces, types, classes, structures, or files of the Program solely +in each case in order to link to, bind by name, or subclass the Program +or Modified Works thereof. + +"Distribute" means the acts of a) distributing or b) making available +in any manner that enables the transfer of a copy. + +"Source Code" means the form of a Program preferred for making +modifications, including but not limited to software source code, +documentation source, and configuration files. + +"Secondary License" means either the GNU General Public License, +Version 2.0, or any later versions of that license, including any +exceptions or additional permissions as identified by the initial +Contributor. 2. GRANT OF RIGHTS -a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form. - -b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder. - -c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program. - -d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement. + a) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free copyright + license to reproduce, prepare Derivative Works of, publicly display, + publicly perform, Distribute and sublicense the Contribution of such + Contributor, if any, and such Derivative Works. + + b) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free patent + license under Licensed Patents to make, use, sell, offer to sell, + import and otherwise transfer the Contribution of such Contributor, + if any, in Source Code or other form. This patent license shall + apply to the combination of the Contribution and the Program if, at + the time the Contribution is added by the Contributor, such addition + of the Contribution causes such combination to be covered by the + Licensed Patents. The patent license shall not apply to any other + combinations which include the Contribution. No hardware per se is + licensed hereunder. + + c) Recipient understands that although each Contributor grants the + licenses to its Contributions set forth herein, no assurances are + provided by any Contributor that the Program does not infringe the + patent or other intellectual property rights of any other entity. + Each Contributor disclaims any liability to Recipient for claims + brought by any other entity based on infringement of intellectual + property rights or otherwise. As a condition to exercising the + rights and licenses granted hereunder, each Recipient hereby + assumes sole responsibility to secure any other intellectual + property rights needed, if any. For example, if a third party + patent license is required to allow Recipient to Distribute the + Program, it is Recipient's responsibility to acquire that license + before distributing the Program. + + d) Each Contributor represents that to its knowledge it has + sufficient copyright rights in its Contribution, if any, to grant + the copyright license set forth in this Agreement. + + e) Notwithstanding the terms of any Secondary License, no + Contributor makes additional grants to any Recipient (other than + those set forth in this Agreement) as a result of such Recipient's + receipt of the Program under the terms of a Secondary License + (if permitted under the terms of Section 3). 3. REQUIREMENTS -A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that: - -a) it complies with the terms and conditions of this Agreement; and +3.1 If a Contributor Distributes the Program in any form, then: -b) its license agreement: + a) the Program must also be made available as Source Code, in + accordance with section 3.2, and the Contributor must accompany + the Program with a statement that the Source Code for the Program + is available under this Agreement, and informs Recipients how to + obtain it in a reasonable manner on or through a medium customarily + used for software exchange; and -i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose; + b) the Contributor may Distribute the Program under a license + different than this Agreement, provided that such license: + i) effectively disclaims on behalf of all other Contributors all + warranties and conditions, express and implied, including + warranties or conditions of title and non-infringement, and + implied warranties or conditions of merchantability and fitness + for a particular purpose; -ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits; + ii) effectively excludes on behalf of all other Contributors all + liability for damages, including direct, indirect, special, + incidental and consequential damages, such as lost profits; -iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and + iii) does not attempt to limit or alter the recipients' rights + in the Source Code under section 3.2; and -iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. + iv) requires any subsequent distribution of the Program by any + party to be under a license that satisfies the requirements + of this section 3. -When the Program is made available in source code form: +3.2 When the Program is Distributed as Source Code: -a) it must be made available under this Agreement; and + a) it must be made available under this Agreement, or if the + Program (i) is combined with other material in a separate file or + files made available under a Secondary License, and (ii) the initial + Contributor attached to the Source Code the notice described in + Exhibit A of this Agreement, then the Program may be made available + under the terms of such Secondary Licenses, and -b) a copy of this Agreement must be included with each copy of the Program. + b) a copy of this Agreement must be included with each copy of + the Program. -Contributors may not remove or alter any copyright notices contained within the Program. - -Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution. +3.3 Contributors may not remove or alter any copyright, patent, +trademark, attribution notices, disclaimers of warranty, or limitations +of liability ("notices") contained within the Program from any copy of +the Program which they Distribute, provided that Contributors may add +their own appropriate notices. 4. COMMERCIAL DISTRIBUTION -Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense. - -For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages. +Commercial distributors of software may accept certain responsibilities +with respect to end users, business partners and the like. While this +license is intended to facilitate the commercial use of the Program, +the Contributor who includes the Program in a commercial product +offering should do so in a manner which does not create potential +liability for other Contributors. Therefore, if a Contributor includes +the Program in a commercial product offering, such Contributor +("Commercial Contributor") hereby agrees to defend and indemnify every +other Contributor ("Indemnified Contributor") against any losses, +damages and costs (collectively "Losses") arising from claims, lawsuits +and other legal actions brought by a third party against the Indemnified +Contributor to the extent caused by the acts or omissions of such +Commercial Contributor in connection with its distribution of the Program +in a commercial product offering. The obligations in this section do not +apply to any claims or Losses relating to any actual or alleged +intellectual property infringement. In order to qualify, an Indemnified +Contributor must: a) promptly notify the Commercial Contributor in +writing of such claim, and b) allow the Commercial Contributor to control, +and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may +participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial +product offering, Product X. That Contributor is then a Commercial +Contributor. If that Commercial Contributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Contributor's responsibility +alone. Under this section, the Commercial Contributor would have to +defend claims against the other Contributors related to those performance +claims and warranties, and if a court requires any other Contributor to +pay any damages as a result, the Commercial Contributor must pay +those damages. 5. NO WARRANTY -EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations. +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" +BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF +TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR +PURPOSE. Each Recipient is solely responsible for determining the +appropriateness of using and distributing the Program and assumes all +risks associated with its exercise of rights under this Agreement, +including but not limited to the risks and costs of program errors, +compliance with applicable laws, damage to or loss of data, programs +or equipment, and unavailability or interruption of operations. 6. DISCLAIMER OF LIABILITY -EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS +SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE +EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. 7. GENERAL -If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable. - -If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed. - -All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive. - -Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved. - -This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation. \ No newline at end of file +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further +action by the parties hereto, such provision shall be reformed to the +minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the +Program itself (excluding combinations of the Program with other software +or hardware) infringes such Recipient's patent(s), then such Recipient's +rights granted under Section 2(b) shall terminate as of the date such +litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it +fails to comply with any of the material terms or conditions of this +Agreement and does not cure such failure in a reasonable period of +time after becoming aware of such noncompliance. If all Recipient's +rights under this Agreement terminate, Recipient agrees to cease use +and distribution of the Program as soon as reasonably practicable. +However, Recipient's obligations under this Agreement and any licenses +granted by Recipient relating to the Program shall continue and survive. + +Everyone is permitted to copy and distribute copies of this Agreement, +but in order to avoid inconsistency the Agreement is copyrighted and +may only be modified in the following manner. The Agreement Steward +reserves the right to publish new versions (including revisions) of +this Agreement from time to time. No one other than the Agreement +Steward has the right to modify this Agreement. The Eclipse Foundation +is the initial Agreement Steward. The Eclipse Foundation may assign the +responsibility to serve as the Agreement Steward to a suitable separate +entity. Each new version of the Agreement will be given a distinguishing +version number. The Program (including Contributions) may always be +Distributed subject to the version of the Agreement under which it was +received. In addition, after a new version of the Agreement is published, +Contributor may elect to Distribute the Program (including its +Contributions) under the new version. + +Except as expressly stated in Sections 2(a) and 2(b) above, Recipient +receives no rights or licenses to the intellectual property of any +Contributor under this Agreement, whether expressly, by implication, +estoppel or otherwise. All rights in the Program not expressly granted +under this Agreement are reserved. Nothing in this Agreement is intended +to be enforceable by any entity that is not a Contributor or Recipient. +No third-party beneficiary rights are created under this Agreement. + +Exhibit A - Form of Secondary Licenses Notice + +"This Source Code may also be made available under the following +Secondary Licenses when the conditions for such availability set forth +in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), +version(s), and exceptions or additional permissions here}." + + Simply including a copy of this Agreement, including this Exhibit A + is not sufficient to license the Source Code under Secondary Licenses. + + If it is not possible or desirable to put the notice in a particular + file, then You may include the notice in a location (such as a LICENSE + file in a relevant directory) where a recipient would be likely to + look for such a notice. + + You may add additional accurate notices of copyright ownership. \ No newline at end of file diff --git a/tools/dependencies/known-dependencies.txt b/tools/dependencies/known-dependencies.txt index 9a6c44d58453..f77ab9aafa6b 100644 --- a/tools/dependencies/known-dependencies.txt +++ b/tools/dependencies/known-dependencies.txt @@ -11,6 +11,7 @@ annotations-13.0.jar apache-client-2.17.282.jar asm-9.1.jar aspectjweaver-1.9.7.jar +aspectjrt-1.9.7.jar auth-2.17.282.jar audience-annotations-0.12.0.jar automaton-1.11-8.jar From f59dc7e6cd499affda1aff0d08c23d95e648b9b4 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Fri, 21 Oct 2022 15:17:02 +0800 Subject: [PATCH 17/32] alter aop dependency version --- dolphinscheduler-aop/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 0a858bb7e96b..22fb26054e4d 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -33,7 +33,7 @@ 1.8 1.8 1.9.7 - 3.2.0 + 3.2.4 From 7ff47f75ef3033db454e93cc858039e4531b772d Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Mon, 24 Oct 2022 20:47:34 +0800 Subject: [PATCH 18/32] remove unused file --- dolphinscheduler-aop/pom.xml | 18 ++ .../poc/YarnClientAspectMoc.java | 2 +- .../service/log/LoggerRequestProcessor.java | 4 +- .../dolphinscheduler/spi/utils/Constants.java | 255 ------------------ .../plugin/task/api/utils/LogUtilsTest.java | 2 + 5 files changed, 23 insertions(+), 258 deletions(-) delete mode 100644 dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 6b09980d38a1..59ef8a4f64dd 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -37,21 +37,39 @@ + org.aspectj aspectjweaver ${aspectj.version} runtime + org.aspectj aspectjrt ${aspectj.version} + + org.apache.hadoop hadoop-yarn-client ${yarn.version} + + + + + org.apache.hadoop + hadoop-yarn-common + ${yarn.version} + + + + org.apache.hadoop + hadoop-common + ${yarn.version} + org.apache.hadoop diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java index 394c934cc339..9205a7c00783 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java @@ -28,7 +28,7 @@ public class YarnClientAspectMoc { private ApplicationId privateId = null; - @AfterReturning(pointcut = "execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext,submittedAppId") + @AfterReturning(pointcut = "execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext") public void submitApplication(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) { System.out.println("YarnClientAspectMoc[submitApplication]: app context " + appContext + ", submittedAppId " + submittedAppId + " privateId " + privateId); diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java index 3c99b34f0fd5..26a9401e9c30 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/log/LoggerRequestProcessor.java @@ -17,8 +17,8 @@ package org.apache.dolphinscheduler.service.log; -import static org.apache.dolphinscheduler.common.Constants.APPID_COLLECT; -import static org.apache.dolphinscheduler.common.Constants.DEFAULT_COLLECT_WAY; +import static org.apache.dolphinscheduler.common.constants.Constants.APPID_COLLECT; +import static org.apache.dolphinscheduler.common.constants.Constants.DEFAULT_COLLECT_WAY; import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; diff --git a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java deleted file mode 100644 index 8527a6d9cf2f..000000000000 --- a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/utils/Constants.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.dolphinscheduler.spi.utils; - -/** - * constants - */ -public class Constants { - - private Constants() { - throw new IllegalStateException("Constants class"); - } - - /** alert plugin param field string **/ - public static final String STRING_PLUGIN_PARAM_FIELD = "field"; - /** alert plugin param name string **/ - public static final String STRING_PLUGIN_PARAM_NAME = "name"; - /** alert plugin param props string **/ - public static final String STRING_PLUGIN_PARAM_PROPS = "props"; - /** alert plugin param type string **/ - public static final String STRING_PLUGIN_PARAM_TYPE = "type"; - /** alert plugin param title string **/ - public static final String STRING_PLUGIN_PARAM_TITLE = "title"; - /** alert plugin param value string **/ - public static final String STRING_PLUGIN_PARAM_VALUE = "value"; - /** alert plugin param validate string **/ - public static final String STRING_PLUGIN_PARAM_VALIDATE = "validate"; - /** alert plugin param options string **/ - public static final String STRING_PLUGIN_PARAM_OPTIONS = "options"; - /**plugin param emit string **/ - public static final String STRING_PLUGIN_PARAM_EMIT = "emit"; - - /** string true */ - public static final String STRING_TRUE = "true"; - /** string false */ - public static final String STRING_FALSE = "false"; - /** string yes */ - public static final String STRING_YES = "YES"; - /** string no */ - public static final String STRING_NO = "NO"; - - /** - * common properties path - */ - public static final String COMMON_PROPERTIES_PATH = "/common.properties"; - - /** - * date format of yyyy-MM-dd HH:mm:ss - */ - public static final String YYYY_MM_DD_HH_MM_SS = "yyyy-MM-dd HH:mm:ss"; - - /** - * date format of yyyyMMddHHmmss - */ - public static final String YYYYMMDDHHMMSS = "yyyyMMddHHmmss"; - - /** - * date format of yyyyMMddHHmmssSSS - */ - public static final String YYYYMMDDHHMMSSSSS = "yyyyMMddHHmmssSSS"; - - /** - * double brackets left - */ - public static final String DOUBLE_BRACKETS_LEFT = "{{"; - - /** - * double brackets left - */ - public static final String DOUBLE_BRACKETS_RIGHT = "}}"; - - /** - * double brackets left - */ - public static final String DOUBLE_BRACKETS_LEFT_SPACE = "{ {"; - - /** - * double brackets left - */ - public static final String DOUBLE_BRACKETS_RIGHT_SPACE = "} }"; - - public static final String SMALL = "small"; - - public static final String CHANGE = "change"; - - public static final String SPRING_DATASOURCE_MIN_IDLE = "spring.datasource.minIdle"; - - public static final String SPRING_DATASOURCE_MAX_ACTIVE = "spring.datasource.maxActive"; - - public static final String SPRING_DATASOURCE_TEST_ON_BORROW = "spring.datasource.testOnBorrow"; - - /** - * java.security.krb5.conf - */ - public static final String JAVA_SECURITY_KRB5_CONF = "java.security.krb5.conf"; - - /** - * java.security.krb5.conf.path - */ - public static final String JAVA_SECURITY_KRB5_CONF_PATH = "java.security.krb5.conf.path"; - - /** - * hadoop.security.authentication - */ - public static final String HADOOP_SECURITY_AUTHENTICATION = "hadoop.security.authentication"; - - /** - * hadoop.security.authentication - */ - public static final String HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE = - "hadoop.security.authentication.startup.state"; - - /** - * loginUserFromKeytab user - */ - public static final String LOGIN_USER_KEY_TAB_USERNAME = "login.user.keytab.username"; - - /** - * loginUserFromKeytab path - */ - public static final String LOGIN_USER_KEY_TAB_PATH = "login.user.keytab.path"; - - /** - * resource storage type - */ - public static final String RESOURCE_STORAGE_TYPE = "resource.storage.type"; - - /** - * kerberos - */ - public static final String KERBEROS = "kerberos"; - - /** - * support hive datasource in one session - */ - public static final String SUPPORT_HIVE_ONE_SESSION = "support.hive.oneSession"; - - /** - * driver - */ - public static final String ORG_POSTGRESQL_DRIVER = "org.postgresql.Driver"; - public static final String COM_MYSQL_CJ_JDBC_DRIVER = "com.mysql.cj.jdbc.Driver"; - public static final String COM_MYSQL_JDBC_DRIVER = "com.mysql.jdbc.Driver"; - public static final String ORG_APACHE_HIVE_JDBC_HIVE_DRIVER = "org.apache.hive.jdbc.HiveDriver"; - public static final String COM_CLICKHOUSE_JDBC_DRIVER = "ru.yandex.clickhouse.ClickHouseDriver"; - public static final String COM_ORACLE_JDBC_DRIVER = "oracle.jdbc.OracleDriver"; - public static final String COM_SQLSERVER_JDBC_DRIVER = "com.microsoft.sqlserver.jdbc.SQLServerDriver"; - public static final String COM_DB2_JDBC_DRIVER = "com.ibm.db2.jcc.DB2Driver"; - public static final String COM_PRESTO_JDBC_DRIVER = "com.facebook.presto.jdbc.PrestoDriver"; - public static final String COM_REDSHIFT_JDBC_DRIVER = "com.amazon.redshift.jdbc42.Driver"; - public static final String COM_ATHENA_JDBC_DRIVER = "com.simba.athena.jdbc.Driver"; - - /** - * validation Query - */ - public static final String POSTGRESQL_VALIDATION_QUERY = "select version()"; - public static final String MYSQL_VALIDATION_QUERY = "select 1"; - public static final String HIVE_VALIDATION_QUERY = "select 1"; - public static final String CLICKHOUSE_VALIDATION_QUERY = "select 1"; - public static final String ORACLE_VALIDATION_QUERY = "select 1 from dual"; - public static final String SQLSERVER_VALIDATION_QUERY = "select 1"; - public static final String DB2_VALIDATION_QUERY = "select 1 from sysibm.sysdummy1"; - public static final String PRESTO_VALIDATION_QUERY = "select 1"; - public static final String REDHIFT_VALIDATION_QUERY = "select 1"; - public static final String ATHENA_VALIDATION_QUERY = "select 1"; - - /** - * jdbc url - */ - public static final String JDBC_MYSQL = "jdbc:mysql://"; - public static final String JDBC_POSTGRESQL = "jdbc:postgresql://"; - public static final String JDBC_HIVE_2 = "jdbc:hive2://"; - public static final String JDBC_CLICKHOUSE = "jdbc:clickhouse://"; - public static final String JDBC_ORACLE_SID = "jdbc:oracle:thin:@"; - public static final String JDBC_ORACLE_SERVICE_NAME = "jdbc:oracle:thin:@//"; - public static final String JDBC_SQLSERVER = "jdbc:sqlserver://"; - public static final String JDBC_DB2 = "jdbc:db2://"; - public static final String JDBC_PRESTO = "jdbc:presto://"; - public static final String JDBC_REDSHIFT = "jdbc:redshift://"; - public static final String JDBC_ATHENA = "jdbc:awsathena://"; - - public static final String ADDRESS = "address"; - public static final String DATABASE = "database"; - public static final String JDBC_URL = "jdbcUrl"; - public static final String PRINCIPAL = "principal"; - public static final String OTHER = "other"; - public static final String ORACLE_DB_CONNECT_TYPE = "connectType"; - public static final String KERBEROS_KRB5_CONF_PATH = "javaSecurityKrb5Conf"; - public static final String KERBEROS_KEY_TAB_USERNAME = "loginUserKeytabUsername"; - public static final String KERBEROS_KEY_TAB_PATH = "loginUserKeytabPath"; - - /** - * fetch applicationId way - */ - public static final String APPID_COLLECT = "appId.collect"; - public static final String DEFAULT_COLLECT_WAY = "log"; - - /** - * DOUBLE_SLASH // - */ - public static final String DOUBLE_SLASH = "//"; - - /** - * SLASH / - */ - public static final String SLASH = "/"; - - /** - * comma , - */ - public static final String COMMA = ","; - - /** - * COLON : - */ - public static final String COLON = ":"; - - /** - * AT SIGN @ - */ - public static final String AT_SIGN = "@"; - - /** - * SEMICOLON ; - */ - public static final String SEMICOLON = ";"; - - /** - * EQUAL_SIGN = - */ - public static final String EQUAL_SIGN = "="; - - /** - * datasource encryption salt - */ - public static final String DATASOURCE_ENCRYPTION_SALT_DEFAULT = "!@#$%^&*"; - public static final String DATASOURCE_ENCRYPTION_ENABLE = "datasource.encryption.enable"; - public static final String DATASOURCE_ENCRYPTION_SALT = "datasource.encryption.salt"; - -} diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java index f9d62c07787d..c8675a0a64c4 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtilsTest.java @@ -18,6 +18,7 @@ package org.apache.dolphinscheduler.plugin.task.api.utils; import java.util.List; +import java.util.stream.Collectors; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -40,6 +41,7 @@ public void getAppIdsFromLogFile() { @Test public void getAppIdsFromAppInfoFile() { List appIds = LogUtils.getAppIds(APP_ID_FILE, APP_INFO_FILE, "aop"); + appIds = appIds.stream().filter(a -> a.contains("application")).collect(Collectors.toList()); Assertions.assertEquals(Lists.newArrayList("application_1548381669007_1234"), appIds); } } From 94dfe46a3cf8581393dba326cc29e024582a2bc9 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Mon, 24 Oct 2022 22:00:20 +0800 Subject: [PATCH 19/32] rename import package --- .../dolphinscheduler/plugin/task/api/AbstractYarnTask.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java index 236673a9ba09..8ef09258255b 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractYarnTask.java @@ -17,8 +17,8 @@ package org.apache.dolphinscheduler.plugin.task.api; -import static org.apache.dolphinscheduler.spi.utils.Constants.APPID_COLLECT; -import static org.apache.dolphinscheduler.spi.utils.Constants.DEFAULT_COLLECT_WAY; +import static org.apache.dolphinscheduler.common.constants.Constants.APPID_COLLECT; +import static org.apache.dolphinscheduler.common.constants.Constants.DEFAULT_COLLECT_WAY; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.task.api.model.ResourceInfo; From 224adcbc571abb73120d39ea08a9e0815e533e6b Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 26 Oct 2022 14:16:51 +0800 Subject: [PATCH 20/32] exclude redundant dependencies --- dolphinscheduler-aop/pom.xml | 28 +++------------ dolphinscheduler-tools/pom.xml | 62 ++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 59ef8a4f64dd..c6f427cd4e4a 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -33,53 +33,33 @@ 1.8 1.8 1.9.7 - 3.2.4 + 3.2.4 - org.aspectj aspectjweaver ${aspectj.version} runtime - + org.aspectj aspectjrt ${aspectj.version} - org.apache.hadoop hadoop-yarn-client - ${yarn.version} - + ${hadoop.version} - - org.apache.hadoop - hadoop-yarn-common - ${yarn.version} - - - - org.apache.hadoop - hadoop-common - ${yarn.version} - - - - org.apache.hadoop - hadoop-yarn-common - ${yarn.version} - org.apache.hadoop hadoop-common - ${yarn.version} + ${hadoop.version} diff --git a/dolphinscheduler-tools/pom.xml b/dolphinscheduler-tools/pom.xml index f8e053f4aeee..d9494c0b0953 100644 --- a/dolphinscheduler-tools/pom.xml +++ b/dolphinscheduler-tools/pom.xml @@ -49,6 +49,68 @@ org.apache.dolphinscheduler dolphinscheduler-aop + + + aopalliance + aopalliance + + + com.google.inject + guice + + + com.google.inject.extensions + guice-servlet + + + javax.inject + javax.inject + + + com.sun.xml.bind + jaxb-impl + + + com.sun.jersey.contribs + jersey-guice + + + com.sun.jersey + jersey-client + + + javax.ws.rs + jsr311-api + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-jaxrs + + + org.codehaus.jackson + jackson-xc + + From f45d52710d8993d21d08f158d79ac01b24bd1e66 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Wed, 26 Oct 2022 21:25:52 +0800 Subject: [PATCH 21/32] use logger to output debug info --- docs/docs/en/architecture/configuration.md | 2 +- docs/docs/zh/architecture/configuration.md | 2 +- .../dolphinscheduler/aop/YarnClientAspect.java | 15 +++++++++------ .../dolphinscheduler/YarnClientAspectMocTest.java | 9 ++++++++- .../dolphinscheduler/poc/YarnClientAspectMoc.java | 8 ++++++-- .../dolphinscheduler/poc/YarnClientMoc.java | 7 ++++++- 6 files changed, 31 insertions(+), 12 deletions(-) diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index a47b3a5d6e1a..08653455c7f8 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -224,7 +224,7 @@ The default configuration is as follows: |sudo.enable | true | whether to enable sudo| |alert.rpc.port | 50052 | the RPC port of Alert Server| |zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| -|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop. Note: Aop way will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh.| +|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and .| ### Api-server related configuration diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 16c4833b560e..89e6760b3856 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -221,7 +221,7 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId |sudo.enable | true | 是否开启sudo| |alert.rpc.port | 50052 | Alert Server的RPC端口| |zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| -|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,注意:如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| +|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| ## Api-server相关配置 diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index 494845d56c8a..8a930df1e296 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -29,6 +29,8 @@ import org.aspectj.lang.annotation.AfterReturning; import org.aspectj.lang.annotation.Aspect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @Aspect public class YarnClientAspect { @@ -41,6 +43,8 @@ public class YarnClientAspect { private final String appInfoFilePath; private boolean debug; + protected final Logger logger = LoggerFactory.getLogger(getClass()); + public YarnClientAspect() { appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), "appInfo.log"); debug = true; @@ -64,15 +68,14 @@ public void registerApplicationInfo(ApplicationSubmissionContext appContext, App StandardOpenOption.WRITE, StandardOpenOption.APPEND); } catch (IOException ioException) { - System.err.println( - "YarnClientAspect[registerAppInfo]: can't output current application information, because " - + ioException.getMessage()); + logger.error("YarnClientAspect[registerAppInfo]: can't output current application information, because " + + ioException.getMessage()); } } if (debug) { - System.out.println("YarnClientAspect[submitApplication]: current application context " + appContext); - System.out.println("YarnClientAspect[submitApplication]: submitted application id " + submittedAppId); - System.out.println( + logger.info("YarnClientAspect[submitApplication]: current application context " + appContext); + logger.info("YarnClientAspect[submitApplication]: submitted application id " + submittedAppId); + logger.info( "YarnClientAspect[submitApplication]: current application report " + currentApplicationReport); } } diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java index 6a823f06cc6e..d1519269c031 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java @@ -32,20 +32,27 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class YarnClientAspectMocTest { + protected final Logger logger = LoggerFactory.getLogger(getClass()); + private final PrintStream standardOut = System.out; ByteArrayOutputStream stdoutStream = new ByteArrayOutputStream(); + @BeforeEach public void beforeEveryTest() { System.setOut(new PrintStream(stdoutStream)); } + @AfterEach public void afterEveryTest() throws IOException { System.setOut(standardOut); stdoutStream.close(); } + @Test public void testMoc() { YarnClientMoc moc = new YarnClientMoc(); @@ -64,7 +71,7 @@ public void testMoc() { Assertions.assertTrue(stdoutContent.contains("YarnClientAspectMoc[createAppId]:"), "trigger YarnClientAspectMoc.createAppId failed"); } catch (YarnException | IOException e) { - Assertions.fail("test YarnClientAspectMoc failed: " + e.getMessage()); + logger.error("test YarnClientAspectMoc failed: " + e.getMessage()); e.printStackTrace(); } } diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java index 9205a7c00783..77e1f279f141 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java @@ -22,15 +22,19 @@ import org.aspectj.lang.annotation.AfterReturning; import org.aspectj.lang.annotation.Aspect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @Aspect public class YarnClientAspectMoc { + protected final Logger logger = LoggerFactory.getLogger(getClass()); + private ApplicationId privateId = null; @AfterReturning(pointcut = "execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext") public void submitApplication(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) { - System.out.println("YarnClientAspectMoc[submitApplication]: app context " + appContext + ", submittedAppId " + logger.info("YarnClientAspectMoc[submitApplication]: app context " + appContext + ", submittedAppId " + submittedAppId + " privateId " + privateId); } @@ -39,6 +43,6 @@ public void submitApplication(ApplicationSubmissionContext appContext, Applicati "&& !within(CfowAspect) && execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.createAppId())", returning = "submittedAppId") public void createAppId(ApplicationId submittedAppId) { privateId = submittedAppId; - System.out.println("YarnClientAspectMoc[createAppId]: created submittedAppId " + submittedAppId); + logger.info("YarnClientAspectMoc[createAppId]: created submittedAppId " + submittedAppId); } } diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java index 6d906820b4bd..58815d231817 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java @@ -24,13 +24,18 @@ import java.io.IOException; import java.util.Random; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class YarnClientMoc { + protected final Logger logger = LoggerFactory.getLogger(getClass()); + private Random random = new Random(); public ApplicationId createAppId() { ApplicationId created = ApplicationId.newInstance(System.currentTimeMillis(), random.nextInt()); - System.out.println("created id " + created.getId()); + logger.info("created id " + created.getId()); return created; } From 0ee40c86bac3d150559e564842af9e64fa75e1a5 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 27 Oct 2022 10:03:21 +0800 Subject: [PATCH 22/32] add logger configuration --- .../src/main/resources/log4j.properties | 22 +++++++++++++++++++ .../src/test/resources/log4j.properties | 21 ++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 dolphinscheduler-aop/src/main/resources/log4j.properties create mode 100644 dolphinscheduler-aop/src/test/resources/log4j.properties diff --git a/dolphinscheduler-aop/src/main/resources/log4j.properties b/dolphinscheduler-aop/src/main/resources/log4j.properties new file mode 100644 index 000000000000..c7527c03b428 --- /dev/null +++ b/dolphinscheduler-aop/src/main/resources/log4j.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger=INFO, stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss.SSS Z} %-5p [%c] - %m%n \ No newline at end of file diff --git a/dolphinscheduler-aop/src/test/resources/log4j.properties b/dolphinscheduler-aop/src/test/resources/log4j.properties new file mode 100644 index 000000000000..3e09eb53f217 --- /dev/null +++ b/dolphinscheduler-aop/src/test/resources/log4j.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger=INFO, stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout From 3355270390baa81eb1486a02529d32241b1bab10 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 27 Oct 2022 13:14:17 +0800 Subject: [PATCH 23/32] Update dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java Co-authored-by: caishunfeng --- .../java/org/apache/dolphinscheduler/aop/YarnClientAspect.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index 8a930df1e296..6da2574b2795 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -72,12 +72,10 @@ public void registerApplicationInfo(ApplicationSubmissionContext appContext, App + ioException.getMessage()); } } - if (debug) { logger.info("YarnClientAspect[submitApplication]: current application context " + appContext); logger.info("YarnClientAspect[submitApplication]: submitted application id " + submittedAppId); logger.info( "YarnClientAspect[submitApplication]: current application report " + currentApplicationReport); - } } /** From d4ad0377721d614e7aad4361ea0290111f4276d4 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 27 Oct 2022 13:15:21 +0800 Subject: [PATCH 24/32] Update dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java Co-authored-by: caishunfeng --- .../java/org/apache/dolphinscheduler/aop/YarnClientAspect.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index 6da2574b2795..b269604aaa7a 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -47,7 +47,7 @@ public class YarnClientAspect { public YarnClientAspect() { appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), "appInfo.log"); - debug = true; + } /** From a5865a72f575bac6ea84e914872485b04ef99e02 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 27 Oct 2022 13:16:03 +0800 Subject: [PATCH 25/32] Update dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java Co-authored-by: caishunfeng --- .../org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java index 77e1f279f141..7977d8bc930b 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java @@ -34,8 +34,7 @@ public class YarnClientAspectMoc { @AfterReturning(pointcut = "execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext") public void submitApplication(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) { - logger.info("YarnClientAspectMoc[submitApplication]: app context " + appContext + ", submittedAppId " - + submittedAppId + " privateId " + privateId); + logger.info("YarnClientAspectMoc[submitApplication]: app context: {}, submittedAppId: {}, privateId: {}", appContext, submittedAppId, privateId); } @AfterReturning(pointcut = "cflow(execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext))) " From 3ebe4e995684a4372e0870b7666e122709a71566 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 27 Oct 2022 13:16:17 +0800 Subject: [PATCH 26/32] Update dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java Co-authored-by: caishunfeng --- .../java/org/apache/dolphinscheduler/poc/YarnClientMoc.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java index 58815d231817..d701cb0c8cce 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientMoc.java @@ -35,7 +35,7 @@ public class YarnClientMoc { public ApplicationId createAppId() { ApplicationId created = ApplicationId.newInstance(System.currentTimeMillis(), random.nextInt()); - logger.info("created id " + created.getId()); + logger.info("created id {}", created.getId()); return created; } From 67818705b27b448b7e5b703cdcf4ade0a4630f75 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 27 Oct 2022 19:06:14 +0800 Subject: [PATCH 27/32] update log format & delete buildExecPathRelatedInfo --- .../mysql/dolphinscheduler_env.sh | 2 +- .../postgresql/dolphinscheduler_env.sh | 2 +- docs/docs/en/architecture/configuration.md | 2 +- docs/docs/zh/architecture/configuration.md | 2 +- dolphinscheduler-aop/pom.xml | 31 ++++++++++++------- .../aop/YarnClientAspect.java | 15 +++++---- .../YarnClientAspectMocTest.java | 3 +- .../poc/YarnClientAspectMoc.java | 5 +-- dolphinscheduler-bom/pom.xml | 20 +++++++++++- .../builder/TaskExecutionContextBuilder.java | 19 ------------ .../master/runner/task/BaseTaskProcessor.java | 1 - .../utils/TaskExecutionCheckerUtils.java | 10 ++++++ script/env/dolphinscheduler_env.sh | 2 +- 13 files changed, 64 insertions(+), 50 deletions(-) diff --git a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh index 79a45dd7cf1b..d7c35a5e0e5f 100755 --- a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh @@ -45,7 +45,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH -# applicationId auto collection related configuration +# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS diff --git a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh index e3fe21fb452a..0c451835e8a0 100644 --- a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh @@ -45,7 +45,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH -# applicationId auto collection related configuration +# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 08653455c7f8..29d008f34bbb 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -355,7 +355,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH -# applicationId auto collection related configuration +# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 89e6760b3856..90f5118455ed 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -347,7 +347,7 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH -# applicationId auto collection related configuration +# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index c6f427cd4e4a..88e038c4e490 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -28,38 +28,45 @@ ${project.artifactId} aop 4 YarnClient to get application id when submitting jars using 'yarn jar mainClass args' - - UTF-8 - 1.8 - 1.8 - 1.9.7 - 3.2.4 - + + + + + + + + + + + + org.apache.dolphinscheduler + dolphinscheduler-bom + ${project.version} + pom + import + + + org.aspectj aspectjweaver - ${aspectj.version} - runtime org.aspectj aspectjrt - ${aspectj.version} org.apache.hadoop hadoop-yarn-client - ${hadoop.version} org.apache.hadoop hadoop-common - ${hadoop.version} diff --git a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java index b269604aaa7a..01c36ced3aab 100644 --- a/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java +++ b/dolphinscheduler-aop/src/main/java/org/apache/dolphinscheduler/aop/YarnClientAspect.java @@ -41,13 +41,11 @@ public class YarnClientAspect { private ApplicationReport currentApplicationReport = null; private final String appInfoFilePath; - private boolean debug; protected final Logger logger = LoggerFactory.getLogger(getClass()); public YarnClientAspect() { appInfoFilePath = String.format("%s/%s", System.getProperty("user.dir"), "appInfo.log"); - } /** @@ -68,14 +66,15 @@ public void registerApplicationInfo(ApplicationSubmissionContext appContext, App StandardOpenOption.WRITE, StandardOpenOption.APPEND); } catch (IOException ioException) { - logger.error("YarnClientAspect[registerAppInfo]: can't output current application information, because " - + ioException.getMessage()); + logger.error( + "YarnClientAspect[registerAppInfo]: can't output current application information, because {}", + ioException.getMessage()); } } - logger.info("YarnClientAspect[submitApplication]: current application context " + appContext); - logger.info("YarnClientAspect[submitApplication]: submitted application id " + submittedAppId); - logger.info( - "YarnClientAspect[submitApplication]: current application report " + currentApplicationReport); + logger.info("YarnClientAspect[submitApplication]: current application context {}", appContext); + logger.info("YarnClientAspect[submitApplication]: submitted application id {}", submittedAppId); + logger.info( + "YarnClientAspect[submitApplication]: current application report {}", currentApplicationReport); } /** diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java index d1519269c031..765f42628e69 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java @@ -71,8 +71,7 @@ public void testMoc() { Assertions.assertTrue(stdoutContent.contains("YarnClientAspectMoc[createAppId]:"), "trigger YarnClientAspectMoc.createAppId failed"); } catch (YarnException | IOException e) { - logger.error("test YarnClientAspectMoc failed: " + e.getMessage()); - e.printStackTrace(); + logger.error("test YarnClientAspectMoc failed", e); } } } diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java index 7977d8bc930b..96b155e7d159 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/poc/YarnClientAspectMoc.java @@ -34,7 +34,8 @@ public class YarnClientAspectMoc { @AfterReturning(pointcut = "execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext)) && args(appContext)", returning = "submittedAppId", argNames = "appContext") public void submitApplication(ApplicationSubmissionContext appContext, ApplicationId submittedAppId) { - logger.info("YarnClientAspectMoc[submitApplication]: app context: {}, submittedAppId: {}, privateId: {}", appContext, submittedAppId, privateId); + logger.info("YarnClientAspectMoc[submitApplication]: app context: {}, submittedAppId: {}, privateId: {}", + appContext, submittedAppId, privateId); } @AfterReturning(pointcut = "cflow(execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.submitApplication(ApplicationSubmissionContext))) " @@ -42,6 +43,6 @@ public void submitApplication(ApplicationSubmissionContext appContext, Applicati "&& !within(CfowAspect) && execution(ApplicationId org.apache.dolphinscheduler.poc.YarnClientMoc.createAppId())", returning = "submittedAppId") public void createAppId(ApplicationId submittedAppId) { privateId = submittedAppId; - logger.info("YarnClientAspectMoc[createAppId]: created submittedAppId " + submittedAppId); + logger.info("YarnClientAspectMoc[createAppId]: created submittedAppId {}", submittedAppId); } } diff --git a/dolphinscheduler-bom/pom.xml b/dolphinscheduler-bom/pom.xml index 337312e0dd9a..915bd09c9b31 100644 --- a/dolphinscheduler-bom/pom.xml +++ b/dolphinscheduler-bom/pom.xml @@ -98,10 +98,24 @@ 4.1.1 2.17.282 1.6.9 + 1.9.7 + + + org.aspectj + aspectjweaver + ${aspectj.version} + runtime + + + org.aspectj + aspectjrt + ${aspectj.version} + + io.netty @@ -461,7 +475,11 @@ hadoop-yarn-common ${hadoop.version} - + + org.apache.hadoop + hadoop-yarn-client + ${hadoop.version} + org.apache.htrace htrace-core4 diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java index d0504003dfaa..684cb10f9108 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/builder/TaskExecutionContextBuilder.java @@ -21,7 +21,6 @@ import org.apache.dolphinscheduler.common.enums.TimeoutFlag; import org.apache.dolphinscheduler.common.utils.DateUtils; -import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.dao.entity.ProcessDefinition; import org.apache.dolphinscheduler.dao.entity.ProcessInstance; import org.apache.dolphinscheduler.dao.entity.TaskDefinition; @@ -127,24 +126,6 @@ public TaskExecutionContextBuilder buildProcessDefinitionRelatedInfo(ProcessDefi return this; } - /** - * build execPath related info - * - * @return TaskExecutionContextBuilder - */ - public TaskExecutionContextBuilder buildExecPathRelatedInfo() { - String execPath = FileUtils.getProcessExecDir( - taskExecutionContext.getTenantCode(), - taskExecutionContext.getProjectCode(), - taskExecutionContext.getProcessDefineCode(), - taskExecutionContext.getProcessDefineVersion(), - taskExecutionContext.getProcessInstanceId(), - taskExecutionContext.getTaskInstanceId()); - taskExecutionContext.setExecutePath(execPath); - taskExecutionContext.setAppInfoPath(FileUtils.getAppInfoPath(execPath)); - return this; - } - public TaskExecutionContextBuilder buildDataQualityTaskExecutionContext(DataQualityTaskExecutionContext dataQualityTaskExecutionContext) { taskExecutionContext.setDataQualityTaskExecutionContext(dataQualityTaskExecutionContext); return this; diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java index b9a566b391cf..029ae7471a17 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/BaseTaskProcessor.java @@ -340,7 +340,6 @@ protected TaskExecutionContext getTaskExecutionContext(TaskInstance taskInstance .buildTaskDefinitionRelatedInfo(taskInstance.getTaskDefine()) .buildProcessInstanceRelatedInfo(taskInstance.getProcessInstance()) .buildProcessDefinitionRelatedInfo(taskInstance.getProcessDefine()) - .buildExecPathRelatedInfo() .buildResourceParametersInfo(resources) .buildDataQualityTaskExecutionContext(dataQualityTaskExecutionContext) .buildK8sTaskRelatedInfo(k8sTaskExecutionContext) diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java index 4f9cc1b18541..765b5b990207 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionCheckerUtils.java @@ -18,6 +18,7 @@ package org.apache.dolphinscheduler.server.worker.utils; import org.apache.dolphinscheduler.common.exception.StorageOperateNoConfiguredException; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.OSUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.task.api.TaskException; @@ -79,6 +80,15 @@ public static void checkTenantExist(WorkerConfig workerConfig, TaskExecutionCont public static void createProcessLocalPathIfAbsent(TaskExecutionContext taskExecutionContext) throws TaskException { try { // local execute path + String execLocalPath = FileUtils.getProcessExecDir( + taskExecutionContext.getTenantCode(), + taskExecutionContext.getProjectCode(), + taskExecutionContext.getProcessDefineCode(), + taskExecutionContext.getProcessDefineVersion(), + taskExecutionContext.getProcessInstanceId(), + taskExecutionContext.getTaskInstanceId()); + taskExecutionContext.setExecutePath(execLocalPath); + taskExecutionContext.setAppInfoPath(FileUtils.getAppInfoPath(execLocalPath)); createDirectoryWithOwner(Paths.get(taskExecutionContext.getExecutePath()), taskExecutionContext.getTenantCode()); } catch (Throwable ex) { diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index 20f5bedecf9e..ad68c2d9c65c 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -34,7 +34,7 @@ export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH -# applicationId auto collection related configuration +# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS From 9b70033720257e402b0bac174cb4a48dae51d0f5 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Thu, 27 Oct 2022 20:58:21 +0800 Subject: [PATCH 28/32] delete dependency scope label --- dolphinscheduler-bom/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dolphinscheduler-bom/pom.xml b/dolphinscheduler-bom/pom.xml index 915bd09c9b31..668a9b0ada8b 100644 --- a/dolphinscheduler-bom/pom.xml +++ b/dolphinscheduler-bom/pom.xml @@ -108,7 +108,6 @@ org.aspectj aspectjweaver ${aspectj.version} - runtime org.aspectj From f2282e4728e88ed6fbc28e37517ad56f2eea9056 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Fri, 28 Oct 2022 09:49:21 +0800 Subject: [PATCH 29/32] improve code quality --- dolphinscheduler-aop/pom.xml | 8 -------- .../apache/dolphinscheduler/YarnClientAspectMocTest.java | 4 ++-- .../dolphinscheduler/plugin/task/api/utils/LogUtils.java | 1 + 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/dolphinscheduler-aop/pom.xml b/dolphinscheduler-aop/pom.xml index 88e038c4e490..4f3b0a594b81 100644 --- a/dolphinscheduler-aop/pom.xml +++ b/dolphinscheduler-aop/pom.xml @@ -28,14 +28,6 @@ ${project.artifactId} aop 4 YarnClient to get application id when submitting jars using 'yarn jar mainClass args' - - - - - - - - diff --git a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java index 765f42628e69..165046e2829b 100644 --- a/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java +++ b/dolphinscheduler-aop/src/test/java/org/apache/dolphinscheduler/YarnClientAspectMocTest.java @@ -35,7 +35,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class YarnClientAspectMocTest { +class YarnClientAspectMocTest { protected final Logger logger = LoggerFactory.getLogger(getClass()); @@ -54,7 +54,7 @@ public void afterEveryTest() throws IOException { } @Test - public void testMoc() { + void testMoc() { YarnClientMoc moc = new YarnClientMoc(); try { ApplicationSubmissionContext appContext = ApplicationSubmissionContext.newInstance( diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java index eaec86d3911d..8c33acd4eaea 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java @@ -62,6 +62,7 @@ public List getAppIdsFromAppInfoFile(@NonNull String appInfoPath, Logger } List appIds = new ArrayList<>(); try (Stream stream = Files.lines(Paths.get(appInfoPath))) { + stream.forEach(appIds::add); stream.forEach(line -> { appIds.add(line); }); From 5c6f72b8d6ee71863bc996cb3ad059abfb10d313 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Fri, 28 Oct 2022 10:27:57 +0800 Subject: [PATCH 30/32] remove useless code --- .../dolphinscheduler/plugin/task/api/utils/LogUtils.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java index 8c33acd4eaea..4954c4e7cce6 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java @@ -63,9 +63,6 @@ public List getAppIdsFromAppInfoFile(@NonNull String appInfoPath, Logger List appIds = new ArrayList<>(); try (Stream stream = Files.lines(Paths.get(appInfoPath))) { stream.forEach(appIds::add); - stream.forEach(line -> { - appIds.add(line); - }); return new ArrayList<>(appIds); } catch (IOException e) { logger.error("Get appId from appInfo file error, appInfoPath: {}", appInfoPath, e); From 7e0e22646b81a249d89759714dd4b23909a630ee Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Fri, 28 Oct 2022 14:50:37 +0800 Subject: [PATCH 31/32] replace switch statement --- .../plugin/task/api/utils/LogUtils.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java index 4954c4e7cce6..36251fe9e631 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/LogUtils.java @@ -19,6 +19,8 @@ import org.apache.dolphinscheduler.plugin.task.api.TaskConstants; +import org.apache.commons.lang3.StringUtils; + import java.io.File; import java.io.IOException; import java.nio.file.Files; @@ -45,13 +47,12 @@ public class LogUtils { private static final Pattern APPLICATION_REGEX = Pattern.compile(TaskConstants.YARN_APPLICATION_REGEX); public List getAppIds(@NonNull String logPath, @NonNull String appInfoPath, String fetchWay) { - switch (fetchWay) { - case "aop": - log.info("Start finding appId in {}, fetch way: {} ", appInfoPath); - return getAppIdsFromAppInfoFile(appInfoPath, log); - default: - log.info("Start finding appId in {}, fetch way: {} ", logPath); - return getAppIdsFromLogFile(logPath, log); + if (!StringUtils.isEmpty(fetchWay) && fetchWay.equals("aop")) { + log.info("Start finding appId in {}, fetch way: {} ", appInfoPath); + return getAppIdsFromAppInfoFile(appInfoPath, log); + } else { + log.info("Start finding appId in {}, fetch way: {} ", logPath); + return getAppIdsFromLogFile(logPath, log); } } From 9ca85c3899717c6a041d636ecb0d7ea576992863 Mon Sep 17 00:00:00 2001 From: Aaron Wang Date: Mon, 31 Oct 2022 12:58:10 +0800 Subject: [PATCH 32/32] add docs --- .../cluster-test/mysql/dolphinscheduler_env.sh | 10 +++++----- .../cluster-test/postgresql/dolphinscheduler_env.sh | 10 +++++----- docs/docs/en/architecture/configuration.md | 2 +- docs/docs/zh/architecture/configuration.md | 2 +- script/env/dolphinscheduler_env.sh | 10 +++++----- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh index d7c35a5e0e5f..3727538d5e12 100755 --- a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh @@ -46,8 +46,8 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* -export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH -export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS -export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS -export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS +#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* +#export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH +#export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +#export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +#export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS diff --git a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh index 0c451835e8a0..63437283b600 100644 --- a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh @@ -46,8 +46,8 @@ export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* -export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH -export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS -export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS -export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS +#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* +#export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH +#export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +#export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +#export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 29d008f34bbb..4e38ec05e3eb 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -224,7 +224,7 @@ The default configuration is as follows: |sudo.enable | true | whether to enable sudo| |alert.rpc.port | 50052 | the RPC port of Alert Server| |zeppelin.rest.url | http://localhost:8080 | the RESTful API url of zeppelin| -|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and .| +|appId.collect | log | way to collect applicationId, if use aop, alter the configuration from log to aop, annotation of applicationId auto collection related configuration in `bin/env/dolphinscheduler_env.sh` should be removed. Note: Aop way doesn't support submitting yarn job on remote host by client mode like Beeline, and will failure if override applicationId collection-related environment configuration in dolphinscheduler_env.sh, and .| ### Api-server related configuration diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 90f5118455ed..6efc7354241d 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -221,7 +221,7 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId |sudo.enable | true | 是否开启sudo| |alert.rpc.port | 50052 | Alert Server的RPC端口| |zeppelin.rest.url | http://localhost:8080 | zeppelin RESTful API 接口地址| -|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| +|appId.collect | log | 收集applicationId方式, 如果用aop方法,将配置log替换为aop,并将`bin/env/dolphinscheduler_env.sh`自动收集applicationId相关环境变量配置的注释取消掉,注意:aop不支持远程主机提交yarn作业的方式比如Beeline客户端提交,且如果用户环境覆盖了dolphinscheduler_env.sh收集applicationId相关环境变量配置,aop方法会失效| ## Api-server相关配置 diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index ad68c2d9c65c..4c4405437c1a 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -35,8 +35,8 @@ export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun} export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log -export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* -export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH -export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS -export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS -export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS \ No newline at end of file +#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* +#export SPARK_DIST_CLASSPATH=$HADOOP_CLASSPATH:$SPARK_DIST_CLASS_PATH +#export HADOOP_CLIENT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$HADOOP_CLIENT_OPTS +#export SPARK_SUBMIT_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$SPARK_SUBMIT_OPTS +#export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspectjweaver-1.9.7.jar":$FLINK_ENV_JAVA_OPTS \ No newline at end of file