diff --git a/kernel/README.md b/kernel/README.md new file mode 100644 index 00000000000..220d4e26512 --- /dev/null +++ b/kernel/README.md @@ -0,0 +1,3 @@ +# Delta Kernel + +[TODO] - For now refer to [delta-io/delta#1783](https://github.com/delta-io/delta/issues/1783) for further information. diff --git a/kernel/build.sbt b/kernel/build.sbt new file mode 100644 index 00000000000..67d7956195c --- /dev/null +++ b/kernel/build.sbt @@ -0,0 +1,110 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +val scala212 = "2.12.15" +scalaVersion := scala212 + +lazy val commonSettings = Seq( + organization := "io.delta", + scalaVersion := scala212, + fork := true, + scalacOptions ++= Seq("-target:jvm-1.8", "-Ywarn-unused:imports"), + javacOptions ++= Seq("-source", "1.8"), + // -target cannot be passed as a parameter to javadoc. See https://github.com/sbt/sbt/issues/355 + Compile / compile / javacOptions ++= Seq("-target", "1.8", "-Xlint:unchecked"), + // Configurations to speed up tests and reduce memory footprint + Test / javaOptions += "-Xmx1024m", +) + +// TODO javastyle checkstyle tests +// TODO unidoc/javadoc settings + +lazy val kernelApi = (project in file("kernel-api")) + .settings( + name := "delta-kernel-api", + commonSettings, + scalaStyleSettings, + releaseSettings, + libraryDependencies ++= Seq() + ) + +val hadoopVersion = "3.3.1" +val deltaStorageVersion = "2.2.0" +val scalaTestVersion = "3.2.15" +val deltaSparkVersion = deltaStorageVersion +val sparkVersion = "3.3.2" + +lazy val kernelDefault = (project in file("kernel-default")) + .dependsOn(kernelApi) + .settings( + name := "delta-kernel-default", + commonSettings, + scalaStyleSettings, + releaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client-api" % hadoopVersion, // Configuration, Path + "io.delta" % "delta-storage" % deltaStorageVersion, // LogStore + "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5", // ObjectMapper + "org.apache.parquet" % "parquet-hadoop" % "1.12.3", + + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "io.delta" %% "delta-core" % deltaSparkVersion % "test", + "org.apache.spark" %% "spark-sql" % sparkVersion % "test", // SparkSession + "org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", + "org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests", + "junit" % "junit" % "4.11" % "test", + "com.novocode" % "junit-interface" % "0.11" % "test" + ) + ) + +/* + *********************** + * ScalaStyle settings * + *********************** + */ +ThisBuild / scalastyleConfig := baseDirectory.value / "scalastyle-config.xml" + +// Not used since scala is test-only +lazy val compileScalastyle = taskKey[Unit]("compileScalastyle") +lazy val testScalastyle = taskKey[Unit]("testScalastyle") + +lazy val scalaStyleSettings = Seq( + compileScalastyle := (Compile / scalastyle).toTask("").value, + Compile / compile := ((Compile / compile) dependsOn compileScalastyle).value, + testScalastyle := (Test / scalastyle).toTask("").value, + Test / test := ((Test / test) dependsOn testScalastyle).value +) + +/* + ******************** + * Release settings * + ******************** + */ + +// Don't release the root project +publishArtifact := false +publish / skip := true + +lazy val releaseSettings = Seq( + // Java only release settings + crossPaths := false, // drop off Scala suffix from artifact names + autoScalaLibrary := false, // exclude scala-library from dependencies in generated pom.xml + + // Other release settings + publishArtifact := true, + Test / publishArtifact := false +) diff --git a/kernel/build/sbt b/kernel/build/sbt new file mode 100755 index 00000000000..044a2929bde --- /dev/null +++ b/kernel/build/sbt @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2021) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so +# that we can run Hive to generate the golden answer. This is not required for normal development +# or testing. +if [ -n "$HIVE_HOME" ]; then + for i in "$HIVE_HOME"/lib/* + do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i" + done + export HADOOP_CLASSPATH +fi + +realpath () { +( + TARGET_FILE="$1" + + cd "$(dirname "$TARGET_FILE")" + TARGET_FILE="$(basename "$TARGET_FILE")" + + COUNT=0 + while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] + do + TARGET_FILE="$(readlink "$TARGET_FILE")" + cd $(dirname "$TARGET_FILE") + TARGET_FILE="$(basename $TARGET_FILE)" + COUNT=$(($COUNT + 1)) + done + + echo "$(pwd -P)/"$TARGET_FILE"" +) +} + +if [[ "$JENKINS_URL" != "" ]]; then + # Make Jenkins use Google Mirror first as Maven Central may ban us + SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" + export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG" +fi + +. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash + + +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r sbt_opts_file=".sbtopts" +declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" + +usage() { + cat < path to global settings/plugins directory (default: ~/.sbt) + -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) + -ivy path to local Ivy repository (default: ~/.ivy2) + -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) + -no-share use all local caches; no sharing + -no-global uses global caches, but does not use global ~/.sbt directory. + -jvm-debug Turn on JVM debugging, open at the given port. + -batch Disable interactive mode + + # sbt version (default: from project/build.properties if present, else latest release) + -sbt-version use the specified version of sbt + -sbt-jar use the specified jar as the sbt launcher + -sbt-rc use an RC version of sbt + -sbt-snapshot use a snapshot version of sbt + + # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) + -java-home alternate JAVA_HOME + + # jvm options and output control + JAVA_OPTS environment variable, if unset uses "$java_opts" + SBT_OPTS environment variable, if unset uses "$default_sbt_opts" + .sbtopts if this file exists in the current directory, it is + prepended to the runner args + /etc/sbt/sbtopts if this file exists, it is prepended to the runner args + -Dkey=val pass -Dkey=val directly to the java runtime + -J-X pass option -X directly to the java runtime + (-J is stripped) + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. + +In the case of duplicated or conflicting options, the order above +shows precedence: JAVA_OPTS lowest, command line options highest. +EOM +} + +process_my_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-share) addJava "$noshare_opts" && shift ;; + -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + -batch) exec /dev/null) + if [[ ! $? ]]; then + saved_stty="" + fi +} + +saveSttySettings +trap onExit INT + +run "$@" + +exit_status=$? +onExit diff --git a/kernel/build/sbt-config/repositories b/kernel/build/sbt-config/repositories new file mode 100644 index 00000000000..ca4160b22d0 --- /dev/null +++ b/kernel/build/sbt-config/repositories @@ -0,0 +1,12 @@ +[repositories] + local + local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext] + local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/} + gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/ + maven-central + typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + bintray-typesafe-sbt-plugin-releases: https://dl.bintray.com/typesafe/sbt-plugins/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + bintray-spark-packages: https://dl.bintray.com/spark-packages/maven/ + typesafe-releases: https://repo.typesafe.com/typesafe/releases/ diff --git a/kernel/build/sbt-launch-lib.bash b/kernel/build/sbt-launch-lib.bash new file mode 100755 index 00000000000..0d58bb7269c --- /dev/null +++ b/kernel/build/sbt-launch-lib.bash @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# + +# A library to simplify using the SBT launcher from other packages. +# Note: This should be used by tools like giter8/conscript etc. + +# TODO - Should we merge the main SBT script with this library? + +if test -z "$HOME"; then + declare -r script_dir="$(dirname "$script_path")" +else + declare -r script_dir="$HOME/.sbt" +fi + +declare -a residual_args +declare -a java_args +declare -a scalac_args +declare -a sbt_commands +declare -a maven_profiles + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +echoerr () { + echo 1>&2 "$@" +} +vlog () { + [[ $verbose || $debug ]] && echoerr "$@" +} +dlog () { + [[ $debug ]] && echoerr "$@" +} + +acquire_sbt_jar () { + SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` + + # Download sbt from mirror URL if the environment variable is provided + if [[ "${SBT_VERSION}" == "0.13.18" ]] && [[ -n "${SBT_MIRROR_JAR_URL}" ]]; then + URL1="${SBT_MIRROR_JAR_URL}" + elif [[ "${SBT_VERSION}" == "1.5.5" ]] && [[ -n "${SBT_1_5_5_MIRROR_JAR_URL}" ]]; then + URL1="${SBT_1_5_5_MIRROR_JAR_URL}" + else + URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar + fi + + JAR=build/sbt-launch-${SBT_VERSION}.jar + sbt_jar=$JAR + + if [[ ! -f "$sbt_jar" ]]; then + # Download sbt launch jar if it hasn't been downloaded yet + if [ ! -f "${JAR}" ]; then + # Download + printf 'Attempting to fetch sbt from %s\n' "${URL1}" + JAR_DL="${JAR}.part" + if [ $(command -v curl) ]; then + curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + elif [ $(command -v wget) ]; then + wget --quiet ${URL1} -O "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + else + printf "You do not have curl or wget installed, please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + fi + if [ ! -f "${JAR}" ]; then + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + printf "Launching sbt from ${JAR}\n" + fi +} + +execRunner () { + # print the arguments one to a line, quoting any containing spaces + [[ $verbose || $debug ]] && echo "# Executing command line:" && { + for arg; do + if printf "%s\n" "$arg" | grep -q ' '; then + printf "\"%s\"\n" "$arg" + else + printf "%s\n" "$arg" + fi + done + echo "" + } + + "$@" +} + +addJava () { + dlog "[addJava] arg = '$1'" + java_args=( "${java_args[@]}" "$1" ) +} + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export SBT_MAVEN_PROFILES="${maven_profiles[@]}" +} + +addSbt () { + dlog "[addSbt] arg = '$1'" + sbt_commands=( "${sbt_commands[@]}" "$1" ) +} +addResidual () { + dlog "[residual] arg = '$1'" + residual_args=( "${residual_args[@]}" "$1" ) +} +addDebugger () { + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" +} + +# a ham-fisted attempt to move some memory settings in concert +# so they need not be dicked around with individually. +get_mem_opts () { + local mem=${1:-1000} + local perm=$(( $mem / 4 )) + (( $perm > 256 )) || perm=256 + (( $perm < 4096 )) || perm=4096 + local codecache=$(( $perm / 2 )) + + echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m" +} + +require_arg () { + local type="$1" + local opt="$2" + local arg="$3" + if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then + echo "$opt requires <$type> argument" 1>&2 + exit 1 + fi +} + +is_function_defined() { + declare -f "$1" > /dev/null +} + +process_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|-help) usage; exit 1 ;; + -v|-verbose) verbose=1 && shift ;; + -d|-debug) debug=1 && shift ;; + + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; + -batch) exec + + + + + + Scalastyle standard configuration + + + + + + + + + + true + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + + + + ^FunSuite[A-Za-z]*$ + Tests must extend org.apache.spark.SparkFunSuite instead. + + + + + ^println$ + + + + + spark(.sqlContext)?.sparkContext.hadoopConfiguration + + + + + sessionState.newHadoopConf + + + + + @VisibleForTesting + + + + + Runtime\.getRuntime\.addShutdownHook + + + + + mutable\.SynchronizedBuffer + + + + + Class\.forName + + + + + Await\.result + + + + + Await\.ready + + + + + (\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\))) + + + + + typed[lL]it + + + + + spark(Session)?.implicits._ + + + + + throw new \w+Error\( + + + + + count\(" + + + + + + JavaConversions + Instead of importing implicits in scala.collection.JavaConversions._, import + scala.collection.JavaConverters._ and use .asScala / .asJava methods + + + + org\.apache\.commons\.lang\. + Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead + of Commons Lang 2 (package org.apache.commons.lang.*) + + + + extractOpt + Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter + is slower. + + + + + COMMA + + + + + + \)\{ + + + + + (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] + Use Javadoc style indentation for multiline comments + + + + case[^\n>]*=>\s*\{ + Omit braces in case clauses. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 800> + + + + + 30 + + + + + 10 + + + + + 50 + + + + + + + + + + + -1,0,1,2,3 + + + diff --git a/kernel/version.sbt b/kernel/version.sbt new file mode 100644 index 00000000000..e91862e625a --- /dev/null +++ b/kernel/version.sbt @@ -0,0 +1 @@ +ThisBuild / version := "0.1.0-SNAPSHOT"