Skip to content

Commit

Permalink
initial kernel structure
Browse files Browse the repository at this point in the history
  • Loading branch information
allisonport-db committed May 23, 2023
1 parent f052bbb commit 7f5f138
Show file tree
Hide file tree
Showing 9 changed files with 1,136 additions and 0 deletions.
3 changes: 3 additions & 0 deletions kernel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Delta Kernel

[TODO] - For now refer to [delta-io/delta#1783](https://github.com/delta-io/delta/issues/1783) for further information.
201 changes: 201 additions & 0 deletions kernel/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
/*
* Copyright (2021) The Delta Lake Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import ReleaseTransformations._

val scala212 = "2.12.15"
scalaVersion := scala212

lazy val commonSettings = Seq(
organization := "io.delta",
scalaVersion := scala212,
fork := true,
scalacOptions ++= Seq("-target:jvm-1.8", "-Ywarn-unused:imports"),
javacOptions ++= Seq("-source", "1.8"),
// -target cannot be passed as a parameter to javadoc. See https://github.com/sbt/sbt/issues/355
Compile / compile / javacOptions ++= Seq("-target", "1.8", "-Xlint:unchecked"),
// Configurations to speed up tests and reduce memory footprint
Test / javaOptions += "-Xmx1024m",
)

// TODO javastyle checkstyle tests
// TODO unidoc/javadoc settings

lazy val kernelApi = (project in file("kernel-api"))
.settings(
name := "delta-kernel-api",
commonSettings,
releaseSettings,
scalaStyleSettings,
libraryDependencies ++= Seq()
)

val hadoopVersion = "3.3.1"
val deltaStorageVersion = "2.2.0"
val scalaTestVersion = "3.2.15"
val deltaSparkVersion = deltaStorageVersion
val sparkVersion = "3.3.2"

lazy val kernelDefault = (project in file("kernel-default"))
.dependsOn(kernelApi)
.settings(
name := "delta-kernel-default",
commonSettings,
releaseSettings,
scalaStyleSettings,
libraryDependencies ++= Seq(
"org.apache.hadoop" % "hadoop-client-api" % hadoopVersion, // Configuration, Path
"io.delta" % "delta-storage" % deltaStorageVersion, // LogStore
"com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5", // ObjectMapper
"org.apache.parquet" % "parquet-hadoop" % "1.12.3",

"org.scalatest" %% "scalatest" % scalaTestVersion % "test",
"io.delta" %% "delta-core" % deltaSparkVersion % "test",
"org.apache.spark" %% "spark-sql" % sparkVersion % "test", // SparkSession
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests",
"org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests",
"junit" % "junit" % "4.11" % "test",
"com.novocode" % "junit-interface" % "0.11" % "test",
)
)

/*
***********************
* ScalaStyle settings *
***********************
*/
ThisBuild / scalastyleConfig := baseDirectory.value / "scalastyle-config.xml"

// Not used since scala is test-only
lazy val compileScalastyle = taskKey[Unit]("compileScalastyle")
lazy val testScalastyle = taskKey[Unit]("testScalastyle")

lazy val scalaStyleSettings = Seq(
compileScalastyle := (Compile / scalastyle).toTask("").value,
Compile / compile := ((Compile / compile) dependsOn compileScalastyle).value,
testScalastyle := (Test / scalastyle).toTask("").value,
Test / test := ((Test / test) dependsOn testScalastyle).value
)

/*
********************
* Release settings *
********************
*/

// Looks some of release settings should be set for the root project as well.
publishArtifact := false // Don't release the root project
publish / skip := true
publishTo := Some("snapshots" at "https://oss.sonatype.org/content/repositories/snapshots")
releaseCrossBuild := false
releaseProcess := Seq[ReleaseStep](
checkSnapshotDependencies,
inquireVersions,
runTest,
setReleaseVersion,
commitReleaseVersion,
tagRelease,
releaseStepCommandAndRemaining("+publishSigned"),
setNextVersion,
commitNextVersion
)

/**
* Release settings for artifact that contains only Java source code
*/
lazy val javaOnlyReleaseSettings = Seq(
// drop off Scala suffix from artifact names
crossPaths := false,
// we publish jars for each scalaVersion in crossScalaVersions. however, we only need to publish
// one java jar. thus, only do so when the current scala version == default scala version
publishArtifact := scalaBinaryVersion.value == "2.12",
// exclude scala-library from dependencies in generated pom.xml
autoScalaLibrary := false,
)

lazy val releaseSettings = Seq(
publishMavenStyle := true,
publishArtifact := true,
Test / publishArtifact := false,
releasePublishArtifactsAction := PgpKeys.publishSigned.value,
releaseCrossBuild := true,
pgpPassphrase := sys.env.get("PGP_PASSPHRASE").map(_.toArray),
sonatypeProfileName := "io.delta", // sonatype account domain name prefix / group ID
credentials += Credentials(
"Sonatype Nexus Repository Manager",
"oss.sonatype.org",
sys.env.getOrElse("SONATYPE_USERNAME", ""),
sys.env.getOrElse("SONATYPE_PASSWORD", "")
),
publishTo := {
val nexus = "https://oss.sonatype.org/"
if (isSnapshot.value) {
Some("snapshots" at nexus + "content/repositories/snapshots")
} else {
Some("releases" at nexus + "service/local/staging/deploy/maven2")
}
},
licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0")),
pomExtra :=
<url>https://github.com/delta-io/connectors</url>
<scm>
<url>git@github.com:delta-io/connectors.git</url>
<connection>scm:git:git@github.com:delta-io/connectors.git</connection>
</scm>,
) ++ javaOnlyReleaseSettings

/*
********************
* MIMA settings *
********************
*/
def getPrevVersion(currentVersion: String): String = {
implicit def extractInt(str: String): Int = {
"""\d+""".r.findFirstIn(str).map(java.lang.Integer.parseInt).getOrElse {
throw new Exception(s"Could not extract version number from $str in $version")
}
}

val (major, minor, patch): (Int, Int, Int) = {
currentVersion.split("\\.").toList match {
case majorStr :: minorStr :: patchStr :: _ =>
(majorStr, minorStr, patchStr)
case _ => throw new Exception(s"Could not find previous version for $version.")
}
}

val majorToLastMinorVersions: Map[Int, Int] = Map(
// TODO add mapping when required
// e.g. 0 -> 8
)
if (minor == 0) { // 1.0.0
val prevMinor = majorToLastMinorVersions.getOrElse(major - 1, {
throw new Exception(s"Last minor version of ${major - 1}.x.x not configured.")
})
s"${major - 1}.$prevMinor.0" // 1.0.0 -> 0.8.0
} else if (patch == 0) {
s"$major.${minor - 1}.0" // 1.1.0 -> 1.0.0
} else {
s"$major.$minor.${patch - 1}" // 1.1.1 -> 1.1.0
}
}

lazy val mimaSettings = Seq(
(Test / test) := ((Test / test) dependsOn mimaReportBinaryIssues).value,
mimaPreviousArtifacts := Set.empty // TODO update this after first release and activate
// mimaBinaryIssueFilters ++= MimaExcludes.ignoredABIProblems // TODO update when added
)
183 changes: 183 additions & 0 deletions kernel/build/sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# This file contains code from the Apache Spark project (original license above).
# It contains modifications, which are licensed as follows:
#

#
# Copyright (2021) The Delta Lake Project Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
# that we can run Hive to generate the golden answer. This is not required for normal development
# or testing.
if [ -n "$HIVE_HOME" ]; then
for i in "$HIVE_HOME"/lib/*
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
done
export HADOOP_CLASSPATH
fi

realpath () {
(
TARGET_FILE="$1"

cd "$(dirname "$TARGET_FILE")"
TARGET_FILE="$(basename "$TARGET_FILE")"

COUNT=0
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
do
TARGET_FILE="$(readlink "$TARGET_FILE")"
cd $(dirname "$TARGET_FILE")
TARGET_FILE="$(basename $TARGET_FILE)"
COUNT=$(($COUNT + 1))
done

echo "$(pwd -P)/"$TARGET_FILE""
)
}

if [[ "$JENKINS_URL" != "" ]]; then
# Make Jenkins use Google Mirror first as Maven Central may ban us
SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories"
export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG"
fi

. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash


declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r sbt_opts_file=".sbtopts"
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"

usage() {
cat <<EOM
Usage: $script_name [options]
-h | -help print this message
-v | -verbose this runner is chattier
-d | -debug set sbt log level to debug
-no-colors disable ANSI color codes
-sbt-create start sbt even if current directory contains no sbt project
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
-mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
-no-share use all local caches; no sharing
-no-global uses global caches, but does not use global ~/.sbt directory.
-jvm-debug <port> Turn on JVM debugging, open at the given port.
-batch Disable interactive mode
# sbt version (default: from project/build.properties if present, else latest release)
-sbt-version <version> use the specified version of sbt
-sbt-jar <path> use the specified jar as the sbt launcher
-sbt-rc use an RC version of sbt
-sbt-snapshot use a snapshot version of sbt
# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
-java-home <path> alternate JAVA_HOME
# jvm options and output control
JAVA_OPTS environment variable, if unset uses "$java_opts"
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
.sbtopts if this file exists in the current directory, it is
prepended to the runner args
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
-Dkey=val pass -Dkey=val directly to the java runtime
-J-X pass option -X directly to the java runtime
(-J is stripped)
-S-X add -X to sbt's scalacOptions (-S is stripped)
-PmavenProfiles Enable a maven profile for the build.
In the case of duplicated or conflicting options, the order above
shows precedence: JAVA_OPTS lowest, command line options highest.
EOM
}

process_my_args () {
while [[ $# -gt 0 ]]; do
case "$1" in
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
-no-share) addJava "$noshare_opts" && shift ;;
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
-batch) exec </dev/null && shift ;;

-sbt-create) sbt_create=true && shift ;;

*) addResidual "$1" && shift ;;
esac
done

# Now, ensure sbt version is used.
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
}

loadConfigFile() {
cat "$1" | sed '/^\#/d'
}

# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"

exit_status=127
saved_stty=""

restoreSttySettings() {
stty $saved_stty
saved_stty=""
}

onExit() {
if [[ "$saved_stty" != "" ]]; then
restoreSttySettings
fi
exit $exit_status
}

saveSttySettings() {
saved_stty=$(stty -g 2>/dev/null)
if [[ ! $? ]]; then
saved_stty=""
fi
}

saveSttySettings
trap onExit INT

run "$@"

exit_status=$?
onExit
Loading

0 comments on commit 7f5f138

Please sign in to comment.