-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Create initial structure for Delta Kernel development #1785
Closed
Closed
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Delta Kernel | ||
|
||
[TODO] - For now refer to [delta-io/delta#1783](https://github.com/delta-io/delta/issues/1783) for further information. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
val scala212 = "2.12.15" | ||
scalaVersion := scala212 | ||
|
||
lazy val commonSettings = Seq( | ||
organization := "io.delta", | ||
scalaVersion := scala212, | ||
fork := true, | ||
scalacOptions ++= Seq("-target:jvm-1.8", "-Ywarn-unused:imports"), | ||
javacOptions ++= Seq("-source", "1.8"), | ||
// -target cannot be passed as a parameter to javadoc. See https://github.com/sbt/sbt/issues/355 | ||
Compile / compile / javacOptions ++= Seq("-target", "1.8", "-Xlint:unchecked"), | ||
// Configurations to speed up tests and reduce memory footprint | ||
Test / javaOptions += "-Xmx1024m", | ||
) | ||
|
||
// TODO javastyle checkstyle tests | ||
// TODO unidoc/javadoc settings | ||
|
||
lazy val kernelApi = (project in file("kernel-api")) | ||
.settings( | ||
name := "delta-kernel-api", | ||
commonSettings, | ||
scalaStyleSettings, | ||
releaseSettings, | ||
libraryDependencies ++= Seq() | ||
) | ||
|
||
val hadoopVersion = "3.3.1" | ||
val deltaStorageVersion = "2.2.0" | ||
val scalaTestVersion = "3.2.15" | ||
val deltaSparkVersion = deltaStorageVersion | ||
val sparkVersion = "3.3.2" | ||
|
||
lazy val kernelDefault = (project in file("kernel-default")) | ||
.dependsOn(kernelApi) | ||
.settings( | ||
name := "delta-kernel-default", | ||
commonSettings, | ||
scalaStyleSettings, | ||
releaseSettings, | ||
libraryDependencies ++= Seq( | ||
"org.apache.hadoop" % "hadoop-client-api" % hadoopVersion, // Configuration, Path | ||
"io.delta" % "delta-storage" % deltaStorageVersion, // LogStore | ||
"com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5", // ObjectMapper | ||
"org.apache.parquet" % "parquet-hadoop" % "1.12.3", | ||
|
||
"org.scalatest" %% "scalatest" % scalaTestVersion % "test", | ||
"io.delta" %% "delta-core" % deltaSparkVersion % "test", | ||
"org.apache.spark" %% "spark-sql" % sparkVersion % "test", // SparkSession | ||
"org.apache.spark" %% "spark-sql" % sparkVersion % "test" classifier "tests", | ||
"org.apache.spark" %% "spark-core" % sparkVersion % "test" classifier "tests", | ||
"org.apache.spark" %% "spark-catalyst" % sparkVersion % "test" classifier "tests", | ||
"junit" % "junit" % "4.11" % "test", | ||
"com.novocode" % "junit-interface" % "0.11" % "test" | ||
) | ||
) | ||
|
||
/* | ||
*********************** | ||
* ScalaStyle settings * | ||
*********************** | ||
*/ | ||
ThisBuild / scalastyleConfig := baseDirectory.value / "scalastyle-config.xml" | ||
|
||
// Not used since scala is test-only | ||
lazy val compileScalastyle = taskKey[Unit]("compileScalastyle") | ||
lazy val testScalastyle = taskKey[Unit]("testScalastyle") | ||
|
||
lazy val scalaStyleSettings = Seq( | ||
compileScalastyle := (Compile / scalastyle).toTask("").value, | ||
Compile / compile := ((Compile / compile) dependsOn compileScalastyle).value, | ||
testScalastyle := (Test / scalastyle).toTask("").value, | ||
Test / test := ((Test / test) dependsOn testScalastyle).value | ||
) | ||
|
||
/* | ||
******************** | ||
* Release settings * | ||
******************** | ||
*/ | ||
|
||
// Don't release the root project | ||
publishArtifact := false | ||
publish / skip := true | ||
|
||
lazy val releaseSettings = Seq( | ||
// Java only release settings | ||
crossPaths := false, // drop off Scala suffix from artifact names | ||
autoScalaLibrary := false, // exclude scala-library from dependencies in generated pom.xml | ||
|
||
// Other release settings | ||
publishArtifact := true, | ||
Test / publishArtifact := false | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
#!/usr/bin/env bash | ||
|
||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
# | ||
# This file contains code from the Apache Spark project (original license above). | ||
# It contains modifications, which are licensed as follows: | ||
# | ||
|
||
# | ||
# Copyright (2021) The Delta Lake Project Authors. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
|
||
# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so | ||
# that we can run Hive to generate the golden answer. This is not required for normal development | ||
# or testing. | ||
if [ -n "$HIVE_HOME" ]; then | ||
for i in "$HIVE_HOME"/lib/* | ||
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i" | ||
done | ||
export HADOOP_CLASSPATH | ||
fi | ||
|
||
realpath () { | ||
( | ||
TARGET_FILE="$1" | ||
|
||
cd "$(dirname "$TARGET_FILE")" | ||
TARGET_FILE="$(basename "$TARGET_FILE")" | ||
|
||
COUNT=0 | ||
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] | ||
do | ||
TARGET_FILE="$(readlink "$TARGET_FILE")" | ||
cd $(dirname "$TARGET_FILE") | ||
TARGET_FILE="$(basename $TARGET_FILE)" | ||
COUNT=$(($COUNT + 1)) | ||
done | ||
|
||
echo "$(pwd -P)/"$TARGET_FILE"" | ||
) | ||
} | ||
|
||
if [[ "$JENKINS_URL" != "" ]]; then | ||
# Make Jenkins use Google Mirror first as Maven Central may ban us | ||
SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" | ||
export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG" | ||
fi | ||
|
||
. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash | ||
|
||
|
||
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" | ||
declare -r sbt_opts_file=".sbtopts" | ||
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" | ||
|
||
usage() { | ||
cat <<EOM | ||
Usage: $script_name [options] | ||
|
||
-h | -help print this message | ||
-v | -verbose this runner is chattier | ||
-d | -debug set sbt log level to debug | ||
-no-colors disable ANSI color codes | ||
-sbt-create start sbt even if current directory contains no sbt project | ||
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt) | ||
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series) | ||
-ivy <path> path to local Ivy repository (default: ~/.ivy2) | ||
-mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) | ||
-no-share use all local caches; no sharing | ||
-no-global uses global caches, but does not use global ~/.sbt directory. | ||
-jvm-debug <port> Turn on JVM debugging, open at the given port. | ||
-batch Disable interactive mode | ||
|
||
# sbt version (default: from project/build.properties if present, else latest release) | ||
-sbt-version <version> use the specified version of sbt | ||
-sbt-jar <path> use the specified jar as the sbt launcher | ||
-sbt-rc use an RC version of sbt | ||
-sbt-snapshot use a snapshot version of sbt | ||
|
||
# java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) | ||
-java-home <path> alternate JAVA_HOME | ||
|
||
# jvm options and output control | ||
JAVA_OPTS environment variable, if unset uses "$java_opts" | ||
SBT_OPTS environment variable, if unset uses "$default_sbt_opts" | ||
.sbtopts if this file exists in the current directory, it is | ||
prepended to the runner args | ||
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args | ||
-Dkey=val pass -Dkey=val directly to the java runtime | ||
-J-X pass option -X directly to the java runtime | ||
(-J is stripped) | ||
-S-X add -X to sbt's scalacOptions (-S is stripped) | ||
-PmavenProfiles Enable a maven profile for the build. | ||
|
||
In the case of duplicated or conflicting options, the order above | ||
shows precedence: JAVA_OPTS lowest, command line options highest. | ||
EOM | ||
} | ||
|
||
process_my_args () { | ||
while [[ $# -gt 0 ]]; do | ||
case "$1" in | ||
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; | ||
-no-share) addJava "$noshare_opts" && shift ;; | ||
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; | ||
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; | ||
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; | ||
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; | ||
-batch) exec </dev/null && shift ;; | ||
|
||
-sbt-create) sbt_create=true && shift ;; | ||
|
||
*) addResidual "$1" && shift ;; | ||
esac | ||
done | ||
|
||
# Now, ensure sbt version is used. | ||
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version" | ||
} | ||
|
||
loadConfigFile() { | ||
cat "$1" | sed '/^\#/d' | ||
} | ||
|
||
# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner | ||
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@" | ||
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@" | ||
|
||
exit_status=127 | ||
saved_stty="" | ||
|
||
restoreSttySettings() { | ||
stty $saved_stty | ||
saved_stty="" | ||
} | ||
|
||
onExit() { | ||
if [[ "$saved_stty" != "" ]]; then | ||
restoreSttySettings | ||
fi | ||
exit $exit_status | ||
} | ||
|
||
saveSttySettings() { | ||
saved_stty=$(stty -g 2>/dev/null) | ||
if [[ ! $? ]]; then | ||
saved_stty="" | ||
fi | ||
} | ||
|
||
saveSttySettings | ||
trap onExit INT | ||
|
||
run "$@" | ||
|
||
exit_status=$? | ||
onExit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[repositories] | ||
local | ||
local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext] | ||
local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/} | ||
gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/ | ||
maven-central | ||
typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly | ||
sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly | ||
sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] | ||
bintray-typesafe-sbt-plugin-releases: https://dl.bintray.com/typesafe/sbt-plugins/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] | ||
bintray-spark-packages: https://dl.bintray.com/spark-packages/maven/ | ||
typesafe-releases: https://repo.typesafe.com/typesafe/releases/ |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we need scala here related stuff here and below?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like 2.12 is the default anyways. But not sure if it's better to be explicit?