Skip to content

Commit

Permalink
log store chooses where checkpoitns go (delta-io#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryan Murray committed Nov 3, 2020
1 parent fe68e6c commit a0faf88
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/main/scala/org/apache/spark/sql/delta/Checkpoints.scala
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ object Checkpoints extends DeltaLogging {
val checkpointSize = spark.sparkContext.longAccumulator("checkpointSize")
val numOfFiles = spark.sparkContext.longAccumulator("numOfFiles")
// Use the string in the closure as Path is not Serializable.
val path = checkpointFileSingular(snapshot.path, snapshot.version).toString
val resolvedPath = deltaLog.store.resolveCheckpointPath(snapshot.path)
val path = checkpointFileSingular(resolvedPath, snapshot.version).toString
val base = snapshot.state
.repartition(1)
.map { action =>
Expand Down
12 changes: 12 additions & 0 deletions src/main/scala/org/apache/spark/sql/delta/storage/LogStore.scala
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,18 @@ trait LogStore {
throw new UnsupportedOperationException()
}

/**
* Let LogStore decide where checkpoints should be stored.
*
* Typically the checkpoint storage path would be the same as for Delta storage.
* This woudl be inside the _delta_log directory. The LogStore impl may wish to control this path
* and should inform the Checkpoints methods accordingly. This is only required for checkpoints
* as they are the only metadata file not directly written by LogStore.
*/
def resolveCheckpointPath(path: Path): Path = {
path
}

/**
* Whether a partial write is visible when writing to `path`.
*
Expand Down

0 comments on commit a0faf88

Please sign in to comment.