Skip to content

Commit

Permalink
[Spark] Adds util function getTotalSizeOfDVFieldsInFile in DeletionVe…
Browse files Browse the repository at this point in the history
…ctorStore

Adds util function getTotalSizeOfDVFieldsInFile in DeletionVectorStore, so it's consistent across the usages.

Closes delta-io#2393

GitOrigin-RevId: addfa1a0bb5314bfccb29a29f6cc6f2d38e41650
  • Loading branch information
linzhou-db authored and vkorukanti committed Dec 20, 2023
1 parent ccba005 commit d51ed45
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ trait DeletionVectorStoreUtils {
/** The size of the stored length of a DV. */
final val DATA_SIZE_LEN = 4

// DV Format:<SerializedDV Size> <SerializedDV Bytes> <DV Checksum>
def getTotalSizeOfDVFieldsInFile(bitmapDataSize: Int): Int = {
DATA_SIZE_LEN + bitmapDataSize + CHECKSUM_LEN
}

// scalastyle:off pathfromuri
/** Convert the given String path to a Hadoop Path, handing special characters properly. */
def stringToPath(path: String): Path = new Path(new URI(path))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.io.{DataInputStream, DataOutputStream, File}
import org.apache.spark.sql.delta.{DeltaChecksumException, DeltaConfigs, DeltaLog}
import org.apache.spark.sql.delta.deletionvectors.{RoaringBitmapArray, RoaringBitmapArrayFormat}
import org.apache.spark.sql.delta.sources.DeltaSQLConf
import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore.{CHECKSUM_LEN, DATA_SIZE_LEN}
import org.apache.spark.sql.delta.storage.dv.DeletionVectorStore.{getTotalSizeOfDVFieldsInFile, CHECKSUM_LEN}
import org.apache.spark.sql.delta.test.DeltaSQLCommandTest
import org.apache.spark.sql.delta.util.PathWithFileSystem
import com.google.common.primitives.Ints
Expand Down Expand Up @@ -143,8 +143,7 @@ trait DeletionVectorStoreSuiteBase
assert(dvRange1.length === dvBytes1.length)

// DV2 should be written immediately after the DV1
// DV Format:<SerializedDV Size> <SerializedDV Bytes> <DV Checksum>
val totalDV1Size = DATA_SIZE_LEN + dvBytes1.length + CHECKSUM_LEN
val totalDV1Size = getTotalSizeOfDVFieldsInFile(dvBytes1.length)
assert(dvRange2.offset === 1 + totalDV1Size) // 1byte for file format version
assert(dvRange2.length === dvBytes2.length)

Expand Down

0 comments on commit d51ed45

Please sign in to comment.