From 1020a420cb7ee7c4086d5a02a6f3d0426cd257fb Mon Sep 17 00:00:00 2001 From: patrick-schultz Date: Wed, 27 Mar 2024 13:13:18 -0400 Subject: [PATCH] clarify code --- .../scala/is/hail/rvd/AbstractRVDSpec.scala | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala index de35f108c7e2..5fc7891e1f73 100644 --- a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala +++ b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala @@ -512,19 +512,24 @@ case class IndexedRVDSpec2( } val (nestedContexts, newPartitioner) = if (filterIntervals) { + /* We want to filter to intervals in newPartitioner, while preserving the old partitioning, + * but dropping any partitions we know would be empty. So we construct a map from old + * partitions to the range of overlapping new partitions, dropping any with an empty range. */ val contextsAndBounds = for { - oldPartIdx <- part.rangeBounds.indices - oldInterval = part.rangeBounds(oldPartIdx) + (oldInterval, oldPartIdx) <- part.rangeBounds.zipWithIndex overlapRange = extendedNP.queryInterval(oldInterval) if overlapRange.nonEmpty } yield { val ctxs = overlapRange.map(newPartIdx => makeCtx(oldPartIdx, newPartIdx)) + // the interval spanning all overlapping filter intervals val newInterval = Interval( extendedNP.rangeBounds(overlapRange.head).left, extendedNP.rangeBounds(overlapRange.last).right, ) ( ctxs, + // Shrink oldInterval to the rows filtered to. + // By construction we know oldInterval and newInterval overlap oldInterval.intersect(extendedNP.kord, newInterval).get, ) } @@ -532,11 +537,14 @@ case class IndexedRVDSpec2( (nestedContexts, new RVDPartitioner(part.sm, part.kType, newRangeBounds)) } else { - val nestedContexts = extendedNP.rangeBounds.indices.map { newPartIdx => - val newInterval = extendedNP.rangeBounds(newPartIdx) - val overlapRange = part.queryInterval(newInterval) - overlapRange.map(oldPartIdx => makeCtx(oldPartIdx, newPartIdx)) - } + /* We want to use newPartitioner as the partitioner, dropping any rows not contained in any + * new partition. So we construct a map from new partitioner to the range of overlapping old + * partitions. */ + val nestedContexts = + extendedNP.rangeBounds.zipWithIndex.map { case (newInterval, newPartIdx) => + val overlapRange = part.queryInterval(newInterval) + overlapRange.map(oldPartIdx => makeCtx(oldPartIdx, newPartIdx)) + } (nestedContexts, extendedNP) }