-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for segments with mixed granularity (#4)
- Loading branch information
Showing
17 changed files
with
382 additions
and
151 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
41 changes: 41 additions & 0 deletions
41
src/main/scala/bi/deep/segments/MixedGranularitySolver.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package bi.deep.segments | ||
|
||
object MixedGranularitySolver { | ||
trait Overlapping[T] { | ||
|
||
/** Defines if two values overlaps (have common part) */ | ||
def overlaps(left: T, right: T): Boolean | ||
|
||
} | ||
|
||
private def upsert[K, V](map: Map[K, V])(key: K, value: V)(f: (V, V) => V): Map[K, V] = { | ||
map.get(key) match { | ||
case None => map.updated(key, value) | ||
case Some(existing) => map.updated(key, f(existing, value)) | ||
} | ||
} | ||
|
||
private def upsertMany[K, V](map: Map[K, V])(kv: (K, V)*)(f: (V, V) => V): Map[K, V] = { | ||
kv.foldLeft(map) { case (map, (k, v)) => upsert(map)(k, v)(f) } | ||
} | ||
|
||
def solve[T](values: List[T])(implicit overlapping: Overlapping[T], ordering: Ordering[T]): List[T] = { | ||
if (values.length < 2) values | ||
else { | ||
val collisions = values.combinations(2).foldLeft(Map.empty[T, Set[T]]) { case (collisions, left :: right :: Nil) => | ||
if (overlapping.overlaps(left, right)) upsertMany(collisions)( | ||
left -> Set(left, right), | ||
right -> Set(right, left) | ||
)(_ ++ _) | ||
else upsertMany(collisions)( | ||
left -> Set(left), | ||
right -> Set(right) | ||
)(_ ++ _) | ||
} | ||
|
||
val results = collisions.foldLeft(Set.empty[T]) { case (solutions, (_, collisions)) => solutions + collisions.max } | ||
results.toList | ||
} | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package bi.deep.segments | ||
|
||
import bi.deep.utils.Parsing | ||
import org.apache.hadoop.fs.{LocatedFileStatus, Path} | ||
import org.apache.log4j.Logger | ||
import org.joda.time.{Instant, Interval} | ||
|
||
import scala.util.Try | ||
|
||
|
||
case class Segment(path: Path, interval: Interval, version: Instant, partition: Int) | ||
|
||
|
||
object Segment { | ||
implicit private val logger: Logger = Logger.getLogger("SegmentParser") | ||
|
||
private val pattern = """(\d+)""".r | ||
|
||
def apply(segment: SegmentVersion, status: LocatedFileStatus): Option[Segment] = Parsing.withLogger { | ||
val relative = status.getPath.toString.replace(segment.path.toString, "") | ||
for { | ||
partition <- Try(pattern.findFirstIn(relative).get.toInt) | ||
} yield { | ||
new Segment(status.getPath, segment.interval, segment.version, partition) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package bi.deep.segments | ||
|
||
import bi.deep.utils.{Parsing, TimeUtils} | ||
import org.apache.hadoop.fs.{FileStatus, Path} | ||
import org.apache.log4j.Logger | ||
import org.joda.time.Interval | ||
|
||
|
||
/** A path to Segment's Interval */ | ||
case class SegmentInterval(path: Path, interval: Interval) { | ||
def overlapedBy(range: Interval): Boolean = range.overlaps(interval) | ||
} | ||
|
||
object SegmentInterval { | ||
implicit private val logger: Logger = Logger.getLogger("SegmentIntervalParser") | ||
|
||
def apply(status: FileStatus): Option[SegmentInterval] = Parsing.withLogger { | ||
for { | ||
interval <- TimeUtils.parseIntervalFromHadoop(status.getPath.getName) | ||
} yield new SegmentInterval(status.getPath, interval) | ||
} | ||
} |
Oops, something went wrong.