Skip to content

Commit

Permalink
fix(query) Regex equals .* must ignore the label and match series eve…
Browse files Browse the repository at this point in the history
…n without the label (filodb#1639)
  • Loading branch information
amolnayak311 authored Jul 28, 2023
1 parent ea1644b commit 955814e
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -817,12 +817,14 @@ class PartKeyLuceneIndex(ref: DatasetRef,
logger.info(s"Refreshed index searchers to make reads consistent for dataset=$ref shard=$shardNum")
}

//scalastyle:off method.length
private def leafFilter(column: String, filter: Filter): Query = {
filter match {
case EqualsRegex(value) =>
val regex = removeRegexAnchors(value.toString)
if (regex.nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE)
else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value.
if (regex.replaceAll("\\.\\*", "").nonEmpty) new RegexpQuery(new Term(column, regex), RegExp.NONE)
else leafFilter(column, NotEqualsRegex(".+")) // value="" means the label is absent or has an empty value.

case NotEqualsRegex(value) =>
val term = new Term(column, removeRegexAnchors(value.toString))
val allDocs = new MatchAllDocsQuery
Expand Down Expand Up @@ -864,7 +866,7 @@ class PartKeyLuceneIndex(ref: DatasetRef,
case _ => throw new UnsupportedOperationException
}
}

//scalastyle:on method.length
def partIdsFromFilters(columnFilters: Seq[ColumnFilter],
startTime: Long,
endTime: Long): debox.Buffer[Int] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1008,4 +1008,55 @@ class PartKeyLuceneIndexSpec extends AnyFunSpec with Matchers with BeforeAndAfte
// close CardinalityTracker to avoid leaking of resources
cardTracker.close()
}

it("should match records without label when .* is provided on a non existent label") {

val pkrs = partKeyFromRecords(dataset6, records(dataset6, readers.take(10)), Some(partBuilder))
.zipWithIndex.map { case (addr, i) =>
val pk = partKeyOnHeap(dataset6.schema.partKeySchema, ZeroPointer, addr)
keyIndex.addPartKey(pk, i, i, i + 10)()
PartKeyLuceneIndexRecord(pk, i, i + 10)
}
keyIndex.refreshReadersBlocking()


// Query with just the existing Label name
val filter1 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result1 = keyIndex.partKeyRecordsFromFilters(Seq(filter1), 0, Long.MaxValue)
val expected1 = Seq(pkrs(7), pkrs(8), pkrs(9))

result1.map(_.partKey.toSeq) shouldEqual expected1.map(_.partKey.toSeq)
result1.map(p => (p.startTime, p.endTime)) shouldEqual expected1.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an empty regex
val filter2 = ColumnFilter("dummy", EqualsRegex(".*".utf8))
val filter3 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result2 = keyIndex.partKeyRecordsFromFilters(Seq(filter2, filter3), 0, Long.MaxValue)
val expected2 = Seq(pkrs(7), pkrs(8), pkrs(9))

result2.map(_.partKey.toSeq) shouldEqual expected2.map(_.partKey.toSeq)
result2.map(p => (p.startTime, p.endTime)) shouldEqual expected2.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an regex matching at least 1 character
val filter4 = ColumnFilter("dummy", EqualsRegex(".+".utf8))
val filter5 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result3 = keyIndex.partKeyRecordsFromFilters(Seq(filter4, filter5), 0, Long.MaxValue)
result3 shouldEqual Seq()

// Query with non existent label name with an empty regex
val filter6 = ColumnFilter("dummy", EqualsRegex("".utf8))
val filter7 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result4 = keyIndex.partKeyRecordsFromFilters(Seq(filter6, filter7), 0, Long.MaxValue)
val expected4 = Seq(pkrs(7), pkrs(8), pkrs(9))
result4.map(_.partKey.toSeq) shouldEqual expected4.map(_.partKey.toSeq)
result4.map(p => (p.startTime, p.endTime)) shouldEqual expected4.map(p => (p.startTime, p.endTime))

// Query with non existent label name with an empty equals
val filter8 = ColumnFilter("dummy", Equals("".utf8))
val filter9 = ColumnFilter("Actor2Code", Equals("GOV".utf8))
val result5 = keyIndex.partKeyRecordsFromFilters(Seq(filter8, filter9), 0, Long.MaxValue)
val expected5 = Seq(pkrs(7), pkrs(8), pkrs(9))
result5.map(_.partKey.toSeq) shouldEqual expected5.map(_.partKey.toSeq)
result5.map(p => (p.startTime, p.endTime)) shouldEqual expected5.map(p => (p.startTime, p.endTime))
}
}

0 comments on commit 955814e

Please sign in to comment.