Skip to content

Commit

Permalink
870 - Improve fees computation query performance (#907)
Browse files Browse the repository at this point in the history
* Remove compilation warnings for test deprecation and trivial correction

* Add a lower bound on fees' timestamps in the computation query

* Fix rebase error

* Define fees averaging in term of configured time-window

 * update the tests
 * fix a flaky test

* Basic implementation of fee stats using postgres functions, half-working

 * convert the tests
 * the test for empty results fails, as the db-query is not giving
   correct results

* Compute statistics for fees as part of the db query

* Apply code reviews suggestions
  • Loading branch information
ivanopagano authored Oct 16, 2020
1 parent 5b8509c commit 9f4eb4c
Show file tree
Hide file tree
Showing 12 changed files with 228 additions and 185 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1690,7 +1690,7 @@ class TezosDataOperationsTest
)
}

"should aggregate with COUNT function" in {
"aggregate with COUNT function" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1725,7 +1725,7 @@ class TezosDataOperationsTest
)
}

"should aggregate with MAX function" in {
"aggregate with MAX function" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1760,7 +1760,7 @@ class TezosDataOperationsTest
)
}

"should aggregate with MIN function" in {
"aggregate with MIN function" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1795,7 +1795,7 @@ class TezosDataOperationsTest
)
}

"should aggregate with SUM function" in {
"aggregate with SUM function" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1830,7 +1830,7 @@ class TezosDataOperationsTest
)
}

"should aggregate with SUM function and order by SUM()" in {
"aggregate with SUM function and order by SUM()" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1865,7 +1865,7 @@ class TezosDataOperationsTest
)
}

"should order correctly by the field not existing in query)" in {
"order correctly by the field not existing in query)" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1897,7 +1897,7 @@ class TezosDataOperationsTest
)
}

"should correctly check use between in the timestamps" in {
"correctly check use between in the timestamps" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(2), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1936,7 +1936,7 @@ class TezosDataOperationsTest
)
}

"should correctly execute BETWEEN operation using numeric comparison instead of lexicographical" in {
"correctly execute BETWEEN operation using numeric comparison instead of lexicographical" in {
val feesTmp = List(
FeesRow(0, 0, 0, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 0, 10, new Timestamp(3), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -1971,7 +1971,7 @@ class TezosDataOperationsTest
result shouldBe List(Map("high" -> Some(2)))
}

"should return correct query when asked for SQL" in {
"return correct query when asked for SQL" in {
val predicates = List(
Predicate(
field = "medium",
Expand Down Expand Up @@ -2022,7 +2022,7 @@ class TezosDataOperationsTest
result shouldBe List(Map("sql" -> Some(expectedQuery)))
}

"should aggregate with multiple aggregations on the same field" in {
"aggregate with multiple aggregations on the same field" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -2058,7 +2058,7 @@ class TezosDataOperationsTest
)
}

"should aggregate with single aggegation when there is only one field" in {
"aggregate with single aggegation when there is only one field" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -2107,7 +2107,7 @@ class TezosDataOperationsTest
)
}

"should aggregate with correct predicate field when aggregation is using predicate" in {
"aggregate with correct predicate field when aggregation is using predicate" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind1", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind2", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -2164,7 +2164,7 @@ class TezosDataOperationsTest
)
}

"should aggregate correctly with multiple aggregation fields with predicate" in {
"aggregate correctly with multiple aggregation fields with predicate" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(0), "kind1", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(1), "kind2", forkId = Fork.mainForkId),
Expand Down Expand Up @@ -2239,14 +2239,14 @@ class TezosDataOperationsTest
)
}

"should aggregate with datePart aggregation" in {
"aggregate with datePart aggregation" in {
val feesTmp = List(
FeesRow(0, 2, 4, new Timestamp(100, 0, 1, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, new Timestamp(100, 0, 2, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, new Timestamp(100, 0, 2, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, new Timestamp(100, 0, 2, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, new Timestamp(100, 0, 3, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, new Timestamp(100, 0, 3, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId)
FeesRow(0, 2, 4, Timestamp.valueOf("2000-01-01 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 8, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, Timestamp.valueOf("2000-01-03 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, Timestamp.valueOf("2000-01-03 00:00:00"), "kind", forkId = Fork.mainForkId)
)

val aggregate = List(
Expand Down Expand Up @@ -2279,14 +2279,14 @@ class TezosDataOperationsTest

}

"should map date with datePart aggregation when it is only type of aggregation" in {
"map date with datePart aggregation when it is only type of aggregation" in {
val feesTmp = List(
FeesRow(0, 1, 4, new Timestamp(100, 0, 1, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 2, 8, new Timestamp(100, 0, 2, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, new Timestamp(100, 0, 2, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 4, new Timestamp(100, 0, 2, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 5, 4, new Timestamp(100, 0, 3, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId),
FeesRow(0, 6, 4, new Timestamp(100, 0, 3, 0, 0, 0, 0), "kind", forkId = Fork.mainForkId)
FeesRow(0, 1, 4, Timestamp.valueOf("2000-01-01 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 2, 8, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 3, 4, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 4, 4, Timestamp.valueOf("2000-01-02 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 5, 4, Timestamp.valueOf("2000-01-03 00:00:00"), "kind", forkId = Fork.mainForkId),
FeesRow(0, 6, 4, Timestamp.valueOf("2000-01-03 00:00:00"), "kind", forkId = Fork.mainForkId)
)

val populateAndTest = for {
Expand Down Expand Up @@ -2317,7 +2317,7 @@ class TezosDataOperationsTest
)
}

"should correctly use query on temporal table" in {
"correctly use query on temporal table" in {

val accountsHistoryRow = AccountsHistoryRow(
accountId = "id",
Expand Down Expand Up @@ -2357,7 +2357,7 @@ class TezosDataOperationsTest

}

"should get the balance of an account at a specific timestamp where there are multiple entities for given account_id" in {
"get the balance of an account at a specific timestamp where there are multiple entities for given account_id" in {

val accountsHistoryRows = List(
AccountsHistoryRow(
Expand Down Expand Up @@ -2414,7 +2414,7 @@ class TezosDataOperationsTest
)
}

"should get the balance of an account at a specific timestamp" in {
"get the balance of an account at a specific timestamp" in {

val accountsHistoryRows = List(
AccountsHistoryRow(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ trait RandomGenerationKit {

//a stable timestamp reference if needed
lazy val testReferenceTimestamp =
new Timestamp(testReferenceDateTime.toEpochSecond)
new Timestamp(testReferenceDateTime.toEpochSecond * 1000L)

//creates pseudo-random strings of given length, based on an existing [[Random]] generator
val alphaNumericGenerator = (random: Random) => random.alphanumeric.take(_: Int).mkString
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,94 @@
package tech.cryptonomic.conseil.common.sql

import com.github.tminglei.slickpg.ExPostgresProfile
import slick.ast._
import slick.lifted.{Query, Rep}
import slick.basic.Capability
import slick.jdbc.JdbcCapabilities
import com.github.tminglei.slickpg.agg.PgAggFuncSupport

/** Custom postgres profile for enabling `insertOrUpdateAll` */
trait CustomProfileExtension extends ExPostgresProfile {
trait CustomProfileExtension extends ExPostgresProfile with PgAggFuncSupport {

// Add back `capabilities.insertOrUpdate` to enable native `upsert` support; for postgres 9.5+
override protected def computeCapabilities: Set[Capability] =
super.computeCapabilities + JdbcCapabilities.insertOrUpdate

/* The following values can be used via imports to add all aggregations supported
* by pg-slick, which covers all postgres defined ones.
* e.g.
* {{{
* import CustomProfileExtension.statisticsAggregations._
* }}}
*
* There is a very strong limitation though, the current implementation
* doesn't support aggregations combined with grouping of other columns.
*
* see the related issue here: https://github.com/tminglei/slick-pg/issues/289
* To overcome this limitation, we introduced an additional extension of
* slick-provided aggregations in the CustomApi object.
* The latter is a totally different work-around and can't be used
* with the pg-slick aggregations, as the two API designs are not compatible.
*/
/** provides access to postgres general aggregations via pg-slick */
lazy val generalAggregations = new GeneralAggFunctions {}

/** provides access to postgres statistics aggregations via pg-slick */
lazy val statisticsAggregations = new StatisticsAggFunctions {}

/** provides access to postgres ordered-set aggregations via pg-slick */
lazy val orderedSetAggregations = new OrderedSetAggFunctions {}

/** provides access to postgres hypothetical-set aggregations via pg-slick */
lazy val hypotheticalSetAggregations = new HypotheticalSetAggFunctions {}

override val api: API = CustomApi

/** Provide additional features from api import */
object CustomApi extends API {

/* The following conversions provides extra statistic operations from postgres
* which are not generally available for any db-type
* The use the same mechanisim used internally by slick, which provides aggregate functions
* only for queries that results in single-column values.
* The column can contain a value or an optional value (i.e. it works for nullable columns too)
*/
implicit def customSingleColumnQueryExtensions[E: BaseTypedType, C[_]](q: Query[Rep[E], _, C]) =
new CustomSingleColumnQueryExtensions[E, E, C](q)
implicit def customSingleOptionColumnQueryExtensions[E: BaseTypedType, C[_]](q: Query[Rep[Option[E]], _, C]) =
new CustomSingleColumnQueryExtensions[E, Option[E], C](q)
}

}

/** Allow using the extensions under an import in scope */
object CustomProfileExtension extends CustomProfileExtension {
import slick.ast.Library._

/** Defines extra functions that we use for statistical computations */
object Library {
/* slick-compatible definitions, based on the internal implementation of slick-provided
* extensions for sql functions
*/
val StdDevAggregate = new SqlAggregateFunction("stddev")
val StdDevPopAggregate = new SqlAggregateFunction("stddev_pop")
val StdDevSampAggregate = new SqlAggregateFunction("stddev_samp")
}
}

object CustomProfileExtension extends CustomProfileExtension
/** The implementation of extra functions added via implicit conversions of the query type */
final class CustomSingleColumnQueryExtensions[A, E, C[_]](val q: Query[Rep[E], _, C]) extends AnyVal {
import slick.lifted.FunctionSymbolExtensionMethods._

/** historical alias for [[stdDevSamp]] */
def stdDev(implicit tt: TypedType[Option[A]]) =
CustomProfileExtension.Library.StdDevAggregate.column[Option[A]](q.toNode)

/** population standard deviation of the input values */
def stdDevPop(implicit tt: TypedType[Option[A]]) =
CustomProfileExtension.Library.StdDevPopAggregate.column[Option[A]](q.toNode)

/** sample standard deviation of the input values */
def stdDevSamp(implicit tt: TypedType[Option[A]]) =
CustomProfileExtension.Library.StdDevSampAggregate.column[Option[A]](q.toNode)
}

This file was deleted.

This file was deleted.

4 changes: 2 additions & 2 deletions conseil-lorre/src/main/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ lorre {
bootup-connection-check-timeout: 10 s
#Used to make sure Lorre records average fees every n iterations
fee-update-interval: 20
#Used to select how many fees should be averaged together
number-of-fees-averaged: 1000
#Used to select how many fees should be averaged together, based on a time-window
fees-average-time-window: 1 day
#Docs missing
depth: newest
depth: ${?CONSEIL_LORRE_DEPTH}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ final case class LorreConfiguration(
bootupRetryInterval: FiniteDuration,
bootupConnectionCheckTimeout: FiniteDuration,
feeUpdateInterval: Int,
numberOfFeesAveraged: Int,
feesAverageTimeWindow: FiniteDuration,
depth: Depth,
headHash: Option[String],
chainEvents: List[ChainEvent],
Expand Down
Loading

0 comments on commit 9f4eb4c

Please sign in to comment.