From b647a3ba613162decec5bf1b6fd24638e4fa5b97 Mon Sep 17 00:00:00 2001 From: "Malik, Junaid" Date: Wed, 22 May 2024 10:43:08 +0800 Subject: [PATCH 1/4] #1353 improve in-memory typeahead's performance --- .../vuu/core/table/ColumnValueProvider.scala | 54 +++++++--------- .../table/InMemColumnValueProviderTest.scala | 64 ++++++++++++++----- 2 files changed, 69 insertions(+), 49 deletions(-) diff --git a/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala b/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala index e57db5b6a..c23808d08 100644 --- a/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala +++ b/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala @@ -3,15 +3,8 @@ package org.finos.vuu.core.table import com.typesafe.scalalogging.StrictLogging trait ColumnValueProvider { - - //todo currently only returns first 10 results.. so can't scrolling through values - //could return everything let ui decide how many results to display but there is cost to the json serialisig for large dataset - //todo how to handle nulls - for different data types - //todo should this be returning null or rely on json deserialiser rules? - def getUniqueValues(columnName:String):Array[String] def getUniqueValuesStartingWith(columnName:String, starts: String):Array[String] - } class EmptyColumnValueProvider extends ColumnValueProvider { @@ -28,41 +21,38 @@ object InMemColumnValueProvider { } } } + class InMemColumnValueProvider(dataTable: InMemDataTable) extends ColumnValueProvider with StrictLogging { + private val get10DistinctValues = DistinctValuesGetter(10) override def getUniqueValues(columnName: String): Array[String] = dataTable.columnForName(columnName) match { - case c: Column => - dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).toArray.sorted.take(10) - case null => - logger.error(s"Column $columnName not found in table ${dataTable.name}") - Array() + case c: Column => get10DistinctValues(c) + case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty; } override def getUniqueValuesStartingWith(columnName: String, starts: String): Array[String] = dataTable.columnForName(columnName) match { - case c: Column => - dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).filter(_.startsWith(starts)).toArray.sorted.take(10) - case null => - logger.error(s"Column $columnName not found in table ${dataTable.name}") - Array() + case c: Column => get10DistinctValues(c, _.startsWith(starts)) + case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty; } - private def addUnique(dt: DataTable, c: Column, set: Set[String], key: String): Set[String] = { - val row = dt.pullRow(key) - row.get(c) match { - case null => - Set() - case x: String => - set.+(x) - case x: Long => - set.+(x.toString) - case x: Double => - set.+(x.toString) - case x: Int => - set.+(x.toString) - case x => - set.+(x.toString) + + private case class DistinctValuesGetter(n: Int) { + private type Filter = String => Boolean + + def apply(c: Column, filter: Filter = _ => true): Array[String] = getDistinctValues(c, filter).take(n).toArray + + private def getDistinctValues(c: Column, filter: Filter): Iterator[String] = { + dataTable.primaryKeys + .iterator + .map(dataTable.pullRow(_).get(c)) + .distinct + .flatMap(valueToString) + .filter(filter) } + + private def valueToString(value: Any): Option[String] = Option(value).map(_.toString) } + } \ No newline at end of file diff --git a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala index 9b5f7522c..0f93e13e0 100644 --- a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala +++ b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala @@ -4,10 +4,13 @@ import org.finos.toolbox.jmx.{MetricsProvider, MetricsProviderImpl} import org.finos.toolbox.lifecycle.LifecycleContainer import org.finos.toolbox.time.{Clock, TestFriendlyClock} import org.finos.vuu.api.TableDef +import org.finos.vuu.core.table.InMemColumnValueProviderTest.randomRic import org.finos.vuu.provider.{JoinTableProviderImpl, MockProvider} import org.scalatest.featurespec.AnyFeatureSpec import org.scalatest.matchers.should.Matchers +import java.security.SecureRandom + class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { implicit val clock: Clock = new TestFriendlyClock(10001L) @@ -16,45 +19,72 @@ class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { private val pricesDef: TableDef = TableDef( "prices", - "ric", - Columns.fromNames("ric:String", "bid:Double", "ask:Double"), + "id", + Columns.fromNames("id:Long", "ric:String", "bid:Double", "ask:Double"), ) Feature("InMemColumnValueProvider") { Scenario("Get all unique value of a given column") { - - val joinProvider = JoinTableProviderImpl() - val table = new InMemDataTable(pricesDef, joinProvider) + val table = givenTable(pricesDef) val provider = new MockProvider(table) val columnValueProvider = new InMemColumnValueProvider(table) - provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 220, "ask" -> 223)) - provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) - provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 240, "ask" -> 244)) + provider.tick("1", Map("id" -> "1", "ric" -> "VOD.L", "bid" -> 220, "ask" -> 223)) + provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) + provider.tick("3", Map("id" -> "3", "ric" -> "VOD.L", "bid" -> 240, "ask" -> 244)) val uniqueValues = columnValueProvider.getUniqueValues("ric") - uniqueValues shouldBe Array("BT.L", "VOD.L") + uniqueValues.toSet shouldBe Set("BT.L", "VOD.L") } - Scenario("Get all unique value of a given column that starts with specified string") { - - val joinProvider = JoinTableProviderImpl() - val table = new InMemDataTable(pricesDef, joinProvider) + val table = givenTable(pricesDef) val provider = new MockProvider(table) val columnValueProvider = new InMemColumnValueProvider(table) - provider.tick("VOA.L", Map("ric" -> "VOA.L", "bid" -> 220, "ask" -> 223)) - provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) - provider.tick("VOV.L", Map("ric" -> "VOV.L", "bid" -> 240, "ask" -> 244)) + provider.tick("1", Map("id" -> "1", "ric" -> "VOA.L", "bid" -> 220, "ask" -> 223)) + provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) + provider.tick("3", Map("id" -> "3", "ric" -> "VOV.L", "bid" -> 240, "ask" -> 244)) val uniqueValues = columnValueProvider.getUniqueValuesStartingWith("ric", "VO") - uniqueValues shouldBe Array("VOA.L", "VOV.L") + uniqueValues.toSet shouldBe Set("VOA.L", "VOV.L") + } + + ignore("Performance test with 1 million rows") { + val table = givenTable(pricesDef) + val provider = new MockProvider(table) + val columnValueProvider = new InMemColumnValueProvider(table) + + Range.inclusive(1, 1_000_000).foreach(id => { + provider.tick(id.toString, Map("id" -> id, "ric" -> randomRic, "bid" -> 220, "ask" -> 223)) + }) + + val startTime = System.currentTimeMillis() + val values = columnValueProvider.getUniqueValuesStartingWith("ric", "A") + val endTime = System.currentTimeMillis() + val timeTakenMs = endTime - startTime + + println(s"time-taken: $timeTakenMs | values: ${values.mkString("Array(", ", ", ")")}") + + timeTakenMs should be < 20L } //todo match for start with string should not be case sensitive } + + private def givenTable(tableDef: TableDef): InMemDataTable = new InMemDataTable(tableDef, JoinTableProviderImpl()) +} + +private object InMemColumnValueProviderTest { + private val alphabets = List("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z") + private val secureRandom = SecureRandom.getInstanceStrong + private def randomAlphabet: String = alphabets(secureRandom.nextInt(alphabets.length)) + private def randomRic: String = { + val ricPrefix = Range.inclusive(1, 3).map(_ => randomAlphabet).mkString("") + val ricPostfix = Range.inclusive(1, 2).map(_ => randomAlphabet).mkString("") + s"$ricPrefix.$ricPostfix" + } } From d2af9d9a71b691aa5bda2e51a99005cb9ebf1a31 Mon Sep 17 00:00:00 2001 From: "Malik, Junaid" Date: Wed, 22 May 2024 10:44:56 +0800 Subject: [PATCH 2/4] #1353 remove legacy/unused code and update tests --- docs/rpc/service.md | 4 +- .../ignite/IgniteColumnValueProvider.scala | 14 ----- .../typeahead/TypeAheadRpcHandler.scala | 62 ------------------- .../typeahead/TypeAheadModuleTest.scala | 4 +- 4 files changed, 4 insertions(+), 80 deletions(-) delete mode 100644 example/apache-ignite/src/main/scala/org/finos/vuu/example/ignite/IgniteColumnValueProvider.scala diff --git a/docs/rpc/service.md b/docs/rpc/service.md index 22d4adfed..01d9123ed 100644 --- a/docs/rpc/service.md +++ b/docs/rpc/service.md @@ -26,13 +26,13 @@ object TypeAheadModule extends DefaultModule { def apply()(implicit clock: Clock, lifecycle: LifecycleContainer): ViewServerModule = { ModuleFactory.withNamespace(NAME) - .addRpcHandler(server => new TypeAheadRpcHandlerImpl(server.tableContainer)) + .addRpcHandler(server => new GenericTypeAheadRpcHandler(server.tableContainer)) .asModule() } } ``` -You can see we've defined an RpcHandler called TypeAheadRpcHandlerImpl, which implements the interface: +You can see we've defined an RpcHandler called GenericTypeAheadRpcHandler, which implements the interface: ```scala trait TypeAheadRpcHandler{ diff --git a/example/apache-ignite/src/main/scala/org/finos/vuu/example/ignite/IgniteColumnValueProvider.scala b/example/apache-ignite/src/main/scala/org/finos/vuu/example/ignite/IgniteColumnValueProvider.scala deleted file mode 100644 index bee541ab7..000000000 --- a/example/apache-ignite/src/main/scala/org/finos/vuu/example/ignite/IgniteColumnValueProvider.scala +++ /dev/null @@ -1,14 +0,0 @@ -package org.finos.vuu.example.ignite - -import org.finos.vuu.core.table.ColumnValueProvider - - -class IgniteColumnValueProvider(final val igniteStore: IgniteOrderStore) extends ColumnValueProvider { - - private val MAX_RESULT_COUNT = 10 - override def getUniqueValues(columnName: String): Array[String] = - igniteStore.getDistinct(columnName, MAX_RESULT_COUNT).toArray - - override def getUniqueValuesStartingWith(columnName: String, starts: String): Array[String] = - igniteStore.getDistinct(columnName, starts, MAX_RESULT_COUNT).toArray -} diff --git a/vuu/src/main/scala/org/finos/vuu/core/module/typeahead/TypeAheadRpcHandler.scala b/vuu/src/main/scala/org/finos/vuu/core/module/typeahead/TypeAheadRpcHandler.scala index 0d48bc7c5..cd8271ba5 100644 --- a/vuu/src/main/scala/org/finos/vuu/core/module/typeahead/TypeAheadRpcHandler.scala +++ b/vuu/src/main/scala/org/finos/vuu/core/module/typeahead/TypeAheadRpcHandler.scala @@ -1,70 +1,8 @@ package org.finos.vuu.core.module.typeahead -import com.typesafe.scalalogging.StrictLogging -import org.finos.vuu.core.table.{Column, DataTable, TableContainer} import org.finos.vuu.net.RequestContext -import org.finos.vuu.net.rpc.RpcHandler trait TypeAheadRpcHandler{ def getUniqueFieldValues(tableMap: Map[String, String], column: String, ctx: RequestContext): Array[String] def getUniqueFieldValuesStartingWith(tableMap: Map[String, String], column: String, starts: String, ctx: RequestContext): Array[String] } - - -class TypeAheadRpcHandlerImpl(val tableContainer: TableContainer) extends RpcHandler with StrictLogging with TypeAheadRpcHandler { - - private def addUnique(dt: DataTable, c: Column, set: Set[String], key: String): Set[String] = { - val row = dt.pullRow(key) - row.get(c) match { - case null => - Set() - case x: String => - set.+(x) - case x: Long => - set.+(x.toString) - case x: Double => - set.+(x.toString) - case x: Int => - set.+(x.toString) - case x => - set.+(x.toString) - } - } - - - override def getUniqueFieldValuesStartingWith(tableMap: Map[String, String], column: String, starts: String, ctx: RequestContext): Array[String] = { - val tableName = tableMap("table") - - tableContainer.getTable(tableName) match { - case dataTable: DataTable => - dataTable.columnForName(column) match { - case c: Column => - dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).filter(_.startsWith(starts)).toArray.sorted.take(10) - case null => - logger.error(s"Column ${column} not found in table ${tableName}") - Array() - } - case null => - throw new Exception("Could not find table by name:" + tableName) - } - } - - def getUniqueFieldValues(tableMap: Map[String, String], column: String, ctx: RequestContext): Array[String] = { - - val tableName = tableMap("table") - - tableContainer.getTable(tableName) match { - case dataTable: DataTable => - dataTable.columnForName(column) match { - case c: Column => - dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).toArray.sorted.take(10) - case null => - logger.error(s"Column ${column} not found in table ${tableName}") - Array() - } - case null => - throw new Exception("Could not find table by name:" + tableName) - } - } - -} diff --git a/vuu/src/test/scala/org/finos/vuu/core/module/typeahead/TypeAheadModuleTest.scala b/vuu/src/test/scala/org/finos/vuu/core/module/typeahead/TypeAheadModuleTest.scala index 9cc12c9ae..2664a8f51 100644 --- a/vuu/src/test/scala/org/finos/vuu/core/module/typeahead/TypeAheadModuleTest.scala +++ b/vuu/src/test/scala/org/finos/vuu/core/module/typeahead/TypeAheadModuleTest.scala @@ -20,7 +20,7 @@ class TypeAheadModuleTest extends AnyFeatureSpec with Matchers with GivenWhenThe def callGetUniqueFieldValues(tables: TableContainer, column: String): Array[String] = { - val typeAheadRpc = new TypeAheadRpcHandlerImpl(tables) + val typeAheadRpc = new GenericTypeAheadRpcHandler(tables) val ctx = RequestContext("", ClientSessionId("", ""), null, "") @@ -33,7 +33,7 @@ class TypeAheadModuleTest extends AnyFeatureSpec with Matchers with GivenWhenThe def callGetUniqueFieldValuesStarting(tables: TableContainer, column: String, starts: String): Array[String] = { - val typeAheadRpc = new TypeAheadRpcHandlerImpl(tables) + val typeAheadRpc = new GenericTypeAheadRpcHandler(tables) val ctx = new RequestContext("", ClientSessionId("", ""), null, "") From 89223ff2fffb876b7f2ac77bc947ee88e9720a14 Mon Sep 17 00:00:00 2001 From: "Malik, Junaid" Date: Tue, 21 May 2024 10:15:05 +0800 Subject: [PATCH 3/4] #1353 remove performance test as the setup is expensive - the plan is to add a separate test suite for benchmark tests as they are usually more expensive to run. --- .../table/InMemColumnValueProviderTest.scala | 33 ------------------- 1 file changed, 33 deletions(-) diff --git a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala index 0f93e13e0..0742e03c5 100644 --- a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala +++ b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala @@ -4,13 +4,10 @@ import org.finos.toolbox.jmx.{MetricsProvider, MetricsProviderImpl} import org.finos.toolbox.lifecycle.LifecycleContainer import org.finos.toolbox.time.{Clock, TestFriendlyClock} import org.finos.vuu.api.TableDef -import org.finos.vuu.core.table.InMemColumnValueProviderTest.randomRic import org.finos.vuu.provider.{JoinTableProviderImpl, MockProvider} import org.scalatest.featurespec.AnyFeatureSpec import org.scalatest.matchers.should.Matchers -import java.security.SecureRandom - class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { implicit val clock: Clock = new TestFriendlyClock(10001L) @@ -53,38 +50,8 @@ class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { uniqueValues.toSet shouldBe Set("VOA.L", "VOV.L") } - ignore("Performance test with 1 million rows") { - val table = givenTable(pricesDef) - val provider = new MockProvider(table) - val columnValueProvider = new InMemColumnValueProvider(table) - - Range.inclusive(1, 1_000_000).foreach(id => { - provider.tick(id.toString, Map("id" -> id, "ric" -> randomRic, "bid" -> 220, "ask" -> 223)) - }) - - val startTime = System.currentTimeMillis() - val values = columnValueProvider.getUniqueValuesStartingWith("ric", "A") - val endTime = System.currentTimeMillis() - val timeTakenMs = endTime - startTime - - println(s"time-taken: $timeTakenMs | values: ${values.mkString("Array(", ", ", ")")}") - - timeTakenMs should be < 20L - } - //todo match for start with string should not be case sensitive } private def givenTable(tableDef: TableDef): InMemDataTable = new InMemDataTable(tableDef, JoinTableProviderImpl()) } - -private object InMemColumnValueProviderTest { - private val alphabets = List("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z") - private val secureRandom = SecureRandom.getInstanceStrong - private def randomAlphabet: String = alphabets(secureRandom.nextInt(alphabets.length)) - private def randomRic: String = { - val ricPrefix = Range.inclusive(1, 3).map(_ => randomAlphabet).mkString("") - val ricPostfix = Range.inclusive(1, 2).map(_ => randomAlphabet).mkString("") - s"$ricPrefix.$ricPostfix" - } -} From e03db495119ff3a48eaef76350e966b65f01fa0b Mon Sep 17 00:00:00 2001 From: Na Lee Ha Date: Tue, 21 May 2024 16:32:36 +0100 Subject: [PATCH 4/4] #1353 changing assertion for typeahead test to ensure values returned are unique. Adding test for when some of the values are null --- .../table/InMemColumnValueProviderTest.scala | 39 ++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala index 0742e03c5..3e47df80b 100644 --- a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala +++ b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala @@ -5,15 +5,16 @@ import org.finos.toolbox.lifecycle.LifecycleContainer import org.finos.toolbox.time.{Clock, TestFriendlyClock} import org.finos.vuu.api.TableDef import org.finos.vuu.provider.{JoinTableProviderImpl, MockProvider} +import org.scalamock.scalatest.MockFactory +import org.scalatest.Assertions import org.scalatest.featurespec.AnyFeatureSpec import org.scalatest.matchers.should.Matchers -class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { +class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers with MockFactory { implicit val clock: Clock = new TestFriendlyClock(10001L) implicit val lifecycle: LifecycleContainer = new LifecycleContainer() implicit val metricsProvider: MetricsProvider = new MetricsProviderImpl - private val pricesDef: TableDef = TableDef( "prices", "id", @@ -33,7 +34,35 @@ class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { val uniqueValues = columnValueProvider.getUniqueValues("ric") - uniqueValues.toSet shouldBe Set("BT.L", "VOD.L") + uniqueValues should contain theSameElementsAs Vector("BT.L", "VOD.L") + } + + Scenario("Get all unique value of a given column filtering out null") { + val table = givenTable(pricesDef) + val provider = new MockProvider(table) + val columnValueProvider = new InMemColumnValueProvider(table) + + provider.tick("1", Map("id" -> "1", "ric" -> "VOD.L", "bid" -> 220, "ask" -> 223)) + provider.tick("2", Map("id" -> "2", "ric" -> null, "bid" -> 500, "ask" -> 550)) + provider.tick("3", Map("id" -> "3", "ric" -> "VOD.L", "bid" -> 240, "ask" -> 244)) + + val uniqueValues = columnValueProvider.getUniqueValues("ric") + + uniqueValues should contain theSameElementsAs Vector("VOD.L") + + } + + Scenario("Get all unique value of a given column returns empty when all values are null") { + val table = givenTable(pricesDef) + val provider = new MockProvider(table) + val columnValueProvider = new InMemColumnValueProvider(table) + + provider.tick("1", Map("id" -> "1", "ric" -> null, "bid" -> 220, "ask" -> 223)) + provider.tick("2", Map("id" -> "2", "ric" -> null, "bid" -> 500, "ask" -> 550)) + + val uniqueValues = columnValueProvider.getUniqueValues("ric") + + uniqueValues shouldBe empty } Scenario("Get all unique value of a given column that starts with specified string") { @@ -44,13 +73,13 @@ class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { provider.tick("1", Map("id" -> "1", "ric" -> "VOA.L", "bid" -> 220, "ask" -> 223)) provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) provider.tick("3", Map("id" -> "3", "ric" -> "VOV.L", "bid" -> 240, "ask" -> 244)) + provider.tick("4", Map("id" -> "4", "ric" -> null, "bid" -> 240, "ask" -> 244)) val uniqueValues = columnValueProvider.getUniqueValuesStartingWith("ric", "VO") - uniqueValues.toSet shouldBe Set("VOA.L", "VOV.L") + uniqueValues should contain theSameElementsAs Vector("VOA.L", "VOV.L") } - //todo match for start with string should not be case sensitive } private def givenTable(tableDef: TableDef): InMemDataTable = new InMemDataTable(tableDef, JoinTableProviderImpl())