From 398d257661e507449ebc27639599588fd03f546c Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Wed, 22 Jan 2025 13:17:30 +0530 Subject: [PATCH 01/11] Add SqlSegmentsMetadataCache --- .../MaterializedViewSupervisorTest.java | 10 + .../DatasourceOptimizerTest.java | 10 + .../indexing/overlord/DruidOverlord.java | 3 + .../actions/SegmentAllocateActionTest.java | 2 +- .../common/actions/TaskActionTestKit.java | 12 +- .../common/task/IngestionTestBase.java | 18 +- .../overlord/TaskLockBoxConcurrencyTest.java | 10 + .../indexing/overlord/TaskLockboxTest.java | 20 +- .../indexing/overlord/TaskQueueScaleTest.java | 10 + .../indexing/overlord/http/OverlordTest.java | 2 + .../SeekableStreamIndexTaskTestBase.java | 10 + ...TestIndexerMetadataStorageCoordinator.java | 16 +- .../druid/timeline/SegmentTimeline.java | 10 + .../guice/SQLMetadataStorageDruidModule.java | 15 + .../IndexerMetadataStorageCoordinator.java | 29 +- .../IndexerSQLMetadataStorageCoordinator.java | 1309 +++++------------ .../druid/metadata/PendingSegmentRecord.java | 7 + .../SegmentsMetadataManagerConfig.java | 27 +- .../metadata/SqlSegmentsMetadataManager.java | 4 +- .../metadata/SqlSegmentsMetadataQuery.java | 316 +++- .../DatasourceSegmentMetadataReader.java | 96 ++ .../DatasourceSegmentMetadataWriter.java | 69 + .../segment/SegmentsMetadataTransaction.java | 53 + .../SqlSegmentsMetadataCachedTransaction.java | 347 +++++ .../SqlSegmentsMetadataTransaction.java | 561 +++++++ ...SqlSegmentsMetadataTransactionFactory.java | 86 ++ .../metadata/segment/cache/BaseCache.java | 70 + .../segment/cache/DatasourceSegmentCache.java | 558 +++++++ .../metadata/segment/cache/SegmentState.java | 45 + .../segment/cache/SegmentsMetadataCache.java | 52 + .../cache/SqlSegmentsMetadataCache.java | 454 ++++++ .../druid/server/http/MetadataResource.java | 8 +- ...exerSQLMetadataStorageCoordinatorTest.java | 245 ++- ...orageCoordinatorSchemaPersistenceTest.java | 16 +- ...qlSegmentsMetadataManagerProviderTest.java | 2 +- ...SegmentsMetadataManagerSchemaPollTest.java | 9 +- .../SqlSegmentsMetadataManagerTest.java | 6 +- .../cache/NoopSegmentsMetadataCache.java | 56 + .../cache/SqlSegmentsMetadataCacheTest.java | 25 + .../duty/KillUnusedSegmentsTest.java | 3 +- .../server/http/MetadataResourceTest.java | 6 +- 41 files changed, 3522 insertions(+), 1085 deletions(-) create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataWriter.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentState.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java create mode 100644 server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java create mode 100644 server/src/test/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCacheTest.java diff --git a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java index 2b78fce8f7f6..d6f26acab11d 100644 --- a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java +++ b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java @@ -46,6 +46,8 @@ import org.apache.druid.metadata.MetadataSupervisorManager; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.TestHelper; @@ -53,6 +55,7 @@ import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.ChatHandlerProvider; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.server.security.AuthorizerMapper; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; @@ -105,6 +108,13 @@ public void setUp() derbyConnector ); indexerMetadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + objectMapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java index 2f2717365f19..ba449ce35f4b 100644 --- a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java +++ b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java @@ -42,6 +42,8 @@ import org.apache.druid.java.util.http.client.HttpClient; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.query.Query; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.QueryRunnerTestHelper; @@ -56,6 +58,7 @@ import org.apache.druid.segment.realtime.appenderator.SegmentSchemas; import org.apache.druid.server.coordination.DruidServerMetadata; import org.apache.druid.server.coordination.ServerType; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.server.initialization.ZkPathsConfig; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.apache.druid.timeline.DataSegment; @@ -112,6 +115,13 @@ public void setUp() throws Exception ); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + jsonMapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java index 547b63a70f07..b253d7a33285 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java @@ -40,6 +40,7 @@ import org.apache.druid.java.util.common.lifecycle.LifecycleStop; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.CoordinatorOverlordServiceConfig; @@ -88,6 +89,7 @@ public DruidOverlord( final OverlordDutyExecutor overlordDutyExecutor, @IndexingService final DruidLeaderSelector overlordLeaderSelector, final SegmentAllocationQueue segmentAllocationQueue, + final SegmentsMetadataCache segmentsMetadataCache, final CompactionScheduler compactionScheduler, final ObjectMapper mapper, final TaskContextEnricher taskContextEnricher @@ -132,6 +134,7 @@ public void becomeLeader() // First add "half leader" services: everything required for APIs except the supervisor manager. // Then, become "half leader" so those APIs light up and supervisor initialization can proceed. + leaderLifecycle.addManagedInstance(segmentsMetadataCache); leaderLifecycle.addManagedInstance(taskRunner); leaderLifecycle.addManagedInstance(taskQueue); leaderLifecycle.addHandler( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java index 3c0b08758f76..c538853daeac 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java @@ -1170,7 +1170,7 @@ public void testSegmentIdMustNotBeReused() // Allocate another id and ensure that it doesn't exist in the druid_segments table final SegmentIdWithShardSpec theId = allocate(task1, DateTimes.nowUtc(), Granularities.NONE, Granularities.ALL, "seq", "3"); - Assert.assertNull(coordinator.retrieveSegmentForId(theId.asSegmentId().toString(), true)); + Assert.assertNull(coordinator.retrieveSegmentForId(theId.getDataSource(), theId.asSegmentId().toString())); lockbox.unlock(task1, Intervals.ETERNITY); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index 208fec01fe45..4e44af07ab0b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -37,9 +37,12 @@ import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; import org.apache.druid.segment.metadata.SegmentSchemaManager; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.easymock.EasyMock; import org.joda.time.Period; @@ -96,6 +99,13 @@ public void before() final ObjectMapper objectMapper = new TestUtils().getTestObjectMapper(); segmentSchemaManager = new SegmentSchemaManager(metadataStorageTablesConfig, objectMapper, testDerbyConnector); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + objectMapper, + metadataStorageTablesConfig, + testDerbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), objectMapper, metadataStorageTablesConfig, testDerbyConnector, @@ -113,7 +123,7 @@ public int getSqlMetadataMaxRetry() segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); segmentsMetadataManager = new SqlSegmentsMetadataManager( objectMapper, - Suppliers.ofInstance(new SegmentsMetadataManagerConfig()), + Suppliers.ofInstance(new SegmentsMetadataManagerConfig(null, null)), Suppliers.ofInstance(metadataStorageTablesConfig), testDerbyConnector, segmentSchemaCache, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 7f32c67c2384..8395929d7e0f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -70,6 +70,8 @@ import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9Factory; @@ -85,6 +87,7 @@ import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.NoopChatHandlerProvider; import org.apache.druid.server.DruidNode; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.apache.druid.server.security.AuthTestUtils; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -124,8 +127,6 @@ public abstract class IngestionTestBase extends InitializedNullHandlingTest private SegmentsMetadataManager segmentsMetadataManager; private TaskLockbox lockbox; private File baseDir; - private SegmentSchemaManager segmentSchemaManager; - private SegmentSchemaCache segmentSchemaCache; private SupervisorManager supervisorManager; private TestDataSegmentKiller dataSegmentKiller; protected File reportsFile; @@ -142,23 +143,30 @@ public void setUpIngestionTestBase() throws IOException connector.createSegmentSchemasTable(); connector.createSegmentTable(); taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null)); - segmentSchemaManager = new SegmentSchemaManager( + SegmentSchemaManager segmentSchemaManager = new SegmentSchemaManager( derbyConnectorRule.metadataTablesConfigSupplier().get(), objectMapper, derbyConnectorRule.getConnector() ); storageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + objectMapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnectorRule.getConnector(), + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector(), segmentSchemaManager, CentralizedDatasourceSchemaConfig.create() ); - segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); + SegmentSchemaCache segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); segmentsMetadataManager = new SqlSegmentsMetadataManager( objectMapper, - SegmentsMetadataManagerConfig::new, + () -> new SegmentsMetadataManagerConfig(null, null), derbyConnectorRule.metadataTablesConfigSupplier(), derbyConnectorRule.getConnector(), segmentSchemaCache, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java index 4dc0416cd1f2..bdda827b172d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java @@ -33,8 +33,11 @@ import org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.joda.time.Interval; import org.junit.After; import org.junit.Assert; @@ -80,6 +83,13 @@ public void setup() lockbox = new TaskLockbox( taskStorage, new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + objectMapper, + derby.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), objectMapper, derby.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java index 8f47b78a3bfe..a3e8caa67326 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java @@ -56,10 +56,13 @@ import org.apache.druid.metadata.LockFilterPolicy; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; import org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec; @@ -90,7 +93,6 @@ public class TaskLockboxTest @Rule public final TestDerbyConnector.DerbyConnectorRule derby = new TestDerbyConnector.DerbyConnectorRule(); - private ObjectMapper objectMapper; private TaskStorage taskStorage; private IndexerMetadataStorageCoordinator metadataStorageCoordinator; private TaskLockbox lockbox; @@ -104,7 +106,7 @@ public class TaskLockboxTest @Before public void setup() { - objectMapper = TestHelper.makeJsonMapper(); + final ObjectMapper objectMapper = TestHelper.makeJsonMapper(); objectMapper.registerSubtypes(NumberedShardSpec.class, HashBasedNumberedShardSpec.class); final TestDerbyConnector derbyConnector = derby.getConnector(); derbyConnector.createTaskTables(); @@ -129,6 +131,13 @@ public void setup() EasyMock.replay(emitter); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + objectMapper, + tablesConfig, + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), objectMapper, tablesConfig, derbyConnector, @@ -463,6 +472,13 @@ public void testSyncWithUnknownTaskTypesFromModuleNotLoaded() ); IndexerMetadataStorageCoordinator loadedMetadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + loadedMapper, + derby.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), loadedMapper, derby.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java index f67e9fc28614..7fe7c3ea1d26 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java @@ -47,9 +47,12 @@ import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TaskLookup; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.joda.time.Duration; import org.joda.time.Period; @@ -103,6 +106,13 @@ public void setUp() final ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); segmentSchemaManager = new SegmentSchemaManager(derbyConnectorRule.metadataTablesConfigSupplier().get(), jsonMapper, derbyConnectorRule.getConnector()); final IndexerSQLMetadataStorageCoordinator storageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + jsonMapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnectorRule.getConnector(), + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java index 8c5e9e924cc0..1bd7ab1f4c66 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java @@ -74,6 +74,7 @@ import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.CoordinatorOverlordServiceConfig; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -258,6 +259,7 @@ public MockTaskRunner get() EasyMock.createNiceMock(OverlordDutyExecutor.class), new TestDruidLeaderSelector(), EasyMock.createNiceMock(SegmentAllocationQueue.class), + EasyMock.createNiceMock(SegmentsMetadataCache.class), EasyMock.createNiceMock(CompactionScheduler.class), new DefaultObjectMapper(), new NoopTaskContextEnricher() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java index b166cfd4e88f..3b9dc28f6d51 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java @@ -87,6 +87,8 @@ import org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.query.DirectQueryProcessingPool; import org.apache.druid.query.Druids; import org.apache.druid.query.QueryPlus; @@ -120,6 +122,7 @@ import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DataSegmentServerAnnouncer; import org.apache.druid.server.coordination.ServerType; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.server.security.AuthTestUtils; import org.apache.druid.timeline.DataSegment; import org.apache.druid.utils.CompressionUtils; @@ -587,6 +590,13 @@ protected void makeToolboxFactory(TestUtils testUtils, ServiceEmitter emitter, b ); segmentSchemaManager = new SegmentSchemaManager(derby.metadataTablesConfigSupplier().get(), objectMapper, derbyConnector); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + objectMapper, + derby.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), objectMapper, derby.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index a95d73ce1bb7..44008c0fb8aa 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -254,14 +254,6 @@ public SegmentIdWithShardSpec allocatePendingSegment( ); } - @Override - public List upgradePendingSegmentsOverlappingWith( - Set replaceSegments - ) - { - return Collections.emptyList(); - } - @Override public int deletePendingSegmentsCreatedInInterval(String dataSource, Interval deleteInterval) { @@ -288,7 +280,13 @@ public void updateSegmentMetadata(Set segments) } @Override - public DataSegment retrieveSegmentForId(final String id, boolean includeUnused) + public DataSegment retrieveSegmentForId(String dataSource, String segmentId) + { + return null; + } + + @Override + public DataSegment retrieveUsedSegmentForId(String dataSource, String segmentId) { return null; } diff --git a/processing/src/main/java/org/apache/druid/timeline/SegmentTimeline.java b/processing/src/main/java/org/apache/druid/timeline/SegmentTimeline.java index 83c345eacfd0..9fa7898bbed1 100644 --- a/processing/src/main/java/org/apache/druid/timeline/SegmentTimeline.java +++ b/processing/src/main/java/org/apache/druid/timeline/SegmentTimeline.java @@ -60,6 +60,16 @@ public void addSegments(Iterator segments) ); } + public void add(DataSegment segment) + { + add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment)); + } + + public void remove(DataSegment segment) + { + remove(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment)); + } + public boolean isOvershadowed(DataSegment segment) { return isOvershadowed(segment.getInterval(), segment.getVersion(), segment); diff --git a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java index 7894df83af48..35baccce327c 100644 --- a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java +++ b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java @@ -41,6 +41,9 @@ import org.apache.druid.metadata.SegmentsMetadataManagerProvider; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.SqlSegmentsMetadataManagerProvider; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.SqlSegmentsMetadataCache; import org.apache.druid.server.audit.AuditManagerConfig; import org.apache.druid.server.audit.AuditSerdeHelper; import org.apache.druid.server.audit.SQLAuditManager; @@ -72,6 +75,8 @@ public void createBindingChoices(Binder binder, String defaultValue) PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentsMetadataManagerProvider.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManager.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManagerProvider.class), defaultValue); + PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentsMetadataCache.class), defaultValue); + PolyBind.createChoiceWithDefault(binder, prop, Key.get(SqlSegmentsMetadataTransactionFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(IndexerMetadataStorageCoordinator.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageActionHandlerFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageUpdaterJobHandler.class), defaultValue); @@ -103,6 +108,16 @@ public void configure(Binder binder) .to(SQLMetadataRuleManagerProvider.class) .in(LazySingleton.class); + PolyBind.optionBinder(binder, Key.get(SegmentsMetadataCache.class)) + .addBinding(type) + .to(SqlSegmentsMetadataCache.class) + .in(LazySingleton.class); + + PolyBind.optionBinder(binder, Key.get(SqlSegmentsMetadataTransactionFactory.class)) + .addBinding(type) + .to(SqlSegmentsMetadataTransactionFactory.class) + .in(LazySingleton.class); + PolyBind.optionBinder(binder, Key.get(IndexerMetadataStorageCoordinator.class)) .addBinding(type) .to(IndexerSQLMetadataStorageCoordinator.class) diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 5e840b07b6d0..eb2ee13e3aaa 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -350,23 +350,6 @@ SegmentPublishResult commitReplaceSegments( @Nullable SegmentSchemaMapping segmentSchemaMapping ); - /** - * Creates and inserts new IDs for the pending segments hat overlap with the given - * replace segments being committed. The newly created pending segment IDs: - *
    - *
  • Have the same interval and version as that of an overlapping segment - * committed by the REPLACE task.
  • - *
  • Cannot be committed but are only used to serve realtime queries against - * those versions.
  • - *
- * - * @param replaceSegments Segments being committed by a REPLACE task - * @return List of inserted pending segment records - */ - List upgradePendingSegmentsOverlappingWith( - Set replaceSegments - ); - /** * Retrieves data source's metadata from the metadata store. Returns null if there is no metadata. */ @@ -444,16 +427,14 @@ SegmentPublishResult commitMetadataOnly( /** * Retrieve the segment for a given id from the metadata store. Return null if no such segment exists *
- * If {@code includeUnused} is set, the segment {@code id} retrieval should also consider the set of unused segments - * in the metadata store. Unused segments could be deleted by a kill task at any time and might lead to unexpected behaviour. + * The retrieval also considers the set of unused segments in the metadata store. + * Unused segments could be deleted by a kill task at any time and might lead to unexpected behaviour. * This option exists mainly to provide a consistent view of the metadata, for example, in calls from MSQ controller * and worker and would generally not be required. - * - * @param id The segment id to retrieve - * - * @return DataSegment used segment corresponding to given id */ - DataSegment retrieveSegmentForId(String id, boolean includeUnused); + DataSegment retrieveSegmentForId(String dataSource, String segmentId); + + DataSegment retrieveUsedSegmentForId(String dataSource, String segmentId); /** * Delete entries from the upgrade segments table after the corresponding replace task has ended diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 8ecc2647e55d..d609887b11e2 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -48,6 +48,9 @@ import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; +import org.apache.druid.metadata.segment.SegmentsMetadataTransaction; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; import org.apache.druid.segment.SegmentMetadata; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; @@ -60,7 +63,6 @@ import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.TimelineObjectHolder; -import org.apache.druid.timeline.partition.NoneShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.PartialShardSpec; import org.apache.druid.timeline.partition.PartitionChunk; @@ -70,25 +72,16 @@ import org.joda.time.DateTime; import org.joda.time.Interval; import org.joda.time.chrono.ISOChronology; -import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.PreparedBatch; -import org.skife.jdbi.v2.PreparedBatchPart; import org.skife.jdbi.v2.Query; import org.skife.jdbi.v2.ResultIterator; -import org.skife.jdbi.v2.StatementContext; import org.skife.jdbi.v2.TransactionCallback; -import org.skife.jdbi.v2.TransactionStatus; -import org.skife.jdbi.v2.Update; import org.skife.jdbi.v2.exceptions.CallbackFailedException; -import org.skife.jdbi.v2.util.ByteArrayMapper; import javax.annotation.Nullable; import javax.validation.constraints.NotNull; import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -99,6 +92,7 @@ import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -119,8 +113,11 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; private final boolean schemaPersistEnabled; + private final SqlSegmentsMetadataTransactionFactory transactionFactory; + @Inject public IndexerSQLMetadataStorageCoordinator( + SqlSegmentsMetadataTransactionFactory transactionFactory, ObjectMapper jsonMapper, MetadataStorageTablesConfig dbTables, SQLMetadataConnector connector, @@ -128,6 +125,7 @@ public IndexerSQLMetadataStorageCoordinator( CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { + this.transactionFactory = transactionFactory; this.jsonMapper = jsonMapper; this.dbTables = dbTables; this.connector = connector; @@ -178,14 +176,14 @@ private Set doRetrieveUsedSegments( final Segments visibility ) { - return connector.retryWithHandle( - handle -> { + return retryDatasourceTransaction( + dataSource, + transaction -> { if (visibility == Segments.ONLY_VISIBLE) { - final SegmentTimeline timeline = - getTimelineForIntervalsWithHandle(handle, dataSource, intervals); + final SegmentTimeline timeline = getTimelineForIntervals(transaction, intervals); return timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE); } else { - return retrieveAllUsedSegmentsForIntervalsWithHandle(handle, dataSource, intervals); + return asSet(() -> transaction.findUsedSegmentsOverlappingAnyOf(intervals)); } } ); @@ -194,96 +192,16 @@ private Set doRetrieveUsedSegments( @Override public List> retrieveUsedSegmentsAndCreatedDates(String dataSource, List intervals) { - StringBuilder queryBuilder = new StringBuilder( - "SELECT created_date, payload FROM %1$s WHERE dataSource = :dataSource AND used = true" - ); - - final boolean compareIntervalEndpointsAsString = intervals.stream() - .allMatch(Intervals::canCompareEndpointsAsStrings); - final SqlSegmentsMetadataQuery.IntervalMode intervalMode = SqlSegmentsMetadataQuery.IntervalMode.OVERLAPS; - - queryBuilder.append( - SqlSegmentsMetadataQuery.getConditionForIntervalsAndMatchMode( - compareIntervalEndpointsAsString ? intervals : Collections.emptyList(), - intervalMode, - connector.getQuoteString() - ) - ); - - final String queryString = StringUtils.format(queryBuilder.toString(), dbTables.getSegmentsTable()); - return connector.retryWithHandle( - handle -> { - Query> query = handle - .createQuery(queryString) - .bind("dataSource", dataSource); - - if (compareIntervalEndpointsAsString) { - SqlSegmentsMetadataQuery.bindIntervalsToQuery(query, intervals); - } - - final List> segmentsWithCreatedDates = query - .map((int index, ResultSet r, StatementContext ctx) -> - new Pair<>( - JacksonUtils.readValue(jsonMapper, r.getBytes("payload"), DataSegment.class), - r.getString("created_date") - ) - ) - .list(); - - if (intervals.isEmpty() || compareIntervalEndpointsAsString) { - return segmentsWithCreatedDates; - } else { - return segmentsWithCreatedDates - .stream() - .filter(pair -> { - for (Interval interval : intervals) { - if (intervalMode.apply(interval, pair.lhs.getInterval())) { - return true; - } - } - return false; - }).collect(Collectors.toList()); - } - } - ); - } - - List retrieveUnusedSegmentIdsForExactIntervalAndVersion( - String dataSource, - Interval interval, - String version - ) - { - final String sql = "SELECT id FROM %1$s" - + " WHERE used = :used" - + " AND dataSource = :dataSource" - + " AND version = :version" - + " AND start = :start AND %2$send%2$s = :end"; - - final List matchingSegments = connector.inReadOnlyTransaction( - (handle, status) -> { - final Query> query = handle - .createQuery(StringUtils.format( - sql, - dbTables.getSegmentsTable(), - connector.getQuoteString() - )) - .setFetchSize(connector.getStreamingFetchSize()) - .bind("used", false) - .bind("dataSource", dataSource) - .bind("version", version) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()); - - try (final ResultIterator iterator = query.map((index, r, ctx) -> r.getString(1)).iterator()) { - return ImmutableList.copyOf(iterator); - } - } + return retryDatasourceTransaction( + dataSource, + transaction -> transaction.findUsedSegmentsPlusOverlappingAnyOf(intervals) + .stream() + .map(s -> Pair.of( + s.getDataSegment(), + s.getCreatedDate() == null ? null : s.getCreatedDate().toString() + )) + .collect(Collectors.toList()) ); - - log.debug("Found [%,d] unused segments for datasource[%s] for interval[%s] and version[%s].", - matchingSegments.size(), dataSource, interval, version); - return matchingSegments; } @Override @@ -295,36 +213,31 @@ public List retrieveUnusedSegmentsForInterval( @Nullable DateTime maxUsedStatusLastUpdatedTime ) { - final List matchingSegments = connector.inReadOnlyTransaction( - (handle, status) -> { - try (final CloseableIterator iterator = - SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUnusedSegments( - dataSource, - Collections.singletonList(interval), - versions, - limit, - null, - null, - maxUsedStatusLastUpdatedTime - ) - ) { - return ImmutableList.copyOf(iterator); - } - } + final List matchingSegments = inReadOnlyDatasourceTransaction( + dataSource, + transaction -> transaction.findUnusedSegments( + interval, + versions, + limit, + maxUsedStatusLastUpdatedTime + ) ); - log.info("Found [%,d] unused segments for datasource[%s] in interval[%s] and versions[%s] with maxUsedStatusLastUpdatedTime[%s].", - matchingSegments.size(), dataSource, interval, versions, maxUsedStatusLastUpdatedTime); + log.debug( + "Found [%,d] unused segments for datasource[%s] in interval[%s] and" + + " versions[%s] with maxUsedStatusLastUpdatedTime[%s].", + matchingSegments.size(), dataSource, interval, versions, maxUsedStatusLastUpdatedTime + ); return matchingSegments; } @Override public Set retrieveSegmentsById(String dataSource, Set segmentIds) { - return connector.inReadOnlyTransaction( - (handle, transactionStatus) -> - retrieveSegmentsById(handle, dataSource, segmentIds) + return inReadOnlyDatasourceTransaction( + dataSource, + transaction -> + retrieveSegmentsById(transaction, segmentIds) .stream() .map(DataSegmentPlus::getDataSegment) .collect(Collectors.toSet()) @@ -334,111 +247,33 @@ public Set retrieveSegmentsById(String dataSource, Set segm @Override public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interval) { - final Integer numSegmentsMarkedUnused = connector.retryTransaction( - (handle, status) -> - SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .markSegmentsUnused(dataSource, interval), - 3, - SQLMetadataConnector.DEFAULT_MAX_TRIES + final Integer numSegmentsMarkedUnused = retryDatasourceTransaction( + dataSource, + transaction -> transaction.markSegmentsWithinIntervalAsUnused(interval, DateTimes.nowUtc()) ); - log.info("Marked %,d segments unused for %s for interval %s.", numSegmentsMarkedUnused, dataSource, interval); + log.info( + "Marked [%,d] segments unused for datasource[%s], interval[%s].", + numSegmentsMarkedUnused, dataSource, interval + ); return numSegmentsMarkedUnused; } - /** - * Fetches all the pending segments, whose interval overlaps with the given search interval, from the metadata store. - */ - private List getPendingSegmentsForInterval( - final Handle handle, - final String dataSource, - final Interval interval - ) - { - final boolean compareIntervalEndpointsAsStrings = Intervals.canCompareEndpointsAsStrings(interval); - - String sql = "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" - + " FROM " + dbTables.getPendingSegmentsTable() - + " WHERE dataSource = :dataSource"; - if (compareIntervalEndpointsAsStrings) { - sql = sql - + " AND start < :end" - + StringUtils.format(" AND %1$send%1$s > :start", connector.getQuoteString()); - } - - Query> query = handle.createQuery(sql) - .bind("dataSource", dataSource); - if (compareIntervalEndpointsAsStrings) { - query = query.bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()); - } - - - final ResultIterator pendingSegmentIterator = - query.map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) - .iterator(); - final ImmutableList.Builder pendingSegments = ImmutableList.builder(); - while (pendingSegmentIterator.hasNext()) { - final PendingSegmentRecord pendingSegment = pendingSegmentIterator.next(); - if (compareIntervalEndpointsAsStrings || pendingSegment.getId().getInterval().overlaps(interval)) { - pendingSegments.add(pendingSegment); - } - } - pendingSegmentIterator.close(); - return pendingSegments.build(); - } - - private List getPendingSegmentsForTaskAllocatorId( - final Handle handle, - final String dataSource, - final String taskAllocatorId - ) - { - String sql = "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" - + " FROM " + dbTables.getPendingSegmentsTable() - + " WHERE dataSource = :dataSource AND task_allocator_id = :task_allocator_id"; - - Query> query = handle.createQuery(sql) - .bind("dataSource", dataSource) - .bind("task_allocator_id", taskAllocatorId); - - final ResultIterator pendingSegmentRecords = - query.map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) - .iterator(); - - final List pendingSegments = new ArrayList<>(); - while (pendingSegmentRecords.hasNext()) { - pendingSegments.add(pendingSegmentRecords.next()); - } - - pendingSegmentRecords.close(); - - return pendingSegments; - } - - private SegmentTimeline getTimelineForIntervalsWithHandle( - final Handle handle, - final String dataSource, + private SegmentTimeline getTimelineForIntervals( + final SegmentsMetadataTransaction transaction, final List intervals ) throws IOException { - try (final CloseableIterator iterator = - SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals)) { + try (final CloseableIterator iterator + = transaction.findUsedSegmentsOverlappingAnyOf(intervals)) { return SegmentTimeline.forSegments(iterator); } } - private Set retrieveAllUsedSegmentsForIntervalsWithHandle( - final Handle handle, - final String dataSource, - final List intervals - ) throws IOException + private static Set asSet(Supplier> iteratorSupplier) throws IOException { - try (final CloseableIterator iterator = - SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegments(dataSource, intervals)) { - final Set retVal = new HashSet<>(); + try (CloseableIterator iterator = iteratorSupplier.get()) { + final Set retVal = new HashSet<>(); iterator.forEachRemaining(retVal::add); return retVal; } @@ -482,27 +317,18 @@ public SegmentPublishResult commitSegmentsAndMetadata( final String dataSource = segments.iterator().next().getDataSource(); - // Find which segments are used (i.e. not overshadowed). - final Set usedSegments = new HashSet<>(); - List> segmentHolders = - SegmentTimeline.forSegments(segments).lookupWithIncompletePartitions(Intervals.ETERNITY); - for (TimelineObjectHolder holder : segmentHolders) { - for (PartitionChunk chunk : holder.getObject()) { - usedSegments.add(chunk.getObject()); - } - } - final AtomicBoolean definitelyNotUpdated = new AtomicBoolean(false); try { - return connector.retryTransaction( - (handle, transactionStatus) -> { + return retryDatasourceTransaction( + dataSource, + transaction -> { // Set definitelyNotUpdated back to false upon retrying. definitelyNotUpdated.set(false); if (startMetadata != null) { final DataStoreMetadataUpdateResult result = updateDataSourceMetadataWithHandle( - handle, + transaction, dataSource, startMetadata, endMetadata @@ -510,7 +336,7 @@ public SegmentPublishResult commitSegmentsAndMetadata( if (result.isFailed()) { // Metadata was definitely not updated. - transactionStatus.setRollbackOnly(); + transaction.setRollbackOnly(); definitelyNotUpdated.set(true); if (result.canRetry()) { @@ -522,16 +348,13 @@ public SegmentPublishResult commitSegmentsAndMetadata( } final Set inserted = - announceHistoricalSegmentBatch( - handle, + insertSegments( + transaction, segments, - usedSegments, segmentSchemaMapping ); return SegmentPublishResult.ok(ImmutableSet.copyOf(inserted)); - }, - 3, - getSqlMetadataMaxRetry() + } ); } catch (CallbackFailedException e) { @@ -551,15 +374,19 @@ public SegmentPublishResult commitReplaceSegments( @Nullable final SegmentSchemaMapping segmentSchemaMapping ) { - verifySegmentsToCommit(replaceSegments); + final String dataSource = verifySegmentsToCommit(replaceSegments); try { - return connector.retryTransaction( - (handle, transactionStatus) -> { + return retryDatasourceTransaction( + dataSource, + transaction -> { final Set segmentsToInsert = new HashSet<>(replaceSegments); - Set upgradedSegments = - createNewIdsOfAppendSegmentsAfterReplace(handle, replaceSegments, locksHeldByReplaceTask); + Set upgradedSegments = createNewIdsOfAppendSegmentsAfterReplace( + transaction, + replaceSegments, + locksHeldByReplaceTask + ); Map upgradeSegmentMetadata = new HashMap<>(); final Map upgradedFromSegmentIdMap = new HashMap<>(); @@ -580,18 +407,16 @@ public SegmentPublishResult commitReplaceSegments( } return SegmentPublishResult.ok( insertSegments( - handle, + transaction, segmentsToInsert, segmentSchemaMapping, upgradeSegmentMetadata, Collections.emptyMap(), upgradedFromSegmentIdMap ), - upgradePendingSegmentsOverlappingWith(segmentsToInsert) + upgradePendingSegmentsOverlappingWith(transaction, segmentsToInsert) ); - }, - 3, - getSqlMetadataMaxRetry() + } ); } catch (CallbackFailedException e) { @@ -657,42 +482,33 @@ public SegmentPublishResult commitMetadataOnly( final AtomicBoolean definitelyNotUpdated = new AtomicBoolean(false); try { - return connector.retryTransaction( - new TransactionCallback<>() - { - @Override - public SegmentPublishResult inTransaction( - final Handle handle, - final TransactionStatus transactionStatus - ) throws Exception - { - // Set definitelyNotUpdated back to false upon retrying. - definitelyNotUpdated.set(false); + return retryDatasourceTransaction( + dataSource, + transaction -> { + // Set definitelyNotUpdated back to false upon retrying. + definitelyNotUpdated.set(false); - final DataStoreMetadataUpdateResult result = updateDataSourceMetadataWithHandle( - handle, - dataSource, - startMetadata, - endMetadata - ); + final DataStoreMetadataUpdateResult result = updateDataSourceMetadataWithHandle( + transaction, + dataSource, + startMetadata, + endMetadata + ); - if (result.isFailed()) { - // Metadata was definitely not updated. - transactionStatus.setRollbackOnly(); - definitelyNotUpdated.set(true); + if (result.isFailed()) { + // Metadata was definitely not updated. + transaction.setRollbackOnly(); + definitelyNotUpdated.set(true); - if (result.canRetry()) { - throw new RetryTransactionException(result.getErrorMsg()); - } else { - throw new RuntimeException(result.getErrorMsg()); - } + if (result.canRetry()) { + throw new RetryTransactionException(result.getErrorMsg()); + } else { + throw new RuntimeException(result.getErrorMsg()); } - - return SegmentPublishResult.ok(ImmutableSet.of()); } - }, - 3, - getSqlMetadataMaxRetry() + + return SegmentPublishResult.ok(ImmutableSet.of()); + } ); } catch (CallbackFailedException e) { @@ -724,9 +540,10 @@ public Map allocatePendingSegments Preconditions.checkNotNull(allocateInterval, "interval"); final Interval interval = allocateInterval.withChronology(ISOChronology.getInstanceUTC()); - return connector.retryWithHandle( - handle -> allocatePendingSegments( - handle, + return retryDatasourceTransaction( + dataSource, + transaction -> allocatePendingSegments( + transaction, dataSource, interval, skipSegmentLineageCheck, @@ -749,11 +566,12 @@ public SegmentIdWithShardSpec allocatePendingSegment( Preconditions.checkNotNull(interval, "interval"); final Interval allocateInterval = interval.withChronology(ISOChronology.getInstanceUTC()); - return connector.retryWithHandle( - handle -> { + return retryDatasourceTransaction( + dataSource, + transaction -> { // Get the time chunk and associated data segments for the given interval, if any final List> existingChunks = - getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)) + getTimelineForIntervals(transaction, ImmutableList.of(interval)) .lookup(interval); if (existingChunks.size() > 1) { // Not possible to expand more than one chunk with a single segment. @@ -766,7 +584,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( if (skipSegmentLineageCheck) { return allocatePendingSegment( - handle, + transaction, dataSource, allocateInterval, createRequest, @@ -774,7 +592,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( ); } else { return allocatePendingSegmentWithSegmentLineageCheck( - handle, + transaction, dataSource, allocateInterval, createRequest, @@ -785,10 +603,23 @@ public SegmentIdWithShardSpec allocatePendingSegment( ); } - @Override - public List upgradePendingSegmentsOverlappingWith( + /** + * Creates and inserts new IDs for the pending segments that overlap with the given + * replace segments being committed. The newly created pending segment IDs: + *
    + *
  • Have the same interval and version as that of an overlapping segment + * committed by the REPLACE task.
  • + *
  • Cannot be committed but are only used to serve realtime queries against + * those versions.
  • + *
+ * + * @param replaceSegments Segments being committed by a REPLACE task + * @return List of inserted pending segment records + */ + private List upgradePendingSegmentsOverlappingWith( + SegmentsMetadataTransaction transaction, Set replaceSegments - ) + ) throws JsonProcessingException { if (replaceSegments.isEmpty()) { return Collections.emptyList(); @@ -805,9 +636,7 @@ public List upgradePendingSegmentsOverlappingWith( } final String datasource = replaceSegments.iterator().next().getDataSource(); - return connector.retryWithHandle( - handle -> upgradePendingSegments(handle, datasource, replaceIntervalToMaxId) - ); + return upgradePendingSegments(transaction, datasource, replaceIntervalToMaxId); } /** @@ -823,10 +652,10 @@ public List upgradePendingSegmentsOverlappingWith( * @return Inserted pending segment records */ private List upgradePendingSegments( - Handle handle, + SegmentsMetadataTransaction transaction, String datasource, Map replaceIntervalToMaxId - ) throws JsonProcessingException + ) { final List upgradedPendingSegments = new ArrayList<>(); @@ -839,7 +668,7 @@ private List upgradePendingSegments( int currentPartitionNumber = maxSegmentId.getShardSpec().getPartitionNum(); final List overlappingPendingSegments - = getPendingSegmentsForInterval(handle, datasource, replaceInterval); + = transaction.findPendingSegmentsOverlapping(replaceInterval); for (PendingSegmentRecord overlappingPendingSegment : overlappingPendingSegments) { final SegmentIdWithShardSpec pendingSegmentId = overlappingPendingSegment.getId(); @@ -869,12 +698,8 @@ private List upgradePendingSegments( // Do not skip lineage check so that the sequence_name_prev_id_sha1 // includes hash of both sequence_name and prev_segment_id - int numInsertedPendingSegments = insertPendingSegmentsIntoMetastore( - handle, - upgradedPendingSegments, - datasource, - false - ); + int numInsertedPendingSegments = + transaction.insertPendingSegments(upgradedPendingSegments, false); log.info( "Inserted total [%d] new versions for [%d] pending segments.", numInsertedPendingSegments, upgradedPendingSegments.size() @@ -912,29 +737,20 @@ private boolean shouldUpgradePendingSegment( @Nullable private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( - final Handle handle, + final SegmentsMetadataTransaction transaction, final String dataSource, final Interval interval, final SegmentCreateRequest createRequest, final List> existingChunks - ) throws IOException + ) { - final String sql = StringUtils.format( - "SELECT payload FROM %s WHERE " - + "dataSource = :dataSource AND " - + "sequence_name = :sequence_name AND " - + "sequence_prev_id = :sequence_prev_id", - dbTables.getPendingSegmentsTable() + final List existingPendingSegmentIds = transaction.findPendingSegmentIds( + createRequest.getSequenceName(), + createRequest.getPreviousSegmentId() ); - final Query> query - = handle.createQuery(sql) - .bind("dataSource", dataSource) - .bind("sequence_name", createRequest.getSequenceName()) - .bind("sequence_prev_id", createRequest.getPreviousSegmentId()); - final String usedSegmentVersion = existingChunks.isEmpty() ? null : existingChunks.get(0).getVersion(); - final CheckExistingSegmentIdResult result = findExistingPendingSegment( - query, + final CheckExistingSegmentIdResult result = findPendingSegmentMatchingIntervalAndVersion( + existingPendingSegmentIds, interval, createRequest.getSequenceName(), createRequest.getPreviousSegmentId(), @@ -947,7 +763,7 @@ private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( } final SegmentIdWithShardSpec newIdentifier = createNewPendingSegment( - handle, + transaction, dataSource, interval, createRequest.getPartialShardSpec(), @@ -958,35 +774,14 @@ private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( return null; } - // SELECT -> INSERT can fail due to races; callers must be prepared to retry. - // Avoiding ON DUPLICATE KEY since it's not portable. - // Avoiding try/catch since it may cause inadvertent transaction-splitting. - - // UNIQUE key for the row, ensuring sequences do not fork in two directions. - // Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines - // have difficulty with large unique keys (see https://github.com/apache/druid/issues/2319) - final String sequenceNamePrevIdSha1 = BaseEncoding.base16().encode( - Hashing.sha1() - .newHasher() - .putBytes(StringUtils.toUtf8(createRequest.getSequenceName())) - .putByte((byte) 0xff) - .putBytes(StringUtils.toUtf8(createRequest.getPreviousSegmentId())) - .putByte((byte) 0xff) - .putBytes(StringUtils.toUtf8(newIdentifier.getVersion())) - .hash() - .asBytes() - ); - - insertPendingSegmentIntoMetastore( - handle, + final PendingSegmentRecord record = new PendingSegmentRecord( newIdentifier, - dataSource, - interval, - createRequest.getPreviousSegmentId(), createRequest.getSequenceName(), - sequenceNamePrevIdSha1, + createRequest.getPreviousSegmentId(), + null, createRequest.getTaskAllocatorId() ); + transaction.insertPendingSegment(record, false); return newIdentifier; } @@ -997,25 +792,26 @@ public SegmentTimeline getSegmentTimelineForAllocation( boolean reduceMetadataIO ) { - return connector.retryWithHandle( - handle -> { + return retryDatasourceTransaction( + dataSource, + transaction -> { if (reduceMetadataIO) { - return SegmentTimeline.forSegments(retrieveUsedSegmentsForAllocation(handle, dataSource, interval)); + return SegmentTimeline.forSegments(retrieveUsedSegmentsForAllocation(transaction, dataSource, interval)); } else { - return getTimelineForIntervalsWithHandle(handle, dataSource, Collections.singletonList(interval)); + return getTimelineForIntervals(transaction, Collections.singletonList(interval)); } } ); } private Map allocatePendingSegments( - final Handle handle, + final SegmentsMetadataTransaction transaction, final String dataSource, final Interval interval, final boolean skipSegmentLineageCheck, final List requests, final boolean reduceMetadataIO - ) throws IOException + ) { // Get the time chunk and associated data segments for the given interval, if any final List> existingChunks @@ -1032,10 +828,10 @@ private Map allocatePendingSegment final Map existingSegmentIds; if (skipSegmentLineageCheck) { existingSegmentIds = - getExistingSegmentIdsSkipLineageCheck(handle, dataSource, interval, existingVersion, requests); + getExistingSegmentIdsSkipLineageCheck(transaction, interval, existingVersion, requests); } else { existingSegmentIds = - getExistingSegmentIdsWithLineageCheck(handle, dataSource, interval, existingVersion, requests); + getExistingSegmentIdsWithLineageCheck(transaction, interval, existingVersion, requests); } // For every request see if a segment id already exists @@ -1055,7 +851,7 @@ private Map allocatePendingSegment // For each of the remaining requests, create a new segment final Map createdSegments = createNewSegments( - handle, + transaction, dataSource, interval, skipSegmentLineageCheck, @@ -1070,10 +866,8 @@ private Map allocatePendingSegment // UNIQUE key for the row, ensuring we don't have more than one segment per sequence per interval. // Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines // have difficulty with large unique keys (see https://github.com/apache/druid/issues/2319) - insertPendingSegmentsIntoMetastore( - handle, + transaction.insertPendingSegments( ImmutableList.copyOf(createdSegments.values()), - dataSource, skipSegmentLineageCheck ); @@ -1085,31 +879,19 @@ private Map allocatePendingSegment @Nullable private SegmentIdWithShardSpec allocatePendingSegment( - final Handle handle, + final SegmentsMetadataTransaction transaction, final String dataSource, final Interval interval, final SegmentCreateRequest createRequest, final List> existingChunks - ) throws IOException + ) { - final String sql = StringUtils.format( - "SELECT payload FROM %s WHERE " - + "dataSource = :dataSource AND " - + "sequence_name = :sequence_name AND " - + "start = :start AND " - + "%2$send%2$s = :end", - dbTables.getPendingSegmentsTable(), - connector.getQuoteString() + final List existingPendingSegmentIds = transaction.findPendingSegmentIdsWithExactInterval( + createRequest.getSequenceName(), + interval ); - final Query> query - = handle.createQuery(sql) - .bind("dataSource", dataSource) - .bind("sequence_name", createRequest.getSequenceName()) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()); - - final CheckExistingSegmentIdResult result = findExistingPendingSegment( - query, + final CheckExistingSegmentIdResult result = findPendingSegmentMatchingIntervalAndVersion( + existingPendingSegmentIds, interval, createRequest.getSequenceName(), null, @@ -1121,7 +903,7 @@ private SegmentIdWithShardSpec allocatePendingSegment( } final SegmentIdWithShardSpec newIdentifier = createNewPendingSegment( - handle, + transaction, dataSource, interval, createRequest.getPartialShardSpec(), @@ -1132,37 +914,15 @@ private SegmentIdWithShardSpec allocatePendingSegment( return null; } - // SELECT -> INSERT can fail due to races; callers must be prepared to retry. - // Avoiding ON DUPLICATE KEY since it's not portable. - // Avoiding try/catch since it may cause inadvertent transaction-splitting. - - // UNIQUE key for the row, ensuring we don't have more than one segment per sequence per interval. - // Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines - // have difficulty with large unique keys (see https://github.com/apache/druid/issues/2319) - final String sequenceNamePrevIdSha1 = BaseEncoding.base16().encode( - Hashing.sha1() - .newHasher() - .putBytes(StringUtils.toUtf8(createRequest.getSequenceName())) - .putByte((byte) 0xff) - .putLong(interval.getStartMillis()) - .putLong(interval.getEndMillis()) - .putByte((byte) 0xff) - .putBytes(StringUtils.toUtf8(newIdentifier.getVersion())) - .hash() - .asBytes() - ); - // always insert empty previous sequence id - insertPendingSegmentIntoMetastore( - handle, + final PendingSegmentRecord record = new PendingSegmentRecord( newIdentifier, - dataSource, - interval, - "", createRequest.getSequenceName(), - sequenceNamePrevIdSha1, + "", + null, createRequest.getTaskAllocatorId() ); + transaction.insertPendingSegment(record, true); log.info( "Created new pending segment[%s] for datasource[%s], interval[%s].", @@ -1176,39 +936,19 @@ private SegmentIdWithShardSpec allocatePendingSegment( * Returns a map from sequenceName to segment id. */ private Map getExistingSegmentIdsSkipLineageCheck( - Handle handle, - String dataSource, + SegmentsMetadataTransaction transaction, Interval interval, String usedSegmentVersion, List requests - ) throws IOException + ) { - final Query> query = handle - .createQuery( - StringUtils.format( - "SELECT sequence_name, payload " - + "FROM %s WHERE " - + "dataSource = :dataSource AND " - + "start = :start AND " - + "%2$send%2$s = :end", - dbTables.getPendingSegmentsTable(), - connector.getQuoteString() - ) - ) - .bind("dataSource", dataSource) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()); - - final ResultIterator dbSegments = query - .map((index, r, ctx) -> PendingSegmentsRecord.fromResultSet(r)) - .iterator(); + final List existingPendingSegments + = transaction.findPendingSegmentsWithExactInterval(interval); // Map from sequenceName to segment id final Map sequenceToSegmentId = new HashMap<>(); - while (dbSegments.hasNext()) { - final PendingSegmentsRecord record = dbSegments.next(); - final SegmentIdWithShardSpec segmentId = - jsonMapper.readValue(record.getPayload(), SegmentIdWithShardSpec.class); + for (PendingSegmentRecord record : existingPendingSegments) { + final SegmentIdWithShardSpec segmentId = record.getId(); // Consider only the pending segments allocated for the latest used segment version if (usedSegmentVersion == null || segmentId.getVersion().equals(usedSegmentVersion)) { @@ -1229,12 +969,11 @@ private Map getExistingSegme * Returns a map from sequenceName to segment id. */ private Map getExistingSegmentIdsWithLineageCheck( - Handle handle, - String dataSource, + SegmentsMetadataTransaction transaction, Interval interval, String usedSegmentVersion, List requests - ) throws IOException + ) { // This cannot be batched because there doesn't seem to be a clean option: // 1. WHERE must have sequence_name and sequence_prev_id but not start or end. @@ -1242,21 +981,14 @@ private Map getExistingSegme // end are used to determine if the found segment is valid or not) // 2. IN filters on sequence_name and sequence_prev_id might perform worse than individual SELECTs? // 3. IN filter on sequence_name alone might be a feasible option worth evaluating - final String sql = StringUtils.format( - "SELECT payload FROM %s WHERE " - + "dataSource = :dataSource AND " - + "sequence_name = :sequence_name AND " - + "sequence_prev_id = :sequence_prev_id", - dbTables.getPendingSegmentsTable() - ); - final Map requestToResult = new HashMap<>(); for (SegmentCreateRequest request : requests) { - CheckExistingSegmentIdResult result = findExistingPendingSegment( - handle.createQuery(sql) - .bind("dataSource", dataSource) - .bind("sequence_name", request.getSequenceName()) - .bind("sequence_prev_id", request.getPreviousSegmentId()), + final List existingPendingSegmentIds = transaction.findPendingSegmentIds( + request.getSequenceName(), + request.getPreviousSegmentId() + ); + CheckExistingSegmentIdResult result = findPendingSegmentMatchingIntervalAndVersion( + existingPendingSegmentIds, interval, request.getSequenceName(), request.getPreviousSegmentId(), @@ -1268,23 +1000,19 @@ private Map getExistingSegme return requestToResult; } - private CheckExistingSegmentIdResult findExistingPendingSegment( - final Query> query, + private CheckExistingSegmentIdResult findPendingSegmentMatchingIntervalAndVersion( + final List pendingSegments, final Interval interval, final String sequenceName, final @Nullable String previousSegmentId, final @Nullable String usedSegmentVersion - ) throws IOException + ) { - final List records = query.map(ByteArrayMapper.FIRST).list(); - if (records.isEmpty()) { + if (pendingSegments.isEmpty()) { return new CheckExistingSegmentIdResult(false, null); } - for (byte[] record : records) { - final SegmentIdWithShardSpec pendingSegment - = jsonMapper.readValue(record, SegmentIdWithShardSpec.class); - + for (SegmentIdWithShardSpec pendingSegment : pendingSegments) { // Consider only pending segments matching the expected version if (usedSegmentVersion == null || pendingSegment.getVersion().equals(usedSegmentVersion)) { if (pendingSegment.getInterval().isEqual(interval)) { @@ -1366,24 +1094,6 @@ public int hashCode() } } - private int deletePendingSegmentsById(Handle handle, String datasource, List pendingSegmentIds) - { - if (pendingSegmentIds.isEmpty()) { - return 0; - } - - Update query = handle.createStatement( - StringUtils.format( - "DELETE FROM %s WHERE dataSource = :dataSource %s", - dbTables.getPendingSegmentsTable(), - SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", pendingSegmentIds) - ) - ).bind("dataSource", datasource); - SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", pendingSegmentIds, query); - - return query.execute(); - } - private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( Set appendSegments, Map appendSegmentToReplaceLock, @@ -1393,18 +1103,15 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( @Nullable SegmentSchemaMapping segmentSchemaMapping ) { - verifySegmentsToCommit(appendSegments); + final String dataSource = verifySegmentsToCommit(appendSegments); if ((startMetadata == null && endMetadata != null) || (startMetadata != null && endMetadata == null)) { throw new IllegalArgumentException("start/end metadata pair must be either null or non-null"); } - final String dataSource = appendSegments.iterator().next().getDataSource(); - final List segmentIdsForNewVersions = connector.retryTransaction( - (handle, transactionStatus) - -> getPendingSegmentsForTaskAllocatorId(handle, dataSource, taskAllocatorId), - 0, - SQLMetadataConnector.DEFAULT_MAX_TRIES + final List segmentIdsForNewVersions = inReadOnlyDatasourceTransaction( + dataSource, + transaction -> transaction.findPendingSegments(taskAllocatorId) ); // Create entries for all required versions of the append segments @@ -1438,16 +1145,17 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( final AtomicBoolean metadataNotUpdated = new AtomicBoolean(false); try { - return connector.retryTransaction( - (handle, transactionStatus) -> { + return retryDatasourceTransaction( + dataSource, + transaction -> { metadataNotUpdated.set(false); if (startMetadata != null) { final DataStoreMetadataUpdateResult metadataUpdateResult - = updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata); + = updateDataSourceMetadataWithHandle(transaction, dataSource, startMetadata, endMetadata); if (metadataUpdateResult.isFailed()) { - transactionStatus.setRollbackOnly(); + transaction.setRollbackOnly(); metadataNotUpdated.set(true); if (metadataUpdateResult.canRetry()) { throw new RetryTransactionException(metadataUpdateResult.getErrorMsg()); @@ -1457,24 +1165,19 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( } } - insertIntoUpgradeSegmentsTable(handle, appendSegmentToReplaceLock); + insertIntoUpgradeSegmentsTable(transaction, appendSegmentToReplaceLock); // Delete the pending segments to be committed in this transaction in batches of at most 100 - final List> pendingSegmentIdBatches = Lists.partition( + int numDeletedPendingSegments = transaction.deletePendingSegments( allSegmentsToInsert.stream() .map(pendingSegment -> pendingSegment.getId().toString()) - .collect(Collectors.toList()), - 100 + .collect(Collectors.toList()) ); - int numDeletedPendingSegments = 0; - for (List pendingSegmentIdBatch : pendingSegmentIdBatches) { - numDeletedPendingSegments += deletePendingSegmentsById(handle, dataSource, pendingSegmentIdBatch); - } log.info("Deleted [%d] entries from pending segments table upon commit.", numDeletedPendingSegments); return SegmentPublishResult.ok( insertSegments( - handle, + transaction, allSegmentsToInsert, segmentSchemaMapping, Collections.emptyMap(), @@ -1482,9 +1185,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( upgradedFromSegmentIdMap ) ); - }, - 3, - getSqlMetadataMaxRetry() + } ); } catch (CallbackFailedException e) { @@ -1497,91 +1198,8 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( } } - @VisibleForTesting - int insertPendingSegmentsIntoMetastore( - Handle handle, - List pendingSegments, - String dataSource, - boolean skipSegmentLineageCheck - ) throws JsonProcessingException - { - final PreparedBatch insertBatch = handle.prepareBatch( - StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " - + "sequence_name_prev_id_sha1, payload, task_allocator_id, upgraded_from_segment_id) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " - + ":sequence_name_prev_id_sha1, :payload, :task_allocator_id, :upgraded_from_segment_id)", - dbTables.getPendingSegmentsTable(), - connector.getQuoteString() - )); - - final String now = DateTimes.nowUtc().toString(); - final Set processedSegmentIds = new HashSet<>(); - for (PendingSegmentRecord pendingSegment : pendingSegments) { - final SegmentIdWithShardSpec segmentId = pendingSegment.getId(); - if (processedSegmentIds.contains(segmentId)) { - continue; - } - final Interval interval = segmentId.getInterval(); - - insertBatch.add() - .bind("id", segmentId.toString()) - .bind("dataSource", dataSource) - .bind("created_date", now) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()) - .bind("sequence_name", pendingSegment.getSequenceName()) - .bind("sequence_prev_id", pendingSegment.getSequencePrevId()) - .bind( - "sequence_name_prev_id_sha1", - pendingSegment.computeSequenceNamePrevIdSha1(skipSegmentLineageCheck) - ) - .bind("payload", jsonMapper.writeValueAsBytes(segmentId)) - .bind("task_allocator_id", pendingSegment.getTaskAllocatorId()) - .bind("upgraded_from_segment_id", pendingSegment.getUpgradedFromSegmentId()); - - processedSegmentIds.add(segmentId); - } - int[] updated = insertBatch.execute(); - return Arrays.stream(updated).sum(); - } - - private void insertPendingSegmentIntoMetastore( - Handle handle, - SegmentIdWithShardSpec newIdentifier, - String dataSource, - Interval interval, - String previousSegmentId, - String sequenceName, - String sequenceNamePrevIdSha1, - String taskAllocatorId - ) throws JsonProcessingException - { - handle.createStatement( - StringUtils.format( - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " - + "sequence_name_prev_id_sha1, payload, task_allocator_id) " - + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " - + ":sequence_name_prev_id_sha1, :payload, :task_allocator_id)", - dbTables.getPendingSegmentsTable(), - connector.getQuoteString() - ) - ) - .bind("id", newIdentifier.toString()) - .bind("dataSource", dataSource) - .bind("created_date", DateTimes.nowUtc().toString()) - .bind("start", interval.getStart().toString()) - .bind("end", interval.getEnd().toString()) - .bind("sequence_name", sequenceName) - .bind("sequence_prev_id", previousSegmentId) - .bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1) - .bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)) - .bind("task_allocator_id", taskAllocatorId) - .execute(); - } - private Map createNewSegments( - Handle handle, + SegmentsMetadataTransaction transaction, String dataSource, Interval interval, boolean skipSegmentLineageCheck, @@ -1629,10 +1247,10 @@ private Map createNewSegments( // A pending segment having a higher partitionId must also be considered // to avoid clashes when inserting the pending segment created here. final Set pendingSegments = - getPendingSegmentsForInterval(handle, dataSource, interval) - .stream() - .map(PendingSegmentRecord::getId) - .collect(Collectors.toSet()); + transaction.findPendingSegmentsOverlapping(interval) + .stream() + .map(PendingSegmentRecord::getId) + .collect(Collectors.toSet()); final Map createdSegments = new HashMap<>(); final Map uniqueRequestToSegment = new HashMap<>(); @@ -1647,6 +1265,7 @@ private Map createNewSegments( createdSegment = uniqueRequestToSegment.get(uniqueRequest); } else { createdSegment = createNewPendingSegment( + transaction, request, dataSource, interval, @@ -1674,6 +1293,7 @@ private Map createNewSegments( @Nullable private PendingSegmentRecord createNewPendingSegment( + SegmentsMetadataTransaction transaction, SegmentCreateRequest request, String dataSource, Interval interval, @@ -1769,7 +1389,7 @@ private PendingSegmentRecord createNewPendingSegment( ) ); return new PendingSegmentRecord( - getTrueAllocatedId(pendingSegmentId), + getTrueAllocatedId(transaction, pendingSegmentId), request.getSequenceName(), request.getPreviousSegmentId(), null, @@ -1786,7 +1406,7 @@ private PendingSegmentRecord createNewPendingSegment( */ @Nullable private SegmentIdWithShardSpec createNewPendingSegment( - final Handle handle, + final SegmentsMetadataTransaction transaction, final String dataSource, final Interval interval, final PartialShardSpec partialShardSpec, @@ -1826,10 +1446,10 @@ private SegmentIdWithShardSpec createNewPendingSegment( // A pending segment having a higher partitionId must also be considered // to avoid clashes when inserting the pending segment created here. final Set pendings = - getPendingSegmentsForInterval(handle, dataSource, interval) - .stream() - .map(PendingSegmentRecord::getId) - .collect(Collectors.toSet()); + transaction.findPendingSegmentsOverlapping(interval) + .stream() + .map(PendingSegmentRecord::getId) + .collect(Collectors.toSet()); if (committedMaxId != null) { pendings.add(committedMaxId); @@ -1901,7 +1521,7 @@ private SegmentIdWithShardSpec createNewPendingSegment( committedMaxId == null ? 0 : committedMaxId.getShardSpec().getNumCorePartitions() ) ); - return getTrueAllocatedId(allocatedId); + return getTrueAllocatedId(transaction, allocatedId); } } @@ -1912,15 +1532,19 @@ private SegmentIdWithShardSpec createNewPendingSegment( * @param allocatedId The segment allcoted on the basis of used and pending segments * @return a segment id that isn't already used by other unused segments */ - private SegmentIdWithShardSpec getTrueAllocatedId(SegmentIdWithShardSpec allocatedId) + private SegmentIdWithShardSpec getTrueAllocatedId( + SegmentsMetadataTransaction transaction, + SegmentIdWithShardSpec allocatedId + ) { // Check if there is a conflict with an existing entry in the segments table - if (retrieveSegmentForId(allocatedId.asSegmentId().toString(), true) == null) { + if (transaction.findSegment(allocatedId.asSegmentId().toString()) == null) { return allocatedId; } // If yes, try to compute allocated partition num using the max unused segment shard spec SegmentId unusedMaxId = getMaxIdOfUnusedSegment( + transaction, allocatedId.getDataSource(), allocatedId.getInterval(), allocatedId.getVersion() @@ -1952,12 +1576,18 @@ private SegmentIdWithShardSpec getTrueAllocatedId(SegmentIdWithShardSpec allocat * @return null if no unused segment exists for the given parameters. */ @Nullable - private SegmentId getMaxIdOfUnusedSegment(String datasource, Interval interval, String version) + private SegmentId getMaxIdOfUnusedSegment( + SegmentsMetadataTransaction transaction, + String datasource, + Interval interval, + String version + ) { - List unusedSegmentIds = retrieveUnusedSegmentIdsForExactIntervalAndVersion( - datasource, - interval, - version + Set unusedSegmentIds = + transaction.findUnusedSegmentIdsWithExactIntervalAndVersion(interval, version); + log.debug( + "Found [%,d] unused segments for datasource[%s] for interval[%s] and version[%s].", + unusedSegmentIds.size(), datasource, interval, version ); SegmentId unusedMaxId = null; @@ -1979,31 +1609,18 @@ private SegmentId getMaxIdOfUnusedSegment(String datasource, Interval interval, @Override public int deletePendingSegmentsCreatedInInterval(String dataSource, Interval deleteInterval) { - return connector.getDBI().inTransaction( - (handle, status) -> handle - .createStatement( - StringUtils.format( - "DELETE FROM %s WHERE datasource = :dataSource AND created_date >= :start AND created_date < :end", - dbTables.getPendingSegmentsTable() - ) - ) - .bind("dataSource", dataSource) - .bind("start", deleteInterval.getStart().toString()) - .bind("end", deleteInterval.getEnd().toString()) - .execute() + return retryDatasourceTransaction( + dataSource, + transaction -> transaction.deletePendingSegmentsCreatedIn(deleteInterval) ); } @Override public int deletePendingSegments(String dataSource) { - return connector.getDBI().inTransaction( - (handle, status) -> handle - .createStatement( - StringUtils.format("DELETE FROM %s WHERE datasource = :dataSource", dbTables.getPendingSegmentsTable()) - ) - .bind("dataSource", dataSource) - .execute() + return retryDatasourceTransaction( + dataSource, + DatasourceSegmentMetadataWriter::deleteAllPendingSegments ); } @@ -2015,7 +1632,7 @@ private boolean shouldPersistSchema(SegmentSchemaMapping segmentSchemaMapping) } private void persistSchema( - final Handle handle, + final SegmentsMetadataTransaction transaction, final Set segments, final SegmentSchemaMapping segmentSchemaMapping ) throws JsonProcessingException @@ -2033,29 +1650,28 @@ private void persistSchema( String dataSource = segments.stream().iterator().next().getDataSource(); segmentSchemaManager.persistSegmentSchema( - handle, + transaction.getHandle(), dataSource, segmentSchemaMapping.getSchemaVersion(), segmentSchemaMapping.getSchemaFingerprintToPayloadMap() ); } - private Set announceHistoricalSegmentBatch( - final Handle handle, + private Set insertSegments( + final SegmentsMetadataTransaction transaction, final Set segments, - final Set usedSegments, @Nullable final SegmentSchemaMapping segmentSchemaMapping - ) throws IOException + ) throws Exception { final Set toInsertSegments = new HashSet<>(); try { boolean shouldPersistSchema = shouldPersistSchema(segmentSchemaMapping); if (shouldPersistSchema) { - persistSchema(handle, segments, segmentSchemaMapping); + persistSchema(transaction, segments, segmentSchemaMapping); } - Set existedSegments = segmentExistsBatch(handle, segments); + Set existedSegments = transaction.findExistingSegmentIds(segments); log.info("Found these segments already exist in DB: %s", existedSegments); for (DataSegment segment : segments) { @@ -2064,60 +1680,30 @@ private Set announceHistoricalSegmentBatch( } } - // SELECT -> INSERT can fail due to races; callers must be prepared to retry. - // Avoiding ON DUPLICATE KEY since it's not portable. - // Avoiding try/catch since it may cause inadvertent transaction-splitting. - final List> partitionedSegments = Lists.partition( - new ArrayList<>(toInsertSegments), - MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE - ); + final DateTime createdTime = DateTimes.nowUtc(); + final Set usedSegments = findNonOvershadowedSegments(segments); + + final Set segmentPlusToInsert = toInsertSegments.stream().map(segment -> { + SegmentMetadata segmentMetadata + = shouldPersistSchema + ? segmentSchemaMapping.getSegmentIdToMetadataMap().get(segment.getId().toString()) + : null; + + return new DataSegmentPlus( + segment, + createdTime, + createdTime, + usedSegments.contains(segment), + segmentMetadata == null ? null : segmentMetadata.getSchemaFingerprint(), + segmentMetadata == null ? null : segmentMetadata.getNumRows(), + null + ); + }).collect(Collectors.toSet()); - final String now = DateTimes.nowUtc().toString(); - PreparedBatch preparedBatch = handle.prepareBatch(buildSqlToInsertSegments()); - for (List partition : partitionedSegments) { - for (DataSegment segment : partition) { - String segmentId = segment.getId().toString(); - - PreparedBatchPart preparedBatchPart = preparedBatch.add() - .bind("id", segmentId) - .bind("dataSource", segment.getDataSource()) - .bind("created_date", now) - .bind("start", segment.getInterval().getStart().toString()) - .bind("end", segment.getInterval().getEnd().toString()) - .bind("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)) - .bind("version", segment.getVersion()) - .bind("used", usedSegments.contains(segment)) - .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", now) - .bind("upgraded_from_segment_id", (String) null); - - if (schemaPersistEnabled) { - Long numRows = null; - String schemaFingerprint = null; - if (shouldPersistSchema && segmentSchemaMapping.getSegmentIdToMetadataMap().containsKey(segmentId)) { - SegmentMetadata segmentMetadata = segmentSchemaMapping.getSegmentIdToMetadataMap().get(segmentId); - numRows = segmentMetadata.getNumRows(); - schemaFingerprint = segmentMetadata.getSchemaFingerprint(); - } - preparedBatchPart - .bind("num_rows", numRows) - .bind("schema_fingerprint", schemaFingerprint); - } - } - final int[] affectedRows = preparedBatch.execute(); - final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1); - if (succeeded) { - log.infoSegments(partition, "Published segments to DB"); - } else { - final List failedToPublish = IntStream.range(0, partition.size()) - .filter(i -> affectedRows[i] != 1) - .mapToObj(partition::get) - .collect(Collectors.toList()); - throw new ISE( - "Failed to publish segments to DB: %s", - SegmentUtils.commaSeparatedIdentifiers(failedToPublish) - ); - } + if (schemaPersistEnabled) { + transaction.insertSegmentsWithMetadata(segmentPlusToInsert); + } else { + transaction.insertSegments(segmentPlusToInsert); } } catch (Exception e) { @@ -2132,7 +1718,7 @@ private Set announceHistoricalSegmentBatch( * Creates new versions of segments appended while a REPLACE task was in progress. */ private Set createNewIdsOfAppendSegmentsAfterReplace( - final Handle handle, + final SegmentsMetadataTransaction transaction, final Set replaceSegments, final Set locksHeldByReplaceTask ) @@ -2143,8 +1729,6 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( return Collections.emptySet(); } - final String datasource = replaceSegments.iterator().next().getDataSource(); - // For each replace interval, find the number of core partitions and total partitions final Map intervalToNumCorePartitions = new HashMap<>(); final Map intervalToCurrentPartitionNum = new HashMap<>(); @@ -2163,10 +1747,10 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( .map(ReplaceTaskLock::getSupervisorTaskId) .findFirst().orElse(null); final Map upgradeSegmentToLockVersion - = getAppendSegmentsCommittedDuringTask(handle, taskId); + = getAppendSegmentsCommittedDuringTask(transaction, taskId); final List segmentsToUpgrade - = retrieveSegmentsById(handle, datasource, upgradeSegmentToLockVersion.keySet()); + = retrieveSegmentsById(transaction, upgradeSegmentToLockVersion.keySet()); if (segmentsToUpgrade.isEmpty()) { return Collections.emptySet(); @@ -2247,109 +1831,91 @@ private Set createNewIdsOfAppendSegmentsAfterReplace( *
  • The set of segments being committed is non-empty.
  • *
  • All segments belong to the same datasource.
  • * + * @return Name of the common data source */ - private void verifySegmentsToCommit(Collection segments) + private String verifySegmentsToCommit(Collection segments) { if (segments.isEmpty()) { - throw new IllegalArgumentException("No segment to commit"); + throw InvalidInput.exception("No segment to commit"); } final String dataSource = segments.iterator().next().getDataSource(); for (DataSegment segment : segments) { if (!dataSource.equals(segment.getDataSource())) { - throw new IllegalArgumentException("Segments to commit must all belong to the same datasource"); + throw InvalidInput.exception("Segments to commit must all belong to the same datasource"); } } + + return dataSource; + } + + private static Set findNonOvershadowedSegments(Set segments) + { + final Set nonOvershadowedSegments = new HashSet<>(); + + List> segmentHolders = + SegmentTimeline.forSegments(segments).lookupWithIncompletePartitions(Intervals.ETERNITY); + for (TimelineObjectHolder holder : segmentHolders) { + for (PartitionChunk chunk : holder.getObject()) { + nonOvershadowedSegments.add(chunk.getObject()); + } + } + + return nonOvershadowedSegments; } /** - * Inserts the given segments into the DB in batches of size - * {@link #MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE} and returns the set of - * segments actually inserted. + * Inserts the given segments into the metadata store. *

    * This method avoids inserting segment IDs which already exist in the DB. * Callers of this method might need to retry as INSERT followed by SELECT * might fail due to race conditions. + * + * @return Set of segments inserted */ private Set insertSegments( - Handle handle, + SegmentsMetadataTransaction transaction, Set segments, @Nullable SegmentSchemaMapping segmentSchemaMapping, Map upgradeSegmentMetadata, Map newVersionForAppendToParent, Map upgradedFromSegmentIdMap - ) throws IOException + ) throws Exception { if (shouldPersistSchema(segmentSchemaMapping)) { - persistSchema(handle, segments, segmentSchemaMapping); + persistSchema(transaction, segments, segmentSchemaMapping); } // Do not insert segment IDs which already exist - Set existingSegmentIds = segmentExistsBatch(handle, segments); + Set existingSegmentIds = transaction.findExistingSegmentIds(segments); final Set segmentsToInsert = segments.stream().filter( s -> !existingSegmentIds.contains(s.getId().toString()) ).collect(Collectors.toSet()); - // Insert the segments in batches of manageable size - final List> partitionedSegments = Lists.partition( - new ArrayList<>(segmentsToInsert), - MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE - ); - - final String now = DateTimes.nowUtc().toString(); - final PreparedBatch batch = handle.prepareBatch(buildSqlToInsertSegments()); - for (List partition : partitionedSegments) { - for (DataSegment segment : partition) { - PreparedBatchPart preparedBatchPart = - batch.add() - .bind("id", segment.getId().toString()) - .bind("dataSource", segment.getDataSource()) - .bind("created_date", now) - .bind("start", segment.getInterval().getStart().toString()) - .bind("end", segment.getInterval().getEnd().toString()) - .bind("partitioned", (segment.getShardSpec() instanceof NoneShardSpec) ? false : true) - .bind("version", segment.getVersion()) - .bind("used", true) - .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .bind("used_status_last_updated", now) - .bind("upgraded_from_segment_id", upgradedFromSegmentIdMap.get(segment.getId().toString())); - - if (schemaPersistEnabled) { - SegmentMetadata segmentMetadata = - getSegmentMetadataFromSchemaMappingOrUpgradeMetadata( - segment.getId(), - segmentSchemaMapping, - newVersionForAppendToParent, - upgradeSegmentMetadata - ); - Long numRows = null; - String schemaFingerprint = null; - if (segmentMetadata != null) { - numRows = segmentMetadata.getNumRows(); - schemaFingerprint = segmentMetadata.getSchemaFingerprint(); - } - preparedBatchPart - .bind("num_rows", numRows) - .bind("schema_fingerprint", schemaFingerprint); - } - } + final DateTime createdTime = DateTimes.nowUtc(); + final Set segmentPlusToInsert = segmentsToInsert.stream().map(segment -> { + SegmentMetadata segmentMetadata = getSegmentMetadataFromSchemaMappingOrUpgradeMetadata( + segment.getId(), + segmentSchemaMapping, + newVersionForAppendToParent, + upgradeSegmentMetadata + ); - final int[] affectedRows = batch.execute(); + return new DataSegmentPlus( + segment, + createdTime, + createdTime, + true, + segmentMetadata == null ? null : segmentMetadata.getSchemaFingerprint(), + segmentMetadata == null ? null : segmentMetadata.getNumRows(), + upgradedFromSegmentIdMap.get(segment.getId().toString()) + ); + }).collect(Collectors.toSet()); - final List failedInserts = new ArrayList<>(); - for (int i = 0; i < partition.size(); ++i) { - if (affectedRows[i] != 1) { - failedInserts.add(partition.get(i)); - } - } - if (failedInserts.isEmpty()) { - log.infoSegments(partition, "Published segments to DB"); - } else { - throw new ISE( - "Failed to publish segments to DB: %s", - SegmentUtils.commaSeparatedIdentifiers(failedInserts) - ); - } + if (schemaPersistEnabled) { + transaction.insertSegmentsWithMetadata(segmentPlusToInsert); + } else { + transaction.insertSegments(segmentPlusToInsert); } return segmentsToInsert; @@ -2395,7 +1961,7 @@ private SegmentMetadata getSegmentMetadataFromSchemaMappingOrUpgradeMetadata( * {@link #MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE}. */ private void insertIntoUpgradeSegmentsTable( - Handle handle, + SegmentsMetadataTransaction transaction, Map segmentToReplaceLock ) { @@ -2403,7 +1969,7 @@ private void insertIntoUpgradeSegmentsTable( return; } - final PreparedBatch batch = handle.prepareBatch( + final PreparedBatch batch = transaction.getHandle().prepareBatch( StringUtils.format( "INSERT INTO %1$s (task_id, segment_id, lock_version)" + " VALUES (:task_id, :segment_id, :lock_version)", @@ -2441,45 +2007,19 @@ private void insertIntoUpgradeSegmentsTable( } } - private List retrieveSegmentsById(Handle handle, String datasource, Set segmentIds) + private List retrieveSegmentsById( + SegmentsMetadataTransaction transaction, + Set segmentIds + ) { if (segmentIds.isEmpty()) { return Collections.emptyList(); } if (schemaPersistEnabled) { - return SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveSegmentsWithSchemaById(datasource, segmentIds); - } else { - return SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveSegmentsById(datasource, segmentIds); - } - } - - private String buildSqlToInsertSegments() - { - String insertStatement = - "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s," - + " partitioned, version, used, payload, used_status_last_updated, upgraded_from_segment_id %3$s) " - + "VALUES (:id, :dataSource, :created_date, :start, :end," - + " :partitioned, :version, :used, :payload, :used_status_last_updated, :upgraded_from_segment_id %4$s)"; - - if (schemaPersistEnabled) { - return StringUtils.format( - insertStatement, - dbTables.getSegmentsTable(), - connector.getQuoteString(), - ", schema_fingerprint, num_rows", - ", :schema_fingerprint, :num_rows" - ); + return transaction.findSegmentsWithSchema(segmentIds); } else { - return StringUtils.format( - insertStatement, - dbTables.getSegmentsTable(), - connector.getQuoteString(), - "", - "" - ); + return transaction.findSegments(segmentIds); } } @@ -2491,7 +2031,7 @@ private String buildSqlToInsertSegments() * @return Map from append Segment ID to REPLACE lock version */ private Map getAppendSegmentsCommittedDuringTask( - Handle handle, + SegmentsMetadataTransaction transaction, String taskId ) { @@ -2500,7 +2040,7 @@ private Map getAppendSegmentsCommittedDuringTask( dbTables.getUpgradeSegmentsTable() ); - ResultIterator> resultIterator = handle + ResultIterator> resultIterator = transaction.getHandle() .createQuery(sql) .bind("task_id", taskId) .map( @@ -2516,23 +2056,6 @@ private Map getAppendSegmentsCommittedDuringTask( return segmentIdToLockVersion; } - private Set segmentExistsBatch(final Handle handle, final Set segments) - { - Set existedSegments = new HashSet<>(); - - List> segmentsLists = Lists.partition(new ArrayList<>(segments), MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE); - for (List segmentList : segmentsLists) { - String segmentIds = segmentList.stream() - .map(segment -> "'" + StringUtils.escapeSql(segment.getId().toString()) + "'") - .collect(Collectors.joining(",")); - List existIds = handle.createQuery(StringUtils.format("SELECT id FROM %s WHERE id in (%s)", dbTables.getSegmentsTable(), segmentIds)) - .mapTo(String.class) - .list(); - existedSegments.addAll(existIds); - } - return existedSegments; - } - /** * Read dataSource metadata. Returns null if there is no metadata. */ @@ -2557,12 +2080,12 @@ private Set segmentExistsBatch(final Handle handle, final Set segmentExistsBatch(final Handle handle, final Set INSERT can fail due to races; callers must be prepared to retry. - final int numRows = handle.createStatement( + final int numRows = transaction.getHandle().createStatement( StringUtils.format( "INSERT INTO %s (dataSource, created_date, commit_metadata_payload, commit_metadata_sha1) " + "VALUES (:dataSource, :created_date, :commit_metadata_payload, :commit_metadata_sha1)", @@ -2683,7 +2206,7 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( : DataStoreMetadataUpdateResult.retryableFailure("Failed to insert metadata for datasource[%s]", dataSource); } else { // Expecting a particular old metadata; use the SHA1 in a compare-and-swap UPDATE - final int numRows = handle.createStatement( + final int numRows = transaction.getHandle().createStatement( StringUtils.format( "UPDATE %s SET " + "commit_metadata_payload = :new_commit_metadata_payload, " @@ -2754,10 +2277,12 @@ public boolean resetDataSourceMetadata(final String dataSource, final DataSource @Override public void updateSegmentMetadata(final Set segments) { - connector.getDBI().inTransaction( - (handle, transactionStatus) -> { + final String dataSource = verifySegmentsToCommit(segments); + retryDatasourceTransaction( + dataSource, + transaction -> { for (final DataSegment segment : segments) { - updatePayload(handle, segment); + transaction.updateSegmentPayload(segment); } return 0; @@ -2773,45 +2298,16 @@ public void deleteSegments(final Set segments) return; } - final String deleteSql = StringUtils.format("DELETE from %s WHERE id = :id", dbTables.getSegmentsTable()); - final String dataSource = segments.stream().findFirst().map(DataSegment::getDataSource).get(); - - // generate the IDs outside the transaction block - final List ids = segments.stream().map(s -> s.getId().toString()).collect(Collectors.toList()); - - int numDeletedSegments = connector.getDBI().inTransaction((handle, transactionStatus) -> { - final PreparedBatch batch = handle.prepareBatch(deleteSql); - - for (final String id : ids) { - batch.bind("id", id).add(); - } - - int[] deletedRows = batch.execute(); - return Arrays.stream(deletedRows).sum(); - } + final String dataSource = verifySegmentsToCommit(segments); + int numDeletedSegments = retryDatasourceTransaction( + dataSource, + transaction -> transaction.deleteSegments(segments) ); log.debugSegments(segments, "Delete the metadata of segments"); log.info("Deleted [%d] segments from metadata storage for dataSource [%s].", numDeletedSegments, dataSource); } - private void updatePayload(final Handle handle, final DataSegment segment) throws IOException - { - try { - handle - .createStatement( - StringUtils.format("UPDATE %s SET payload = :payload WHERE id = :id", dbTables.getSegmentsTable()) - ) - .bind("id", segment.getId().toString()) - .bind("payload", jsonMapper.writeValueAsBytes(segment)) - .execute(); - } - catch (IOException e) { - log.error(e, "Exception inserting into DB"); - throw e; - } - } - @Override public boolean insertDataSourceMetadata(String dataSource, DataSourceMetadata metadata) { @@ -2870,14 +2366,13 @@ public int removeDataSourceMetadataOlderThan(long timestamp, @NotNull Set retrieveUsedSegmentsForAllocation( - final Handle handle, + final SegmentsMetadataTransaction transaction, final String dataSource, final Interval interval ) { final Set overlappingSegmentIds - = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegmentIds(dataSource, interval); + = transaction.findUsedSegmentIdsOverlapping(interval); // Map from version -> interval -> segmentId with the smallest partitionNum Map> versionIntervalToSmallestSegmentId = new HashMap<>(); for (SegmentId segmentId : overlappingSegmentIds) { @@ -2894,7 +2389,7 @@ Set retrieveUsedSegmentsForAllocation( for (Map itvlMap : versionIntervalToSmallestSegmentId.values()) { segmentIdsToRetrieve.addAll(itvlMap.values().stream().map(SegmentId::toString).collect(Collectors.toList())); } - final Set dataSegments = retrieveSegmentsById(dataSource, segmentIdsToRetrieve); + final List dataSegments = transaction.findUsedSegments(segmentIdsToRetrieve); final Set retrievedIds = new HashSet<>(); final Map> versionIntervalToNumCorePartitions = new HashMap<>(); for (DataSegment segment : dataSegments) { @@ -2930,45 +2425,38 @@ Set retrieveUsedSegmentsForAllocation( } @Override - public DataSegment retrieveSegmentForId(final String id, boolean includeUnused) + public DataSegment retrieveSegmentForId(final String dataSource, final String segmentId) { - return connector.retryTransaction( - (handle, status) -> { - if (includeUnused) { - return SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveSegmentForId(id); - } else { - return SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper) - .retrieveUsedSegmentForId(id); - } - }, - 3, - SQLMetadataConnector.DEFAULT_MAX_TRIES + return retryDatasourceTransaction( + dataSource, + transaction -> transaction.findSegment(segmentId) + ); + } + + @Override + public DataSegment retrieveUsedSegmentForId(String dataSource, String segmentId) + { + return retryDatasourceTransaction( + dataSource, + transaction -> transaction.findUsedSegment(segmentId) ); } @Override public int deletePendingSegmentsForTaskAllocatorId(final String datasource, final String taskAllocatorId) { - return connector.getDBI().inTransaction( - (handle, status) -> handle - .createStatement( - StringUtils.format( - "DELETE FROM %s WHERE dataSource = :dataSource AND task_allocator_id = :task_allocator_id", - dbTables.getPendingSegmentsTable() - ) - ) - .bind("dataSource", datasource) - .bind("task_allocator_id", taskAllocatorId) - .execute() + return retryDatasourceTransaction( + datasource, + transaction -> transaction.deletePendingSegments(taskAllocatorId) ); } @Override public List getPendingSegments(String datasource, Interval interval) { - return connector.retryWithHandle( - handle -> getPendingSegmentsForInterval(handle, datasource, interval) + return inReadOnlyDatasourceTransaction( + datasource, + transaction -> transaction.findPendingSegmentsOverlapping(interval) ); } @@ -3072,46 +2560,47 @@ public Map> retrieveUpgradedToSegmentIds( return upgradedToSegmentIds; } - private static class PendingSegmentsRecord + private T retryDatasourceTransaction( + String dataSource, + SegmentsMetadataTransaction.Callback callback + ) { - private final String sequenceName; - private final byte[] payload; - - /** - * The columns expected in the result set are: - *

      - *
    1. sequence_name
    2. - *
    3. payload
    4. - *
    - */ - static PendingSegmentsRecord fromResultSet(ResultSet resultSet) - { + return connector.retryTransaction( + createTransactionCallback(dataSource, callback), + 3, + getSqlMetadataMaxRetry() + ); + } + + private T inReadOnlyDatasourceTransaction( + String dataSource, + SegmentsMetadataTransaction.Callback callback + ) + { + return connector.inReadOnlyTransaction( + createTransactionCallback(dataSource, callback) + ); + } + + private TransactionCallback createTransactionCallback( + String dataSource, + SegmentsMetadataTransaction.Callback baseCallback + ) + { + return (handle, status) -> { + final SegmentsMetadataTransaction transaction = + transactionFactory.createTransactionForDatasource(dataSource, handle, status); try { - return new PendingSegmentsRecord( - resultSet.getString(1), - resultSet.getBytes(2) - ); + return baseCallback.inTransaction(transaction); } - catch (SQLException e) { - throw new RuntimeException(e); + catch (Exception e) { + transaction.setRollbackOnly(); + throw e; } - } - - PendingSegmentsRecord(String sequenceName, byte[] payload) - { - this.payload = payload; - this.sequenceName = sequenceName; - } - - public byte[] getPayload() - { - return payload; - } - - public String getSequenceName() - { - return sequenceName; - } + finally { + transaction.complete(); + } + }; } public static class DataStoreMetadataUpdateResult diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index f117fe7f28bf..44033b0a394d 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -111,6 +111,13 @@ public String getTaskAllocatorId() return taskAllocatorId; } + /** + * Computes a hash for this record to serve as UNIQUE key, ensuring we don't + * have more than one segment per sequence per interval. + * A single column is used instead of (sequence_name, sequence_prev_id) as + * some MySQL storage engines have difficulty with large unique keys + * (see #2319) + */ @SuppressWarnings("UnstableApiUsage") public String computeSequenceNamePrevIdSha1(boolean skipSegmentLineageCheck) { diff --git a/server/src/main/java/org/apache/druid/metadata/SegmentsMetadataManagerConfig.java b/server/src/main/java/org/apache/druid/metadata/SegmentsMetadataManagerConfig.java index 89072c64afce..fc65de4af532 100644 --- a/server/src/main/java/org/apache/druid/metadata/SegmentsMetadataManagerConfig.java +++ b/server/src/main/java/org/apache/druid/metadata/SegmentsMetadataManagerConfig.java @@ -19,25 +19,42 @@ package org.apache.druid.metadata; +import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.common.config.Configs; import org.joda.time.Period; /** + * Config that dictates polling and caching of segment metadata on leader + * Coordinator or Overlord services. */ public class SegmentsMetadataManagerConfig { public static final String CONFIG_PREFIX = "druid.manager.segments"; @JsonProperty - private Period pollDuration = new Period("PT1M"); + private final Period pollDuration; - public Period getPollDuration() + @JsonProperty + private final boolean useCache; + + @JsonCreator + public SegmentsMetadataManagerConfig( + @JsonProperty("pollDuration") Period pollDuration, + @JsonProperty("useCache") Boolean useCache + ) { - return pollDuration; + this.pollDuration = Configs.valueOrDefault(pollDuration, Period.minutes(1)); + this.useCache = Configs.valueOrDefault(useCache, false); } - public void setPollDuration(Period pollDuration) + public boolean isUseCache() { - this.pollDuration = pollDuration; + return useCache; + } + + public Period getPollDuration() + { + return pollDuration; } } diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index 7213cd3481b2..c38f7b87bf08 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -831,7 +831,7 @@ public int markAsUnusedAllSegmentsInDataSource(final String dataSource) return connector.getDBI().withHandle( handle -> SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper) - .markSegmentsUnused(dataSource, Intervals.ETERNITY) + .markSegmentsUnused(dataSource, Intervals.ETERNITY, DateTimes.nowUtc()) ); } catch (RuntimeException e) { @@ -884,7 +884,7 @@ public int markAsUnusedSegmentsInInterval( return connector.getDBI().withHandle( handle -> SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables.get(), jsonMapper) - .markSegmentsUnused(dataSource, interval, versions) + .markSegmentsUnused(dataSource, interval, versions, DateTimes.nowUtc()) ); } catch (Exception e) { diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 501c6d46134c..f86ac0064c35 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.UnmodifiableIterator; @@ -33,6 +34,7 @@ import org.apache.druid.java.util.common.jackson.JacksonUtils; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.http.DataSegmentPlus; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; @@ -44,8 +46,10 @@ import org.skife.jdbi.v2.ResultIterator; import org.skife.jdbi.v2.SQLStatement; import org.skife.jdbi.v2.Update; +import org.skife.jdbi.v2.util.StringMapper; import javax.annotation.Nullable; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -149,6 +153,46 @@ public CloseableIterator retrieveUsedSegments( ); } + public CloseableIterator retrieveUsedSegmentsPlus( + String dataSource, + Collection intervals + ) + { + return retrieveSegmentsPlus( + dataSource, + intervals, null, IntervalMode.OVERLAPS, true, null, null, null, null + ); + } + + public Set retrieveUnusedSegmentIdsForExactIntervalAndVersion( + String dataSource, + Interval interval, + String version + ) + { + final String sql = StringUtils.format( + "SELECT id FROM %1$s" + + " WHERE used = :used" + + " AND dataSource = :dataSource" + + " AND version = :version" + + " AND start = :start AND %2$send%2$s = :end", + dbTables.getSegmentsTable(), connector.getQuoteString() + ); + + final Query> query = handle + .createQuery(sql) + .setFetchSize(connector.getStreamingFetchSize()) + .bind("used", false) + .bind("dataSource", dataSource) + .bind("version", version) + .bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()); + + try (final ResultIterator iterator = query.map(StringMapper.FIRST).iterator()) { + return ImmutableSet.copyOf(iterator); + } + } + /** * Retrieves segments for a given datasource that are marked unused and that are fully contained by any interval * in a particular collection of intervals. If the collection of intervals is empty, this method will retrieve all @@ -244,6 +288,10 @@ public CloseableIterator retrieveUnusedSegmentsPlus( ); } + /** + * Retrieves IDs of used segments that belong to the datasource and overlap + * the given interval. + */ public Set retrieveUsedSegmentIds( final String dataSource, final Interval interval @@ -264,51 +312,64 @@ public Set retrieveUsedSegmentIds( ); } - return connector.inReadOnlyTransaction( - (handle, status) -> { - final Query> sql = handle - .createQuery(StringUtils.format(sb.toString(), dbTables.getSegmentsTable())) - .setFetchSize(connector.getStreamingFetchSize()) - .bind("used", true) - .bind("dataSource", dataSource); + final Query> sql = handle + .createQuery(StringUtils.format(sb.toString(), dbTables.getSegmentsTable())) + .setFetchSize(connector.getStreamingFetchSize()) + .bind("used", true) + .bind("dataSource", dataSource); - if (compareAsString) { - bindIntervalsToQuery(sql, Collections.singletonList(interval)); - } + if (compareAsString) { + bindIntervalsToQuery(sql, Collections.singletonList(interval)); + } + + final Set segmentIds = new HashSet<>(); + try (final ResultIterator iterator = sql.map(StringMapper.FIRST).iterator()) { + while (iterator.hasNext()) { + final String id = iterator.next(); + final SegmentId segmentId = SegmentId.tryParse(dataSource, id); + if (segmentId == null) { + throw DruidException.defensive( + "Failed to parse SegmentId for id[%s] and dataSource[%s].", + id, dataSource + ); + } + if (IntervalMode.OVERLAPS.apply(interval, segmentId.getInterval())) { + segmentIds.add(segmentId); + } + } + } + return segmentIds; - final Set segmentIds = new HashSet<>(); - try (final ResultIterator iterator = sql.map((index, r, ctx) -> r.getString(1)).iterator()) { - while (iterator.hasNext()) { - final String id = iterator.next(); - final SegmentId segmentId = SegmentId.tryParse(dataSource, id); - if (segmentId == null) { - throw DruidException.defensive( - "Failed to parse SegmentId for id[%s] and dataSource[%s].", - id, dataSource - ); - } - if (IntervalMode.OVERLAPS.apply(interval, segmentId.getInterval())) { - segmentIds.add(segmentId); - } - } - } - return segmentIds; - }); } public List retrieveSegmentsById( String datasource, Set segmentIds ) + { + try (CloseableIterator iterator + = retrieveSegmentsByIdIterator(datasource, segmentIds)) { + return ImmutableList.copyOf(iterator); + } + catch (IOException e) { + throw DruidException.defensive(e, "Error while retrieving segments from metadata store"); + } + } + + public CloseableIterator retrieveSegmentsByIdIterator( + String datasource, + Set segmentIds + ) { final List> partitionedSegmentIds = Lists.partition(new ArrayList<>(segmentIds), 100); - final List fetchedSegments = new ArrayList<>(segmentIds.size()); + final List> fetchedSegments + = new ArrayList<>(partitionedSegmentIds.size()); for (List partition : partitionedSegmentIds) { - fetchedSegments.addAll(retrieveSegmentBatchById(datasource, partition, false)); + fetchedSegments.add(retrieveSegmentBatchById(datasource, partition, false)); } - return fetchedSegments; + return CloseableIterators.concat(fetchedSegments); } public List retrieveSegmentsWithSchemaById( @@ -319,21 +380,29 @@ public List retrieveSegmentsWithSchemaById( final List> partitionedSegmentIds = Lists.partition(new ArrayList<>(segmentIds), 100); - final List fetchedSegments = new ArrayList<>(segmentIds.size()); + final List> fetchedSegments + = new ArrayList<>(partitionedSegmentIds.size()); for (List partition : partitionedSegmentIds) { - fetchedSegments.addAll(retrieveSegmentBatchById(datasource, partition, true)); + fetchedSegments.add(retrieveSegmentBatchById(datasource, partition, true)); + } + + try (CloseableIterator iterator + = CloseableIterators.concat(fetchedSegments)) { + return ImmutableList.copyOf(iterator); + } + catch (IOException e) { + throw DruidException.defensive(e, "Error while retrieving segments with schema from metadata store."); } - return fetchedSegments; } - private List retrieveSegmentBatchById( + private CloseableIterator retrieveSegmentBatchById( String datasource, List segmentIds, boolean includeSchemaInfo ) { if (segmentIds.isEmpty()) { - return Collections.emptyList(); + return CloseableIterators.withEmptyBaggage(Collections.emptyIterator()); } ResultIterator resultIterator; @@ -393,7 +462,7 @@ private List retrieveSegmentBatchById( .iterator(); } - return Lists.newArrayList(resultIterator); + return CloseableIterators.wrap(resultIterator, resultIterator); } /** @@ -442,9 +511,9 @@ public int markSegments(final Collection segmentIds, final boolean us * * @return Number of segments updated. */ - public int markSegmentsUnused(final String dataSource, final Interval interval) + public int markSegmentsUnused(final String dataSource, final Interval interval, final DateTime updateTime) { - return markSegmentsUnused(dataSource, interval, null); + return markSegmentsUnused(dataSource, interval, null, updateTime); } /** @@ -453,7 +522,12 @@ public int markSegmentsUnused(final String dataSource, final Interval interval) * * @return Number of segments updated. */ - public int markSegmentsUnused(final String dataSource, final Interval interval, @Nullable final List versions) + public int markSegmentsUnused( + final String dataSource, + final Interval interval, + @Nullable final List versions, + final DateTime updateTime + ) { if (versions != null && versions.isEmpty()) { return 0; @@ -477,7 +551,7 @@ public int markSegmentsUnused(final String dataSource, final Interval interval, .createStatement(sb.toString()) .bind("dataSource", dataSource) .bind("used", false) - .bind("used_status_last_updated", DateTimes.nowUtc().toString()); + .bind("used_status_last_updated", updateTime.toString()); if (versions != null) { bindColumnValuesToQueryWithInCondition("version", versions, stmt); @@ -509,7 +583,7 @@ public int markSegmentsUnused(final String dataSource, final Interval interval, .bind("used", false) .bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()) - .bind("used_status_last_updated", DateTimes.nowUtc().toString()); + .bind("used_status_last_updated", updateTime.toString()); if (versions != null) { bindColumnValuesToQueryWithInCondition("version", versions, stmt); @@ -581,6 +655,160 @@ public DataSegment retrieveSegmentForId(String id) return null; } + public List retrievePendingSegmentIds( + final String dataSource, + final String sequenceName, + final String sequencePreviousId + ) + { + final String sql = StringUtils.format( + "SELECT payload FROM %s WHERE " + + "dataSource = :dataSource AND " + + "sequence_name = :sequence_name AND " + + "sequence_prev_id = :sequence_prev_id", + dbTables.getPendingSegmentsTable() + ); + return handle + .createQuery(sql) + .bind("dataSource", dataSource) + .bind("sequence_name", sequenceName) + .bind("sequence_prev_id", sequencePreviousId) + .map( + (index, r, ctx) -> JacksonUtils.readValue( + jsonMapper, + r.getBytes("payload"), + SegmentIdWithShardSpec.class + ) + ) + .list(); + } + + public List retrievePendingSegmentIdsWithExactInterval( + final String dataSource, + final String sequenceName, + final Interval interval + ) + { + final String sql = StringUtils.format( + "SELECT payload FROM %s WHERE " + + "dataSource = :dataSource AND " + + "sequence_name = :sequence_name AND " + + "start = :start AND " + + "%2$send%2$s = :end", + dbTables.getPendingSegmentsTable(), + connector.getQuoteString() + ); + return handle + .createQuery(sql) + .bind("dataSource", dataSource) + .bind("sequence_name", sequenceName) + .bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()) + .map( + (index, r, ctx) -> JacksonUtils.readValue( + jsonMapper, + r.getBytes("payload"), + SegmentIdWithShardSpec.class + ) + ) + .list(); + } + + public List retrievePendingSegmentsWithExactInterval( + final String dataSource, + final Interval interval + ) + { + final String sql = StringUtils.format( + "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" + + " FROM %1$s WHERE" + + " dataSource = :dataSource" + + " AND start = :start" + + " AND %2$send%2$s = :end", + dbTables.getPendingSegmentsTable(), connector.getQuoteString() + ); + return handle + .createQuery(sql) + .bind("dataSource", dataSource) + .bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()) + .map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) + .list(); + } + + /** + * Fetches all the pending segments, whose interval overlaps with the given + * search interval, from the metadata store. + */ + public List retrievePendingSegmentsOverlappingInterval( + final String dataSource, + final Interval interval + ) + { + final boolean compareIntervalEndpointsAsStrings = Intervals.canCompareEndpointsAsStrings(interval); + + String sql = StringUtils.format( + "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" + + " FROM %1$s" + + " WHERE dataSource = :dataSource", + dbTables.getPendingSegmentsTable() + ); + if (compareIntervalEndpointsAsStrings) { + sql += " AND start < :end" + + StringUtils.format(" AND %1$send%1$s > :start", connector.getQuoteString()); + } + + Query> query = handle.createQuery(sql) + .bind("dataSource", dataSource); + if (compareIntervalEndpointsAsStrings) { + query = query.bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()); + } + + final ResultIterator pendingSegmentIterator = + query.map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) + .iterator(); + final ImmutableList.Builder pendingSegments = ImmutableList.builder(); + while (pendingSegmentIterator.hasNext()) { + final PendingSegmentRecord pendingSegment = pendingSegmentIterator.next(); + if (compareIntervalEndpointsAsStrings || pendingSegment.getId().getInterval().overlaps(interval)) { + pendingSegments.add(pendingSegment); + } + } + pendingSegmentIterator.close(); + return pendingSegments.build(); + } + + public List retrievePendingSegmentsForTaskAllocatorId( + final String dataSource, + final String taskAllocatorId + ) + { + final String sql = StringUtils.format( + "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" + + " FROM %1$s" + + " WHERE dataSource = :dataSource AND task_allocator_id = :task_allocator_id", + dbTables.getPendingSegmentsTable() + ); + + Query> query = handle.createQuery(sql) + .bind("dataSource", dataSource) + .bind("task_allocator_id", taskAllocatorId); + + final ResultIterator pendingSegmentRecords = + query.map((index, r, ctx) -> PendingSegmentRecord.fromResultSet(r, jsonMapper)) + .iterator(); + + final List pendingSegments = new ArrayList<>(); + while (pendingSegmentRecords.hasNext()) { + pendingSegments.add(pendingSegmentRecords.next()); + } + + pendingSegmentRecords.close(); + + return pendingSegments; + } + /** * Get the condition for the interval and match mode. * @param intervals - intervals to fetch the segments for @@ -1012,7 +1240,7 @@ private static int computeNumChangedSegments(List segmentIds, int[] segm * * @implNote JDBI 3.x has better support for binding {@code IN} clauses directly. */ - static String getParameterizedInConditionForColumn(final String columnName, final List values) + public static String getParameterizedInConditionForColumn(final String columnName, final List values) { if (values == null) { return ""; @@ -1037,7 +1265,7 @@ static String getParameterizedInConditionForColumn(final String columnName, fina * * @see #getParameterizedInConditionForColumn(String, List) */ - static void bindColumnValuesToQueryWithInCondition( + public static void bindColumnValuesToQueryWithInCondition( final String columnName, final List values, final SQLStatement query diff --git a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java new file mode 100644 index 000000000000..05af79b97756 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.apache.druid.server.http.DataSegmentPlus; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.util.List; +import java.util.Set; + +/** + * Performs read operations on the segment metadata for a single datasource. + */ +public interface DatasourceSegmentMetadataReader +{ + /** + * Returns the IDs of segments (out of the given set) which already exist in + * the metadata store. + */ + Set findExistingSegmentIds(Set segments); + + /** + * Retrieves IDs of used segments that belong to the datasource and overlap + * the given interval. + */ + Set findUsedSegmentIdsOverlapping(Interval interval); + + Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version); + + /** + * Finds used segments that overlap with any of the given intervals. + */ + CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals); + + List findUsedSegments(Set segmentIds); + + /** + * Finds used segments that overlap with any of the given intervals. + */ + Set findUsedSegmentsPlusOverlappingAnyOf(List intervals); + + DataSegment findSegment(String segmentId); + + DataSegment findUsedSegment(String segmentId); + + List findSegments(Set segmentIds); + + List findSegmentsWithSchema(Set segmentIds); + + List findUnusedSegments( + Interval interval, + @Nullable List versions, + @Nullable Integer limit, + @Nullable DateTime maxUsedStatusLastUpdatedTime + ); + + List findPendingSegmentIds( + String sequenceName, + String sequencePreviousId + ); + + List findPendingSegmentIdsWithExactInterval( + String sequenceName, + Interval interval + ); + + List findPendingSegmentsOverlapping(Interval interval); + + List findPendingSegmentsWithExactInterval(Interval interval); + + List findPendingSegments(String taskAllocatorId); +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataWriter.java b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataWriter.java new file mode 100644 index 000000000000..63f05549016b --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataWriter.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.server.http.DataSegmentPlus; +import org.apache.druid.timeline.DataSegment; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import java.util.List; +import java.util.Set; + +/** + * Performs write operations on the segment metadata of a single datasource. + */ +public interface DatasourceSegmentMetadataWriter +{ + /** + * Inserts the given segments into the metadata store. + */ + int insertSegments(Set segments); + + int insertSegmentsWithMetadata(Set segments); + + /** + * Marks the segments fully contained in the given interval as unused. + */ + int markSegmentsWithinIntervalAsUnused(Interval interval, DateTime updateTime); + + int deleteSegments(Set segments); + + boolean updateSegmentPayload(DataSegment segment); + + boolean insertPendingSegment( + PendingSegmentRecord pendingSegment, + boolean skipSegmentLineageCheck + ); + + int insertPendingSegments( + List pendingSegments, + boolean skipSegmentLineageCheck + ); + + int deleteAllPendingSegments(); + + int deletePendingSegments(List segmentIdsToDelete); + + int deletePendingSegments(String taskAllocatorId); + + int deletePendingSegmentsCreatedIn(Interval interval); +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java new file mode 100644 index 000000000000..c4d0bb9d618d --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +import org.skife.jdbi.v2.Handle; + +/** + * Represents a single transaction involving read/write of segment metadata into + * the metadata store. A transaction is associated with a single instance of a + * {@link Handle} and is meant to be short-lived. + */ +public interface SegmentsMetadataTransaction + extends DatasourceSegmentMetadataReader, DatasourceSegmentMetadataWriter +{ + /** + * @return The JDBI handle used in this transaction + */ + Handle getHandle(); + + /** + * Marks this transaction to be rolled back. + */ + void setRollbackOnly(); + + /** + * Completes the transaction by either committing it or rolling it back. + * This method must not be called from a {@link Callback}. + */ + void complete(); + + @FunctionalInterface + interface Callback + { + T inTransaction(SegmentsMetadataTransaction transaction) throws Exception; + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java new file mode 100644 index 000000000000..b6b0a570a2c7 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java @@ -0,0 +1,347 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +import org.apache.druid.discovery.DruidLeaderSelector; +import org.apache.druid.error.InternalServerError; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.apache.druid.server.http.DataSegmentPlus; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.skife.jdbi.v2.Handle; + +import javax.annotation.Nullable; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; + +/** + * A {@link SegmentsMetadataTransaction} that performs reads using the cache + * and sends writes first to the metadata store and then the cache (if the + * metadata store persist succeeds). + */ +public class SqlSegmentsMetadataCachedTransaction implements SegmentsMetadataTransaction +{ + private final String dataSource; + private final SegmentsMetadataTransaction delegate; + private final SegmentsMetadataCache metadataCache; + private final DruidLeaderSelector leaderSelector; + + private final int startTerm; + + private final AtomicBoolean isRollingBack = new AtomicBoolean(false); + + public SqlSegmentsMetadataCachedTransaction( + String dataSource, + SegmentsMetadataTransaction delegate, + SegmentsMetadataCache metadataCache, + DruidLeaderSelector leaderSelector + ) + { + this.dataSource = dataSource; + this.delegate = delegate; + this.metadataCache = metadataCache; + this.leaderSelector = leaderSelector; + + if (leaderSelector.isLeader()) { + this.startTerm = leaderSelector.localTerm(); + } else { + throw InternalServerError.exception("Not leader anymore"); + } + } + + private void verifyStillLeaderWithSameTerm() + { + if (!isLeaderWithSameTerm()) { + throw InternalServerError.exception("Failing transaction. Not leader anymore"); + } + } + + private boolean isLeaderWithSameTerm() + { + return leaderSelector.isLeader() && startTerm == leaderSelector.localTerm(); + } + + private DatasourceSegmentMetadataReader cacheReader() + { + return metadataCache.readerForDatasource(dataSource); + } + + private DatasourceSegmentMetadataWriter cacheWriter() + { + return metadataCache.writerForDatasource(dataSource); + } + + @Override + public Handle getHandle() + { + return delegate.getHandle(); + } + + @Override + public void setRollbackOnly() + { + delegate.setRollbackOnly(); + } + + @Override + public void complete() + { + // TODO: complete this implementation + + if (isRollingBack.get()) { + // rollback the changes made to the cache + } else { + // commit the changes to the cache + // or may be we can commit right at the end + // since I don't think we ever read what we have just written + // so it should be okay to postpone the writes until the very end + // since reads from cache are going to be fast, it should be okay to hold + // a write lock for the entire duration of the transaction + + // Is there any alternative? That is also consistent? + } + + // release the lock on the cache + // What if we don't acquire any lock? + + delegate.complete(); + } + + // READ METHODS + + @Override + public Set findExistingSegmentIds(Set segments) + { + return cacheReader().findExistingSegmentIds(segments); + } + + @Override + public Set findUsedSegmentIdsOverlapping(Interval interval) + { + return cacheReader().findUsedSegmentIdsOverlapping(interval); + } + + @Override + public Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version) + { + // TODO: we need to start caching some info of unused segments to empower this method + return delegate.findUnusedSegmentIdsWithExactIntervalAndVersion(interval, version); + } + + @Override + public List findSegments(Set segmentIds) + { + // Read from metadata store since unused segment payloads are not cached + return delegate.findSegments(segmentIds); + } + + @Override + public List findSegmentsWithSchema(Set segmentIds) + { + // Read from metadata store since unused segment payloads are not cached + return delegate.findSegmentsWithSchema(segmentIds); + } + + @Override + public CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals) + { + return cacheReader().findUsedSegmentsOverlappingAnyOf(intervals); + } + + @Override + public List findUsedSegments(Set segmentIds) + { + return cacheReader().findUsedSegments(segmentIds); + } + + @Override + public Set findUsedSegmentsPlusOverlappingAnyOf(List intervals) + { + return cacheReader().findUsedSegmentsPlusOverlappingAnyOf(intervals); + } + + @Override + public List findUnusedSegments( + Interval interval, + @Nullable List versions, + @Nullable Integer limit, + @Nullable DateTime maxUsedStatusLastUpdatedTime + ) + { + // Read from metadata store since unused segment payloads are not cached + return delegate.findUnusedSegments(interval, versions, limit, maxUsedStatusLastUpdatedTime); + } + + @Override + public DataSegment findSegment(String segmentId) + { + // Read from metadata store since unused segment payloads are not cached + return delegate.findSegment(segmentId); + } + + @Override + public DataSegment findUsedSegment(String segmentId) + { + return cacheReader().findUsedSegment(segmentId); + } + + @Override + public List findPendingSegmentIds( + String sequenceName, + String sequencePreviousId + ) + { + return cacheReader().findPendingSegmentIds(sequenceName, sequencePreviousId); + } + + @Override + public List findPendingSegmentIdsWithExactInterval( + String sequenceName, + Interval interval + ) + { + return cacheReader().findPendingSegmentIdsWithExactInterval(sequenceName, interval); + } + + @Override + public List findPendingSegmentsOverlapping(Interval interval) + { + return cacheReader().findPendingSegmentsOverlapping(interval); + } + + @Override + public List findPendingSegmentsWithExactInterval(Interval interval) + { + return cacheReader().findPendingSegmentsWithExactInterval(interval); + } + + @Override + public List findPendingSegments(String taskAllocatorId) + { + return cacheReader().findPendingSegments(taskAllocatorId); + } + + // WRITE METHODS + + @Override + public int insertSegments(Set segments) + { + return performWriteAction(writer -> writer.insertSegments(segments)); + } + + @Override + public int insertSegmentsWithMetadata(Set segments) + { + return performWriteAction(writer -> writer.insertSegmentsWithMetadata(segments)); + } + + @Override + public int markSegmentsWithinIntervalAsUnused(Interval interval, DateTime updateTime) + { + return performWriteAction( + writer -> writer.markSegmentsWithinIntervalAsUnused(interval, updateTime) + ); + } + + @Override + public int deleteSegments(Set segments) + { + return performWriteAction(writer -> writer.deleteSegments(segments)); + } + + @Override + public boolean updateSegmentPayload(DataSegment segment) + { + return performWriteAction(writer -> writer.updateSegmentPayload(segment)); + } + + @Override + public boolean insertPendingSegment( + PendingSegmentRecord pendingSegment, + boolean skipSegmentLineageCheck + ) + { + return performWriteAction( + writer -> writer.insertPendingSegment(pendingSegment, skipSegmentLineageCheck) + ); + } + + @Override + public int insertPendingSegments( + List pendingSegments, + boolean skipSegmentLineageCheck + ) + { + return performWriteAction( + writer -> writer.insertPendingSegments(pendingSegments, skipSegmentLineageCheck) + ); + } + + @Override + public int deleteAllPendingSegments() + { + return performWriteAction( + DatasourceSegmentMetadataWriter::deleteAllPendingSegments + ); + } + + @Override + public int deletePendingSegments(List segmentIdsToDelete) + { + return performWriteAction( + writer -> writer.deletePendingSegments(segmentIdsToDelete) + ); + } + + @Override + public int deletePendingSegments(String taskAllocatorId) + { + return performWriteAction( + writer -> writer.deletePendingSegments(taskAllocatorId) + ); + } + + @Override + public int deletePendingSegmentsCreatedIn(Interval interval) + { + return performWriteAction( + writer -> writer.deletePendingSegmentsCreatedIn(interval) + ); + } + + private T performWriteAction(Function action) + { + verifyStillLeaderWithSameTerm(); + final T result = action.apply(delegate); + + if (isLeaderWithSameTerm()) { + action.apply(cacheWriter()); + } + + return result; + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java new file mode 100644 index 000000000000..dea50e315380 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java @@ -0,0 +1,561 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.InternalServerError; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.metadata.SqlSegmentsMetadataQuery; +import org.apache.druid.segment.SegmentUtils; +import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.apache.druid.server.http.DataSegmentPlus; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.skife.jdbi.v2.Handle; +import org.skife.jdbi.v2.PreparedBatch; +import org.skife.jdbi.v2.PreparedBatchPart; +import org.skife.jdbi.v2.TransactionStatus; +import org.skife.jdbi.v2.Update; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class SqlSegmentsMetadataTransaction implements SegmentsMetadataTransaction +{ + private static final int MAX_SEGMENTS_PER_BATCH = 100; + + private final String dataSource; + private final Handle handle; + private final TransactionStatus transactionStatus; + private final SQLMetadataConnector connector; + private final MetadataStorageTablesConfig dbTables; + private final ObjectMapper jsonMapper; + + private final SqlSegmentsMetadataQuery query; + + public SqlSegmentsMetadataTransaction( + String dataSource, + Handle handle, + TransactionStatus transactionStatus, + SQLMetadataConnector connector, + MetadataStorageTablesConfig dbTables, + ObjectMapper jsonMapper + ) + { + this.dataSource = dataSource; + this.handle = handle; + this.connector = connector; + this.dbTables = dbTables; + this.jsonMapper = jsonMapper; + this.transactionStatus = transactionStatus; + this.query = SqlSegmentsMetadataQuery.forHandle(handle, connector, dbTables, jsonMapper); + } + + @Override + public Handle getHandle() + { + return handle; + } + + @Override + public void setRollbackOnly() + { + transactionStatus.setRollbackOnly(); + } + + @Override + public void complete() + { + // Do nothing here, the JDBI Handle will commit or rollback the transaction as needed + } + + // READ METHODS + + @Override + public Set findExistingSegmentIds(Set segments) + { + final Set existingSegmentIds = new HashSet<>(); + final String sql = "SELECT id FROM %s WHERE id in (%s)"; + + List> partitions = Lists.partition(new ArrayList<>(segments), MAX_SEGMENTS_PER_BATCH); + for (List segmentList : partitions) { + String segmentIds = segmentList.stream().map( + segment -> "'" + StringUtils.escapeSql(segment.getId().toString()) + "'" + ).collect(Collectors.joining(",")); + + existingSegmentIds.addAll( + handle.createQuery(StringUtils.format(sql, dbTables.getSegmentsTable(), segmentIds)) + .mapTo(String.class) + .list() + ); + } + + return existingSegmentIds; + } + + @Override + public Set findUsedSegmentIdsOverlapping(Interval interval) + { + return query.retrieveUsedSegmentIds(dataSource, interval); + } + + @Override + public Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version) + { + return query.retrieveUnusedSegmentIdsForExactIntervalAndVersion(dataSource, interval, version); + } + + @Override + public CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals) + { + return query.retrieveUsedSegments(dataSource, intervals); + } + + @Override + public List findUsedSegments(Set segmentIds) + { + return query.retrieveSegmentsById(dataSource, segmentIds) + .stream() + .map(DataSegmentPlus::getDataSegment) + .collect(Collectors.toList()); + } + + @Override + public Set findUsedSegmentsPlusOverlappingAnyOf(List intervals) + { + try (CloseableIterator iterator + = query.retrieveUsedSegmentsPlus(dataSource, intervals)) { + return ImmutableSet.copyOf(iterator); + } + catch (Exception e) { + throw DruidException.defensive(e, "Error while retrieving used segments"); + } + } + + @Override + public DataSegment findSegment(String segmentId) + { + return query.retrieveSegmentForId(segmentId); + } + + @Override + public DataSegment findUsedSegment(String segmentId) + { + return query.retrieveUsedSegmentForId(segmentId); + } + + @Override + public List findSegments(Set segmentIds) + { + return query.retrieveSegmentsById(dataSource, segmentIds); + } + + @Override + public List findSegmentsWithSchema(Set segmentIds) + { + return query.retrieveSegmentsWithSchemaById(dataSource, segmentIds); + } + + @Override + public List findUnusedSegments( + Interval interval, + @Nullable List versions, + @Nullable Integer limit, + @Nullable DateTime maxUsedStatusLastUpdatedTime + ) + { + try (final CloseableIterator iterator = + query.retrieveUnusedSegments( + dataSource, + List.of(interval), + versions, + limit, + null, + null, + maxUsedStatusLastUpdatedTime + ) + ) { + return ImmutableList.copyOf(iterator); + } + catch (IOException e) { + throw DruidException.defensive(e, "Error while reading unused segments"); + } + } + + // WRITE METHODS + + @Override + public int insertSegments(Set segments) + { + return insertSegmentsInBatches( + dataSource, + segments, + "INSERT INTO %1$s " + + "(id, dataSource, created_date, start, %2$send%2$s, partitioned, " + + "version, used, payload, used_status_last_updated, upgraded_from_segment_id) " + + "VALUES " + + "(:id, :dataSource, :created_date, :start, :end, :partitioned, " + + ":version, :used, :payload, :used_status_last_updated, :upgraded_from_segment_id)" + ); + } + + @Override + public int insertSegmentsWithMetadata(Set segments) + { + return insertSegmentsInBatches( + dataSource, + segments, + "INSERT INTO %1$s " + + "(id, dataSource, created_date, start, %2$send%2$s, partitioned, " + + "version, used, payload, used_status_last_updated, upgraded_from_segment_id, " + + "schema_fingerprint, num_rows) " + + "VALUES " + + "(:id, :dataSource, :created_date, :start, :end, :partitioned, " + + ":version, :used, :payload, :used_status_last_updated, :upgraded_from_segment_id, " + + ":schema_fingerprint, :num_rows)" + ); + } + + @Override + public int markSegmentsWithinIntervalAsUnused(Interval interval, DateTime updateTime) + { + return query.markSegmentsUnused(dataSource, interval, updateTime); + } + + @Override + public int deleteSegments(Set segments) + { + final String deleteSql = StringUtils.format("DELETE from %s WHERE id = :id", dbTables.getSegmentsTable()); + + // generate the IDs outside the transaction block + final List ids = segments.stream().map(s -> s.getId().toString()).collect(Collectors.toList()); + + final PreparedBatch batch = handle.prepareBatch(deleteSql); + + for (final String id : ids) { + batch.bind("id", id).add(); + } + + int[] deletedRows = batch.execute(); + return Arrays.stream(deletedRows).sum(); + } + + @Override + public boolean updateSegmentPayload(DataSegment segment) + { + final String sql = "UPDATE %s SET payload = :payload WHERE id = :id"; + int updatedCount = handle + .createStatement(StringUtils.format(sql, dbTables.getSegmentsTable())) + .bind("id", segment.getId().toString()) + .bind("payload", getJsonBytes(segment)) + .execute(); + + return updatedCount > 0; + } + + @Override + public List findPendingSegmentIds( + String sequenceName, + String sequencePreviousId + ) + { + return query.retrievePendingSegmentIds(dataSource, sequenceName, sequencePreviousId); + } + + @Override + public List findPendingSegmentIdsWithExactInterval( + String sequenceName, + Interval interval + ) + { + return query.retrievePendingSegmentIdsWithExactInterval(dataSource, sequenceName, interval); + } + + @Override + public List findPendingSegmentsOverlapping(Interval interval) + { + return query.retrievePendingSegmentsOverlappingInterval(dataSource, interval); + } + + @Override + public List findPendingSegmentsWithExactInterval(Interval interval) + { + return query.retrievePendingSegmentsWithExactInterval(dataSource, interval); + } + + @Override + public List findPendingSegments(String taskAllocatorId) + { + return query.retrievePendingSegmentsForTaskAllocatorId(dataSource, taskAllocatorId); + } + + @Override + public boolean insertPendingSegment( + PendingSegmentRecord pendingSegment, + boolean skipSegmentLineageCheck + ) + { + final SegmentIdWithShardSpec segmentId = pendingSegment.getId(); + final Interval interval = segmentId.getInterval(); + int updatedCount = handle.createStatement(getSqlToInsertPendingSegment()) + .bind("id", segmentId.toString()) + .bind("dataSource", dataSource) + .bind("created_date", DateTimes.nowUtc().toString()) + .bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()) + .bind("sequence_name", pendingSegment.getSequenceName()) + .bind("sequence_prev_id", pendingSegment.getSequencePrevId()) + .bind( + "sequence_name_prev_id_sha1", + pendingSegment.computeSequenceNamePrevIdSha1(skipSegmentLineageCheck) + ) + .bind("payload", getJsonBytes(segmentId)) + .bind("task_allocator_id", pendingSegment.getTaskAllocatorId()) + .bind("upgraded_from_segment_id", pendingSegment.getUpgradedFromSegmentId()) + .execute(); + + return updatedCount > 0; + } + + @Override + public int insertPendingSegments( + List pendingSegments, + boolean skipSegmentLineageCheck + ) + { + final PreparedBatch insertBatch = handle.prepareBatch(getSqlToInsertPendingSegment()); + + final String createdDate = DateTimes.nowUtc().toString(); + final Set processedSegmentIds = new HashSet<>(); + for (PendingSegmentRecord pendingSegment : pendingSegments) { + final SegmentIdWithShardSpec segmentId = pendingSegment.getId(); + if (processedSegmentIds.contains(segmentId)) { + continue; + } + final Interval interval = segmentId.getInterval(); + + insertBatch.add() + .bind("id", segmentId.toString()) + .bind("dataSource", dataSource) + .bind("created_date", createdDate) + .bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()) + .bind("sequence_name", pendingSegment.getSequenceName()) + .bind("sequence_prev_id", pendingSegment.getSequencePrevId()) + .bind( + "sequence_name_prev_id_sha1", + pendingSegment.computeSequenceNamePrevIdSha1(skipSegmentLineageCheck) + ) + .bind("payload", getJsonBytes(segmentId)) + .bind("task_allocator_id", pendingSegment.getTaskAllocatorId()) + .bind("upgraded_from_segment_id", pendingSegment.getUpgradedFromSegmentId()); + + processedSegmentIds.add(segmentId); + } + int[] updated = insertBatch.execute(); + return Arrays.stream(updated).sum(); + } + + @Override + public int deleteAllPendingSegments() + { + final String sql = StringUtils.format( + "DELETE FROM %s WHERE datasource = :dataSource", + dbTables.getPendingSegmentsTable() + ); + return handle.createStatement(sql).bind("dataSource", dataSource).execute(); + } + + @Override + public int deletePendingSegments(List segmentIdsToDelete) + { + if (segmentIdsToDelete.isEmpty()) { + return 0; + } + + final List> pendingSegmentIdBatches + = Lists.partition(segmentIdsToDelete, MAX_SEGMENTS_PER_BATCH); + + int numDeletedPendingSegments = 0; + for (List pendingSegmentIdBatch : pendingSegmentIdBatches) { + numDeletedPendingSegments += deletePendingSegmentsBatch(pendingSegmentIdBatch); + } + + return numDeletedPendingSegments; + } + + @Override + public int deletePendingSegments(String taskAllocatorId) + { + final String sql = StringUtils.format( + "DELETE FROM %s WHERE dataSource = :dataSource" + + " AND task_allocator_id = :task_allocator_id", + dbTables.getPendingSegmentsTable() + ); + + return handle + .createStatement(sql) + .bind("dataSource", dataSource) + .bind("task_allocator_id", taskAllocatorId) + .execute(); + } + + @Override + public int deletePendingSegmentsCreatedIn(Interval interval) + { + final String sql = StringUtils.format( + "DELETE FROM %s WHERE datasource = :dataSource" + + " AND created_date >= :start AND created_date < :end", + dbTables.getPendingSegmentsTable() + ); + return handle + .createStatement(sql) + .bind("dataSource", dataSource) + .bind("start", interval.getStart().toString()) + .bind("end", interval.getEnd().toString()) + .execute(); + } + + private int deletePendingSegmentsBatch(List segmentIdsToDelete) + { + Update query = handle.createStatement( + StringUtils.format( + "DELETE FROM %s WHERE dataSource = :dataSource %s", + dbTables.getPendingSegmentsTable(), + SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIdsToDelete) + ) + ).bind("dataSource", dataSource); + SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", segmentIdsToDelete, query); + + return query.execute(); + } + + private int insertSegmentsInBatches( + final String dataSource, + final Set segments, + String insertSql + ) + { + final List> partitionedSegments = Lists.partition( + new ArrayList<>(segments), + MAX_SEGMENTS_PER_BATCH + ); + + final boolean persistAdditionalMetadata = insertSql.contains(":schema_fingerprint"); + + // SELECT -> INSERT can fail due to races; callers must be prepared to retry. + // Avoiding ON DUPLICATE KEY since it's not portable. + // Avoiding try/catch since it may cause inadvertent transaction-splitting. + final PreparedBatch batch = handle.prepareBatch( + StringUtils.format(insertSql, dbTables.getSegmentsTable(), connector.getQuoteString()) + ); + + int numInsertedSegments = 0; + for (List partition : partitionedSegments) { + for (DataSegmentPlus segmentPlus : partition) { + final DataSegment segment = segmentPlus.getDataSegment(); + PreparedBatchPart preparedBatchPart = + batch.add() + .bind("id", segment.getId().toString()) + .bind("dataSource", dataSource) + .bind("created_date", nullSafeString(segmentPlus.getCreatedDate())) + .bind("start", segment.getInterval().getStart().toString()) + .bind("end", segment.getInterval().getEnd().toString()) + .bind("partitioned", true) + .bind("version", segment.getVersion()) + .bind("used", Boolean.TRUE.equals(segmentPlus.getUsed())) + .bind("payload", getJsonBytes(segment)) + .bind("used_status_last_updated", nullSafeString(segmentPlus.getUsedStatusLastUpdatedDate())) + .bind("upgraded_from_segment_id", segmentPlus.getUpgradedFromSegmentId()); + + if (persistAdditionalMetadata) { + preparedBatchPart + .bind("num_rows", segmentPlus.getNumRows()) + .bind("schema_fingerprint", segmentPlus.getSchemaFingerprint()); + } + } + + // Execute the batch and ensure that all the segments were inserted + final int[] affectedRows = batch.execute(); + + final List failedInserts = new ArrayList<>(); + for (int i = 0; i < partition.size(); ++i) { + if (affectedRows[i] == 1) { + ++numInsertedSegments; + } else { + failedInserts.add(partition.get(i).getDataSegment()); + } + } + if (!failedInserts.isEmpty()) { + throw InternalServerError.exception( + "Failed to insert segments in metadata store: %s", + SegmentUtils.commaSeparatedIdentifiers(failedInserts) + ); + } + } + + return numInsertedSegments; + } + + private String getSqlToInsertPendingSegment() + { + return StringUtils.format( + "INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, " + + "sequence_name_prev_id_sha1, payload, task_allocator_id, upgraded_from_segment_id) " + + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, " + + ":sequence_name_prev_id_sha1, :payload, :task_allocator_id, :upgraded_from_segment_id)", + dbTables.getPendingSegmentsTable(), + connector.getQuoteString() + ); + } + + private static String nullSafeString(DateTime time) + { + return time == null ? null : time.toString(); + } + + private byte[] getJsonBytes(T object) + { + try { + return jsonMapper.writeValueAsBytes(object); + } + catch (JsonProcessingException e) { + throw InternalServerError.exception("Could not serialize object[%s]", object); + } + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java new file mode 100644 index 000000000000..ac46956c4d55 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.inject.Inject; +import org.apache.druid.discovery.DruidLeaderSelector; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.skife.jdbi.v2.Handle; +import org.skife.jdbi.v2.TransactionStatus; + +/** + * Factory for {@link SegmentsMetadataTransaction}s. If the + * {@link SegmentsMetadataCache} is enabled and ready, the transaction may + * read/write from the cache as applicable. + */ +public class SqlSegmentsMetadataTransactionFactory +{ + private final ObjectMapper jsonMapper; + private final MetadataStorageTablesConfig tablesConfig; + private final SQLMetadataConnector connector; + private final DruidLeaderSelector leaderSelector; + private final SegmentsMetadataCache segmentsMetadataCache; + + @Inject + public SqlSegmentsMetadataTransactionFactory( + ObjectMapper jsonMapper, + MetadataStorageTablesConfig tablesConfig, + SQLMetadataConnector connector, + DruidLeaderSelector leaderSelector, + SegmentsMetadataCache segmentsMetadataCache + ) + { + this.jsonMapper = jsonMapper; + this.tablesConfig = tablesConfig; + this.connector = connector; + this.leaderSelector = leaderSelector; + this.segmentsMetadataCache = segmentsMetadataCache; + } + + public SegmentsMetadataTransaction createTransactionForDatasource( + String dataSource, + Handle handle, + TransactionStatus transactionStatus + ) + { + final SegmentsMetadataTransaction metadataTransaction = new SqlSegmentsMetadataTransaction( + dataSource, + handle, + transactionStatus, + connector, + tablesConfig, + jsonMapper + ); + + return + segmentsMetadataCache.isReady() + ? new SqlSegmentsMetadataCachedTransaction( + dataSource, + metadataTransaction, + segmentsMetadataCache, + leaderSelector + ) + : metadataTransaction; + } + +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java new file mode 100644 index 000000000000..27cf5aa1d966 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +import com.google.common.base.Supplier; + +import java.util.concurrent.locks.ReentrantReadWriteLock; + +public abstract class BaseCache +{ + private final ReentrantReadWriteLock stateLock; + + public BaseCache(boolean fair) + { + stateLock = new ReentrantReadWriteLock(fair); + } + + public void withWriteLock(Action action) + { + withWriteLock(() -> { + action.perform(); + return 0; + }); + } + + public T withWriteLock(Supplier action) + { + stateLock.writeLock().lock(); + try { + return action.get(); + } + finally { + stateLock.writeLock().unlock(); + } + } + + public T withReadLock(Supplier action) + { + stateLock.readLock().lock(); + try { + return action.get(); + } + finally { + stateLock.readLock().unlock(); + } + } + + @FunctionalInterface + public interface Action + { + void perform(); + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java new file mode 100644 index 000000000000..c087fd51904f --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java @@ -0,0 +1,558 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.CloseableIterators; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataReader; +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; +import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.apache.druid.server.http.DataSegmentPlus; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.apache.druid.timeline.SegmentTimeline; +import org.joda.time.DateTime; +import org.joda.time.Interval; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * Datasource-level cache for segments and pending segments. + * + * TODO: track the created date for each pending segment as it might be needed + * for delete. + */ +class DatasourceSegmentCache + extends BaseCache + implements DatasourceSegmentMetadataReader, DatasourceSegmentMetadataWriter +{ + private static final DatasourceSegmentCache EMPTY_INSTANCE = new DatasourceSegmentCache(); + + /** + * Used to obtain the segment for a given ID so that it can be updated in the + * timeline. + */ + private final Map idToUsedSegment = new HashMap<>(); + + /** + * Current state of segments as seen by the cache. + */ + private final Map idToSegmentState = new HashMap<>(); + + /** + * Allows lookup of visible segments for a given interval. + */ + private final SegmentTimeline usedSegmentTimeline = SegmentTimeline.forSegments(Set.of()); + + private final Map> + intervalToPendingSegments = new HashMap<>(); + + private final Set unusedSegmentIds = new HashSet<>(); + + static DatasourceSegmentCache empty() + { + return EMPTY_INSTANCE; + } + + DatasourceSegmentCache() + { + super(true); + } + + void clear() + { + withWriteLock(() -> { + idToSegmentState.clear(); + idToUsedSegment.clear(); + unusedSegmentIds.clear(); + idToUsedSegment.values().forEach(s -> usedSegmentTimeline.remove(s.getDataSegment())); + }); + } + + boolean isEmpty() + { + return withReadLock(() -> idToSegmentState.isEmpty() && intervalToPendingSegments.isEmpty()); + } + + /** + * Checks if a segment needs to be refreshed. A refresh is required if the + * cache has no known state for the given segment or if the metadata store + * has a more recent last_updated_time than the cache. + */ + boolean shouldRefreshSegment(String segmentId, SegmentState metadataState) + { + return withReadLock(() -> { + final SegmentState cachedState = idToSegmentState.get(segmentId); + return cachedState == null + || cachedState.getLastUpdatedTime().isBefore(metadataState.getLastUpdatedTime()); + }); + } + + /** + * Checks if a pending segment needs to be refreshed in the cache. + */ + boolean shouldRefreshPendingSegment(PendingSegmentRecord record) + { + final SegmentIdWithShardSpec segmentId = record.getId(); + return withReadLock( + () -> intervalToPendingSegments.getOrDefault(segmentId.getInterval(), Map.of()) + .containsKey(segmentId.toString()) + ); + } + + boolean refreshUnusedSegment(String segmentId, SegmentState newState) + { + if (newState.isUsed()) { + return false; + } + + return withWriteLock(() -> { + if (!shouldRefreshSegment(segmentId, newState)) { + return false; + } + + final SegmentState oldState = idToSegmentState.put(segmentId, newState); + + if (oldState != null && oldState.isUsed()) { + // Segment has transitioned from used to unused + DataSegmentPlus segment = idToUsedSegment.remove(segmentId); + if (segment != null) { + usedSegmentTimeline.remove(segment.getDataSegment()); + } + } + + unusedSegmentIds.add(segmentId); + return true; + }); + } + + boolean refreshUsedSegment(DataSegmentPlus segmentPlus) + { + final DataSegment segment = segmentPlus.getDataSegment(); + final String segmentId = getId(segment); + + final SegmentState newState = new SegmentState( + Boolean.TRUE.equals(segmentPlus.getUsed()), + segmentPlus.getUsedStatusLastUpdatedDate() + ); + if (!newState.isUsed()) { + return refreshUnusedSegment(segmentId, newState); + } + + return withWriteLock(() -> { + if (!shouldRefreshSegment(segmentId, newState)) { + return false; + } + + final SegmentState oldState = idToSegmentState.put(segmentId, newState); + final DataSegmentPlus oldSegmentPlus = idToUsedSegment.put(segmentId, segmentPlus); + + if (oldState == null) { + // This is a new segment + } else if (oldState.isUsed()) { + // Segment payload may have changed + if (oldSegmentPlus != null) { + usedSegmentTimeline.remove(oldSegmentPlus.getDataSegment()); + } + } else { + // Segment has transitioned from unused to used + unusedSegmentIds.remove(segmentId); + } + + usedSegmentTimeline.add(segment); + return true; + }); + } + + int removeSegmentIds(Set segmentIds) + { + return withWriteLock(() -> { + int removedCount = 0; + for (String segmentId : segmentIds) { + SegmentState state = idToSegmentState.remove(segmentId); + if (state != null) { + ++removedCount; + } + + unusedSegmentIds.remove(segmentId); + + final DataSegmentPlus segment = idToUsedSegment.remove(segmentId); + if (segment != null) { + usedSegmentTimeline.remove(segment.getDataSegment()); + } + } + + return removedCount; + }); + } + + /** + * Returns the set of segment IDs present in the cache but not present in the + * given set of known segment IDs. + */ + Set getSegmentIdsNotIn(Set knownSegmentIds) + { + return withReadLock( + () -> knownSegmentIds.stream() + .filter(id -> !idToSegmentState.containsKey(id)) + .collect(Collectors.toSet()) + ); + } + + // READ METHODS + + @Override + public Set findExistingSegmentIds(Set segments) + { + return withReadLock( + () -> segments.stream() + .map(DatasourceSegmentCache::getId) + .filter(idToSegmentState::containsKey) + .collect(Collectors.toSet()) + ); + } + + @Override + public Set findUsedSegmentIdsOverlapping(Interval interval) + { + return findUsedSegmentsPlusOverlappingAnyOf(List.of(interval)) + .stream() + .map(s -> s.getDataSegment().getId()) + .collect(Collectors.toSet()); + } + + @Override + public Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version) + { + // TODO: implement this or may be add a variant of this method to find the + // max unused segment ID for an exact interval and version + throw DruidException.defensive("Unsupported: Unused segments are not cached"); + } + + @Override + public CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals) + { + return CloseableIterators.withEmptyBaggage( + findUsedSegmentsPlusOverlappingAnyOf(intervals) + .stream() + .map(DataSegmentPlus::getDataSegment) + .iterator() + ); + } + + @Override + public List findUsedSegments(Set segmentIds) + { + return withReadLock( + () -> segmentIds.stream() + .map(idToUsedSegment::get) + .filter(Objects::nonNull) + .map(DataSegmentPlus::getDataSegment) + .collect(Collectors.toList()) + ); + } + + @Override + public Set findUsedSegmentsPlusOverlappingAnyOf(List intervals) + { + return withReadLock( + () -> idToUsedSegment.values() + .stream() + .filter(s -> anyIntervalOverlaps(intervals, s.getDataSegment().getInterval())) + .collect(Collectors.toSet()) + ); + } + + @Override + public DataSegment findSegment(String segmentId) + { + throw DruidException.defensive("Unsupported: Unused segments are not cached"); + } + + @Override + public DataSegment findUsedSegment(String segmentId) + { + return withReadLock(() -> { + final DataSegmentPlus segmentPlus = idToUsedSegment.get(segmentId); + return segmentPlus == null ? null : segmentPlus.getDataSegment(); + }); + } + + @Override + public List findSegments(Set segmentIds) + { + throw DruidException.defensive("Unsupported: Unused segments are not cached"); + } + + @Override + public List findSegmentsWithSchema(Set segmentIds) + { + throw DruidException.defensive("Unsupported: Unused segments are not cached"); + } + + @Override + public List findUnusedSegments( + Interval interval, + @Nullable List versions, + @Nullable Integer limit, + @Nullable DateTime maxUsedStatusLastUpdatedTime + ) + { + throw DruidException.defensive("Unsupported: Unused segments are not cached"); + } + + @Override + public List findPendingSegmentIds(String sequenceName, String sequencePreviousId) + { + return findPendingSegmentsMatching( + record -> sequenceName.equals(record.getSequenceName()) + && sequencePreviousId.equals(record.getSequencePrevId()) + ) + .stream() + .map(PendingSegmentRecord::getId) + .collect(Collectors.toList()); + } + + @Override + public List findPendingSegmentIdsWithExactInterval(String sequenceName, Interval interval) + { + return withReadLock( + () -> intervalToPendingSegments + .getOrDefault(interval, Map.of()) + .values() + .stream() + .filter(record -> record.getSequenceName().equals(sequenceName)) + .map(PendingSegmentRecord::getId) + .collect(Collectors.toList()) + ); + } + + @Override + public List findPendingSegmentsOverlapping(Interval interval) + { + return withReadLock( + () -> intervalToPendingSegments.entrySet() + .stream() + .filter(entry -> entry.getKey().overlaps(interval)) + .flatMap(entry -> entry.getValue().values().stream()) + .collect(Collectors.toList()) + ); + } + + @Override + public List findPendingSegmentsWithExactInterval(Interval interval) + { + return withReadLock( + () -> List.copyOf( + intervalToPendingSegments.getOrDefault(interval, Map.of()).values() + ) + ); + } + + @Override + public List findPendingSegments(String taskAllocatorId) + { + return findPendingSegmentsMatching(record -> taskAllocatorId.equals(record.getTaskAllocatorId())); + } + + // WRITE METHODS + + @Override + public int insertSegments(Set segments) + { + return withWriteLock(() -> { + int numInsertedSegments = 0; + for (DataSegmentPlus segmentPlus : segments) { + final DataSegment segment = segmentPlus.getDataSegment(); + final String segmentId = getId(segment); + final SegmentState state = new SegmentState( + Boolean.TRUE.equals(segmentPlus.getUsed()), + segmentPlus.getUsedStatusLastUpdatedDate() + ); + + final boolean updated = state.isUsed() + ? refreshUsedSegment(segmentPlus) + : refreshUnusedSegment(segmentId, state); + if (updated) { + ++numInsertedSegments; + } + } + + return numInsertedSegments; + }); + } + + @Override + public int insertSegmentsWithMetadata(Set segments) + { + return insertSegments(segments); + } + + @Override + public int markSegmentsWithinIntervalAsUnused(Interval interval, DateTime updateTime) + { + int updatedCount = 0; + try (CloseableIterator segmentIterator + = findUsedSegmentsOverlappingAnyOf(List.of(interval))) { + while (segmentIterator.hasNext()) { + boolean updated = refreshUnusedSegment( + getId(segmentIterator.next()), + new SegmentState(false, updateTime) + ); + if (updated) { + ++updatedCount; + } + } + } + catch (IOException e) { + throw DruidException.defensive("Error while updating segments in cache"); + } + + return updatedCount; + } + + @Override + public int deleteSegments(Set segments) + { + final Set segmentIdsToDelete = + segments.stream() + .map(DatasourceSegmentCache::getId) + .collect(Collectors.toSet()); + return withWriteLock(() -> removeSegmentIds(segmentIdsToDelete)); + } + + @Override + public boolean updateSegmentPayload(DataSegment segment) + { + // Segment payload updates are not supported since we don't know if the segment is used or unused + throw DruidException.defensive("Unsupported: Segment payload updates are not supported in the cache"); + } + + @Override + public boolean insertPendingSegment(PendingSegmentRecord pendingSegment, boolean skipSegmentLineageCheck) + { + return insertPendingSegments(List.of(pendingSegment), skipSegmentLineageCheck) > 0; + } + + @Override + public int insertPendingSegments(List pendingSegments, boolean skipSegmentLineageCheck) + { + return withWriteLock(() -> { + int insertedCount = 0; + for (PendingSegmentRecord record : pendingSegments) { + final SegmentIdWithShardSpec segmentId = record.getId(); + PendingSegmentRecord oldValue = + intervalToPendingSegments.computeIfAbsent(segmentId.getInterval(), interval -> new HashMap<>()) + .putIfAbsent(segmentId.toString(), record); + if (oldValue == null) { + ++insertedCount; + } + } + + return insertedCount; + }); + } + + @Override + public int deleteAllPendingSegments() + { + return withWriteLock(() -> { + int numPendingSegments = intervalToPendingSegments.values().stream().mapToInt(Map::size).sum(); + intervalToPendingSegments.clear(); + return numPendingSegments; + }); + } + + @Override + public int deletePendingSegments(List segmentIdsToDelete) + { + final Set remainingIdsToDelete = new HashSet<>(segmentIdsToDelete); + + withWriteLock(() -> intervalToPendingSegments.forEach( + (interval, pendingSegments) -> { + final Set deletedIds = + remainingIdsToDelete.stream() + .map(pendingSegments::remove) + .filter(Objects::nonNull) + .map(record -> record.getId().toString()) + .collect(Collectors.toSet()); + + remainingIdsToDelete.removeAll(deletedIds); + } + )); + + return segmentIdsToDelete.size() - remainingIdsToDelete.size(); + } + + @Override + public int deletePendingSegments(String taskAllocatorId) + { + return withWriteLock(() -> { + List idsToDelete = findPendingSegmentsMatching( + record -> taskAllocatorId.equals(record.getTaskAllocatorId()) + ).stream().map(record -> record.getId().toString()).collect(Collectors.toList()); + + return deletePendingSegments(idsToDelete); + }); + } + + @Override + public int deletePendingSegmentsCreatedIn(Interval interval) + { + // TODO + return 0; + } + + /** + * Returns all the pending segments that match the given predicate. + */ + private List findPendingSegmentsMatching(Predicate predicate) + { + return withReadLock( + () -> intervalToPendingSegments.entrySet() + .stream() + .flatMap(entry -> entry.getValue().values().stream()) + .filter(predicate) + .collect(Collectors.toList()) + ); + } + + private static boolean anyIntervalOverlaps(List intervals, Interval testInterval) + { + return intervals.isEmpty() + || intervals.stream().anyMatch(interval -> interval.overlaps(testInterval)); + } + + private static String getId(DataSegment segment) + { + return segment.getId().toString(); + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentState.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentState.java new file mode 100644 index 000000000000..d4d24ff382ad --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentState.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +import org.apache.druid.java.util.common.DateTimes; +import org.joda.time.DateTime; + +public class SegmentState +{ + private final boolean used; + private final DateTime lastUpdatedTime; + + public SegmentState(boolean used, DateTime lastUpdatedTime) + { + this.used = used; + this.lastUpdatedTime = lastUpdatedTime == null ? DateTimes.EPOCH : lastUpdatedTime; + } + + public boolean isUsed() + { + return used; + } + + public DateTime getLastUpdatedTime() + { + return lastUpdatedTime; + } +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java new file mode 100644 index 000000000000..fec34645e044 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataReader; +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; + +/** + * TODO: + * -[ ] Finish polling of pending segments properly + * -[ ] Implement rollback and commit for cached transaction + * -[ ] Acquire read/write lock on datasource cache when transaction starts. + * -[ ] Add different factory methods to create read vs write transaction + * -[ ] Write a basic unit test to verify that things are working as expected + * -[ ] Wire up cache in OverlordCompactionScheduler and SqlSegmentsMetadataManager, + * otherwise we will end up having two copies of the segment timeline and stuff + * The timeline inside the cache can replace the SegmentTimeline of SqlSegmentsMetadataManager + * -[ ] Add transaction API to return timeline and/or timeline holders + * -[ ] Write unit tests + * -[ ] Write integration tests + * -[ ] Write a benchmark + */ +public interface SegmentsMetadataCache +{ + void start(); + + void stop(); + + boolean isReady(); + + DatasourceSegmentMetadataReader readerForDatasource(String dataSource); + + DatasourceSegmentMetadataWriter writerForDatasource(String dataSource); + +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java new file mode 100644 index 000000000000..34500dde4aa0 --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java @@ -0,0 +1,454 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Supplier; +import com.google.inject.Inject; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Stopwatch; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.ScheduledExecutorFactory; +import org.apache.druid.java.util.common.lifecycle.LifecycleStart; +import org.apache.druid.java.util.common.lifecycle.LifecycleStop; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; +import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.metadata.PendingSegmentRecord; +import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.metadata.SegmentsMetadataManagerConfig; +import org.apache.druid.metadata.SqlSegmentsMetadataQuery; +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataReader; +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; +import org.apache.druid.query.DruidMetrics; +import org.apache.druid.server.http.DataSegmentPlus; +import org.joda.time.DateTime; +import org.joda.time.Duration; +import org.skife.jdbi.v2.ResultIterator; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +public class SqlSegmentsMetadataCache implements SegmentsMetadataCache +{ + private static final EmittingLogger log = new EmittingLogger(SqlSegmentsMetadataCache.class); + private static final String METRIC_PREFIX = "segment/metadataCache/"; + + private enum CacheState + { + STOPPED, STARTING, READY + } + + private final ObjectMapper jsonMapper; + private final Duration pollDuration; + private final boolean isCacheEnabled; + private final Supplier tablesConfig; + private final SQLMetadataConnector connector; + + private final ScheduledExecutorService pollExecutor; + private final ServiceEmitter emitter; + + private final AtomicReference currentCacheState + = new AtomicReference<>(CacheState.STOPPED); + + private final ConcurrentHashMap + datasourceToSegmentCache = new ConcurrentHashMap<>(); + private final AtomicReference pollFinishTime = new AtomicReference<>(); + + @Inject + public SqlSegmentsMetadataCache( + ObjectMapper jsonMapper, + Supplier config, + Supplier tablesConfig, + SQLMetadataConnector connector, + ScheduledExecutorFactory executorFactory, + ServiceEmitter emitter + ) + { + this.jsonMapper = jsonMapper; + this.isCacheEnabled = config.get().isUseCache(); + this.pollDuration = config.get().getPollDuration().toStandardDuration(); + this.tablesConfig = tablesConfig; + this.connector = connector; + this.pollExecutor = isCacheEnabled ? executorFactory.create(1, "SegmentsMetadataCache-%s") : null; + this.emitter = emitter; + } + + + @Override + @LifecycleStart + public synchronized void start() + { + if (isCacheEnabled && currentCacheState.compareAndSet(CacheState.STOPPED, CacheState.STARTING)) { + // Clean up any stray entries in the cache left over due to race conditions + tearDown(); + pollExecutor.schedule(this::pollMetadataStore, pollDuration.getMillis(), TimeUnit.MILLISECONDS); + } + } + + @Override + @LifecycleStop + public synchronized void stop() + { + if (isCacheEnabled) { + currentCacheState.set(CacheState.STOPPED); + tearDown(); + } + + // TODO: Handle race conditions + // T1: sees cache as ready + // T2: stops the cache + // T1: tries to read some value from the cache and fails + + // Should start-stop wait on everything else? + // When does stop happen? + // 1. Leadership changes: If leadership has changed, no point continuing the operation? + // In the current implementation, a task action would continue executing even if leadership has been lost? + // Yes, I do think so. + // Solution: If leadership has changed, transaction would fail, we wouldn't need to read or write anymore + + // 2. Service start-stop. Again no point worrying about the cache + } + + @Override + public boolean isReady() + { + return currentCacheState.get() == CacheState.READY; + } + + @Override + public DatasourceSegmentMetadataReader readerForDatasource(String dataSource) + { + verifyCacheIsReady(); + return datasourceToSegmentCache.getOrDefault(dataSource, DatasourceSegmentCache.empty()); + } + + @Override + public DatasourceSegmentMetadataWriter writerForDatasource(String dataSource) + { + verifyCacheIsReady(); + return datasourceToSegmentCache.computeIfAbsent(dataSource, ds -> new DatasourceSegmentCache()); + } + + private void verifyCacheIsReady() + { + if (!isReady()) { + throw DruidException.defensive("Segment metadata cache is not ready yet."); + } + } + + private boolean isStopped() + { + return currentCacheState.get() == CacheState.STOPPED; + } + + private void tearDown() + { + datasourceToSegmentCache.forEach((datasource, state) -> state.clear()); + datasourceToSegmentCache.clear(); + } + + private void pollMetadataStore() + { + final Stopwatch sincePollStart = Stopwatch.createStarted(); + if (isStopped()) { + tearDown(); + return; + } + + final Map> datasourceToRefreshSegmentIds = new HashMap<>(); + final Map> datasourceToKnownSegmentIds + = retrieveAllSegmentIds(datasourceToRefreshSegmentIds); + + // TODO: handle changes made to the metadata store between these two database calls + // there doesn't seem to be much point to lock the cache during this period + // so go and fetch the segments and then refresh them + // it is possible that the cache is now updated and the refresh is not needed after all + // so the refresh should be idempotent + if (isStopped()) { + tearDown(); + return; + } + + removeUnknownSegmentIdsFromCache(datasourceToKnownSegmentIds); + + if (isStopped()) { + tearDown(); + return; + } + + retrieveAndRefreshUsedSegmentsForIds(datasourceToRefreshSegmentIds); + + if (isStopped()) { + tearDown(); + return; + } + + retrieveAndRefreshAllPendingSegments(); + + emitMetric("poll/time", sincePollStart.millisElapsed()); + pollFinishTime.set(DateTimes.nowUtc()); + + if (isStopped()) { + tearDown(); + } else { + currentCacheState.compareAndSet(CacheState.STARTING, CacheState.READY); + + // Schedule the next poll + final long nextPollDelay = Math.max(pollDuration.getMillis() - sincePollStart.millisElapsed(), 0); + pollExecutor.schedule(this::pollMetadataStore, nextPollDelay, TimeUnit.MILLISECONDS); + } + } + + /** + * Retrieves all the segment IDs (used and unused) from the metadata store. + * + * @return Map from datasource name to set of all segment IDs present in the + * metadata store for that datasource. + */ + private Map> retrieveAllSegmentIds( + Map> datasourceToRefreshSegmentIds + ) + { + final Map> datasourceToKnownSegmentIds = new HashMap<>(); + final AtomicInteger countOfRefreshedUnusedSegments = new AtomicInteger(0); + + // TODO: should we poll all segments here or just poll used + // and then separately poll only the required stuff for unused segments + // because the number of unused segments can be very large + + final String sql = StringUtils.format( + "SELECT id, dataSource, used, used_status_last_updated FROM %s", + getSegmentsTable() + ); + + connector.inReadOnlyTransaction((handle, status) -> { + try ( + ResultIterator iterator = + handle.createQuery(sql) + .map((index, r, ctx) -> SegmentRecord.fromResultSet(r)) + .iterator() + ) { + while (iterator.hasNext()) { + final SegmentRecord record = iterator.next(); + final DatasourceSegmentCache cache = datasourceToSegmentCache.computeIfAbsent( + record.dataSource, + ds -> new DatasourceSegmentCache() + ); + + if (cache.shouldRefreshSegment(record.segmentId, record.state)) { + if (record.state.isUsed()) { + datasourceToRefreshSegmentIds.computeIfAbsent(record.dataSource, ds -> new HashSet<>()) + .add(record.segmentId); + } else if (cache.refreshUnusedSegment(record.segmentId, record.state)) { + countOfRefreshedUnusedSegments.incrementAndGet(); + emitDatasourceMetric(record.dataSource, "refreshed/unused", 1); + } + } + + datasourceToKnownSegmentIds.computeIfAbsent(record.dataSource, ds -> new HashSet<>()) + .add(record.segmentId); + } + + return 0; + } catch (Exception e) { + log.makeAlert(e, "Error while retrieving segment IDs from metadata store."); + return 1; + } + }); + + if (countOfRefreshedUnusedSegments.get() > 0) { + log.info("Refreshed total [%d] unused segments from metadata store.", countOfRefreshedUnusedSegments.get()); + } + + return datasourceToKnownSegmentIds; + } + + private void retrieveAndRefreshUsedSegmentsForIds( + Map> datasourceToRefreshSegmentIds + ) + { + final AtomicInteger countOfRefreshedUsedSegments = new AtomicInteger(0); + datasourceToRefreshSegmentIds.forEach((dataSource, segmentIds) -> { + final DatasourceSegmentCache cache + = datasourceToSegmentCache.computeIfAbsent(dataSource, ds -> new DatasourceSegmentCache()); + + int numUpdatedUsedSegments = 0; + try ( + CloseableIterator iterator = connector.inReadOnlyTransaction( + (handle, status) -> SqlSegmentsMetadataQuery + .forHandle(handle, connector, tablesConfig.get(), jsonMapper) + .retrieveSegmentsByIdIterator(dataSource, segmentIds) + ) + ) { + while (iterator.hasNext()) { + if (cache.refreshUsedSegment(iterator.next())) { + ++numUpdatedUsedSegments; + } + } + } + catch (IOException e) { + log.makeAlert(e, "Error retrieving segments for datasource[%s] from metadata store.", dataSource) + .emit(); + } + + emitDatasourceMetric(dataSource, "refresh/used", numUpdatedUsedSegments); + countOfRefreshedUsedSegments.addAndGet(numUpdatedUsedSegments); + }); + + if (countOfRefreshedUsedSegments.get() > 0) { + log.info( + "Refreshed total [%d] used segments from metadata store.", + countOfRefreshedUsedSegments.get() + ); + } + } + + private void retrieveAndRefreshAllPendingSegments() + { + final String sql = StringUtils.format( + "SELECT payload, sequence_name, sequence_prev_id, upgraded_from_segment_id" + + " task_allocator_id, created_date FROM %1$s", + tablesConfig.get().getPendingSegmentsTable() + ); + + connector.inReadOnlyTransaction( + (handle, status) -> handle + .createQuery(sql) + .setFetchSize(connector.getStreamingFetchSize()) + .map((index, r, ctx) -> { + try { + final PendingSegmentRecord record = PendingSegmentRecord.fromResultSet(r, jsonMapper); + final DateTime createdDate = nullSafeDate(r.getString("created_date")); + + // TODO: use the created date + + final DatasourceSegmentCache cache = datasourceToSegmentCache.computeIfAbsent( + record.getId().getDataSource(), + ds -> new DatasourceSegmentCache() + ); + + if (cache.shouldRefreshPendingSegment(record)) { + cache.insertPendingSegment(record, false); + } + + return 0; + } + catch (Exception e) { + return 1; + } + }) + ); + } + + /** + * This is safe to do since updates are always made first to metadata store + * and then to cache. + */ + private void removeUnknownSegmentIdsFromCache(Map> datasourceToKnownSegmentIds) + { + datasourceToSegmentCache.forEach((dataSource, cache) -> { + final Set unknownSegmentIds = cache.getSegmentIdsNotIn( + datasourceToKnownSegmentIds.getOrDefault(dataSource, Set.of()) + ); + final int numSegmentsRemoved = cache.removeSegmentIds(unknownSegmentIds); + if (numSegmentsRemoved > 0) { + log.info( + "Removed [%d] unknown segment IDs from cache of datasource[%s].", + numSegmentsRemoved, dataSource + ); + emitDatasourceMetric(dataSource, "deleted/unknown", numSegmentsRemoved); + } + }); + } + + private String getSegmentsTable() + { + return tablesConfig.get().getSegmentsTable(); + } + + private void emitMetric(String metric, long value) + { + emitter.emit( + ServiceMetricEvent.builder().setMetric(METRIC_PREFIX + metric, value) + ); + } + + private void emitDatasourceMetric(String datasource, String metric, long value) + { + emitter.emit( + ServiceMetricEvent.builder() + .setDimension(DruidMetrics.DATASOURCE, datasource) + .setMetric(METRIC_PREFIX + metric, value) + ); + } + + @Nullable + private static DateTime nullSafeDate(String date) + { + return date == null ? null : DateTimes.of(date); + } + + private static class SegmentRecord + { + private final String segmentId; + private final String dataSource; + private final SegmentState state; + + SegmentRecord(String segmentId, String dataSource, SegmentState state) + { + this.segmentId = segmentId; + this.dataSource = dataSource; + this.state = state; + } + + @Nullable + static SegmentRecord fromResultSet(ResultSet r) + { + try { + final String segmentId = r.getString("id"); + final boolean isUsed = r.getBoolean("used"); + final String dataSource = r.getString("dataSource"); + final DateTime lastUpdatedTime = nullSafeDate(r.getString("used_status_last_updated")); + + final SegmentState storedState = new SegmentState(isUsed, lastUpdatedTime); + + return new SegmentRecord(segmentId, dataSource, storedState); + } catch (SQLException e) { + return null; + } + } + } + +} diff --git a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java index a1cccd2b784a..ec8f3206ff57 100644 --- a/server/src/main/java/org/apache/druid/server/http/MetadataResource.java +++ b/server/src/main/java/org/apache/druid/server/http/MetadataResource.java @@ -425,7 +425,13 @@ public Response getSegment( } } // fallback to db - DataSegment segment = metadataStorageCoordinator.retrieveSegmentForId(segmentId, Boolean.TRUE.equals(includeUnused)); + final DataSegment segment; + if (Boolean.TRUE.equals(includeUnused)) { + segment = metadataStorageCoordinator.retrieveSegmentForId(dataSourceName, segmentId); + } else { + segment = metadataStorageCoordinator.retrieveUsedSegmentForId(dataSourceName, segmentId); + } + if (segment != null) { return Response.status(Response.Status.OK).entity(segment).build(); } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 06bbf3b7ecd8..6c0df5a6caa3 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -34,6 +34,12 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.metrics.StubServiceEmitter; +import org.apache.druid.metadata.segment.SegmentsMetadataTransaction; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.SqlSegmentsMetadataCache; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.TestDataSource; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; @@ -41,6 +47,9 @@ import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.metadata.SegmentSchemaTestUtils; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.apache.druid.server.coordinator.simulate.BlockingExecutorService; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; +import org.apache.druid.server.coordinator.simulate.WrappingScheduledExecutorService; import org.apache.druid.server.http.DataSegmentPlus; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; @@ -60,12 +69,14 @@ import org.assertj.core.api.Assertions; import org.joda.time.DateTime; import org.joda.time.Interval; +import org.junit.After; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; -import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; -import org.skife.jdbi.v2.Handle; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -81,11 +92,31 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +@RunWith(Parameterized.class) public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadataStorageCoordinatorTestBase { @Rule public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); + private TestDruidLeaderSelector leaderSelector; + private SegmentsMetadataCache segmentsMetadataCache; + private StubServiceEmitter emitter; + private SqlSegmentsMetadataTransactionFactory transactionFactory; + private BlockingExecutorService cachePollExecutor; + + private final boolean useSegmentCache; + + @Parameterized.Parameters(name = "useSegmentCache = {0}") + public static Object[][] testParameters() + { + return new Object[][]{{true}, {false}}; + } + + public IndexerSQLMetadataStorageCoordinatorTest(boolean useSegmentCache) + { + this.useSegmentCache = useSegmentCache; + } + @Before public void setUp() { @@ -104,7 +135,40 @@ public void setUp() segmentSchemaManager = new SegmentSchemaManager(derbyConnectorRule.metadataTablesConfigSupplier().get(), mapper, derbyConnector); segmentSchemaTestUtils = new SegmentSchemaTestUtils(derbyConnectorRule, derbyConnector, mapper); + emitter = new StubServiceEmitter(); + leaderSelector = new TestDruidLeaderSelector(); + + cachePollExecutor = new BlockingExecutorService("test-cache-poll-exec"); + segmentsMetadataCache = new SqlSegmentsMetadataCache( + mapper, + () -> new SegmentsMetadataManagerConfig(null, true), + derbyConnectorRule.metadataTablesConfigSupplier(), + derbyConnector, + (corePoolSize, nameFormat) -> new WrappingScheduledExecutorService( + nameFormat, + cachePollExecutor, + false + ), + emitter + ); + + leaderSelector.becomeLeader(); + + // Get the cache ready if required + if (useSegmentCache) { + segmentsMetadataCache.start(); + cachePollExecutor.finishNextPendingTask(); + } + + transactionFactory = new SqlSegmentsMetadataTransactionFactory( + mapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnector, + leaderSelector, + segmentsMetadataCache + ); coordinator = new IndexerSQLMetadataStorageCoordinator( + transactionFactory, mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, @@ -114,7 +178,7 @@ public void setUp() { @Override protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( - Handle handle, + SegmentsMetadataTransaction transaction, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata @@ -122,7 +186,7 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( { // Count number of times this method is called. metadataUpdateCounter.getAndIncrement(); - return super.updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata); + return super.updateDataSourceMetadataWithHandle(transaction, dataSource, startMetadata, endMetadata); } @Override @@ -133,6 +197,20 @@ public int getSqlMetadataMaxRetry() }; } + @After + public void tearDown() + { + segmentsMetadataCache.stop(); + leaderSelector.stopBeingLeader(); + } + + void refreshCache() + { + if (useSegmentCache) { + cachePollExecutor.finishNextPendingTask(); + } + } + @Test public void testCommitAppendSegments() { @@ -257,9 +335,7 @@ public void testCommitAppendSegments() ); } - derbyConnector.retryWithHandle( - handle -> coordinator.insertPendingSegmentsIntoMetastore(handle, pendingSegmentsForTask, TestDataSource.WIKI, false) - ); + insertPendingSegments(TestDataSource.WIKI, pendingSegmentsForTask, false); final Map segmentToReplaceLock = expectedSegmentsToUpgrade.stream() @@ -354,14 +430,7 @@ public void testCommitReplaceSegments_partiallyOverlappingPendingSegmentUnsuppor } segmentSchemaTestUtils.insertUsedSegments(segmentsAppendedWithReplaceLock, Collections.emptyMap()); - derbyConnector.retryWithHandle( - handle -> coordinator.insertPendingSegmentsIntoMetastore( - handle, - ImmutableList.of(pendingSegmentForInterval), - "foo", - true - ) - ); + insertPendingSegments("foo", List.of(pendingSegmentForInterval), true); insertIntoUpgradeSegmentsTable(appendedSegmentToReplaceLockMap, derbyConnectorRule.metadataTablesConfigSupplier().get()); final Set replacingSegments = new HashSet<>(); @@ -433,13 +502,10 @@ public void testCommitReplaceSegments() } segmentSchemaTestUtils.insertUsedSegments(segmentsAppendedWithReplaceLock, Collections.emptyMap()); - derbyConnector.retryWithHandle( - handle -> coordinator.insertPendingSegmentsIntoMetastore( - handle, - ImmutableList.of(pendingSegmentInInterval, pendingSegmentOutsideInterval), - "foo", - true - ) + insertPendingSegments( + "foo", + List.of(pendingSegmentInInterval, pendingSegmentOutsideInterval), + true ); insertIntoUpgradeSegmentsTable(appendedSegmentToReplaceLockMap, derbyConnectorRule.metadataTablesConfigSupplier().get()); @@ -539,13 +605,10 @@ public void testDuplicatePendingSegmentEntriesAreNotInserted() null, "taskAllocatorId" ); - final int actualInserted = derbyConnector.retryWithHandle( - handle -> coordinator.insertPendingSegmentsIntoMetastore( - handle, - ImmutableList.of(pendingSegment0, pendingSegment0, pendingSegment1, pendingSegment1, pendingSegment1), - "foo", - true - ) + final int actualInserted = insertPendingSegments( + "foo", + List.of(pendingSegment0, pendingSegment0, pendingSegment1, pendingSegment1, pendingSegment1), + true ); Assert.assertEquals(2, actualInserted); } @@ -698,6 +761,13 @@ public void testTransactionalAnnounceRetryAndSuccess() throws IOException final AtomicLong attemptCounter = new AtomicLong(); final IndexerSQLMetadataStorageCoordinator failOnceCoordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + mapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, @@ -707,7 +777,7 @@ public void testTransactionalAnnounceRetryAndSuccess() throws IOException { @Override protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( - Handle handle, + SegmentsMetadataTransaction transaction, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata @@ -717,7 +787,7 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( if (attemptCounter.getAndIncrement() == 0) { return DataStoreMetadataUpdateResult.retryableFailure(null); } else { - return super.updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata); + return super.updateDataSourceMetadataWithHandle(transaction, dataSource, startMetadata, endMetadata); } } }; @@ -829,16 +899,22 @@ public void testTransactionalAnnounceFailDbNotNullWantNull() @Test public void testRetrieveUsedSegmentForId() { - segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(defaultSegment), Collections.emptyMap()); - Assert.assertEquals(defaultSegment, coordinator.retrieveSegmentForId(defaultSegment.getId().toString(), false)); + coordinator.commitSegments(Set.of(defaultSegment), null); + Assert.assertEquals( + defaultSegment, + coordinator.retrieveUsedSegmentForId(defaultSegment.getDataSource(), defaultSegment.getId().toString()) + ); } @Test public void testRetrieveSegmentForId() { - segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(defaultSegment), Collections.emptyMap()); + coordinator.commitSegments(Set.of(defaultSegment), null); markAllSegmentsUnused(ImmutableSet.of(defaultSegment), DateTimes.nowUtc()); - Assert.assertEquals(defaultSegment, coordinator.retrieveSegmentForId(defaultSegment.getId().toString(), true)); + Assert.assertEquals( + defaultSegment, + coordinator.retrieveSegmentForId(defaultSegment.getDataSource(), defaultSegment.getId().toString()) + ); } @Test @@ -2084,11 +2160,12 @@ public void testSecondHalfEternitySegmentWithStringComparison() ); } - // Known Issue: https://github.com/apache/druid/issues/12860 - @Ignore @Test public void testLargeIntervalWithStringComparison() { + // Known Issue when not using cache: https://github.com/apache/druid/issues/12860 + Assume.assumeTrue(useSegmentCache); + coordinator.commitSegments( ImmutableSet.of( hugeTimeRangeSegment4 @@ -2188,6 +2265,8 @@ public void testDeleteSegmentsInMetaDataStorage() @Test public void testUpdateSegmentsInMetaDataStorage() { + Assume.assumeFalse(useSegmentCache); + // Published segments to MetaDataStorage coordinator.commitSegments(SEGMENTS, new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION)); @@ -2426,7 +2505,7 @@ public void testAllocatePendingSegmentAfterDroppingExistingSegment() 9, 100 ); - Assert.assertTrue(segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(segment), Collections.emptyMap())); + coordinator.commitSegments(Set.of(segment), null); List ids = retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new", ids.get(0)); @@ -2447,6 +2526,7 @@ public void testAllocatePendingSegmentAfterDroppingExistingSegment() // now drop the used segment previously loaded: markAllSegmentsUnused(ImmutableSet.of(segment), DateTimes.nowUtc()); + refreshCache(); // and final load, this reproduces an issue that could happen with multiple streaming appends, // followed by a reindex, followed by a drop, and more streaming data coming in for same interval @@ -2507,7 +2587,7 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() 9, 100 ); - Assert.assertTrue(segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(segment), Collections.emptyMap())); + coordinator.commitSegments(Set.of(segment), null); List ids = retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", ids.get(0)); @@ -2536,7 +2616,7 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() 9, 100 ); - Assert.assertTrue(segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(segment), Collections.emptyMap())); + coordinator.commitSegments(Set.of(segment), null); ids = retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_1", ids.get(1)); @@ -2569,7 +2649,7 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() // pendings: A: 0,1,2 // used segments A: 0,1,2 // unused segments: - Assert.assertTrue(segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(segment), Collections.emptyMap())); + coordinator.commitSegments(Set.of(segment), null); ids = retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_2", ids.get(2)); @@ -2587,7 +2667,7 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() 9, 100 ); - Assert.assertTrue(segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(compactedSegment), Collections.emptyMap())); + coordinator.commitSegments(Set.of(compactedSegment), null); ids = retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_B", ids.get(3)); // 3) When overshadowing, segments are still marked as "used" in the segments table @@ -2617,6 +2697,7 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() // 5) reverted compaction (by marking B_0 as unused) // Revert compaction a manual metadata update which is basically the following two steps: markAllSegmentsUnused(ImmutableSet.of(compactedSegment), DateTimes.nowUtc()); // <- drop compacted segment + refreshCache(); // pending: version = A, id = 0,1,2 // version = B, id = 1 // @@ -2663,7 +2744,7 @@ public void testAnotherAllocatePendingSegmentAfterRevertingCompaction() // // used segment: version = A, id = 0,1,2,3 // unused segment: version = B, id = 0 - Assert.assertTrue(segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(segment), Collections.emptyMap())); + coordinator.commitSegments(Set.of(segment), null); ids = retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A_3", ids.get(3)); @@ -2825,7 +2906,8 @@ public void testNoPendingSegmentsAndOneUsedSegment() 9, 100 ); - Assert.assertTrue(segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(segment), Collections.emptyMap())); + + coordinator.commitSegments(Set.of(segment), null); List ids = retrieveUsedSegmentIds(derbyConnectorRule.metadataTablesConfigSupplier().get()); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_A", ids.get(0)); @@ -3303,7 +3385,7 @@ public void testMarkSegmentsAsUnusedWithinIntervalTwoYears() @Test public void testRetrieveUsedSegmentsAndCreatedDates() { - segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(defaultSegment), Collections.emptyMap()); + coordinator.commitSegments(Set.of(defaultSegment), null); List> resultForIntervalOnTheLeft = coordinator.retrieveUsedSegmentsAndCreatedDates(defaultSegment.getDataSource(), Collections.singletonList(Intervals.of("2000/2001"))); @@ -3342,8 +3424,20 @@ public void testRetrieveUsedSegmentsAndCreatedDates() @Test public void testRetrieveUsedSegmentsAndCreatedDatesFetchesEternityForAnyInterval() { - - segmentSchemaTestUtils.insertUsedSegments(ImmutableSet.of(eternitySegment, firstHalfEternityRangeSegment, secondHalfEternityRangeSegment), Collections.emptyMap()); + // Ensure that overlapping segments do not have the same version + // Otherwise they cannot be added to a timeline + coordinator.commitSegments( + Set.of(DataSegment.builder(eternitySegment).version("v1").build()), + null + ); + // Commit these segments separately so that the older one is not overshadowed + coordinator.commitSegments( + Set.of( + DataSegment.builder(firstHalfEternityRangeSegment).version("v2").build(), + DataSegment.builder(secondHalfEternityRangeSegment).version("v3").build() + ), + null + ); List> resultForRandomInterval = coordinator.retrieveUsedSegmentsAndCreatedDates(defaultSegment.getDataSource(), Collections.singletonList(defaultSegment.getInterval())); @@ -3456,6 +3550,7 @@ public void testTimelineWith1CorePartitionTombstone() // Mark the tombstone as unused markAllSegmentsUnused(tombstones, DateTimes.nowUtc()); + refreshCache(); final Collection allUsedSegments = coordinator.retrieveAllUsedSegments( TestDataSource.WIKI, @@ -3520,7 +3615,7 @@ public void testSegmentIdShouldNotBeReallocated() false, "taskAllocatorId" ); - Assert.assertNull(coordinator.retrieveSegmentForId(theId.asSegmentId().toString(), true)); + Assert.assertNull(coordinator.retrieveSegmentForId(theId.getDataSource(), theId.asSegmentId().toString())); } @Test @@ -3559,12 +3654,16 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() coordinator.commitSegments(ImmutableSet.of(usedSegmentForExactIntervalAndVersion), null); - List unusedSegmentIdsForIntervalAndVersion = - coordinator.retrieveUnusedSegmentIdsForExactIntervalAndVersion(TestDataSource.WIKI, Intervals.of("2024/2025"), "v1"); - Assert.assertEquals(1, unusedSegmentIdsForIntervalAndVersion.size()); + Set unusedSegmentIdsForIntervalAndVersion = derbyConnector.retryTransaction( + (handle, status) -> transactionFactory + .createTransactionForDatasource(TestDataSource.WIKI, handle, status) + .findUnusedSegmentIdsWithExactIntervalAndVersion(Intervals.of("2024/2025"), "v1"), + 3, + SQLMetadataConnector.DEFAULT_MAX_TRIES + ); Assert.assertEquals( - unusedSegmentForExactIntervalAndVersion.getId().toString(), - unusedSegmentIdsForIntervalAndVersion.get(0) + Set.of(unusedSegmentForExactIntervalAndVersion.getId().toString()), + unusedSegmentIdsForIntervalAndVersion ); } @@ -3747,7 +3846,7 @@ public void testRetrieveUsedSegmentsForSegmentAllocation() 0, 100 ); - insertUsedSegments(Collections.singleton(firstSegment), Collections.emptyMap()); + coordinator.commitSegments(Set.of(firstSegment), null); for (int j = 1; j < numSegmentsPerInterval; j++) { nextSegments.add( new DataSegment( @@ -3764,7 +3863,7 @@ public void testRetrieveUsedSegmentsForSegmentAllocation() ) ); } - insertUsedSegments(nextSegments, Collections.emptyMap()); + coordinator.commitSegments(nextSegments, null); } final Set expected = new HashSet<>(); @@ -3781,14 +3880,21 @@ public void testRetrieveUsedSegmentsForSegmentAllocation() } } - Assert.assertEquals(expected, - derbyConnector.retryWithHandle( - handle -> coordinator.retrieveUsedSegmentsForAllocation(handle, datasource, month) - .stream() - .map(SegmentIdWithShardSpec::fromDataSegment) - .collect(Collectors.toSet()) - ) + Set observed = derbyConnector.retryTransaction( + (handle, transactionStatus) -> + coordinator.retrieveUsedSegmentsForAllocation( + transactionFactory.createTransactionForDatasource(datasource, handle, transactionStatus), + datasource, + month + ) + .stream() + .map(SegmentIdWithShardSpec::fromDataSegment) + .collect(Collectors.toSet()), + 3, + SQLMetadataConnector.DEFAULT_MAX_TRIES ); + + Assert.assertEquals(expected, observed); } private SegmentIdWithShardSpec allocatePendingSegment( @@ -3816,6 +3922,21 @@ private SegmentIdWithShardSpec allocatePendingSegment( ); } + private int insertPendingSegments( + String dataSource, + List pendingSegments, + boolean skipLineageCheck + ) + { + return derbyConnector.retryTransaction( + (handle, transactionStatus) -> + transactionFactory.createTransactionForDatasource(dataSource, handle, transactionStatus) + .insertPendingSegments(pendingSegments, skipLineageCheck), + 3, + SQLMetadataConnector.DEFAULT_MAX_TRIES + ); + } + private void insertUsedSegments(Set segments, Map upgradedFromSegmentIdMap) { final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(); diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java index fc99af763215..4327b5dd6226 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java @@ -28,6 +28,9 @@ import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; +import org.apache.druid.metadata.segment.SegmentsMetadataTransaction; +import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; import org.apache.druid.segment.SchemaPayload; import org.apache.druid.segment.SchemaPayloadPlus; import org.apache.druid.segment.SegmentSchemaMapping; @@ -37,6 +40,7 @@ import org.apache.druid.segment.metadata.FingerprintGenerator; import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.metadata.SegmentSchemaTestUtils; +import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; import org.apache.druid.timeline.partition.LinearShardSpec; @@ -45,7 +49,6 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.skife.jdbi.v2.Handle; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -89,6 +92,13 @@ public void setUp() centralizedDatasourceSchemaConfig.setEnabled(true); coordinator = new IndexerSQLMetadataStorageCoordinator( + new SqlSegmentsMetadataTransactionFactory( + mapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ), mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, @@ -98,7 +108,7 @@ public void setUp() { @Override protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( - Handle handle, + SegmentsMetadataTransaction transaction, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata @@ -106,7 +116,7 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( { // Count number of times this method is called. metadataUpdateCounter.getAndIncrement(); - return super.updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata); + return super.updateDataSourceMetadataWithHandle(transaction, dataSource, startMetadata, endMetadata); } @Override diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java index 33ee627bef4e..36fc1c4e7da8 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java @@ -44,7 +44,7 @@ public class SqlSegmentsMetadataManagerProviderTest public void testLifecycleStartCreatesSegmentTables() throws Exception { final TestDerbyConnector connector = derbyConnectorRule.getConnector(); - final SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); + final SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(null, null); final Lifecycle lifecycle = new Lifecycle(); final SegmentSchemaCache segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); SqlSegmentsMetadataManagerProvider provider = new SqlSegmentsMetadataManagerProvider( diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java index b6d62b000b14..834553655570 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java @@ -54,8 +54,7 @@ public class SqlSegmentsMetadataManagerSchemaPollTest extends SqlSegmentsMetadat public void setUp() throws Exception { connector = derbyConnectorRule.getConnector(); - SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); - config.setPollDuration(Period.seconds(3)); + SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(Period.seconds(3), false); segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); segmentSchemaManager = new SegmentSchemaManager( @@ -130,8 +129,7 @@ public void testPollSegmentAndSchema() CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = new CentralizedDatasourceSchemaConfig(); centralizedDatasourceSchemaConfig.setEnabled(true); - config = new SegmentsMetadataManagerConfig(); - config.setPollDuration(Period.seconds(3)); + config = new SegmentsMetadataManagerConfig(Period.seconds(3), false); sqlSegmentsMetadataManager = new SqlSegmentsMetadataManager( jsonMapper, Suppliers.ofInstance(config), @@ -219,8 +217,7 @@ public void testPollOnlyNewSchemaVersion() CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = new CentralizedDatasourceSchemaConfig(); centralizedDatasourceSchemaConfig.setEnabled(true); - config = new SegmentsMetadataManagerConfig(); - config.setPollDuration(Period.seconds(3)); + config = new SegmentsMetadataManagerConfig(Period.seconds(3), false); sqlSegmentsMetadataManager = new SqlSegmentsMetadataManager( jsonMapper, Suppliers.ofInstance(config), diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java index 50b2bb511b07..70908c29291a 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java @@ -110,8 +110,7 @@ private void publishWikiSegments() public void setUp() { connector = derbyConnectorRule.getConnector(); - SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); - config.setPollDuration(Period.seconds(3)); + SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(Period.seconds(3), false); storageConfig = derbyConnectorRule.metadataTablesConfigSupplier().get(); segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); @@ -1331,8 +1330,7 @@ public void testIterateAllUsedNonOvershadowedSegmentsForDatasourceInterval() thr final Interval theInterval = Intervals.of("2012-03-15T00:00:00.000/2012-03-20T00:00:00.000"); // Re-create SqlSegmentsMetadataManager with a higher poll duration - final SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); - config.setPollDuration(Period.seconds(1)); + final SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(Period.seconds(1), false); sqlSegmentsMetadataManager = new SqlSegmentsMetadataManager( jsonMapper, Suppliers.ofInstance(config), diff --git a/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java b/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java new file mode 100644 index 000000000000..bbcdd2fdb931 --- /dev/null +++ b/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataReader; +import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; + +public class NoopSegmentsMetadataCache implements SegmentsMetadataCache +{ + @Override + public void start() + { + + } + + @Override + public void stop() + { + + } + + @Override + public boolean isReady() + { + return false; + } + + @Override + public DatasourceSegmentMetadataReader readerForDatasource(String dataSource) + { + throw new UnsupportedOperationException(); + } + + @Override + public DatasourceSegmentMetadataWriter writerForDatasource(String dataSource) + { + throw new UnsupportedOperationException(); + } +} diff --git a/server/src/test/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCacheTest.java b/server/src/test/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCacheTest.java new file mode 100644 index 000000000000..d5c5e0e5d8c1 --- /dev/null +++ b/server/src/test/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCacheTest.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +public class SqlSegmentsMetadataCacheTest +{ + +} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index 5c18616ab58e..f5ca2760f390 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -108,8 +108,7 @@ public class KillUnusedSegmentsTest public void setup() { connector = derbyConnectorRule.getConnector(); - SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(); - config.setPollDuration(Period.millis(1)); + SegmentsMetadataManagerConfig config = new SegmentsMetadataManagerConfig(Period.millis(1), false); sqlSegmentsMetadataManager = new SqlSegmentsMetadataManager( TestHelper.makeJsonMapper(), Suppliers.ofInstance(config), diff --git a/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java b/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java index 9c52d639300c..049d1d7c5237 100644 --- a/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/MetadataResourceTest.java @@ -121,13 +121,13 @@ public void setUp() storageCoordinator = Mockito.mock(IndexerMetadataStorageCoordinator.class); Mockito.doReturn(segments[4]) .when(storageCoordinator) - .retrieveSegmentForId(segments[4].getId().toString(), false); + .retrieveUsedSegmentForId(DATASOURCE1, segments[4].getId().toString()); Mockito.doReturn(null) .when(storageCoordinator) - .retrieveSegmentForId(segments[5].getId().toString(), false); + .retrieveUsedSegmentForId(DATASOURCE1, segments[5].getId().toString()); Mockito.doReturn(segments[5]) .when(storageCoordinator) - .retrieveSegmentForId(segments[5].getId().toString(), true); + .retrieveSegmentForId(DATASOURCE1, segments[5].getId().toString()); Mockito.doAnswer(mockIterateAllUnusedSegmentsForDatasource()) .when(segmentsMetadataManager) From 752664bcb1ca8335f41734ecf54cb92725803d7a Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Mon, 27 Jan 2025 21:02:24 +0530 Subject: [PATCH 02/11] Acquire locks, close transactions --- .../common/actions/TaskActionTestKit.java | 23 ++--- .../supervisor/SupervisorManagerTest.java | 2 +- .../SeekableStreamSupervisorStateTest.java | 4 +- .../IndexerSQLMetadataStorageCoordinator.java | 57 +++-------- .../druid/metadata/PendingSegmentRecord.java | 73 +++++++++++++- .../metadata/SqlSegmentsMetadataQuery.java | 22 ++--- .../SegmentsMetadataReadTransaction.java | 46 +++++++++ .../segment/SegmentsMetadataTransaction.java | 13 +-- .../SqlSegmentsMetadataCachedTransaction.java | 76 ++++++++------ .../SqlSegmentsMetadataTransaction.java | 12 +-- ...SqlSegmentsMetadataTransactionFactory.java | 98 ++++++++++++++++--- .../metadata/segment/cache/BaseCache.java | 26 ++++- .../segment/cache/DatasourceSegmentCache.java | 18 ++-- .../segment/cache/SegmentsMetadataCache.java | 27 +++-- .../cache/SqlSegmentsMetadataCache.java | 52 +++------- ...exerSQLMetadataStorageCoordinatorTest.java | 71 ++++++-------- ...orageCoordinatorSchemaPersistenceTest.java | 21 ++-- .../cache/NoopSegmentsMetadataCache.java | 11 +-- .../appenderator/StreamAppenderatorTest.java | 12 +-- 19 files changed, 405 insertions(+), 259 deletions(-) create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index 4e44af07ab0b..d4a18af27766 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -98,27 +98,28 @@ public void before() ); final ObjectMapper objectMapper = new TestUtils().getTestObjectMapper(); segmentSchemaManager = new SegmentSchemaManager(metadataStorageTablesConfig, objectMapper, testDerbyConnector); - metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( - objectMapper, - metadataStorageTablesConfig, - testDerbyConnector, - new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() - ), + final SqlSegmentsMetadataTransactionFactory transactionFactory = new SqlSegmentsMetadataTransactionFactory( objectMapper, metadataStorageTablesConfig, testDerbyConnector, - segmentSchemaManager, - CentralizedDatasourceSchemaConfig.create() + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() ) { @Override - public int getSqlMetadataMaxRetry() + public int getMaxRetries() { return 2; } }; + metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + transactionFactory, + objectMapper, + metadataStorageTablesConfig, + testDerbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() + ); taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator); segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); segmentsMetadataManager = new SqlSegmentsMetadataManager( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java index add6d4473dec..97877052fa08 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/supervisor/SupervisorManagerTest.java @@ -655,7 +655,7 @@ public void testRegisterUpgradedPendingSegmentOnSupervisor() replayAll(); - final PendingSegmentRecord pendingSegment = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegment = PendingSegmentRecord.create( new SegmentIdWithShardSpec( "DS", Intervals.ETERNITY, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java index b1600d3ad549..33d5958bd16d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisorStateTest.java @@ -2025,7 +2025,7 @@ public void testRegisterNewVersionOfPendingSegment() ImmutableSet.of() ); - final PendingSegmentRecord pendingSegmentRecord0 = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegmentRecord0 = PendingSegmentRecord.create( new SegmentIdWithShardSpec( "DS", Intervals.of("2024/2025"), @@ -2037,7 +2037,7 @@ public void testRegisterNewVersionOfPendingSegment() "someAppendedSegment0", taskGroup0.getBaseSequenceName() ); - final PendingSegmentRecord pendingSegmentRecord1 = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegmentRecord1 = PendingSegmentRecord.create( new SegmentIdWithShardSpec( "DS", Intervals.of("2024/2025"), diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index d609887b11e2..efb9d531726f 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -49,6 +49,7 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; +import org.apache.druid.metadata.segment.SegmentsMetadataReadTransaction; import org.apache.druid.metadata.segment.SegmentsMetadataTransaction; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; import org.apache.druid.segment.SegmentMetadata; @@ -75,7 +76,6 @@ import org.skife.jdbi.v2.PreparedBatch; import org.skife.jdbi.v2.Query; import org.skife.jdbi.v2.ResultIterator; -import org.skife.jdbi.v2.TransactionCallback; import org.skife.jdbi.v2.exceptions.CallbackFailedException; import javax.annotation.Nullable; @@ -521,12 +521,6 @@ public SegmentPublishResult commitMetadataOnly( } } - @VisibleForTesting - public int getSqlMetadataMaxRetry() - { - return SQLMetadataConnector.DEFAULT_MAX_TRIES; - } - @Override public Map allocatePendingSegments( String dataSource, @@ -619,7 +613,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( private List upgradePendingSegmentsOverlappingWith( SegmentsMetadataTransaction transaction, Set replaceSegments - ) throws JsonProcessingException + ) { if (replaceSegments.isEmpty()) { return Collections.emptyList(); @@ -684,7 +678,7 @@ private List upgradePendingSegments( new NumberedShardSpec(++currentPartitionNumber, numCorePartitions) ); upgradedPendingSegments.add( - new PendingSegmentRecord( + PendingSegmentRecord.create( newId, UPGRADED_PENDING_SEGMENT_PREFIX + replaceVersion, pendingSegmentId.toString(), @@ -774,7 +768,7 @@ private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( return null; } - final PendingSegmentRecord record = new PendingSegmentRecord( + final PendingSegmentRecord record = PendingSegmentRecord.create( newIdentifier, createRequest.getSequenceName(), createRequest.getPreviousSegmentId(), @@ -915,7 +909,7 @@ private SegmentIdWithShardSpec allocatePendingSegment( } // always insert empty previous sequence id - final PendingSegmentRecord record = new PendingSegmentRecord( + final PendingSegmentRecord record = PendingSegmentRecord.create( newIdentifier, createRequest.getSequenceName(), "", @@ -1352,7 +1346,7 @@ private PendingSegmentRecord createNewPendingSegment( version, partialShardSpec.complete(jsonMapper, newPartitionId, 0) ); - return new PendingSegmentRecord( + return PendingSegmentRecord.create( pendingSegmentId, request.getSequenceName(), request.getPreviousSegmentId(), @@ -1388,7 +1382,7 @@ private PendingSegmentRecord createNewPendingSegment( committedMaxId == null ? 0 : committedMaxId.getShardSpec().getNumCorePartitions() ) ); - return new PendingSegmentRecord( + return PendingSegmentRecord.create( getTrueAllocatedId(transaction, pendingSegmentId), request.getSequenceName(), request.getPreviousSegmentId(), @@ -1998,7 +1992,7 @@ private void insertIntoUpgradeSegmentsTable( failedInserts.add(partition.get(i).getKey()); } } - if (failedInserts.size() > 0) { + if (!failedInserts.isEmpty()) { throw new ISE( "Failed to insert upgrade segments in DB: %s", SegmentUtils.commaSeparatedIdentifiers(failedInserts) @@ -2008,7 +2002,7 @@ private void insertIntoUpgradeSegmentsTable( } private List retrieveSegmentsById( - SegmentsMetadataTransaction transaction, + SegmentsMetadataReadTransaction transaction, Set segmentIds ) { @@ -2565,42 +2559,15 @@ private T retryDatasourceTransaction( SegmentsMetadataTransaction.Callback callback ) { - return connector.retryTransaction( - createTransactionCallback(dataSource, callback), - 3, - getSqlMetadataMaxRetry() - ); + return transactionFactory.retryDatasourceTransaction(dataSource, callback); } private T inReadOnlyDatasourceTransaction( String dataSource, - SegmentsMetadataTransaction.Callback callback + SegmentsMetadataReadTransaction.Callback callback ) { - return connector.inReadOnlyTransaction( - createTransactionCallback(dataSource, callback) - ); - } - - private TransactionCallback createTransactionCallback( - String dataSource, - SegmentsMetadataTransaction.Callback baseCallback - ) - { - return (handle, status) -> { - final SegmentsMetadataTransaction transaction = - transactionFactory.createTransactionForDatasource(dataSource, handle, status); - try { - return baseCallback.inTransaction(transaction); - } - catch (Exception e) { - transaction.setRollbackOnly(); - throw e; - } - finally { - transaction.complete(); - } - }; + return transactionFactory.inReadOnlyDatasourceTransaction(dataSource, callback); } public static class DataStoreMetadataUpdateResult diff --git a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java index 44033b0a394d..e1a526a6332b 100644 --- a/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java +++ b/server/src/main/java/org/apache/druid/metadata/PendingSegmentRecord.java @@ -20,13 +20,16 @@ package org.apache.druid.metadata; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import com.google.common.io.BaseEncoding; +import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; +import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -54,21 +57,43 @@ public class PendingSegmentRecord private final String sequencePrevId; private final String upgradedFromSegmentId; private final String taskAllocatorId; + private final DateTime createdDate; @JsonCreator - public PendingSegmentRecord( + public static PendingSegmentRecord fromJson( @JsonProperty("id") SegmentIdWithShardSpec id, @JsonProperty("sequenceName") String sequenceName, @JsonProperty("sequencePrevId") String sequencePrevId, @JsonProperty("upgradedFromSegmentId") @Nullable String upgradedFromSegmentId, @JsonProperty("taskAllocatorId") @Nullable String taskAllocatorId ) + { + return new PendingSegmentRecord( + id, + sequenceName, + sequencePrevId, + upgradedFromSegmentId, + taskAllocatorId, + // Tasks don't use the createdDate of the record + DateTimes.EPOCH + ); + } + + private PendingSegmentRecord( + SegmentIdWithShardSpec id, + String sequenceName, + String sequencePrevId, + String upgradedFromSegmentId, + String taskAllocatorId, + DateTime createdDate + ) { this.id = id; this.sequenceName = sequenceName; this.sequencePrevId = sequencePrevId; this.upgradedFromSegmentId = upgradedFromSegmentId; this.taskAllocatorId = taskAllocatorId; + this.createdDate = createdDate; } @JsonProperty @@ -111,6 +136,15 @@ public String getTaskAllocatorId() return taskAllocatorId; } + /** + * This field is not serialized since tasks do not use it. + */ + @JsonIgnore + public DateTime getCreatedDate() + { + return createdDate; + } + /** * Computes a hash for this record to serve as UNIQUE key, ensuring we don't * have more than one segment per sequence per interval. @@ -141,6 +175,40 @@ public String computeSequenceNamePrevIdSha1(boolean skipSegmentLineageCheck) return BaseEncoding.base16().encode(hasher.hash().asBytes()); } + /** + * Creates a new record (with the current timestamp) that can be used to create + * a new entry in the pending segments metadata table. + */ + public static PendingSegmentRecord create( + SegmentIdWithShardSpec id, + String sequenceName, + String sequencePrevId, + @Nullable String upgradedFromSegmentId, + @Nullable String taskAllocatorId + ) + { + return new PendingSegmentRecord( + id, + sequenceName, + sequencePrevId, + upgradedFromSegmentId, + taskAllocatorId, + DateTimes.nowUtc() + ); + } + + /** + * Maps the given result set into a {@link PendingSegmentRecord}. + * The columns required in the result set are: + *
      + *
    • {@code payload}
    • + *
    • {@code sequence_name}
    • + *
    • {@code sequence_prev_id}
    • + *
    • {@code upgraded_from_segment_id}
    • + *
    • {@code task_allocator_id}
    • + *
    • {@code created_date}
    • + *
    + */ public static PendingSegmentRecord fromResultSet(ResultSet resultSet, ObjectMapper jsonMapper) { try { @@ -150,7 +218,8 @@ public static PendingSegmentRecord fromResultSet(ResultSet resultSet, ObjectMapp resultSet.getString("sequence_name"), resultSet.getString("sequence_prev_id"), resultSet.getString("upgraded_from_segment_id"), - resultSet.getString("task_allocator_id") + resultSet.getString("task_allocator_id"), + DateTimes.of(resultSet.getString("created_date")) ); } catch (Exception e) { diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index f86ac0064c35..db6321c78e98 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -720,11 +720,10 @@ public List retrievePendingSegmentsWithExactInterval( ) { final String sql = StringUtils.format( - "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" - + " FROM %1$s WHERE" - + " dataSource = :dataSource" - + " AND start = :start" - + " AND %2$send%2$s = :end", + "SELECT payload, sequence_name, sequence_prev_id," + + " task_allocator_id, upgraded_from_segment_id, created_date" + + " FROM %1$s WHERE dataSource = :dataSource" + + " AND start = :start AND %2$send%2$s = :end", dbTables.getPendingSegmentsTable(), connector.getQuoteString() ); return handle @@ -748,9 +747,9 @@ public List retrievePendingSegmentsOverlappingInterval( final boolean compareIntervalEndpointsAsStrings = Intervals.canCompareEndpointsAsStrings(interval); String sql = StringUtils.format( - "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" - + " FROM %1$s" - + " WHERE dataSource = :dataSource", + "SELECT payload, sequence_name, sequence_prev_id," + + " task_allocator_id, upgraded_from_segment_id, created_date" + + " FROM %1$s WHERE dataSource = :dataSource", dbTables.getPendingSegmentsTable() ); if (compareIntervalEndpointsAsStrings) { @@ -785,9 +784,10 @@ public List retrievePendingSegmentsForTaskAllocatorId( ) { final String sql = StringUtils.format( - "SELECT payload, sequence_name, sequence_prev_id, task_allocator_id, upgraded_from_segment_id" - + " FROM %1$s" - + " WHERE dataSource = :dataSource AND task_allocator_id = :task_allocator_id", + "SELECT payload, sequence_name, sequence_prev_id," + + " task_allocator_id, upgraded_from_segment_id, created_date" + + " FROM %1$s WHERE dataSource = :dataSource" + + " AND task_allocator_id = :task_allocator_id", dbTables.getPendingSegmentsTable() ); diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java new file mode 100644 index 000000000000..132252f998df --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +import org.skife.jdbi.v2.Handle; + +import java.io.Closeable; + +public interface SegmentsMetadataReadTransaction + extends DatasourceSegmentMetadataReader, Closeable +{ + /** + * @return The JDBI handle used in this transaction + */ + Handle getHandle(); + + /** + * Completes the transaction by either committing it or rolling it back. + */ + @Override + void close(); + + @FunctionalInterface + interface Callback + { + T inTransaction(SegmentsMetadataReadTransaction transaction) throws Exception; + } + +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java index c4d0bb9d618d..84f49740d238 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java @@ -27,24 +27,13 @@ * {@link Handle} and is meant to be short-lived. */ public interface SegmentsMetadataTransaction - extends DatasourceSegmentMetadataReader, DatasourceSegmentMetadataWriter + extends SegmentsMetadataReadTransaction, DatasourceSegmentMetadataWriter { - /** - * @return The JDBI handle used in this transaction - */ - Handle getHandle(); - /** * Marks this transaction to be rolled back. */ void setRollbackOnly(); - /** - * Completes the transaction by either committing it or rolling it back. - * This method must not be called from a {@link Callback}. - */ - void complete(); - @FunctionalInterface interface Callback { diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java index b6b0a570a2c7..648d7e4f6f49 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java @@ -20,6 +20,7 @@ package org.apache.druid.metadata.segment; import org.apache.druid.discovery.DruidLeaderSelector; +import org.apache.druid.error.DruidException; import org.apache.druid.error.InternalServerError; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.PendingSegmentRecord; @@ -33,9 +34,11 @@ import org.skife.jdbi.v2.Handle; import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; import java.util.function.Function; /** @@ -43,25 +46,25 @@ * and sends writes first to the metadata store and then the cache (if the * metadata store persist succeeds). */ -public class SqlSegmentsMetadataCachedTransaction implements SegmentsMetadataTransaction +class SqlSegmentsMetadataCachedTransaction implements SegmentsMetadataTransaction { - private final String dataSource; private final SegmentsMetadataTransaction delegate; - private final SegmentsMetadataCache metadataCache; + private final SegmentsMetadataCache.DataSource metadataCache; private final DruidLeaderSelector leaderSelector; private final int startTerm; private final AtomicBoolean isRollingBack = new AtomicBoolean(false); + private final AtomicBoolean isClosed = new AtomicBoolean(false); - public SqlSegmentsMetadataCachedTransaction( - String dataSource, + private final List> pendingWrites = new ArrayList<>(); + + SqlSegmentsMetadataCachedTransaction( SegmentsMetadataTransaction delegate, - SegmentsMetadataCache metadataCache, + SegmentsMetadataCache.DataSource metadataCache, DruidLeaderSelector leaderSelector ) { - this.dataSource = dataSource; this.delegate = delegate; this.metadataCache = metadataCache; this.leaderSelector = leaderSelector; @@ -69,14 +72,14 @@ public SqlSegmentsMetadataCachedTransaction( if (leaderSelector.isLeader()) { this.startTerm = leaderSelector.localTerm(); } else { - throw InternalServerError.exception("Not leader anymore"); + throw InternalServerError.exception("Not leader anymore. Cannot start transaction."); } } private void verifyStillLeaderWithSameTerm() { if (!isLeaderWithSameTerm()) { - throw InternalServerError.exception("Failing transaction. Not leader anymore"); + throw InternalServerError.exception("Not leader anymore. Failing transaction."); } } @@ -87,12 +90,12 @@ private boolean isLeaderWithSameTerm() private DatasourceSegmentMetadataReader cacheReader() { - return metadataCache.readerForDatasource(dataSource); + return metadataCache; } private DatasourceSegmentMetadataWriter cacheWriter() { - return metadataCache.writerForDatasource(dataSource); + return metadataCache; } @Override @@ -104,31 +107,34 @@ public Handle getHandle() @Override public void setRollbackOnly() { + isRollingBack.set(true); delegate.setRollbackOnly(); } @Override - public void complete() + public void close() { - // TODO: complete this implementation - - if (isRollingBack.get()) { - // rollback the changes made to the cache - } else { - // commit the changes to the cache - // or may be we can commit right at the end - // since I don't think we ever read what we have just written - // so it should be okay to postpone the writes until the very end - // since reads from cache are going to be fast, it should be okay to hold - // a write lock for the entire duration of the transaction - - // Is there any alternative? That is also consistent? + if (isClosed.get()) { + return; + } else if (isRollingBack.get()) { + isClosed.set(true); + return; } - // release the lock on the cache - // What if we don't acquire any lock? - - delegate.complete(); + // Commit the changes to the cache + try { + pendingWrites.forEach(action -> { + if (isLeaderWithSameTerm()) { + action.accept(cacheWriter()); + } else { + // Leadership has been lost, cache would have been stopped and invalidated + } + }); + } + finally { + delegate.close(); + isClosed.set(true); + } } // READ METHODS @@ -335,12 +341,18 @@ public int deletePendingSegmentsCreatedIn(Interval interval) private T performWriteAction(Function action) { + if (isClosed.get()) { + throw DruidException.defensive( + "Transaction has already been committed. No more writes can be performed." + ); + } + verifyStillLeaderWithSameTerm(); final T result = action.apply(delegate); - if (isLeaderWithSameTerm()) { - action.apply(cacheWriter()); - } + // TODO: verify if the write to metadata store was successful + // Otherwise, throw an exception + pendingWrites.add(action::apply); return result; } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java index dea50e315380..10b3769d5725 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java @@ -26,7 +26,6 @@ import com.google.common.collect.Lists; import org.apache.druid.error.DruidException; import org.apache.druid.error.InternalServerError; -import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.MetadataStorageTablesConfig; @@ -55,7 +54,7 @@ import java.util.Set; import java.util.stream.Collectors; -public class SqlSegmentsMetadataTransaction implements SegmentsMetadataTransaction +class SqlSegmentsMetadataTransaction implements SegmentsMetadataTransaction { private static final int MAX_SEGMENTS_PER_BATCH = 100; @@ -68,7 +67,7 @@ public class SqlSegmentsMetadataTransaction implements SegmentsMetadataTransacti private final SqlSegmentsMetadataQuery query; - public SqlSegmentsMetadataTransaction( + SqlSegmentsMetadataTransaction( String dataSource, Handle handle, TransactionStatus transactionStatus, @@ -99,7 +98,7 @@ public void setRollbackOnly() } @Override - public void complete() + public void close() { // Do nothing here, the JDBI Handle will commit or rollback the transaction as needed } @@ -335,7 +334,7 @@ public boolean insertPendingSegment( int updatedCount = handle.createStatement(getSqlToInsertPendingSegment()) .bind("id", segmentId.toString()) .bind("dataSource", dataSource) - .bind("created_date", DateTimes.nowUtc().toString()) + .bind("created_date", nullSafeString(pendingSegment.getCreatedDate())) .bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()) .bind("sequence_name", pendingSegment.getSequenceName()) @@ -360,7 +359,6 @@ public int insertPendingSegments( { final PreparedBatch insertBatch = handle.prepareBatch(getSqlToInsertPendingSegment()); - final String createdDate = DateTimes.nowUtc().toString(); final Set processedSegmentIds = new HashSet<>(); for (PendingSegmentRecord pendingSegment : pendingSegments) { final SegmentIdWithShardSpec segmentId = pendingSegment.getId(); @@ -372,7 +370,7 @@ public int insertPendingSegments( insertBatch.add() .bind("id", segmentId.toString()) .bind("dataSource", dataSource) - .bind("created_date", createdDate) + .bind("created_date", nullSafeString(pendingSegment.getCreatedDate())) .bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()) .bind("sequence_name", pendingSegment.getSequenceName()) diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java index ac46956c4d55..81a44c23a88f 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java @@ -32,9 +32,15 @@ * Factory for {@link SegmentsMetadataTransaction}s. If the * {@link SegmentsMetadataCache} is enabled and ready, the transaction may * read/write from the cache as applicable. + *

    + * This class serves as a wrapper over the {@link SQLMetadataConnector} to + * perform transactions specific to segment metadata. */ public class SqlSegmentsMetadataTransactionFactory { + private static final int QUIET_RETRIES = 3; + private static final int MAX_RETRIES = 10; + private final ObjectMapper jsonMapper; private final MetadataStorageTablesConfig tablesConfig; private final SQLMetadataConnector connector; @@ -57,13 +63,66 @@ public SqlSegmentsMetadataTransactionFactory( this.segmentsMetadataCache = segmentsMetadataCache; } - public SegmentsMetadataTransaction createTransactionForDatasource( + public int getMaxRetries() + { + return MAX_RETRIES; + } + + public T inReadOnlyDatasourceTransaction( + String dataSource, + SegmentsMetadataReadTransaction.Callback callback + ) + { + return connector.inReadOnlyTransaction((handle, status) -> { + final SegmentsMetadataTransaction sqlTransaction + = createSqlTransaction(dataSource, handle, status); + + if (segmentsMetadataCache.isReady()) { + final SegmentsMetadataCache.DataSource datasourceCache + = segmentsMetadataCache.getDatasource(dataSource); + final SegmentsMetadataReadTransaction cachedTransaction + = new SqlSegmentsMetadataCachedTransaction(sqlTransaction, datasourceCache, leaderSelector); + + return datasourceCache.withReadLock(dc -> executeRead(cachedTransaction, callback)); + } else { + return executeRead(createSqlTransaction(dataSource, handle, status), callback); + } + }); + } + + public T retryDatasourceTransaction( + String dataSource, + SegmentsMetadataTransaction.Callback callback + ) + { + return connector.retryTransaction( + (handle, status) -> { + final SegmentsMetadataTransaction sqlTransaction + = createSqlTransaction(dataSource, handle, status); + + if (segmentsMetadataCache.isReady()) { + final SegmentsMetadataCache.DataSource datasourceCache + = segmentsMetadataCache.getDatasource(dataSource); + final SegmentsMetadataTransaction cachedTransaction + = new SqlSegmentsMetadataCachedTransaction(sqlTransaction, datasourceCache, leaderSelector); + + return datasourceCache.withWriteLock(dc -> executeWrite(cachedTransaction, callback)); + } else { + return executeWrite(sqlTransaction, callback); + } + }, + QUIET_RETRIES, + getMaxRetries() + ); + } + + private SegmentsMetadataTransaction createSqlTransaction( String dataSource, Handle handle, TransactionStatus transactionStatus ) { - final SegmentsMetadataTransaction metadataTransaction = new SqlSegmentsMetadataTransaction( + return new SqlSegmentsMetadataTransaction( dataSource, handle, transactionStatus, @@ -71,16 +130,33 @@ public SegmentsMetadataTransaction createTransactionForDatasource( tablesConfig, jsonMapper ); + } + + private T executeWrite( + SegmentsMetadataTransaction transaction, + SegmentsMetadataTransaction.Callback callback + ) throws Exception + { + try { + return callback.inTransaction(transaction); + } + catch (Exception e) { + transaction.setRollbackOnly(); + throw e; + } + finally { + transaction.close(); + } + } - return - segmentsMetadataCache.isReady() - ? new SqlSegmentsMetadataCachedTransaction( - dataSource, - metadataTransaction, - segmentsMetadataCache, - leaderSelector - ) - : metadataTransaction; + private T executeRead( + SegmentsMetadataReadTransaction transaction, + SegmentsMetadataReadTransaction.Callback callback + ) throws Exception + { + try (transaction) { + return callback.inTransaction(transaction); + } } } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java index 27cf5aa1d966..7ab1b49e4f89 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java @@ -23,7 +23,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; -public abstract class BaseCache +public abstract class BaseCache implements SegmentsMetadataCache.DataSource { private final ReentrantReadWriteLock stateLock; @@ -62,6 +62,30 @@ public T withReadLock(Supplier action) } } + @Override + public T withReadLock(SegmentsMetadataCache.Action action) throws Exception + { + stateLock.readLock().lock(); + try { + return action.perform(this); + } + finally { + stateLock.readLock().unlock(); + } + } + + @Override + public T withWriteLock(SegmentsMetadataCache.Action action) throws Exception + { + stateLock.writeLock().lock(); + try { + return action.perform(this); + } + finally { + stateLock.writeLock().unlock(); + } + } + @FunctionalInterface public interface Action { diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java index c087fd51904f..f929dbe14e90 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java @@ -23,8 +23,6 @@ import org.apache.druid.java.util.common.CloseableIterators; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.PendingSegmentRecord; -import org.apache.druid.metadata.segment.DatasourceSegmentMetadataReader; -import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.http.DataSegmentPlus; import org.apache.druid.timeline.DataSegment; @@ -46,13 +44,8 @@ /** * Datasource-level cache for segments and pending segments. - * - * TODO: track the created date for each pending segment as it might be needed - * for delete. */ -class DatasourceSegmentCache - extends BaseCache - implements DatasourceSegmentMetadataReader, DatasourceSegmentMetadataWriter +class DatasourceSegmentCache extends BaseCache { private static final DatasourceSegmentCache EMPTY_INSTANCE = new DatasourceSegmentCache(); @@ -527,8 +520,13 @@ record -> taskAllocatorId.equals(record.getTaskAllocatorId()) @Override public int deletePendingSegmentsCreatedIn(Interval interval) { - // TODO - return 0; + return withWriteLock(() -> { + List idsToDelete = findPendingSegmentsMatching( + record -> interval.contains(record.getCreatedDate()) + ).stream().map(record -> record.getId().toString()).collect(Collectors.toList()); + + return deletePendingSegments(idsToDelete); + }); } /** diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java index fec34645e044..32296860de76 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java @@ -24,15 +24,18 @@ /** * TODO: - * -[ ] Finish polling of pending segments properly - * -[ ] Implement rollback and commit for cached transaction - * -[ ] Acquire read/write lock on datasource cache when transaction starts. - * -[ ] Add different factory methods to create read vs write transaction - * -[ ] Write a basic unit test to verify that things are working as expected + * -[x] Finish polling of pending segments properly + * -[x] Implement rollback and commit for cached transaction + * -[x] Acquire read/write lock on datasource cache when transaction starts. + * -[x] Add different factory methods to create read vs write transaction + * -[x] Write a basic unit test to verify that things are working as expected + * -[ ] Write unit test for DatasourceSegmentCache and SqlSegmentsMetadataCache + * - * -[ ] Wire up cache in OverlordCompactionScheduler and SqlSegmentsMetadataManager, * otherwise we will end up having two copies of the segment timeline and stuff * The timeline inside the cache can replace the SegmentTimeline of SqlSegmentsMetadataManager * -[ ] Add transaction API to return timeline and/or timeline holders + * -[ ] Think about race conditions in the cache - leadership changes, multiple concurrent transactions * -[ ] Write unit tests * -[ ] Write integration tests * -[ ] Write a benchmark @@ -45,8 +48,18 @@ public interface SegmentsMetadataCache boolean isReady(); - DatasourceSegmentMetadataReader readerForDatasource(String dataSource); + DataSource getDatasource(String dataSource); - DatasourceSegmentMetadataWriter writerForDatasource(String dataSource); + interface DataSource extends DatasourceSegmentMetadataWriter, DatasourceSegmentMetadataReader + { + T withReadLock(Action action) throws Exception; + + T withWriteLock(Action action) throws Exception; + } + + interface Action + { + T perform(DataSource datasourceCache) throws Exception; + } } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java index 34500dde4aa0..b6720ee0f67b 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java @@ -38,8 +38,6 @@ import org.apache.druid.metadata.SQLMetadataConnector; import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataQuery; -import org.apache.druid.metadata.segment.DatasourceSegmentMetadataReader; -import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; import org.apache.druid.query.DruidMetrics; import org.apache.druid.server.http.DataSegmentPlus; import org.joda.time.DateTime; @@ -117,6 +115,11 @@ public synchronized void start() } } + /** + * This method is called only when leadership is lost or when the service is + * being stopped. Any transaction that is in progress when this method is + * invoked will fail. + */ @Override @LifecycleStop public synchronized void stop() @@ -125,20 +128,6 @@ public synchronized void stop() currentCacheState.set(CacheState.STOPPED); tearDown(); } - - // TODO: Handle race conditions - // T1: sees cache as ready - // T2: stops the cache - // T1: tries to read some value from the cache and fails - - // Should start-stop wait on everything else? - // When does stop happen? - // 1. Leadership changes: If leadership has changed, no point continuing the operation? - // In the current implementation, a task action would continue executing even if leadership has been lost? - // Yes, I do think so. - // Solution: If leadership has changed, transaction would fail, we wouldn't need to read or write anymore - - // 2. Service start-stop. Again no point worrying about the cache } @Override @@ -148,14 +137,7 @@ public boolean isReady() } @Override - public DatasourceSegmentMetadataReader readerForDatasource(String dataSource) - { - verifyCacheIsReady(); - return datasourceToSegmentCache.getOrDefault(dataSource, DatasourceSegmentCache.empty()); - } - - @Override - public DatasourceSegmentMetadataWriter writerForDatasource(String dataSource) + public DataSource getDatasource(String dataSource) { verifyCacheIsReady(); return datasourceToSegmentCache.computeIfAbsent(dataSource, ds -> new DatasourceSegmentCache()); @@ -191,11 +173,6 @@ private void pollMetadataStore() final Map> datasourceToKnownSegmentIds = retrieveAllSegmentIds(datasourceToRefreshSegmentIds); - // TODO: handle changes made to the metadata store between these two database calls - // there doesn't seem to be much point to lock the cache during this period - // so go and fetch the segments and then refresh them - // it is possible that the cache is now updated and the refresh is not needed after all - // so the refresh should be idempotent if (isStopped()) { tearDown(); return; @@ -244,9 +221,10 @@ private Map> retrieveAllSegmentIds( final Map> datasourceToKnownSegmentIds = new HashMap<>(); final AtomicInteger countOfRefreshedUnusedSegments = new AtomicInteger(0); - // TODO: should we poll all segments here or just poll used - // and then separately poll only the required stuff for unused segments - // because the number of unused segments can be very large + // TODO: Consider improving this because the number of unused segments can be very large + // Instead of polling all segments, we could just poll the used segments + // and then fire a smarter query to determine the max unused ID or something + // But it might be tricky final String sql = StringUtils.format( "SELECT id, dataSource, used, used_status_last_updated FROM %s", @@ -282,7 +260,8 @@ private Map> retrieveAllSegmentIds( } return 0; - } catch (Exception e) { + } + catch (Exception e) { log.makeAlert(e, "Error while retrieving segment IDs from metadata store."); return 1; } @@ -350,10 +329,6 @@ private void retrieveAndRefreshAllPendingSegments() .map((index, r, ctx) -> { try { final PendingSegmentRecord record = PendingSegmentRecord.fromResultSet(r, jsonMapper); - final DateTime createdDate = nullSafeDate(r.getString("created_date")); - - // TODO: use the created date - final DatasourceSegmentCache cache = datasourceToSegmentCache.computeIfAbsent( record.getId().getDataSource(), ds -> new DatasourceSegmentCache() @@ -445,7 +420,8 @@ static SegmentRecord fromResultSet(ResultSet r) final SegmentState storedState = new SegmentState(isUsed, lastUpdatedTime); return new SegmentRecord(segmentId, dataSource, storedState); - } catch (SQLException e) { + } + catch (SQLException e) { return null; } } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 6c0df5a6caa3..118549b48a24 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -166,7 +166,14 @@ public void setUp() derbyConnector, leaderSelector, segmentsMetadataCache - ); + ) + { + @Override + public int getMaxRetries() + { + return MAX_SQL_MEATADATA_RETRY_FOR_TEST; + } + }; coordinator = new IndexerSQLMetadataStorageCoordinator( transactionFactory, mapper, @@ -188,12 +195,6 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( metadataUpdateCounter.getAndIncrement(); return super.updateDataSourceMetadataWithHandle(transaction, dataSource, startMetadata, endMetadata); } - - @Override - public int getSqlMetadataMaxRetry() - { - return MAX_SQL_MEATADATA_RETRY_FOR_TEST; - } }; } @@ -241,7 +242,7 @@ public void testCommitAppendSegments() expectedSegmentsToUpgrade.add(segment); // Add the same segment pendingSegmentsForTask.add( - new PendingSegmentRecord( + PendingSegmentRecord.create( SegmentIdWithShardSpec.fromDataSegment(segment), v1, segment.getId().toString(), @@ -251,7 +252,7 @@ public void testCommitAppendSegments() ); // Add upgraded pending segment pendingSegmentsForTask.add( - new PendingSegmentRecord( + PendingSegmentRecord.create( new SegmentIdWithShardSpec( TestDataSource.WIKI, Intervals.of("2023-01-01/2023-02-01"), @@ -276,7 +277,7 @@ public void testCommitAppendSegments() expectedSegmentsToUpgrade.add(segment); // Add the same segment pendingSegmentsForTask.add( - new PendingSegmentRecord( + PendingSegmentRecord.create( SegmentIdWithShardSpec.fromDataSegment(segment), v2, segment.getId().toString(), @@ -286,7 +287,7 @@ public void testCommitAppendSegments() ); // Add upgraded pending segment pendingSegmentsForTask.add( - new PendingSegmentRecord( + PendingSegmentRecord.create( new SegmentIdWithShardSpec( TestDataSource.WIKI, Intervals.of("2023-01-01/2023-02-01"), @@ -310,7 +311,7 @@ public void testCommitAppendSegments() appendSegments.add(segment); // Add the same segment pendingSegmentsForTask.add( - new PendingSegmentRecord( + PendingSegmentRecord.create( SegmentIdWithShardSpec.fromDataSegment(segment), v3, segment.getId().toString(), @@ -320,7 +321,7 @@ public void testCommitAppendSegments() ); // Add upgraded pending segment pendingSegmentsForTask.add( - new PendingSegmentRecord( + PendingSegmentRecord.create( new SegmentIdWithShardSpec( TestDataSource.WIKI, Intervals.of("2023-01-01/2023-02-01"), @@ -401,7 +402,7 @@ public void testCommitReplaceSegments_partiallyOverlappingPendingSegmentUnsuppor final ReplaceTaskLock replaceLock = new ReplaceTaskLock("g1", Intervals.of("2023-01-01/2023-02-01"), "2023-02-01"); final Set segmentsAppendedWithReplaceLock = new HashSet<>(); final Map appendedSegmentToReplaceLockMap = new HashMap<>(); - final PendingSegmentRecord pendingSegmentForInterval = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegmentForInterval = PendingSegmentRecord.create( new SegmentIdWithShardSpec( "foo", Intervals.of("2023-01-01/2024-01-01"), @@ -461,7 +462,7 @@ public void testCommitReplaceSegments() final ReplaceTaskLock replaceLock = new ReplaceTaskLock("g1", Intervals.of("2023-01-01/2023-02-01"), "2023-02-01"); final Set segmentsAppendedWithReplaceLock = new HashSet<>(); final Map appendedSegmentToReplaceLockMap = new HashMap<>(); - final PendingSegmentRecord pendingSegmentInInterval = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegmentInInterval = PendingSegmentRecord.create( new SegmentIdWithShardSpec( "foo", Intervals.of("2023-01-01/2023-01-02"), @@ -473,7 +474,7 @@ public void testCommitReplaceSegments() null, "append" ); - final PendingSegmentRecord pendingSegmentOutsideInterval = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegmentOutsideInterval = PendingSegmentRecord.create( new SegmentIdWithShardSpec( "foo", Intervals.of("2023-04-01/2023-04-02"), @@ -591,14 +592,14 @@ public void testCommitReplaceSegments() @Test public void testDuplicatePendingSegmentEntriesAreNotInserted() { - final PendingSegmentRecord pendingSegment0 = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegment0 = PendingSegmentRecord.create( new SegmentIdWithShardSpec("foo", Intervals.ETERNITY, "version", new NumberedShardSpec(0, 0)), "sequenceName0", "sequencePrevId0", null, "taskAllocatorId" ); - final PendingSegmentRecord pendingSegment1 = new PendingSegmentRecord( + final PendingSegmentRecord pendingSegment1 = PendingSegmentRecord.create( new SegmentIdWithShardSpec("foo", Intervals.ETERNITY, "version", new NumberedShardSpec(1, 0)), "sequenceName1", "sequencePrevId1", @@ -3654,12 +3655,10 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() coordinator.commitSegments(ImmutableSet.of(usedSegmentForExactIntervalAndVersion), null); - Set unusedSegmentIdsForIntervalAndVersion = derbyConnector.retryTransaction( - (handle, status) -> transactionFactory - .createTransactionForDatasource(TestDataSource.WIKI, handle, status) - .findUnusedSegmentIdsWithExactIntervalAndVersion(Intervals.of("2024/2025"), "v1"), - 3, - SQLMetadataConnector.DEFAULT_MAX_TRIES + Set unusedSegmentIdsForIntervalAndVersion = transactionFactory.retryDatasourceTransaction( + TestDataSource.WIKI, + transaction -> transaction + .findUnusedSegmentIdsWithExactIntervalAndVersion(Intervals.of("2024/2025"), "v1") ); Assert.assertEquals( Set.of(unusedSegmentForExactIntervalAndVersion.getId().toString()), @@ -3880,18 +3879,13 @@ public void testRetrieveUsedSegmentsForSegmentAllocation() } } - Set observed = derbyConnector.retryTransaction( - (handle, transactionStatus) -> - coordinator.retrieveUsedSegmentsForAllocation( - transactionFactory.createTransactionForDatasource(datasource, handle, transactionStatus), - datasource, - month - ) + Set observed = transactionFactory.retryDatasourceTransaction( + datasource, + transaction -> + coordinator.retrieveUsedSegmentsForAllocation(transaction, datasource, month) .stream() .map(SegmentIdWithShardSpec::fromDataSegment) - .collect(Collectors.toSet()), - 3, - SQLMetadataConnector.DEFAULT_MAX_TRIES + .collect(Collectors.toSet()) ); Assert.assertEquals(expected, observed); @@ -3928,12 +3922,9 @@ private int insertPendingSegments( boolean skipLineageCheck ) { - return derbyConnector.retryTransaction( - (handle, transactionStatus) -> - transactionFactory.createTransactionForDatasource(dataSource, handle, transactionStatus) - .insertPendingSegments(pendingSegments, skipLineageCheck), - 3, - SQLMetadataConnector.DEFAULT_MAX_TRIES + return transactionFactory.retryDatasourceTransaction( + dataSource, + transaction -> transaction.insertPendingSegments(pendingSegments, skipLineageCheck) ); } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java index 4327b5dd6226..deb0baac13ad 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java @@ -91,14 +91,15 @@ public void setUp() CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = new CentralizedDatasourceSchemaConfig(); centralizedDatasourceSchemaConfig.setEnabled(true); + SqlSegmentsMetadataTransactionFactory transactionFactory = new SqlSegmentsMetadataTransactionFactory( + mapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnector, + new TestDruidLeaderSelector(), + new NoopSegmentsMetadataCache() + ); coordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( - mapper, - derbyConnectorRule.metadataTablesConfigSupplier().get(), - derbyConnector, - new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() - ), + transactionFactory, mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, @@ -118,12 +119,6 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( metadataUpdateCounter.getAndIncrement(); return super.updateDataSourceMetadataWithHandle(transaction, dataSource, startMetadata, endMetadata); } - - @Override - public int getSqlMetadataMaxRetry() - { - return MAX_SQL_MEATADATA_RETRY_FOR_TEST; - } }; } diff --git a/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java b/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java index bbcdd2fdb931..22dd6b6c2894 100644 --- a/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java +++ b/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java @@ -19,9 +19,6 @@ package org.apache.druid.metadata.segment.cache; -import org.apache.druid.metadata.segment.DatasourceSegmentMetadataReader; -import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; - public class NoopSegmentsMetadataCache implements SegmentsMetadataCache { @Override @@ -43,13 +40,7 @@ public boolean isReady() } @Override - public DatasourceSegmentMetadataReader readerForDatasource(String dataSource) - { - throw new UnsupportedOperationException(); - } - - @Override - public DatasourceSegmentMetadataWriter writerForDatasource(String dataSource) + public DataSource getDatasource(String dataSource) { throw new UnsupportedOperationException(); } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java index aab13960cd9b..72a221e9040a 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTest.java @@ -1151,7 +1151,7 @@ public void testQueryBySegments_withSegmentVersionUpgrades() throws Exception appenderator.add(IDENTIFIERS.get(0), ir("2000", "foo", 2), Suppliers.ofInstance(Committers.nil())); // Segment0 for interval upgraded after appends appenderator.registerUpgradedPendingSegment( - new PendingSegmentRecord( + PendingSegmentRecord.create( si("2000/2001", "B", 1), si("2000/2001", "B", 1).asSegmentId().toString(), IDENTIFIERS.get(0).asSegmentId().toString(), @@ -1167,7 +1167,7 @@ public void testQueryBySegments_withSegmentVersionUpgrades() throws Exception appenderator.add(IDENTIFIERS.get(2), ir("2001T01", "foo", 16), Suppliers.ofInstance(Committers.nil())); // Concurrent replace registers a segment version upgrade for the second interval appenderator.registerUpgradedPendingSegment( - new PendingSegmentRecord( + PendingSegmentRecord.create( si("2001/2002", "B", 1), si("2001/2002", "B", 1).asSegmentId().toString(), IDENTIFIERS.get(2).asSegmentId().toString(), @@ -1179,7 +1179,7 @@ public void testQueryBySegments_withSegmentVersionUpgrades() throws Exception appenderator.add(IDENTIFIERS.get(2), ir("2001T03", "foo", 64), Suppliers.ofInstance(Committers.nil())); // Another Concurrent replace registers upgrade with version C for the second interval appenderator.registerUpgradedPendingSegment( - new PendingSegmentRecord( + PendingSegmentRecord.create( si("2001/2002", "C", 7), si("2001/2002", "C", 7).asSegmentId().toString(), IDENTIFIERS.get(2).asSegmentId().toString(), @@ -1634,7 +1634,7 @@ public void testQueryByIntervals_withSegmentVersionUpgrades() throws Exception appenderator.add(IDENTIFIERS.get(0), ir("2000", "foo", 2), Suppliers.ofInstance(Committers.nil())); // Segment0 for interval upgraded after appends appenderator.registerUpgradedPendingSegment( - new PendingSegmentRecord( + PendingSegmentRecord.create( si("2000/2001", "B", 1), si("2000/2001", "B", 1).asSegmentId().toString(), IDENTIFIERS.get(0).asSegmentId().toString(), @@ -1650,7 +1650,7 @@ public void testQueryByIntervals_withSegmentVersionUpgrades() throws Exception appenderator.add(IDENTIFIERS.get(2), ir("2001T01", "foo", 16), Suppliers.ofInstance(Committers.nil())); // Concurrent replace registers a segment version upgrade for the second interval appenderator.registerUpgradedPendingSegment( - new PendingSegmentRecord( + PendingSegmentRecord.create( si("2001/2002", "B", 1), si("2001/2002", "B", 1).asSegmentId().toString(), IDENTIFIERS.get(2).asSegmentId().toString(), @@ -1662,7 +1662,7 @@ public void testQueryByIntervals_withSegmentVersionUpgrades() throws Exception appenderator.add(IDENTIFIERS.get(2), ir("2001T03", "foo", 64), Suppliers.ofInstance(Committers.nil())); // Another Concurrent replace registers upgrade with version C for the second interval appenderator.registerUpgradedPendingSegment( - new PendingSegmentRecord( + PendingSegmentRecord.create( si("2001/2002", "C", 7), si("2001/2002", "C", 7).asSegmentId().toString(), IDENTIFIERS.get(2).asSegmentId().toString(), From e4da41f0155d9f49b0324be54e098f93fe26de49 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 28 Jan 2025 11:02:03 +0530 Subject: [PATCH 03/11] Keep unused max partition in cache --- .../IndexerSQLMetadataStorageCoordinator.java | 41 +--- .../metadata/SqlSegmentsMetadataQuery.java | 38 +++- .../DatasourceSegmentMetadataReader.java | 10 +- .../SqlSegmentsMetadataCachedTransaction.java | 5 +- .../SqlSegmentsMetadataTransaction.java | 4 +- .../segment/cache/DatasourceSegmentCache.java | 72 +++++-- .../cache/SqlSegmentsMetadataCache.java | 177 +++++++++++------- ...exerSQLMetadataStorageCoordinatorTest.java | 9 +- 8 files changed, 217 insertions(+), 139 deletions(-) diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index efb9d531726f..e63e42820f62 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -1537,9 +1537,7 @@ private SegmentIdWithShardSpec getTrueAllocatedId( } // If yes, try to compute allocated partition num using the max unused segment shard spec - SegmentId unusedMaxId = getMaxIdOfUnusedSegment( - transaction, - allocatedId.getDataSource(), + SegmentId unusedMaxId = transaction.findHighestUnusedSegmentId( allocatedId.getInterval(), allocatedId.getVersion() ); @@ -1563,43 +1561,6 @@ private SegmentIdWithShardSpec getTrueAllocatedId( ); } - /** - * Determines the highest ID amongst unused segments for the given datasource, - * interval and version. - * - * @return null if no unused segment exists for the given parameters. - */ - @Nullable - private SegmentId getMaxIdOfUnusedSegment( - SegmentsMetadataTransaction transaction, - String datasource, - Interval interval, - String version - ) - { - Set unusedSegmentIds = - transaction.findUnusedSegmentIdsWithExactIntervalAndVersion(interval, version); - log.debug( - "Found [%,d] unused segments for datasource[%s] for interval[%s] and version[%s].", - unusedSegmentIds.size(), datasource, interval, version - ); - - SegmentId unusedMaxId = null; - int maxPartitionNum = -1; - for (String id : unusedSegmentIds) { - final SegmentId segmentId = SegmentId.tryParse(datasource, id); - if (segmentId == null) { - continue; - } - int partitionNum = segmentId.getPartitionNum(); - if (maxPartitionNum < partitionNum) { - maxPartitionNum = partitionNum; - unusedMaxId = segmentId; - } - } - return unusedMaxId; - } - @Override public int deletePendingSegmentsCreatedInInterval(String dataSource, Interval deleteInterval) { diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index db6321c78e98..c8823e1663fb 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -164,7 +164,43 @@ public CloseableIterator retrieveUsedSegmentsPlus( ); } - public Set retrieveUnusedSegmentIdsForExactIntervalAndVersion( + /** + * Determines the highest ID amongst unused segments for the given datasource, + * interval and version. + * + * @return null if no unused segment exists for the given parameters. + */ + @Nullable + public SegmentId retrieveHighestUnusedSegmentId( + String datasource, + Interval interval, + String version + ) + { + final Set unusedSegmentIds = + retrieveUnusedSegmentIdsForExactIntervalAndVersion(datasource, interval, version); + log.debug( + "Found [%,d] unused segments for datasource[%s] for interval[%s] and version[%s].", + unusedSegmentIds.size(), datasource, interval, version + ); + + SegmentId unusedMaxId = null; + int maxPartitionNum = -1; + for (String id : unusedSegmentIds) { + final SegmentId segmentId = SegmentId.tryParse(datasource, id); + if (segmentId == null) { + continue; + } + int partitionNum = segmentId.getPartitionNum(); + if (maxPartitionNum < partitionNum) { + maxPartitionNum = partitionNum; + unusedMaxId = segmentId; + } + } + return unusedMaxId; + } + + private Set retrieveUnusedSegmentIdsForExactIntervalAndVersion( String dataSource, Interval interval, String version diff --git a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java index 05af79b97756..59a2572f1103 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java @@ -49,7 +49,13 @@ public interface DatasourceSegmentMetadataReader */ Set findUsedSegmentIdsOverlapping(Interval interval); - Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version); + /** + * Retrieves the ID of the unused segment that has the highest partition + * number amongst all unused segments that exactly match the given interval + * and version. + */ + @Nullable + SegmentId findHighestUnusedSegmentId(Interval interval, String version); /** * Finds used segments that overlap with any of the given intervals. @@ -63,8 +69,10 @@ public interface DatasourceSegmentMetadataReader */ Set findUsedSegmentsPlusOverlappingAnyOf(List intervals); + @Nullable DataSegment findSegment(String segmentId); + @Nullable DataSegment findUsedSegment(String segmentId); List findSegments(Set segmentIds); diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java index 648d7e4f6f49..c41484d0bbf6 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java @@ -152,10 +152,9 @@ public Set findUsedSegmentIdsOverlapping(Interval interval) } @Override - public Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version) + public SegmentId findHighestUnusedSegmentId(Interval interval, String version) { - // TODO: we need to start caching some info of unused segments to empower this method - return delegate.findUnusedSegmentIdsWithExactIntervalAndVersion(interval, version); + return cacheReader().findHighestUnusedSegmentId(interval, version); } @Override diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java index 10b3769d5725..228bde1a5b99 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java @@ -134,9 +134,9 @@ public Set findUsedSegmentIdsOverlapping(Interval interval) } @Override - public Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version) + public SegmentId findHighestUnusedSegmentId(Interval interval, String version) { - return query.retrieveUnusedSegmentIdsForExactIntervalAndVersion(dataSource, interval, version); + return query.retrieveHighestUnusedSegmentId(dataSource, interval, version); } @Override diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java index f929dbe14e90..45a6795d1699 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java @@ -47,7 +47,7 @@ */ class DatasourceSegmentCache extends BaseCache { - private static final DatasourceSegmentCache EMPTY_INSTANCE = new DatasourceSegmentCache(); + private final String dataSource; /** * Used to obtain the segment for a given ID so that it can be updated in the @@ -68,16 +68,13 @@ class DatasourceSegmentCache extends BaseCache private final Map> intervalToPendingSegments = new HashMap<>(); - private final Set unusedSegmentIds = new HashSet<>(); + private final Map> + intervalVersionToHighestUnusedPartitionNumber = new HashMap<>(); - static DatasourceSegmentCache empty() - { - return EMPTY_INSTANCE; - } - - DatasourceSegmentCache() + DatasourceSegmentCache(String dataSource) { super(true); + this.dataSource = dataSource; } void clear() @@ -85,7 +82,7 @@ void clear() withWriteLock(() -> { idToSegmentState.clear(); idToUsedSegment.clear(); - unusedSegmentIds.clear(); + intervalVersionToHighestUnusedPartitionNumber.clear(); idToUsedSegment.values().forEach(s -> usedSegmentTimeline.remove(s.getDataSegment())); }); } @@ -142,7 +139,7 @@ boolean refreshUnusedSegment(String segmentId, SegmentState newState) } } - unusedSegmentIds.add(segmentId); + addUnusedSegmentId(segmentId); return true; }); } @@ -177,7 +174,7 @@ boolean refreshUsedSegment(DataSegmentPlus segmentPlus) } } else { // Segment has transitioned from unused to used - unusedSegmentIds.remove(segmentId); + removeUnusedSegmentId(segmentId); } usedSegmentTimeline.add(segment); @@ -195,7 +192,7 @@ int removeSegmentIds(Set segmentIds) ++removedCount; } - unusedSegmentIds.remove(segmentId); + removeUnusedSegmentId(segmentId); final DataSegmentPlus segment = idToUsedSegment.remove(segmentId); if (segment != null) { @@ -207,6 +204,40 @@ int removeSegmentIds(Set segmentIds) }); } + void resetMaxUnusedIds(Map> intervalVersionToHighestPartitionNumber) + { + withWriteLock(() -> { + this.intervalVersionToHighestUnusedPartitionNumber.clear(); + this.intervalVersionToHighestUnusedPartitionNumber.putAll(intervalVersionToHighestPartitionNumber); + }); + } + + private void addUnusedSegmentId(String id) + { + final SegmentId segmentId = SegmentId.tryParse(dataSource, id); + if (segmentId == null) { + return; + } + + final int partitionNum = segmentId.getPartitionNum(); + intervalVersionToHighestUnusedPartitionNumber + .computeIfAbsent(segmentId.getInterval(), i -> new HashMap<>()) + .merge(segmentId.getVersion(), partitionNum, Math::max); + } + + private void removeUnusedSegmentId(String segmentId) + { + // TODO: Do not update the highest unused id since we don't know the new max + // It is okay to keep working with the old max + + // What are the things we can do here? + // - reduce max partition number by at least 1 + // - keep a bool array for every interval / version to see which IDs are currently in use + // + // - but all of this is overkill because this is meant to handle a very rare case + // and even then it is okay to return an older max + } + /** * Returns the set of segment IDs present in the cache but not present in the * given set of known segment IDs. @@ -243,11 +274,15 @@ public Set findUsedSegmentIdsOverlapping(Interval interval) } @Override - public Set findUnusedSegmentIdsWithExactIntervalAndVersion(Interval interval, String version) + public SegmentId findHighestUnusedSegmentId(Interval interval, String version) { - // TODO: implement this or may be add a variant of this method to find the - // max unused segment ID for an exact interval and version - throw DruidException.defensive("Unsupported: Unused segments are not cached"); + final Integer highestPartitionNum = intervalVersionToHighestUnusedPartitionNumber + .getOrDefault(interval, Map.of()) + .get(version); + + return highestPartitionNum == null + ? null + : SegmentId.of(dataSource, interval, version, highestPartitionNum); } @Override @@ -553,4 +588,9 @@ private static String getId(DataSegment segment) { return segment.getId().toString(); } + + private static int nullSafeMax(Integer a, int b) + { + return (a == null || a < b) ? b : a; + } } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java index b6720ee0f67b..1244c77309e2 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java @@ -40,8 +40,10 @@ import org.apache.druid.metadata.SqlSegmentsMetadataQuery; import org.apache.druid.query.DruidMetrics; import org.apache.druid.server.http.DataSegmentPlus; +import org.apache.druid.timeline.SegmentId; import org.joda.time.DateTime; import org.joda.time.Duration; +import org.joda.time.Interval; import org.skife.jdbi.v2.ResultIterator; import javax.annotation.Nullable; @@ -57,6 +59,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; public class SqlSegmentsMetadataCache implements SegmentsMetadataCache { @@ -140,7 +143,12 @@ public boolean isReady() public DataSource getDatasource(String dataSource) { verifyCacheIsReady(); - return datasourceToSegmentCache.computeIfAbsent(dataSource, ds -> new DatasourceSegmentCache()); + return getCacheForDatasource(dataSource); + } + + private DatasourceSegmentCache getCacheForDatasource(String dataSource) + { + return datasourceToSegmentCache.computeIfAbsent(dataSource, DatasourceSegmentCache::new); } private void verifyCacheIsReady() @@ -169,23 +177,38 @@ private void pollMetadataStore() return; } - final Map> datasourceToRefreshSegmentIds = new HashMap<>(); - final Map> datasourceToKnownSegmentIds - = retrieveAllSegmentIds(datasourceToRefreshSegmentIds); + final Map datasourceToSummary = retrieveAllSegmentIds(); if (isStopped()) { tearDown(); return; } - removeUnknownSegmentIdsFromCache(datasourceToKnownSegmentIds); + removeUnknownDatasources(datasourceToSummary); + datasourceToSummary.forEach(this::removeUnknownSegmentIdsFromCache); + datasourceToSummary.forEach( + (datasource, summary) -> + getCacheForDatasource(datasource) + .resetMaxUnusedIds(summary.intervalVersionToMaxUnusedPartition) + ); if (isStopped()) { tearDown(); return; } - retrieveAndRefreshUsedSegmentsForIds(datasourceToRefreshSegmentIds); + final int countOfRefreshedUsedSegments = datasourceToSummary.entrySet().stream().mapToInt( + entry -> retrieveAndRefreshUsedSegments( + entry.getKey(), + entry.getValue().segmentIdsToRefresh + ) + ).sum(); + if (countOfRefreshedUsedSegments > 0) { + log.info( + "Refreshed total [%d] used segments from metadata store.", + countOfRefreshedUsedSegments + ); + } if (isStopped()) { tearDown(); @@ -211,14 +234,11 @@ private void pollMetadataStore() /** * Retrieves all the segment IDs (used and unused) from the metadata store. * - * @return Map from datasource name to set of all segment IDs present in the - * metadata store for that datasource. + * @return Map from datasource name to segment summary. */ - private Map> retrieveAllSegmentIds( - Map> datasourceToRefreshSegmentIds - ) + private Map retrieveAllSegmentIds() { - final Map> datasourceToKnownSegmentIds = new HashMap<>(); + final Map datasourceToSummary = new HashMap<>(); final AtomicInteger countOfRefreshedUnusedSegments = new AtomicInteger(0); // TODO: Consider improving this because the number of unused segments can be very large @@ -240,23 +260,31 @@ private Map> retrieveAllSegmentIds( ) { while (iterator.hasNext()) { final SegmentRecord record = iterator.next(); - final DatasourceSegmentCache cache = datasourceToSegmentCache.computeIfAbsent( - record.dataSource, - ds -> new DatasourceSegmentCache() - ); + final DatasourceSegmentCache cache = getCacheForDatasource(record.dataSource); + final DatasourceSegmentSummary summary = datasourceToSummary + .computeIfAbsent(record.dataSource, ds -> new DatasourceSegmentSummary()); if (cache.shouldRefreshSegment(record.segmentId, record.state)) { if (record.state.isUsed()) { - datasourceToRefreshSegmentIds.computeIfAbsent(record.dataSource, ds -> new HashSet<>()) - .add(record.segmentId); + summary.segmentIdsToRefresh.add(record.segmentId); } else if (cache.refreshUnusedSegment(record.segmentId, record.state)) { countOfRefreshedUnusedSegments.incrementAndGet(); emitDatasourceMetric(record.dataSource, "refreshed/unused", 1); } } - datasourceToKnownSegmentIds.computeIfAbsent(record.dataSource, ds -> new HashSet<>()) - .add(record.segmentId); + if (!record.state.isUsed()) { + final SegmentId segmentId = SegmentId.tryParse(record.dataSource, record.segmentId); + if (segmentId != null) { + final int partitionNum = segmentId.getPartitionNum(); + summary + .intervalVersionToMaxUnusedPartition + .computeIfAbsent(segmentId.getInterval(), i -> new HashMap<>()) + .merge(segmentId.getVersion(), partitionNum, Math::max); + } + } + + summary.persistedSegmentIds.add(record.segmentId); } return 0; @@ -271,47 +299,36 @@ private Map> retrieveAllSegmentIds( log.info("Refreshed total [%d] unused segments from metadata store.", countOfRefreshedUnusedSegments.get()); } - return datasourceToKnownSegmentIds; + return datasourceToSummary; } - private void retrieveAndRefreshUsedSegmentsForIds( - Map> datasourceToRefreshSegmentIds + private int retrieveAndRefreshUsedSegments( + String dataSource, + Set segmentIdsToRefresh ) { - final AtomicInteger countOfRefreshedUsedSegments = new AtomicInteger(0); - datasourceToRefreshSegmentIds.forEach((dataSource, segmentIds) -> { - final DatasourceSegmentCache cache - = datasourceToSegmentCache.computeIfAbsent(dataSource, ds -> new DatasourceSegmentCache()); - - int numUpdatedUsedSegments = 0; - try ( - CloseableIterator iterator = connector.inReadOnlyTransaction( - (handle, status) -> SqlSegmentsMetadataQuery - .forHandle(handle, connector, tablesConfig.get(), jsonMapper) - .retrieveSegmentsByIdIterator(dataSource, segmentIds) - ) - ) { - while (iterator.hasNext()) { - if (cache.refreshUsedSegment(iterator.next())) { - ++numUpdatedUsedSegments; - } + final DatasourceSegmentCache cache = getCacheForDatasource(dataSource); + int numUpdatedUsedSegments = 0; + try ( + CloseableIterator iterator = connector.inReadOnlyTransaction( + (handle, status) -> SqlSegmentsMetadataQuery + .forHandle(handle, connector, tablesConfig.get(), jsonMapper) + .retrieveSegmentsByIdIterator(dataSource, segmentIdsToRefresh) + ) + ) { + while (iterator.hasNext()) { + if (cache.refreshUsedSegment(iterator.next())) { + ++numUpdatedUsedSegments; } } - catch (IOException e) { - log.makeAlert(e, "Error retrieving segments for datasource[%s] from metadata store.", dataSource) - .emit(); - } - - emitDatasourceMetric(dataSource, "refresh/used", numUpdatedUsedSegments); - countOfRefreshedUsedSegments.addAndGet(numUpdatedUsedSegments); - }); - - if (countOfRefreshedUsedSegments.get() > 0) { - log.info( - "Refreshed total [%d] used segments from metadata store.", - countOfRefreshedUsedSegments.get() - ); } + catch (IOException e) { + log.makeAlert(e, "Error retrieving segments for datasource[%s] from metadata store.", dataSource) + .emit(); + } + + emitDatasourceMetric(dataSource, "refresh/used", numUpdatedUsedSegments); + return numUpdatedUsedSegments; } private void retrieveAndRefreshAllPendingSegments() @@ -329,10 +346,7 @@ private void retrieveAndRefreshAllPendingSegments() .map((index, r, ctx) -> { try { final PendingSegmentRecord record = PendingSegmentRecord.fromResultSet(r, jsonMapper); - final DatasourceSegmentCache cache = datasourceToSegmentCache.computeIfAbsent( - record.getId().getDataSource(), - ds -> new DatasourceSegmentCache() - ); + final DatasourceSegmentCache cache = getCacheForDatasource(record.getId().getDataSource()); if (cache.shouldRefreshPendingSegment(record)) { cache.insertPendingSegment(record, false); @@ -347,25 +361,36 @@ private void retrieveAndRefreshAllPendingSegments() ); } + private void removeUnknownDatasources(Map datasourceToSummary) + { + final Set datasourcesNotInMetadataStore = + datasourceToSegmentCache.keySet() + .stream() + .filter(ds -> !datasourceToSummary.containsKey(ds)) + .collect(Collectors.toSet()); + + datasourcesNotInMetadataStore.forEach(datasourceToSegmentCache::remove); + } + /** * This is safe to do since updates are always made first to metadata store * and then to cache. */ - private void removeUnknownSegmentIdsFromCache(Map> datasourceToKnownSegmentIds) + private void removeUnknownSegmentIdsFromCache( + String dataSource, + DatasourceSegmentSummary summary + ) { - datasourceToSegmentCache.forEach((dataSource, cache) -> { - final Set unknownSegmentIds = cache.getSegmentIdsNotIn( - datasourceToKnownSegmentIds.getOrDefault(dataSource, Set.of()) + final DatasourceSegmentCache cache = getCacheForDatasource(dataSource); + final Set unknownSegmentIds = cache.getSegmentIdsNotIn(summary.persistedSegmentIds); + final int numSegmentsRemoved = cache.removeSegmentIds(unknownSegmentIds); + if (numSegmentsRemoved > 0) { + log.info( + "Removed [%d] unknown segment IDs from cache of datasource[%s].", + numSegmentsRemoved, dataSource ); - final int numSegmentsRemoved = cache.removeSegmentIds(unknownSegmentIds); - if (numSegmentsRemoved > 0) { - log.info( - "Removed [%d] unknown segment IDs from cache of datasource[%s].", - numSegmentsRemoved, dataSource - ); - emitDatasourceMetric(dataSource, "deleted/unknown", numSegmentsRemoved); - } - }); + emitDatasourceMetric(dataSource, "deleted/unknown", numSegmentsRemoved); + } } private String getSegmentsTable() @@ -427,4 +452,14 @@ static SegmentRecord fromResultSet(ResultSet r) } } + /** + * Summary of segments of a datasource currently present in the metadata store. + */ + private static class DatasourceSegmentSummary + { + final Set persistedSegmentIds = new HashSet<>(); + final Set segmentIdsToRefresh = new HashSet<>(); + final Map> intervalVersionToMaxUnusedPartition = new HashMap<>(); + } + } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 118549b48a24..b2f490e506e9 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -3655,14 +3655,13 @@ public void testRetrieveUnusedSegmentsForExactIntervalAndVersion() coordinator.commitSegments(ImmutableSet.of(usedSegmentForExactIntervalAndVersion), null); - Set unusedSegmentIdsForIntervalAndVersion = transactionFactory.retryDatasourceTransaction( + SegmentId highestUnusedId = transactionFactory.retryDatasourceTransaction( TestDataSource.WIKI, - transaction -> transaction - .findUnusedSegmentIdsWithExactIntervalAndVersion(Intervals.of("2024/2025"), "v1") + transaction -> transaction.findHighestUnusedSegmentId(Intervals.of("2024/2025"), "v1") ); Assert.assertEquals( - Set.of(unusedSegmentForExactIntervalAndVersion.getId().toString()), - unusedSegmentIdsForIntervalAndVersion + unusedSegmentForExactIntervalAndVersion.getId(), + highestUnusedId ); } From 0df8a60eeca2753987083f8991a6f804e4382a25 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 28 Jan 2025 18:03:25 +0530 Subject: [PATCH 04/11] Add unit tests, cache only used segment states --- .../DatasourceSegmentMetadataReader.java | 3 +- .../SegmentsMetadataReadTransaction.java | 5 + .../SqlSegmentsMetadataCachedTransaction.java | 3 +- .../SqlSegmentsMetadataTransaction.java | 15 +- ...SqlSegmentsMetadataTransactionFactory.java | 4 +- .../metadata/segment/cache/BaseCache.java | 11 +- .../segment/cache/DatasourceSegmentCache.java | 176 ++++------- .../segment/cache/SegmentsMetadataCache.java | 30 +- .../cache/SqlSegmentsMetadataCache.java | 33 +- .../cache/DatasourceSegmentCacheTest.java | 293 ++++++++++++++++++ .../coordinator/CreateDataSegments.java | 48 +++ 11 files changed, 474 insertions(+), 147 deletions(-) create mode 100644 server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java diff --git a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java index 59a2572f1103..890753749d16 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java @@ -65,7 +65,8 @@ public interface DatasourceSegmentMetadataReader List findUsedSegments(Set segmentIds); /** - * Finds used segments that overlap with any of the given intervals. + * Finds used segments that overlap with any of the given intervals. If the + * given list of intervals is empty, all used segments are considered eligible. */ Set findUsedSegmentsPlusOverlappingAnyOf(List intervals); diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java index 132252f998df..1e1a4838ca9f 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java @@ -23,6 +23,11 @@ import java.io.Closeable; +/** + * Represents a single transaction involving read of segment metadata into + * the metadata store. A transaction is associated with a single instance of a + * {@link Handle} and is meant to be short-lived. + */ public interface SegmentsMetadataReadTransaction extends DatasourceSegmentMetadataReader, Closeable { diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java index c41484d0bbf6..c44c4793c825 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java @@ -349,8 +349,7 @@ private T performWriteAction(Function ac verifyStillLeaderWithSameTerm(); final T result = action.apply(delegate); - // TODO: verify if the write to metadata store was successful - // Otherwise, throw an exception + // TODO: Assume that the metadata write operation succeeded pendingWrites.add(action::apply); return result; diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java index 228bde1a5b99..c5bb8c5178b3 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java @@ -334,7 +334,7 @@ public boolean insertPendingSegment( int updatedCount = handle.createStatement(getSqlToInsertPendingSegment()) .bind("id", segmentId.toString()) .bind("dataSource", dataSource) - .bind("created_date", nullSafeString(pendingSegment.getCreatedDate())) + .bind("created_date", toNonNullString(pendingSegment.getCreatedDate())) .bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()) .bind("sequence_name", pendingSegment.getSequenceName()) @@ -370,7 +370,7 @@ public int insertPendingSegments( insertBatch.add() .bind("id", segmentId.toString()) .bind("dataSource", dataSource) - .bind("created_date", nullSafeString(pendingSegment.getCreatedDate())) + .bind("created_date", toNonNullString(pendingSegment.getCreatedDate())) .bind("start", interval.getStart().toString()) .bind("end", interval.getEnd().toString()) .bind("sequence_name", pendingSegment.getSequenceName()) @@ -491,14 +491,14 @@ private int insertSegmentsInBatches( batch.add() .bind("id", segment.getId().toString()) .bind("dataSource", dataSource) - .bind("created_date", nullSafeString(segmentPlus.getCreatedDate())) + .bind("created_date", toNonNullString(segmentPlus.getCreatedDate())) .bind("start", segment.getInterval().getStart().toString()) .bind("end", segment.getInterval().getEnd().toString()) .bind("partitioned", true) .bind("version", segment.getVersion()) .bind("used", Boolean.TRUE.equals(segmentPlus.getUsed())) .bind("payload", getJsonBytes(segment)) - .bind("used_status_last_updated", nullSafeString(segmentPlus.getUsedStatusLastUpdatedDate())) + .bind("used_status_last_updated", toNonNullString(segmentPlus.getUsedStatusLastUpdatedDate())) .bind("upgraded_from_segment_id", segmentPlus.getUpgradedFromSegmentId()); if (persistAdditionalMetadata) { @@ -542,9 +542,12 @@ private String getSqlToInsertPendingSegment() ); } - private static String nullSafeString(DateTime time) + private static String toNonNullString(DateTime date) { - return time == null ? null : time.toString(); + if (date == null) { + throw DruidException.defensive("Created date cannot be null"); + } + return date.toString(); } private byte[] getJsonBytes(T object) diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java index 81a44c23a88f..ea069012ac27 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java @@ -83,7 +83,7 @@ public T inReadOnlyDatasourceTransaction( final SegmentsMetadataReadTransaction cachedTransaction = new SqlSegmentsMetadataCachedTransaction(sqlTransaction, datasourceCache, leaderSelector); - return datasourceCache.withReadLock(dc -> executeRead(cachedTransaction, callback)); + return datasourceCache.read(() -> executeRead(cachedTransaction, callback)); } else { return executeRead(createSqlTransaction(dataSource, handle, status), callback); } @@ -106,7 +106,7 @@ public T retryDatasourceTransaction( final SegmentsMetadataTransaction cachedTransaction = new SqlSegmentsMetadataCachedTransaction(sqlTransaction, datasourceCache, leaderSelector); - return datasourceCache.withWriteLock(dc -> executeWrite(cachedTransaction, callback)); + return datasourceCache.write(() -> executeWrite(cachedTransaction, callback)); } else { return executeWrite(sqlTransaction, callback); } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java index 7ab1b49e4f89..307b147cd054 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java @@ -23,6 +23,9 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; +/** + * Cache with standard read/write locking. + */ public abstract class BaseCache implements SegmentsMetadataCache.DataSource { private final ReentrantReadWriteLock stateLock; @@ -63,11 +66,11 @@ public T withReadLock(Supplier action) } @Override - public T withReadLock(SegmentsMetadataCache.Action action) throws Exception + public T read(SegmentsMetadataCache.Action action) throws Exception { stateLock.readLock().lock(); try { - return action.perform(this); + return action.perform(); } finally { stateLock.readLock().unlock(); @@ -75,11 +78,11 @@ public T withReadLock(SegmentsMetadataCache.Action action) throws Excepti } @Override - public T withWriteLock(SegmentsMetadataCache.Action action) throws Exception + public T write(SegmentsMetadataCache.Action action) throws Exception { stateLock.writeLock().lock(); try { - return action.perform(this); + return action.perform(); } finally { stateLock.writeLock().unlock(); diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java index 45a6795d1699..47ec5eb1b9b6 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java @@ -43,7 +43,7 @@ import java.util.stream.Collectors; /** - * Datasource-level cache for segments and pending segments. + * Datasource-level in-memory cache for segments and pending segments. */ class DatasourceSegmentCache extends BaseCache { @@ -55,13 +55,10 @@ class DatasourceSegmentCache extends BaseCache */ private final Map idToUsedSegment = new HashMap<>(); - /** - * Current state of segments as seen by the cache. - */ - private final Map idToSegmentState = new HashMap<>(); + private final Set unusedSegmentIds = new HashSet<>(); /** - * Allows lookup of visible segments for a given interval. + * Not being used right now. Could allow lookup of visible segments for a given interval. */ private final SegmentTimeline usedSegmentTimeline = SegmentTimeline.forSegments(Set.of()); @@ -80,29 +77,24 @@ class DatasourceSegmentCache extends BaseCache void clear() { withWriteLock(() -> { - idToSegmentState.clear(); + idToUsedSegment.values().forEach(s -> usedSegmentTimeline.remove(s.getDataSegment())); idToUsedSegment.clear(); intervalVersionToHighestUnusedPartitionNumber.clear(); - idToUsedSegment.values().forEach(s -> usedSegmentTimeline.remove(s.getDataSegment())); }); } - boolean isEmpty() - { - return withReadLock(() -> idToSegmentState.isEmpty() && intervalToPendingSegments.isEmpty()); - } - /** * Checks if a segment needs to be refreshed. A refresh is required if the * cache has no known state for the given segment or if the metadata store * has a more recent last_updated_time than the cache. */ - boolean shouldRefreshSegment(String segmentId, SegmentState metadataState) + boolean shouldRefreshUsedSegment(String segmentId, DateTime metadataUpdatedTime) { return withReadLock(() -> { - final SegmentState cachedState = idToSegmentState.get(segmentId); + final DataSegmentPlus cachedState = idToUsedSegment.get(segmentId); return cachedState == null - || cachedState.getLastUpdatedTime().isBefore(metadataState.getLastUpdatedTime()); + || cachedState.getUsedStatusLastUpdatedDate() == null + || cachedState.getUsedStatusLastUpdatedDate().isBefore(metadataUpdatedTime); }); } @@ -118,68 +110,72 @@ boolean shouldRefreshPendingSegment(PendingSegmentRecord record) ); } - boolean refreshUnusedSegment(String segmentId, SegmentState newState) + /** + * Adds or updates the given segment in the cache. + */ + boolean addSegment(DataSegmentPlus segmentPlus) { + if (Boolean.TRUE.equals(segmentPlus.getUsed())) { + return addUsedSegment(segmentPlus); + } else { + return addUnusedSegmentId(segmentPlus.getDataSegment().getId()); + } + } + + /** + * Adds or updates a used segment in the cache. + */ + private boolean addUsedSegment(DataSegmentPlus segmentPlus) { - if (newState.isUsed()) { + // Process only used segments + if (!Boolean.TRUE.equals(segmentPlus.getUsed())) { + addUnusedSegmentId(segmentPlus.getDataSegment().getId()); return false; } + final DataSegment segment = segmentPlus.getDataSegment(); + final String segmentId = getId(segment); + return withWriteLock(() -> { - if (!shouldRefreshSegment(segmentId, newState)) { + if (!shouldRefreshUsedSegment(segmentId, segmentPlus.getUsedStatusLastUpdatedDate())) { return false; } - final SegmentState oldState = idToSegmentState.put(segmentId, newState); - - if (oldState != null && oldState.isUsed()) { - // Segment has transitioned from used to unused - DataSegmentPlus segment = idToUsedSegment.remove(segmentId); - if (segment != null) { - usedSegmentTimeline.remove(segment.getDataSegment()); - } + final DataSegmentPlus oldSegmentPlus = idToUsedSegment.put(segmentId, segmentPlus); + if (oldSegmentPlus != null) { + // Segment payload may have changed, remove old value from timeline + usedSegmentTimeline.remove(oldSegmentPlus.getDataSegment()); } - addUnusedSegmentId(segmentId); + unusedSegmentIds.remove(segmentId); + usedSegmentTimeline.add(segment); return true; }); } - boolean refreshUsedSegment(DataSegmentPlus segmentPlus) + /** + * Adds or updates an unused segment in the cache. + */ + boolean addUnusedSegmentId(SegmentId segmentId) { - final DataSegment segment = segmentPlus.getDataSegment(); - final String segmentId = getId(segment); - - final SegmentState newState = new SegmentState( - Boolean.TRUE.equals(segmentPlus.getUsed()), - segmentPlus.getUsedStatusLastUpdatedDate() - ); - if (!newState.isUsed()) { - return refreshUnusedSegment(segmentId, newState); + if (unusedSegmentIds.contains(segmentId.toString())) { + return false; } - return withWriteLock(() -> { - if (!shouldRefreshSegment(segmentId, newState)) { - return false; - } - - final SegmentState oldState = idToSegmentState.put(segmentId, newState); - final DataSegmentPlus oldSegmentPlus = idToUsedSegment.put(segmentId, segmentPlus); - - if (oldState == null) { - // This is a new segment - } else if (oldState.isUsed()) { - // Segment payload may have changed - if (oldSegmentPlus != null) { - usedSegmentTimeline.remove(oldSegmentPlus.getDataSegment()); - } - } else { - // Segment has transitioned from unused to used - removeUnusedSegmentId(segmentId); + final int partitionNum = segmentId.getPartitionNum(); + withWriteLock(() -> { + final DataSegmentPlus oldSegmentPlus = idToUsedSegment.remove(segmentId.toString()); + if (oldSegmentPlus != null) { + // Segment has transitioned from used to unused + usedSegmentTimeline.remove(oldSegmentPlus.getDataSegment()); } - usedSegmentTimeline.add(segment); - return true; + unusedSegmentIds.add(segmentId.toString()); + intervalVersionToHighestUnusedPartitionNumber + .computeIfAbsent(segmentId.getInterval(), i -> new HashMap<>()) + .merge(segmentId.getVersion(), partitionNum, Math::max); }); + + return true; } int removeSegmentIds(Set segmentIds) @@ -187,16 +183,10 @@ int removeSegmentIds(Set segmentIds) return withWriteLock(() -> { int removedCount = 0; for (String segmentId : segmentIds) { - SegmentState state = idToSegmentState.remove(segmentId); - if (state != null) { - ++removedCount; - } - - removeUnusedSegmentId(segmentId); - final DataSegmentPlus segment = idToUsedSegment.remove(segmentId); if (segment != null) { usedSegmentTimeline.remove(segment.getDataSegment()); + ++removedCount; } } @@ -204,6 +194,10 @@ int removeSegmentIds(Set segmentIds) }); } + /** + * Resets the {@link #intervalVersionToHighestUnusedPartitionNumber} with the + * new values. + */ void resetMaxUnusedIds(Map> intervalVersionToHighestPartitionNumber) { withWriteLock(() -> { @@ -212,32 +206,6 @@ void resetMaxUnusedIds(Map> intervalVersionToHigh }); } - private void addUnusedSegmentId(String id) - { - final SegmentId segmentId = SegmentId.tryParse(dataSource, id); - if (segmentId == null) { - return; - } - - final int partitionNum = segmentId.getPartitionNum(); - intervalVersionToHighestUnusedPartitionNumber - .computeIfAbsent(segmentId.getInterval(), i -> new HashMap<>()) - .merge(segmentId.getVersion(), partitionNum, Math::max); - } - - private void removeUnusedSegmentId(String segmentId) - { - // TODO: Do not update the highest unused id since we don't know the new max - // It is okay to keep working with the old max - - // What are the things we can do here? - // - reduce max partition number by at least 1 - // - keep a bool array for every interval / version to see which IDs are currently in use - // - // - but all of this is overkill because this is meant to handle a very rare case - // and even then it is okay to return an older max - } - /** * Returns the set of segment IDs present in the cache but not present in the * given set of known segment IDs. @@ -246,7 +214,7 @@ Set getSegmentIdsNotIn(Set knownSegmentIds) { return withReadLock( () -> knownSegmentIds.stream() - .filter(id -> !idToSegmentState.containsKey(id)) + .filter(id -> !isSegmentIdCached(id)) .collect(Collectors.toSet()) ); } @@ -259,7 +227,7 @@ public Set findExistingSegmentIds(Set segments) return withReadLock( () -> segments.stream() .map(DatasourceSegmentCache::getId) - .filter(idToSegmentState::containsKey) + .filter(this::isSegmentIdCached) .collect(Collectors.toSet()) ); } @@ -419,17 +387,7 @@ public int insertSegments(Set segments) return withWriteLock(() -> { int numInsertedSegments = 0; for (DataSegmentPlus segmentPlus : segments) { - final DataSegment segment = segmentPlus.getDataSegment(); - final String segmentId = getId(segment); - final SegmentState state = new SegmentState( - Boolean.TRUE.equals(segmentPlus.getUsed()), - segmentPlus.getUsedStatusLastUpdatedDate() - ); - - final boolean updated = state.isUsed() - ? refreshUsedSegment(segmentPlus) - : refreshUnusedSegment(segmentId, state); - if (updated) { + if (addSegment(segmentPlus)) { ++numInsertedSegments; } } @@ -451,10 +409,7 @@ public int markSegmentsWithinIntervalAsUnused(Interval interval, DateTime update try (CloseableIterator segmentIterator = findUsedSegmentsOverlappingAnyOf(List.of(interval))) { while (segmentIterator.hasNext()) { - boolean updated = refreshUnusedSegment( - getId(segmentIterator.next()), - new SegmentState(false, updateTime) - ); + boolean updated = addUnusedSegmentId(segmentIterator.next().getId()); if (updated) { ++updatedCount; } @@ -578,6 +533,11 @@ private List findPendingSegmentsMatching(Predicate intervals, Interval testInterval) { return intervals.isEmpty() diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java index 32296860de76..4164c3586cd9 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java @@ -24,13 +24,8 @@ /** * TODO: - * -[x] Finish polling of pending segments properly - * -[x] Implement rollback and commit for cached transaction - * -[x] Acquire read/write lock on datasource cache when transaction starts. - * -[x] Add different factory methods to create read vs write transaction - * -[x] Write a basic unit test to verify that things are working as expected * -[ ] Write unit test for DatasourceSegmentCache and SqlSegmentsMetadataCache - * - + * * -[ ] Wire up cache in OverlordCompactionScheduler and SqlSegmentsMetadataManager, * otherwise we will end up having two copies of the segment timeline and stuff * The timeline inside the cache can replace the SegmentTimeline of SqlSegmentsMetadataManager @@ -46,20 +41,37 @@ public interface SegmentsMetadataCache void stop(); + /** + * @return true if the cache is enabled and ready for reading and writing. + */ boolean isReady(); DataSource getDatasource(String dataSource); + /** + * Cache containing segment metadata of a single datasource. + */ interface DataSource extends DatasourceSegmentMetadataWriter, DatasourceSegmentMetadataReader { - T withReadLock(Action action) throws Exception; + /** + * Performs a thread-safe read action on the cache. + */ + T read(Action action) throws Exception; - T withWriteLock(Action action) throws Exception; + /** + * Performs a thread-safe write action on the cache. + */ + T write(Action action) throws Exception; } + /** + * Represents a read or write action performed on the cache within required + * locks. + */ + @FunctionalInterface interface Action { - T perform(DataSource datasourceCache) throws Exception; + T perform() throws Exception; } } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java index 1244c77309e2..fbbff73f1990 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java @@ -264,18 +264,21 @@ private Map retrieveAllSegmentIds() final DatasourceSegmentSummary summary = datasourceToSummary .computeIfAbsent(record.dataSource, ds -> new DatasourceSegmentSummary()); - if (cache.shouldRefreshSegment(record.segmentId, record.state)) { - if (record.state.isUsed()) { - summary.segmentIdsToRefresh.add(record.segmentId); - } else if (cache.refreshUnusedSegment(record.segmentId, record.state)) { - countOfRefreshedUnusedSegments.incrementAndGet(); - emitDatasourceMetric(record.dataSource, "refreshed/unused", 1); - } + // Refresh used segments if required + if (record.isUsed && cache.shouldRefreshUsedSegment(record.segmentId, record.lastUpdatedTime)) { + summary.segmentIdsToRefresh.add(record.segmentId); } - if (!record.state.isUsed()) { + // Track max partition number of unused segment if needed + if (!record.isUsed) { final SegmentId segmentId = SegmentId.tryParse(record.dataSource, record.segmentId); + if (segmentId != null) { + if (cache.addUnusedSegmentId(segmentId)) { + countOfRefreshedUnusedSegments.incrementAndGet(); + emitDatasourceMetric(record.dataSource, "refreshed/unused", 1); + } + final int partitionNum = segmentId.getPartitionNum(); summary .intervalVersionToMaxUnusedPartition @@ -317,7 +320,7 @@ private int retrieveAndRefreshUsedSegments( ) ) { while (iterator.hasNext()) { - if (cache.refreshUsedSegment(iterator.next())) { + if (cache.addSegment(iterator.next())) { ++numUpdatedUsedSegments; } } @@ -424,13 +427,15 @@ private static class SegmentRecord { private final String segmentId; private final String dataSource; - private final SegmentState state; + private final boolean isUsed; + private final DateTime lastUpdatedTime; - SegmentRecord(String segmentId, String dataSource, SegmentState state) + SegmentRecord(String segmentId, String dataSource, boolean isUsed, DateTime lastUpdatedTime) { this.segmentId = segmentId; this.dataSource = dataSource; - this.state = state; + this.isUsed = isUsed; + this.lastUpdatedTime = lastUpdatedTime; } @Nullable @@ -442,9 +447,7 @@ static SegmentRecord fromResultSet(ResultSet r) final String dataSource = r.getString("dataSource"); final DateTime lastUpdatedTime = nullSafeDate(r.getString("used_status_last_updated")); - final SegmentState storedState = new SegmentState(isUsed, lastUpdatedTime); - - return new SegmentRecord(segmentId, dataSource, storedState); + return new SegmentRecord(segmentId, dataSource, isUsed, lastUpdatedTime); } catch (SQLException e) { return null; diff --git a/server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java b/server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java new file mode 100644 index 000000000000..e1f3c35609fc --- /dev/null +++ b/server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment.cache; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.server.coordinator.CreateDataSegments; +import org.apache.druid.server.http.DataSegmentPlus; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.SegmentId; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.function.ThrowingRunnable; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class DatasourceSegmentCacheTest +{ + private static final String WIKI = "wiki"; + + private DatasourceSegmentCache cache; + + @Before + public void setup() + { + cache = new DatasourceSegmentCache(WIKI); + } + + @Test + public void testEmptyCache() + { + Assert.assertNull(cache.findUsedSegment("abc")); + Assert.assertNull(cache.findHighestUnusedSegmentId(Intervals.ETERNITY, "v1")); + + Assert.assertTrue(cache.findUsedSegmentsPlusOverlappingAnyOf(List.of()).isEmpty()); + Assert.assertTrue(cache.findPendingSegmentsOverlapping(Intervals.ETERNITY).isEmpty()); + } + + @Test + public void testFindSegmentIsUnsupported() + { + verifyUnsupported(() -> cache.findSegment("abc")); + } + + @Test + public void testFindUnusedSegmentsIsUnsupported() + { + verifyUnsupported(() -> cache.findUnusedSegments(null, null, null, null)); + } + + @Test + public void testFindSegmentsIsUnsupported() + { + verifyUnsupported(() -> cache.findSegments(Set.of())); + } + + @Test + public void testFindSegmentsWithSchemaIsUnsupported() + { + verifyUnsupported(() -> cache.findSegmentsWithSchema(Set.of())); + } + + private void verifyUnsupported(ThrowingRunnable runnable) + { + DruidException exception = Assert.assertThrows(DruidException.class, runnable); + DruidExceptionMatcher.defensive() + .expectMessageIs("Unsupported: Unused segments are not cached") + .matches(exception); + } + + @Test + public void testAddUsedSegment() + { + final DataSegmentPlus segmentPlus = createUsedSegment().ofSizeInMb(100); + final DataSegment segment = segmentPlus.getDataSegment(); + + cache.addSegment(segmentPlus); + + final SegmentId segmentId = segment.getId(); + final Interval interval = segmentId.getInterval(); + + Assert.assertEquals(segment, cache.findUsedSegment(segmentId.toString())); + Assert.assertEquals(List.of(segment), cache.findUsedSegments(Set.of(segmentId.toString()))); + + Assert.assertEquals(Set.of(segmentId.toString()), cache.findExistingSegmentIds(Set.of(segment))); + + Assert.assertEquals(Set.of(segmentId), cache.findUsedSegmentIdsOverlapping(interval)); + Assert.assertEquals(Set.of(segmentId), cache.findUsedSegmentIdsOverlapping(Intervals.ETERNITY)); + + Assert.assertEquals(List.of(segment), asList(cache.findUsedSegmentsOverlappingAnyOf(List.of()))); + Assert.assertEquals(List.of(segment), asList(cache.findUsedSegmentsOverlappingAnyOf(List.of(interval)))); + Assert.assertEquals(List.of(segment), asList(cache.findUsedSegmentsOverlappingAnyOf(List.of(Intervals.ETERNITY)))); + + Assert.assertEquals(Set.of(segmentPlus), cache.findUsedSegmentsPlusOverlappingAnyOf(List.of())); + Assert.assertEquals(Set.of(segmentPlus), cache.findUsedSegmentsPlusOverlappingAnyOf(List.of(interval))); + Assert.assertEquals(Set.of(segmentPlus), cache.findUsedSegmentsPlusOverlappingAnyOf(List.of(Intervals.ETERNITY))); + + Assert.assertNull(cache.findHighestUnusedSegmentId(interval, segment.getVersion())); + } + + @Test + public void testUpdateUsedSegment() + { + final DataSegmentPlus segmentPlus = createUsedSegment().updatedNow().ofSizeInMb(100); + final DataSegment segment = segmentPlus.getDataSegment(); + + cache.addSegment(segmentPlus); + Assert.assertEquals(Set.of(segmentPlus), cache.findUsedSegmentsPlusOverlappingAnyOf(List.of())); + + // Update the segment and verify that the fields have been updated + final DataSegmentPlus updatedSegmentPlus + = new DataSegmentPlus(segment, null, DateTimes.EPOCH, true, null, 100L, null); + cache.addSegment(updatedSegmentPlus); + Assert.assertEquals(Set.of(segmentPlus), cache.findUsedSegmentsPlusOverlappingAnyOf(List.of())); + } + + @Test + public void testAddUnusedSegment() + { + final DataSegmentPlus segmentPlus = createUnusedSegment().ofSizeInMb(100); + final DataSegment segment = segmentPlus.getDataSegment(); + final SegmentId segmentId = segment.getId(); + + cache.addSegment(segmentPlus); + + // Verify that the segment is not returned in any of the used segment methods + Assert.assertNull(cache.findUsedSegment(segmentId.toString())); + Assert.assertTrue(cache.findUsedSegments(Set.of(segmentId.toString())).isEmpty()); + Assert.assertTrue(cache.findUsedSegmentIdsOverlapping(segment.getInterval()).isEmpty()); + Assert.assertTrue(asList(cache.findUsedSegmentsOverlappingAnyOf(List.of())).isEmpty()); + Assert.assertTrue(cache.findUsedSegmentsPlusOverlappingAnyOf(List.of()).isEmpty()); + + Assert.assertEquals(Set.of(segmentId.toString()), cache.findExistingSegmentIds(Set.of(segment))); + + // Verify unused segment methods + Assert.assertEquals(segmentId, cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + } + + @Test + public void testShouldRefreshUsedSegment() + { + final DataSegmentPlus segmentPlus = createUsedSegment().updatedNow().ofSizeInMb(100); + final DataSegment segment = segmentPlus.getDataSegment(); + final String segmentId = segment.getId().toString(); + + Assert.assertTrue(cache.shouldRefreshUsedSegment(segmentId, segmentPlus.getUsedStatusLastUpdatedDate())); + + cache.addSegment(segmentPlus); + Assert.assertEquals(segment, cache.findUsedSegment(segmentId)); + + // Verify that segment refresh is required only if updated time has increased + final DateTime updatedTime = segmentPlus.getUsedStatusLastUpdatedDate(); + Assert.assertNotNull(updatedTime); + + Assert.assertFalse(cache.shouldRefreshUsedSegment(segmentId, updatedTime)); + Assert.assertFalse(cache.shouldRefreshUsedSegment(segmentId, updatedTime.minus(1))); + Assert.assertTrue(cache.shouldRefreshUsedSegment(segmentId, updatedTime.plus(1))); + } + + @Test + public void testUpdateUsedSegmentToUnused() + { + final DataSegmentPlus usedSegmentPlus = createUsedSegment().ofSizeInMb(100); + final DataSegment segment = usedSegmentPlus.getDataSegment(); + final SegmentId segmentId = segment.getId(); + + cache.addSegment(usedSegmentPlus); + + Assert.assertEquals(segment, cache.findUsedSegment(segmentId.toString())); + Assert.assertNull(cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + + final DataSegmentPlus unusedSegmentPlus = new DataSegmentPlus( + segment, + null, + DateTimes.EPOCH, + false, + null, + null, + null + ); + + cache.addSegment(unusedSegmentPlus); + + Assert.assertNull(cache.findUsedSegment(segmentId.toString())); + Assert.assertEquals(segmentId, cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + } + + @Test + public void testUpdateUnusedSegmentToUsed() + { + final DataSegmentPlus unusedSegmentPlus = createUnusedSegment().ofSizeInMb(100); + final DataSegment segment = unusedSegmentPlus.getDataSegment(); + final SegmentId segmentId = segment.getId(); + + cache.addSegment(unusedSegmentPlus); + + Assert.assertEquals(segmentId, cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + Assert.assertNull(cache.findUsedSegment(segmentId.toString())); + + final DataSegmentPlus usedSegmentPlus = new DataSegmentPlus( + segment, + null, + DateTimes.EPOCH, + true, + null, + null, + null + ); + + cache.addSegment(usedSegmentPlus); + + Assert.assertEquals(segment, cache.findUsedSegment(segmentId.toString())); + } + + @Test + public void testOnlyResetUpdatesHighestId() + { + final DataSegmentPlus unusedSegmentPlus = createUnusedSegment().ofSizeInMb(100); + final DataSegment segment = unusedSegmentPlus.getDataSegment(); + final SegmentId segmentId = segment.getId(); + + cache.addSegment(unusedSegmentPlus); + Assert.assertEquals(segmentId, cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + + // Verify that marking the segment as used does not update the highest ID + final DataSegmentPlus usedSegmentPlus = new DataSegmentPlus( + segment, + null, + DateTimes.EPOCH, + true, + null, + null, + null + ); + cache.addSegment(usedSegmentPlus); + Assert.assertEquals(segmentId, cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + + // Verify that removing segment does not update the highest ID + cache.removeSegmentIds(Set.of(segmentId.toString())); + Assert.assertEquals(segmentId, cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + + // Verify that only reset updates the highest ID + cache.resetMaxUnusedIds(Map.of()); + Assert.assertNull(cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); + } + + private static List asList(CloseableIterator iterator) + { + try (iterator) { + return ImmutableList.copyOf(iterator); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static CreateDataSegments createUsedSegment() + { + return CreateDataSegments.ofDatasource(WIKI).markUsed(); + } + + private static CreateDataSegments createUnusedSegment() + { + return CreateDataSegments.ofDatasource(WIKI).markUnused(); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java b/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java index b5e6879bedf0..452d50cc9ddd 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.segment.IndexIO; +import org.apache.druid.server.http.DataSegmentPlus; import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.NumberedShardSpec; @@ -54,6 +55,11 @@ public class CreateDataSegments private String version = "1"; private CompactionState compactionState = null; + // Plus fields + private Boolean used; + private DateTime lastUpdatedTime; + private String upgradedFromSegmentId; + public static CreateDataSegments ofDatasource(String datasource) { return new CreateDataSegments(datasource); @@ -101,6 +107,48 @@ public CreateDataSegments withVersion(String version) return this; } + public CreateDataSegments markUnused() + { + this.used = false; + return this; + } + + public CreateDataSegments markUsed() + { + this.used = true; + return this; + } + + public CreateDataSegments lastUpdatedOn(DateTime updatedTime) + { + this.lastUpdatedTime = updatedTime; + return this; + } + + public CreateDataSegments updatedNow() + { + return lastUpdatedOn(DateTimes.nowUtc()); + } + + public CreateDataSegments upgradedFromSegment(String segmentId) + { + this.upgradedFromSegmentId = segmentId; + return this; + } + + public DataSegmentPlus ofSizeInMb(long sizeMb) { + final DataSegment segment = eachOfSizeInMb(sizeMb).get(0); + return new DataSegmentPlus( + segment, + null, + lastUpdatedTime, + used, + null, + null, + upgradedFromSegmentId + ); + } + public List eachOfSizeInMb(long sizeMb) { return eachOfSize(sizeMb * 1_000_000); From d6dcb4294d848d7814118eeaf99bbe83fa5587a0 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 28 Jan 2025 19:04:10 +0530 Subject: [PATCH 05/11] Cleanup javadocs --- .../MaterializedViewSupervisorTest.java | 4 +- .../DatasourceOptimizerTest.java | 4 +- .../indexing/overlord/DruidOverlord.java | 6 +-- .../common/actions/TaskActionTestKit.java | 4 +- .../common/task/IngestionTestBase.java | 4 +- .../overlord/TaskLockBoxConcurrencyTest.java | 4 +- .../indexing/overlord/TaskLockboxTest.java | 6 +-- .../indexing/overlord/TaskQueueScaleTest.java | 4 +- .../indexing/overlord/http/OverlordTest.java | 4 +- .../SeekableStreamIndexTaskTestBase.java | 4 +- .../guice/SQLMetadataStorageDruidModule.java | 10 ++-- .../IndexerSQLMetadataStorageCoordinator.java | 46 +++++++++---------- ... => CachedSegmentMetadataTransaction.java} | 31 +++++++------ ...n.java => SegmentMetadataTransaction.java} | 4 +- ...ava => SqlSegmentMetadataTransaction.java} | 8 +++- ...SqlSegmentsMetadataTransactionFactory.java | 44 +++++++++--------- .../metadata/segment/cache/BaseCache.java | 6 +-- .../segment/cache/DatasourceSegmentCache.java | 5 +- ...va => HeapMemorySegmentMetadataCache.java} | 17 +++---- ...taCache.java => SegmentMetadataCache.java} | 14 +----- ...exerSQLMetadataStorageCoordinatorTest.java | 24 +++++----- ...orageCoordinatorSchemaPersistenceTest.java | 8 ++-- ...che.java => NoopSegmentMetadataCache.java} | 2 +- .../coordinator/CreateDataSegments.java | 3 +- 24 files changed, 133 insertions(+), 133 deletions(-) rename server/src/main/java/org/apache/druid/metadata/segment/{SqlSegmentsMetadataCachedTransaction.java => CachedSegmentMetadataTransaction.java} (90%) rename server/src/main/java/org/apache/druid/metadata/segment/{SegmentsMetadataTransaction.java => SegmentMetadataTransaction.java} (91%) rename server/src/main/java/org/apache/druid/metadata/segment/{SqlSegmentsMetadataTransaction.java => SqlSegmentMetadataTransaction.java} (98%) rename server/src/main/java/org/apache/druid/metadata/segment/cache/{SqlSegmentsMetadataCache.java => HeapMemorySegmentMetadataCache.java} (96%) rename server/src/main/java/org/apache/druid/metadata/segment/cache/{SegmentsMetadataCache.java => SegmentMetadataCache.java} (73%) rename server/src/test/java/org/apache/druid/metadata/segment/cache/{NoopSegmentsMetadataCache.java => NoopSegmentMetadataCache.java} (94%) diff --git a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java index d6f26acab11d..4f600c369e94 100644 --- a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java +++ b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java @@ -47,7 +47,7 @@ import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.TestHelper; @@ -113,7 +113,7 @@ public void setUp() derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), diff --git a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java index ba449ce35f4b..3a5bbf175823 100644 --- a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java +++ b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java @@ -43,7 +43,7 @@ import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.query.Query; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.QueryRunnerTestHelper; @@ -120,7 +120,7 @@ public void setUp() throws Exception derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java index b253d7a33285..183e62a3b296 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java @@ -40,7 +40,7 @@ import org.apache.druid.java.util.common.lifecycle.LifecycleStop; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.CoordinatorOverlordServiceConfig; @@ -89,7 +89,7 @@ public DruidOverlord( final OverlordDutyExecutor overlordDutyExecutor, @IndexingService final DruidLeaderSelector overlordLeaderSelector, final SegmentAllocationQueue segmentAllocationQueue, - final SegmentsMetadataCache segmentsMetadataCache, + final SegmentMetadataCache segmentMetadataCache, final CompactionScheduler compactionScheduler, final ObjectMapper mapper, final TaskContextEnricher taskContextEnricher @@ -134,7 +134,7 @@ public void becomeLeader() // First add "half leader" services: everything required for APIs except the supervisor manager. // Then, become "half leader" so those APIs light up and supervisor initialization can proceed. - leaderLifecycle.addManagedInstance(segmentsMetadataCache); + leaderLifecycle.addManagedInstance(segmentMetadataCache); leaderLifecycle.addManagedInstance(taskRunner); leaderLifecycle.addManagedInstance(taskQueue); leaderLifecycle.addHandler( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index d4a18af27766..5202efc52318 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -38,7 +38,7 @@ import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; import org.apache.druid.segment.metadata.SegmentSchemaManager; @@ -103,7 +103,7 @@ public void before() metadataStorageTablesConfig, testDerbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ) { @Override diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 8395929d7e0f..c9060fb9fa52 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -71,7 +71,7 @@ import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9Factory; @@ -155,7 +155,7 @@ public void setUpIngestionTestBase() throws IOException derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector(), new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java index bdda827b172d..a4134ba6a922 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java @@ -34,7 +34,7 @@ import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; @@ -88,7 +88,7 @@ public void setup() derby.metadataTablesConfigSupplier().get(), derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), objectMapper, derby.metadataTablesConfigSupplier().get(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java index a3e8caa67326..690f793b6280 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java @@ -57,7 +57,7 @@ import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; @@ -136,7 +136,7 @@ public void setup() tablesConfig, derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), objectMapper, tablesConfig, @@ -477,7 +477,7 @@ public void testSyncWithUnknownTaskTypesFromModuleNotLoaded() derby.metadataTablesConfigSupplier().get(), derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), loadedMapper, derby.metadataTablesConfigSupplier().get(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java index 7fe7c3ea1d26..d2e704175020 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java @@ -48,7 +48,7 @@ import org.apache.druid.metadata.TaskLookup; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; @@ -111,7 +111,7 @@ public void setUp() derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector(), new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java index 1bd7ab1f4c66..af28b48c67fd 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/http/OverlordTest.java @@ -74,7 +74,7 @@ import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.java.util.emitter.service.ServiceEmitter; -import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordinator.CoordinatorOverlordServiceConfig; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -259,7 +259,7 @@ public MockTaskRunner get() EasyMock.createNiceMock(OverlordDutyExecutor.class), new TestDruidLeaderSelector(), EasyMock.createNiceMock(SegmentAllocationQueue.class), - EasyMock.createNiceMock(SegmentsMetadataCache.class), + EasyMock.createNiceMock(SegmentMetadataCache.class), EasyMock.createNiceMock(CompactionScheduler.class), new DefaultObjectMapper(), new NoopTaskContextEnricher() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java index 3b9dc28f6d51..5379f7314580 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java @@ -88,7 +88,7 @@ import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.query.DirectQueryProcessingPool; import org.apache.druid.query.Druids; import org.apache.druid.query.QueryPlus; @@ -595,7 +595,7 @@ protected void makeToolboxFactory(TestUtils testUtils, ServiceEmitter emitter, b derby.metadataTablesConfigSupplier().get(), derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), objectMapper, derby.metadataTablesConfigSupplier().get(), diff --git a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java index 35baccce327c..8379bd2c0ba7 100644 --- a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java +++ b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java @@ -42,8 +42,8 @@ import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.SqlSegmentsMetadataManagerProvider; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; -import org.apache.druid.metadata.segment.cache.SqlSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.server.audit.AuditManagerConfig; import org.apache.druid.server.audit.AuditSerdeHelper; import org.apache.druid.server.audit.SQLAuditManager; @@ -75,7 +75,7 @@ public void createBindingChoices(Binder binder, String defaultValue) PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentsMetadataManagerProvider.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManager.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManagerProvider.class), defaultValue); - PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentsMetadataCache.class), defaultValue); + PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentMetadataCache.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(SqlSegmentsMetadataTransactionFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(IndexerMetadataStorageCoordinator.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageActionHandlerFactory.class), defaultValue); @@ -108,9 +108,9 @@ public void configure(Binder binder) .to(SQLMetadataRuleManagerProvider.class) .in(LazySingleton.class); - PolyBind.optionBinder(binder, Key.get(SegmentsMetadataCache.class)) + PolyBind.optionBinder(binder, Key.get(SegmentMetadataCache.class)) .addBinding(type) - .to(SqlSegmentsMetadataCache.class) + .to(HeapMemorySegmentMetadataCache.class) .in(LazySingleton.class); PolyBind.optionBinder(binder, Key.get(SqlSegmentsMetadataTransactionFactory.class)) diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index e63e42820f62..edf9e0087e5a 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -49,8 +49,8 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; +import org.apache.druid.metadata.segment.SegmentMetadataTransaction; import org.apache.druid.metadata.segment.SegmentsMetadataReadTransaction; -import org.apache.druid.metadata.segment.SegmentsMetadataTransaction; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; import org.apache.druid.segment.SegmentMetadata; import org.apache.druid.segment.SegmentSchemaMapping; @@ -260,7 +260,7 @@ public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interv } private SegmentTimeline getTimelineForIntervals( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final List intervals ) throws IOException { @@ -611,7 +611,7 @@ public SegmentIdWithShardSpec allocatePendingSegment( * @return List of inserted pending segment records */ private List upgradePendingSegmentsOverlappingWith( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, Set replaceSegments ) { @@ -646,7 +646,7 @@ private List upgradePendingSegmentsOverlappingWith( * @return Inserted pending segment records */ private List upgradePendingSegments( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, String datasource, Map replaceIntervalToMaxId ) @@ -731,7 +731,7 @@ private boolean shouldUpgradePendingSegment( @Nullable private SegmentIdWithShardSpec allocatePendingSegmentWithSegmentLineageCheck( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final String dataSource, final Interval interval, final SegmentCreateRequest createRequest, @@ -799,7 +799,7 @@ public SegmentTimeline getSegmentTimelineForAllocation( } private Map allocatePendingSegments( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final String dataSource, final Interval interval, final boolean skipSegmentLineageCheck, @@ -873,7 +873,7 @@ private Map allocatePendingSegment @Nullable private SegmentIdWithShardSpec allocatePendingSegment( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final String dataSource, final Interval interval, final SegmentCreateRequest createRequest, @@ -930,7 +930,7 @@ private SegmentIdWithShardSpec allocatePendingSegment( * Returns a map from sequenceName to segment id. */ private Map getExistingSegmentIdsSkipLineageCheck( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, Interval interval, String usedSegmentVersion, List requests @@ -963,7 +963,7 @@ private Map getExistingSegme * Returns a map from sequenceName to segment id. */ private Map getExistingSegmentIdsWithLineageCheck( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, Interval interval, String usedSegmentVersion, List requests @@ -1193,7 +1193,7 @@ private SegmentPublishResult commitAppendSegmentsAndMetadataInTransaction( } private Map createNewSegments( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, String dataSource, Interval interval, boolean skipSegmentLineageCheck, @@ -1287,7 +1287,7 @@ private Map createNewSegments( @Nullable private PendingSegmentRecord createNewPendingSegment( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, SegmentCreateRequest request, String dataSource, Interval interval, @@ -1400,7 +1400,7 @@ private PendingSegmentRecord createNewPendingSegment( */ @Nullable private SegmentIdWithShardSpec createNewPendingSegment( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final String dataSource, final Interval interval, final PartialShardSpec partialShardSpec, @@ -1527,7 +1527,7 @@ private SegmentIdWithShardSpec createNewPendingSegment( * @return a segment id that isn't already used by other unused segments */ private SegmentIdWithShardSpec getTrueAllocatedId( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, SegmentIdWithShardSpec allocatedId ) { @@ -1587,7 +1587,7 @@ private boolean shouldPersistSchema(SegmentSchemaMapping segmentSchemaMapping) } private void persistSchema( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final Set segments, final SegmentSchemaMapping segmentSchemaMapping ) throws JsonProcessingException @@ -1613,7 +1613,7 @@ private void persistSchema( } private Set insertSegments( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final Set segments, @Nullable final SegmentSchemaMapping segmentSchemaMapping ) throws Exception @@ -1673,7 +1673,7 @@ private Set insertSegments( * Creates new versions of segments appended while a REPLACE task was in progress. */ private Set createNewIdsOfAppendSegmentsAfterReplace( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final Set replaceSegments, final Set locksHeldByReplaceTask ) @@ -1829,7 +1829,7 @@ private static Set findNonOvershadowedSegments(Set seg * @return Set of segments inserted */ private Set insertSegments( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, Set segments, @Nullable SegmentSchemaMapping segmentSchemaMapping, Map upgradeSegmentMetadata, @@ -1916,7 +1916,7 @@ private SegmentMetadata getSegmentMetadataFromSchemaMappingOrUpgradeMetadata( * {@link #MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE}. */ private void insertIntoUpgradeSegmentsTable( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, Map segmentToReplaceLock ) { @@ -1986,7 +1986,7 @@ private List retrieveSegmentsById( * @return Map from append Segment ID to REPLACE lock version */ private Map getAppendSegmentsCommittedDuringTask( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, String taskId ) { @@ -2035,7 +2035,7 @@ private Map getAppendSegmentsCommittedDuringTask( * Read dataSource metadata as bytes, from a specific handle. Returns null if there is no metadata. */ private @Nullable byte[] retrieveDataSourceMetadataWithHandleAsBytes( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final String dataSource ) { @@ -2065,7 +2065,7 @@ private Map getAppendSegmentsCommittedDuringTask( * @throws RuntimeException if state is unknown after this call */ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final String dataSource, final DataSourceMetadata startMetadata, final DataSourceMetadata endMetadata @@ -2321,7 +2321,7 @@ public int removeDataSourceMetadataOlderThan(long timestamp, @NotNull Set retrieveUsedSegmentsForAllocation( - final SegmentsMetadataTransaction transaction, + final SegmentMetadataTransaction transaction, final String dataSource, final Interval interval ) @@ -2517,7 +2517,7 @@ public Map> retrieveUpgradedToSegmentIds( private T retryDatasourceTransaction( String dataSource, - SegmentsMetadataTransaction.Callback callback + SegmentMetadataTransaction.Callback callback ) { return transactionFactory.retryDatasourceTransaction(dataSource, callback); diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java similarity index 90% rename from server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java rename to server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java index c44c4793c825..655dd7f31f83 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataCachedTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java @@ -24,7 +24,7 @@ import org.apache.druid.error.InternalServerError; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.PendingSegmentRecord; -import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.http.DataSegmentPlus; import org.apache.druid.timeline.DataSegment; @@ -42,14 +42,16 @@ import java.util.function.Function; /** - * A {@link SegmentsMetadataTransaction} that performs reads using the cache - * and sends writes first to the metadata store and then the cache (if the - * metadata store persist succeeds). + * A {@link SegmentMetadataTransaction} that reads only from the cache and sends + * writes to the metadata store. If the transaction succeeds, all the writes + * made to the metadata store are also committed to the cache in {@link #close()}. + * The cache is not updated right away in case the transaction needs to be + * rolled back. */ -class SqlSegmentsMetadataCachedTransaction implements SegmentsMetadataTransaction +class CachedSegmentMetadataTransaction implements SegmentMetadataTransaction { - private final SegmentsMetadataTransaction delegate; - private final SegmentsMetadataCache.DataSource metadataCache; + private final SegmentMetadataTransaction delegate; + private final SegmentMetadataCache.DataSource metadataCache; private final DruidLeaderSelector leaderSelector; private final int startTerm; @@ -57,11 +59,11 @@ class SqlSegmentsMetadataCachedTransaction implements SegmentsMetadataTransactio private final AtomicBoolean isRollingBack = new AtomicBoolean(false); private final AtomicBoolean isClosed = new AtomicBoolean(false); - private final List> pendingWrites = new ArrayList<>(); + private final List> pendingCacheWrites = new ArrayList<>(); - SqlSegmentsMetadataCachedTransaction( - SegmentsMetadataTransaction delegate, - SegmentsMetadataCache.DataSource metadataCache, + CachedSegmentMetadataTransaction( + SegmentMetadataTransaction delegate, + SegmentMetadataCache.DataSource metadataCache, DruidLeaderSelector leaderSelector ) { @@ -123,7 +125,7 @@ public void close() // Commit the changes to the cache try { - pendingWrites.forEach(action -> { + pendingCacheWrites.forEach(action -> { if (isLeaderWithSameTerm()) { action.accept(cacheWriter()); } else { @@ -349,8 +351,9 @@ private T performWriteAction(Function ac verifyStillLeaderWithSameTerm(); final T result = action.apply(delegate); - // TODO: Assume that the metadata write operation succeeded - pendingWrites.add(action::apply); + // Assume that the metadata write operation succeeded + // Do not update the cache just yet, add to the list of pending writes + pendingCacheWrites.add(action::apply); return result; } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransaction.java similarity index 91% rename from server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java rename to server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransaction.java index 84f49740d238..bee9f2f2f721 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransaction.java @@ -26,7 +26,7 @@ * the metadata store. A transaction is associated with a single instance of a * {@link Handle} and is meant to be short-lived. */ -public interface SegmentsMetadataTransaction +public interface SegmentMetadataTransaction extends SegmentsMetadataReadTransaction, DatasourceSegmentMetadataWriter { /** @@ -37,6 +37,6 @@ public interface SegmentsMetadataTransaction @FunctionalInterface interface Callback { - T inTransaction(SegmentsMetadataTransaction transaction) throws Exception; + T inTransaction(SegmentMetadataTransaction transaction) throws Exception; } } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransaction.java similarity index 98% rename from server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java rename to server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransaction.java index c5bb8c5178b3..5207054b1a26 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransaction.java @@ -54,7 +54,11 @@ import java.util.Set; import java.util.stream.Collectors; -class SqlSegmentsMetadataTransaction implements SegmentsMetadataTransaction +/** + * Implementation of {@link SegmentMetadataTransaction} that reads from and + * writes to the SQL-based metadata store directly. + */ +class SqlSegmentMetadataTransaction implements SegmentMetadataTransaction { private static final int MAX_SEGMENTS_PER_BATCH = 100; @@ -67,7 +71,7 @@ class SqlSegmentsMetadataTransaction implements SegmentsMetadataTransaction private final SqlSegmentsMetadataQuery query; - SqlSegmentsMetadataTransaction( + SqlSegmentMetadataTransaction( String dataSource, Handle handle, TransactionStatus transactionStatus, diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java index ea069012ac27..00a230f7f297 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java @@ -24,13 +24,13 @@ import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.SQLMetadataConnector; -import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.TransactionStatus; /** - * Factory for {@link SegmentsMetadataTransaction}s. If the - * {@link SegmentsMetadataCache} is enabled and ready, the transaction may + * Factory for {@link SegmentMetadataTransaction}s. If the + * {@link SegmentMetadataCache} is enabled and ready, the transaction may * read/write from the cache as applicable. *

    * This class serves as a wrapper over the {@link SQLMetadataConnector} to @@ -45,7 +45,7 @@ public class SqlSegmentsMetadataTransactionFactory private final MetadataStorageTablesConfig tablesConfig; private final SQLMetadataConnector connector; private final DruidLeaderSelector leaderSelector; - private final SegmentsMetadataCache segmentsMetadataCache; + private final SegmentMetadataCache segmentMetadataCache; @Inject public SqlSegmentsMetadataTransactionFactory( @@ -53,14 +53,14 @@ public SqlSegmentsMetadataTransactionFactory( MetadataStorageTablesConfig tablesConfig, SQLMetadataConnector connector, DruidLeaderSelector leaderSelector, - SegmentsMetadataCache segmentsMetadataCache + SegmentMetadataCache segmentMetadataCache ) { this.jsonMapper = jsonMapper; this.tablesConfig = tablesConfig; this.connector = connector; this.leaderSelector = leaderSelector; - this.segmentsMetadataCache = segmentsMetadataCache; + this.segmentMetadataCache = segmentMetadataCache; } public int getMaxRetries() @@ -74,14 +74,14 @@ public T inReadOnlyDatasourceTransaction( ) { return connector.inReadOnlyTransaction((handle, status) -> { - final SegmentsMetadataTransaction sqlTransaction + final SegmentMetadataTransaction sqlTransaction = createSqlTransaction(dataSource, handle, status); - if (segmentsMetadataCache.isReady()) { - final SegmentsMetadataCache.DataSource datasourceCache - = segmentsMetadataCache.getDatasource(dataSource); + if (segmentMetadataCache.isReady()) { + final SegmentMetadataCache.DataSource datasourceCache + = segmentMetadataCache.getDatasource(dataSource); final SegmentsMetadataReadTransaction cachedTransaction - = new SqlSegmentsMetadataCachedTransaction(sqlTransaction, datasourceCache, leaderSelector); + = new CachedSegmentMetadataTransaction(sqlTransaction, datasourceCache, leaderSelector); return datasourceCache.read(() -> executeRead(cachedTransaction, callback)); } else { @@ -92,19 +92,19 @@ public T inReadOnlyDatasourceTransaction( public T retryDatasourceTransaction( String dataSource, - SegmentsMetadataTransaction.Callback callback + SegmentMetadataTransaction.Callback callback ) { return connector.retryTransaction( (handle, status) -> { - final SegmentsMetadataTransaction sqlTransaction + final SegmentMetadataTransaction sqlTransaction = createSqlTransaction(dataSource, handle, status); - if (segmentsMetadataCache.isReady()) { - final SegmentsMetadataCache.DataSource datasourceCache - = segmentsMetadataCache.getDatasource(dataSource); - final SegmentsMetadataTransaction cachedTransaction - = new SqlSegmentsMetadataCachedTransaction(sqlTransaction, datasourceCache, leaderSelector); + if (segmentMetadataCache.isReady()) { + final SegmentMetadataCache.DataSource datasourceCache + = segmentMetadataCache.getDatasource(dataSource); + final SegmentMetadataTransaction cachedTransaction + = new CachedSegmentMetadataTransaction(sqlTransaction, datasourceCache, leaderSelector); return datasourceCache.write(() -> executeWrite(cachedTransaction, callback)); } else { @@ -116,13 +116,13 @@ public T retryDatasourceTransaction( ); } - private SegmentsMetadataTransaction createSqlTransaction( + private SegmentMetadataTransaction createSqlTransaction( String dataSource, Handle handle, TransactionStatus transactionStatus ) { - return new SqlSegmentsMetadataTransaction( + return new SqlSegmentMetadataTransaction( dataSource, handle, transactionStatus, @@ -133,8 +133,8 @@ private SegmentsMetadataTransaction createSqlTransaction( } private T executeWrite( - SegmentsMetadataTransaction transaction, - SegmentsMetadataTransaction.Callback callback + SegmentMetadataTransaction transaction, + SegmentMetadataTransaction.Callback callback ) throws Exception { try { diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java index 307b147cd054..632d36284e90 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/BaseCache.java @@ -26,7 +26,7 @@ /** * Cache with standard read/write locking. */ -public abstract class BaseCache implements SegmentsMetadataCache.DataSource +public abstract class BaseCache implements SegmentMetadataCache.DataSource { private final ReentrantReadWriteLock stateLock; @@ -66,7 +66,7 @@ public T withReadLock(Supplier action) } @Override - public T read(SegmentsMetadataCache.Action action) throws Exception + public T read(SegmentMetadataCache.Action action) throws Exception { stateLock.readLock().lock(); try { @@ -78,7 +78,7 @@ public T read(SegmentsMetadataCache.Action action) throws Exception } @Override - public T write(SegmentsMetadataCache.Action action) throws Exception + public T write(SegmentMetadataCache.Action action) throws Exception { stateLock.writeLock().lock(); try { diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java index 47ec5eb1b9b6..73cbe3f41794 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java @@ -43,7 +43,7 @@ import java.util.stream.Collectors; /** - * Datasource-level in-memory cache for segments and pending segments. + * In-memory cache for segments and pending segments of a single datasource. */ class DatasourceSegmentCache extends BaseCache { @@ -113,7 +113,8 @@ boolean shouldRefreshPendingSegment(PendingSegmentRecord record) /** * Adds or updates the given segment in the cache. */ - boolean addSegment(DataSegmentPlus segmentPlus) { + boolean addSegment(DataSegmentPlus segmentPlus) + { if (Boolean.TRUE.equals(segmentPlus.getUsed())) { return addUsedSegment(segmentPlus); } else { diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java similarity index 96% rename from server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java rename to server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java index fbbff73f1990..42c076bc83d3 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SqlSegmentsMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java @@ -47,6 +47,7 @@ import org.skife.jdbi.v2.ResultIterator; import javax.annotation.Nullable; +import javax.annotation.concurrent.ThreadSafe; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; @@ -61,9 +62,14 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; -public class SqlSegmentsMetadataCache implements SegmentsMetadataCache +/** + * In-memory implementation of {@link SegmentMetadataCache}, with no persistence + * across restarts. + */ +@ThreadSafe +public class HeapMemorySegmentMetadataCache implements SegmentMetadataCache { - private static final EmittingLogger log = new EmittingLogger(SqlSegmentsMetadataCache.class); + private static final EmittingLogger log = new EmittingLogger(HeapMemorySegmentMetadataCache.class); private static final String METRIC_PREFIX = "segment/metadataCache/"; private enum CacheState @@ -88,7 +94,7 @@ private enum CacheState private final AtomicReference pollFinishTime = new AtomicReference<>(); @Inject - public SqlSegmentsMetadataCache( + public HeapMemorySegmentMetadataCache( ObjectMapper jsonMapper, Supplier config, Supplier tablesConfig, @@ -241,11 +247,6 @@ private Map retrieveAllSegmentIds() final Map datasourceToSummary = new HashMap<>(); final AtomicInteger countOfRefreshedUnusedSegments = new AtomicInteger(0); - // TODO: Consider improving this because the number of unused segments can be very large - // Instead of polling all segments, we could just poll the used segments - // and then fire a smarter query to determine the max unused ID or something - // But it might be tricky - final String sql = StringUtils.format( "SELECT id, dataSource, used, used_status_last_updated FROM %s", getSegmentsTable() diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java similarity index 73% rename from server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java rename to server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java index 4164c3586cd9..66329d401674 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentsMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java @@ -23,19 +23,9 @@ import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; /** - * TODO: - * -[ ] Write unit test for DatasourceSegmentCache and SqlSegmentsMetadataCache - * - * -[ ] Wire up cache in OverlordCompactionScheduler and SqlSegmentsMetadataManager, - * otherwise we will end up having two copies of the segment timeline and stuff - * The timeline inside the cache can replace the SegmentTimeline of SqlSegmentsMetadataManager - * -[ ] Add transaction API to return timeline and/or timeline holders - * -[ ] Think about race conditions in the cache - leadership changes, multiple concurrent transactions - * -[ ] Write unit tests - * -[ ] Write integration tests - * -[ ] Write a benchmark + * Cache for metadata of pending segments and committed segments. */ -public interface SegmentsMetadataCache +public interface SegmentMetadataCache { void start(); diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index b2f490e506e9..e3fd8acb7457 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -35,11 +35,11 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.metrics.StubServiceEmitter; -import org.apache.druid.metadata.segment.SegmentsMetadataTransaction; +import org.apache.druid.metadata.segment.SegmentMetadataTransaction; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; -import org.apache.druid.metadata.segment.cache.SegmentsMetadataCache; -import org.apache.druid.metadata.segment.cache.SqlSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.TestDataSource; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; @@ -99,7 +99,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); private TestDruidLeaderSelector leaderSelector; - private SegmentsMetadataCache segmentsMetadataCache; + private SegmentMetadataCache segmentMetadataCache; private StubServiceEmitter emitter; private SqlSegmentsMetadataTransactionFactory transactionFactory; private BlockingExecutorService cachePollExecutor; @@ -139,7 +139,7 @@ public void setUp() leaderSelector = new TestDruidLeaderSelector(); cachePollExecutor = new BlockingExecutorService("test-cache-poll-exec"); - segmentsMetadataCache = new SqlSegmentsMetadataCache( + segmentMetadataCache = new HeapMemorySegmentMetadataCache( mapper, () -> new SegmentsMetadataManagerConfig(null, true), derbyConnectorRule.metadataTablesConfigSupplier(), @@ -156,7 +156,7 @@ public void setUp() // Get the cache ready if required if (useSegmentCache) { - segmentsMetadataCache.start(); + segmentMetadataCache.start(); cachePollExecutor.finishNextPendingTask(); } @@ -165,7 +165,7 @@ public void setUp() derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, leaderSelector, - segmentsMetadataCache + segmentMetadataCache ) { @Override @@ -185,7 +185,7 @@ public int getMaxRetries() { @Override protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata @@ -201,7 +201,7 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( @After public void tearDown() { - segmentsMetadataCache.stop(); + segmentMetadataCache.stop(); leaderSelector.stopBeingLeader(); } @@ -767,7 +767,7 @@ public void testTransactionalAnnounceRetryAndSuccess() throws IOException derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ), mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), @@ -778,7 +778,7 @@ public void testTransactionalAnnounceRetryAndSuccess() throws IOException { @Override protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java index deb0baac13ad..1f0fda5a7b8d 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java @@ -28,9 +28,9 @@ import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.metadata.segment.SegmentsMetadataTransaction; +import org.apache.druid.metadata.segment.SegmentMetadataTransaction; import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentsMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.SchemaPayload; import org.apache.druid.segment.SchemaPayloadPlus; import org.apache.druid.segment.SegmentSchemaMapping; @@ -96,7 +96,7 @@ public void setUp() derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, new TestDruidLeaderSelector(), - new NoopSegmentsMetadataCache() + new NoopSegmentMetadataCache() ); coordinator = new IndexerSQLMetadataStorageCoordinator( transactionFactory, @@ -109,7 +109,7 @@ public void setUp() { @Override protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( - SegmentsMetadataTransaction transaction, + SegmentMetadataTransaction transaction, String dataSource, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata diff --git a/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java b/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java similarity index 94% rename from server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java rename to server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java index 22dd6b6c2894..5ef2c2511ae6 100644 --- a/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentsMetadataCache.java +++ b/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java @@ -19,7 +19,7 @@ package org.apache.druid.metadata.segment.cache; -public class NoopSegmentsMetadataCache implements SegmentsMetadataCache +public class NoopSegmentMetadataCache implements SegmentMetadataCache { @Override public void start() diff --git a/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java b/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java index 452d50cc9ddd..3717f1c14b12 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/CreateDataSegments.java @@ -136,7 +136,8 @@ public CreateDataSegments upgradedFromSegment(String segmentId) return this; } - public DataSegmentPlus ofSizeInMb(long sizeMb) { + public DataSegmentPlus ofSizeInMb(long sizeMb) + { final DataSegment segment = eachOfSizeInMb(sizeMb).get(0); return new DataSegmentPlus( segment, From afe1b5fa90b42d159a0039148de4178583a2a58f Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 28 Jan 2025 20:20:04 +0530 Subject: [PATCH 06/11] More javadocs and cleanup --- .../MaterializedViewSupervisorTest.java | 4 ++-- .../DatasourceOptimizerTest.java | 4 ++-- .../common/actions/TaskActionTestKit.java | 4 ++-- .../common/task/IngestionTestBase.java | 4 ++-- .../overlord/TaskLockBoxConcurrencyTest.java | 4 ++-- .../indexing/overlord/TaskLockboxTest.java | 6 +++--- .../indexing/overlord/TaskQueueScaleTest.java | 4 ++-- .../SeekableStreamIndexTaskTestBase.java | 4 ++-- .../guice/SQLMetadataStorageDruidModule.java | 8 ++++---- .../IndexerSQLMetadataStorageCoordinator.java | 6 +++--- .../CachedSegmentMetadataTransaction.java | 7 +++++-- ... SqlSegmentMetadataTransactionFactory.java} | 16 ++++++++-------- .../segment/cache/DatasourceSegmentCache.java | 18 +++++++----------- ...dexerSQLMetadataStorageCoordinatorTest.java | 8 ++++---- ...torageCoordinatorSchemaPersistenceTest.java | 4 ++-- 15 files changed, 50 insertions(+), 51 deletions(-) rename server/src/main/java/org/apache/druid/metadata/segment/{SqlSegmentsMetadataTransactionFactory.java => SqlSegmentMetadataTransactionFactory.java} (90%) diff --git a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java index 4f600c369e94..6c39d21c1f59 100644 --- a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java +++ b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java @@ -46,7 +46,7 @@ import org.apache.druid.metadata.MetadataSupervisorManager; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; @@ -108,7 +108,7 @@ public void setUp() derbyConnector ); indexerMetadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java index 3a5bbf175823..999a38ffd3a0 100644 --- a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java +++ b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java @@ -42,7 +42,7 @@ import org.apache.druid.java.util.http.client.HttpClient; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.query.Query; import org.apache.druid.query.QueryRunnerFactoryConglomerate; @@ -115,7 +115,7 @@ public void setUp() throws Exception ); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index 5202efc52318..036adf2e346f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -37,7 +37,7 @@ import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; @@ -98,7 +98,7 @@ public void before() ); final ObjectMapper objectMapper = new TestUtils().getTestObjectMapper(); segmentSchemaManager = new SegmentSchemaManager(metadataStorageTablesConfig, objectMapper, testDerbyConnector); - final SqlSegmentsMetadataTransactionFactory transactionFactory = new SqlSegmentsMetadataTransactionFactory( + final SqlSegmentMetadataTransactionFactory transactionFactory = new SqlSegmentMetadataTransactionFactory( objectMapper, metadataStorageTablesConfig, testDerbyConnector, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index c9060fb9fa52..6d99240a61b0 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -70,7 +70,7 @@ import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.IndexIO; @@ -150,7 +150,7 @@ public void setUpIngestionTestBase() throws IOException ); storageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java index a4134ba6a922..6436ad3d35ca 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java @@ -33,7 +33,7 @@ import org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaManager; @@ -83,7 +83,7 @@ public void setup() lockbox = new TaskLockbox( taskStorage, new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( objectMapper, derby.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java index 690f793b6280..8e72d21fda81 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java @@ -56,7 +56,7 @@ import org.apache.druid.metadata.LockFilterPolicy; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; @@ -131,7 +131,7 @@ public void setup() EasyMock.replay(emitter); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( objectMapper, tablesConfig, derbyConnector, @@ -472,7 +472,7 @@ public void testSyncWithUnknownTaskTypesFromModuleNotLoaded() ); IndexerMetadataStorageCoordinator loadedMetadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( loadedMapper, derby.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java index d2e704175020..80026f137269 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java @@ -47,7 +47,7 @@ import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TaskLookup; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; @@ -106,7 +106,7 @@ public void setUp() final ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); segmentSchemaManager = new SegmentSchemaManager(derbyConnectorRule.metadataTablesConfigSupplier().get(), jsonMapper, derbyConnectorRule.getConnector()); final IndexerSQLMetadataStorageCoordinator storageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector(), diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java index 5379f7314580..f75f14f86770 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java @@ -87,7 +87,7 @@ import org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.query.DirectQueryProcessingPool; import org.apache.druid.query.Druids; @@ -590,7 +590,7 @@ protected void makeToolboxFactory(TestUtils testUtils, ServiceEmitter emitter, b ); segmentSchemaManager = new SegmentSchemaManager(derby.metadataTablesConfigSupplier().get(), objectMapper, derbyConnector); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( objectMapper, derby.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java index 8379bd2c0ba7..f6b9e6cd4592 100644 --- a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java +++ b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java @@ -41,7 +41,7 @@ import org.apache.druid.metadata.SegmentsMetadataManagerProvider; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.SqlSegmentsMetadataManagerProvider; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.server.audit.AuditManagerConfig; @@ -76,7 +76,7 @@ public void createBindingChoices(Binder binder, String defaultValue) PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManager.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManagerProvider.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentMetadataCache.class), defaultValue); - PolyBind.createChoiceWithDefault(binder, prop, Key.get(SqlSegmentsMetadataTransactionFactory.class), defaultValue); + PolyBind.createChoiceWithDefault(binder, prop, Key.get(SqlSegmentMetadataTransactionFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(IndexerMetadataStorageCoordinator.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageActionHandlerFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageUpdaterJobHandler.class), defaultValue); @@ -113,9 +113,9 @@ public void configure(Binder binder) .to(HeapMemorySegmentMetadataCache.class) .in(LazySingleton.class); - PolyBind.optionBinder(binder, Key.get(SqlSegmentsMetadataTransactionFactory.class)) + PolyBind.optionBinder(binder, Key.get(SqlSegmentMetadataTransactionFactory.class)) .addBinding(type) - .to(SqlSegmentsMetadataTransactionFactory.class) + .to(SqlSegmentMetadataTransactionFactory.class) .in(LazySingleton.class); PolyBind.optionBinder(binder, Key.get(IndexerMetadataStorageCoordinator.class)) diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index edf9e0087e5a..5aedfb72dff4 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -51,7 +51,7 @@ import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; import org.apache.druid.metadata.segment.SegmentMetadataTransaction; import org.apache.druid.metadata.segment.SegmentsMetadataReadTransaction; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.segment.SegmentMetadata; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; @@ -113,11 +113,11 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; private final boolean schemaPersistEnabled; - private final SqlSegmentsMetadataTransactionFactory transactionFactory; + private final SqlSegmentMetadataTransactionFactory transactionFactory; @Inject public IndexerSQLMetadataStorageCoordinator( - SqlSegmentsMetadataTransactionFactory transactionFactory, + SqlSegmentMetadataTransactionFactory transactionFactory, ObjectMapper jsonMapper, MetadataStorageTablesConfig dbTables, SQLMetadataConnector connector, diff --git a/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java index 655dd7f31f83..26cd0d9bd0d3 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java @@ -46,7 +46,8 @@ * writes to the metadata store. If the transaction succeeds, all the writes * made to the metadata store are also committed to the cache in {@link #close()}. * The cache is not updated right away in case the transaction needs to be - * rolled back. + * rolled back. This is okay since we assume that a transaction does not read + * what it writes. */ class CachedSegmentMetadataTransaction implements SegmentMetadataTransaction { @@ -353,7 +354,9 @@ private T performWriteAction(Function ac // Assume that the metadata write operation succeeded // Do not update the cache just yet, add to the list of pending writes - pendingCacheWrites.add(action::apply); + pendingCacheWrites.add(writer -> { + T ignored = action.apply(writer); + }); return result; } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransactionFactory.java similarity index 90% rename from server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java rename to server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransactionFactory.java index 00a230f7f297..52cabaeb50cf 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentsMetadataTransactionFactory.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransactionFactory.java @@ -36,7 +36,7 @@ * This class serves as a wrapper over the {@link SQLMetadataConnector} to * perform transactions specific to segment metadata. */ -public class SqlSegmentsMetadataTransactionFactory +public class SqlSegmentMetadataTransactionFactory { private static final int QUIET_RETRIES = 3; private static final int MAX_RETRIES = 10; @@ -48,7 +48,7 @@ public class SqlSegmentsMetadataTransactionFactory private final SegmentMetadataCache segmentMetadataCache; @Inject - public SqlSegmentsMetadataTransactionFactory( + public SqlSegmentMetadataTransactionFactory( ObjectMapper jsonMapper, MetadataStorageTablesConfig tablesConfig, SQLMetadataConnector connector, @@ -83,9 +83,9 @@ public T inReadOnlyDatasourceTransaction( final SegmentsMetadataReadTransaction cachedTransaction = new CachedSegmentMetadataTransaction(sqlTransaction, datasourceCache, leaderSelector); - return datasourceCache.read(() -> executeRead(cachedTransaction, callback)); + return datasourceCache.read(() -> executeReadAndClose(cachedTransaction, callback)); } else { - return executeRead(createSqlTransaction(dataSource, handle, status), callback); + return executeReadAndClose(createSqlTransaction(dataSource, handle, status), callback); } }); } @@ -106,9 +106,9 @@ public T retryDatasourceTransaction( final SegmentMetadataTransaction cachedTransaction = new CachedSegmentMetadataTransaction(sqlTransaction, datasourceCache, leaderSelector); - return datasourceCache.write(() -> executeWrite(cachedTransaction, callback)); + return datasourceCache.write(() -> executeWriteAndClose(cachedTransaction, callback)); } else { - return executeWrite(sqlTransaction, callback); + return executeWriteAndClose(sqlTransaction, callback); } }, QUIET_RETRIES, @@ -132,7 +132,7 @@ private SegmentMetadataTransaction createSqlTransaction( ); } - private T executeWrite( + private T executeWriteAndClose( SegmentMetadataTransaction transaction, SegmentMetadataTransaction.Callback callback ) throws Exception @@ -149,7 +149,7 @@ private T executeWrite( } } - private T executeRead( + private T executeReadAndClose( SegmentsMetadataReadTransaction transaction, SegmentsMetadataReadTransaction.Callback callback ) throws Exception diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java index 73cbe3f41794..8f52765edd15 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java @@ -48,13 +48,7 @@ class DatasourceSegmentCache extends BaseCache { private final String dataSource; - - /** - * Used to obtain the segment for a given ID so that it can be updated in the - * timeline. - */ private final Map idToUsedSegment = new HashMap<>(); - private final Set unusedSegmentIds = new HashSet<>(); /** @@ -62,6 +56,9 @@ class DatasourceSegmentCache extends BaseCache */ private final SegmentTimeline usedSegmentTimeline = SegmentTimeline.forSegments(Set.of()); + /** + * Map from interval to segment ID to pending segment record. + */ private final Map> intervalToPendingSegments = new HashMap<>(); @@ -74,9 +71,13 @@ class DatasourceSegmentCache extends BaseCache this.dataSource = dataSource; } + /** + * Removes all entries from the cache. + */ void clear() { withWriteLock(() -> { + unusedSegmentIds.clear(); idToUsedSegment.values().forEach(s -> usedSegmentTimeline.remove(s.getDataSegment())); idToUsedSegment.clear(); intervalVersionToHighestUnusedPartitionNumber.clear(); @@ -549,9 +550,4 @@ private static String getId(DataSegment segment) { return segment.getId().toString(); } - - private static int nullSafeMax(Integer a, int b) - { - return (a == null || a < b) ? b : a; - } } diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index e3fd8acb7457..f7219100ab8a 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -36,7 +36,7 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.metadata.segment.SegmentMetadataTransaction; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; @@ -101,7 +101,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata private TestDruidLeaderSelector leaderSelector; private SegmentMetadataCache segmentMetadataCache; private StubServiceEmitter emitter; - private SqlSegmentsMetadataTransactionFactory transactionFactory; + private SqlSegmentMetadataTransactionFactory transactionFactory; private BlockingExecutorService cachePollExecutor; private final boolean useSegmentCache; @@ -160,7 +160,7 @@ public void setUp() cachePollExecutor.finishNextPendingTask(); } - transactionFactory = new SqlSegmentsMetadataTransactionFactory( + transactionFactory = new SqlSegmentMetadataTransactionFactory( mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, @@ -762,7 +762,7 @@ public void testTransactionalAnnounceRetryAndSuccess() throws IOException final AtomicLong attemptCounter = new AtomicLong(); final IndexerSQLMetadataStorageCoordinator failOnceCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentsMetadataTransactionFactory( + new SqlSegmentMetadataTransactionFactory( mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java index 1f0fda5a7b8d..9bed46b2c3a9 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java @@ -29,7 +29,7 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.segment.SegmentMetadataTransaction; -import org.apache.druid.metadata.segment.SqlSegmentsMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.segment.SchemaPayload; import org.apache.druid.segment.SchemaPayloadPlus; @@ -91,7 +91,7 @@ public void setUp() CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = new CentralizedDatasourceSchemaConfig(); centralizedDatasourceSchemaConfig.setEnabled(true); - SqlSegmentsMetadataTransactionFactory transactionFactory = new SqlSegmentsMetadataTransactionFactory( + SqlSegmentMetadataTransactionFactory transactionFactory = new SqlSegmentMetadataTransactionFactory( mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnector, From 47424a8c54331c2b69a375fbd2ad52f9440cfd3f Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 28 Jan 2025 21:41:47 +0530 Subject: [PATCH 07/11] Fix some tests --- .../mysql/MySQLMetadataStorageModuleTest.java | 9 +++++++++ .../druid/timeline/SegmentTimelineTest.java | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java b/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java index b1c4922ea643..0657194ef0c8 100644 --- a/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java +++ b/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java @@ -33,6 +33,9 @@ import org.apache.druid.guice.MetadataConfigModule; import org.apache.druid.guice.annotations.Json; import org.apache.druid.guice.security.EscalatorModule; +import org.apache.druid.java.util.common.concurrent.ScheduledExecutorFactory; +import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; +import org.apache.druid.java.util.common.lifecycle.Lifecycle; import org.apache.druid.java.util.emitter.core.NoopEmitter; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.junit.Assert; @@ -129,6 +132,12 @@ public ServiceEmitter getEmitter() { return new ServiceEmitter("test", "localhost", new NoopEmitter()); } + + @Provides + public ScheduledExecutorFactory getScheduledExecutorFactory(Lifecycle lifecycle) + { + return ScheduledExecutors.createFactory(lifecycle); + } } ) ); diff --git a/processing/src/test/java/org/apache/druid/timeline/SegmentTimelineTest.java b/processing/src/test/java/org/apache/druid/timeline/SegmentTimelineTest.java index 64f0ee776d26..d00188145238 100644 --- a/processing/src/test/java/org/apache/druid/timeline/SegmentTimelineTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/SegmentTimelineTest.java @@ -27,10 +27,10 @@ import java.util.Arrays; import java.util.Collections; +import java.util.Set; public class SegmentTimelineTest { - @Test public void testIsOvershadowed() { @@ -65,6 +65,20 @@ public void testIsOvershadowed() ); } + @Test + public void testAddRemoveSegment() + { + final DataSegment segment = createSegment("2022-01-01/P1D", "v1", 0, 1); + + final SegmentTimeline timeline = SegmentTimeline.forSegments(Set.of()); + timeline.add(segment); + Assert.assertEquals(1, timeline.getNumObjects()); + + timeline.remove(segment); + Assert.assertEquals(0, timeline.getNumObjects()); + Assert.assertTrue(timeline.isEmpty()); + } + private DataSegment createSegment(String interval, String version, int partitionNum, int totalNumPartitions) { return new DataSegment( From c0f4bad333d1b5c739becbf9e22662b3b8516725 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Wed, 29 Jan 2025 21:01:55 +0530 Subject: [PATCH 08/11] Enable more UTs to use segment metadata cache --- .../mysql/MySQLMetadataStorageModuleTest.java | 11 +- .../indexing/overlord/DruidOverlord.java | 3 +- .../actions/SegmentAllocateActionTest.java | 23 +- .../actions/SegmentAllocationQueueTest.java | 13 +- .../common/actions/TaskActionTestKit.java | 73 ++++-- .../guice/SQLMetadataStorageDruidModule.java | 15 +- .../IndexerSQLMetadataStorageCoordinator.java | 34 +-- .../CachedSegmentMetadataTransaction.java | 3 +- .../DatasourceSegmentMetadataReader.java | 3 +- ...va => SegmentMetadataReadTransaction.java} | 4 +- .../segment/SegmentMetadataTransaction.java | 2 +- .../SegmentMetadataTransactionFactory.java | 43 ++++ .../SqlSegmentMetadataTransaction.java | 17 +- .../SqlSegmentMetadataTransactionFactory.java | 25 +- .../segment/cache/DatasourceSegmentCache.java | 29 +-- .../cache/HeapMemorySegmentMetadataCache.java | 238 +++++++++++------- .../cache/NoopSegmentMetadataCache.java | 14 +- .../segment/cache/SegmentMetadataCache.java | 20 +- ...exerSQLMetadataStorageCoordinatorTest.java | 4 +- .../cache/DatasourceSegmentCacheTest.java | 21 +- .../org/apache/druid/cli/CliOverlord.java | 6 + 21 files changed, 373 insertions(+), 228 deletions(-) rename server/src/main/java/org/apache/druid/metadata/segment/{SegmentsMetadataReadTransaction.java => SegmentMetadataReadTransaction.java} (91%) create mode 100644 server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransactionFactory.java rename server/src/{test => main}/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java (89%) diff --git a/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java b/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java index 0657194ef0c8..fee116bfa3a0 100644 --- a/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java +++ b/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLMetadataStorageModuleTest.java @@ -26,6 +26,8 @@ import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.Provides; +import org.apache.druid.client.indexing.IndexingService; +import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.guice.GuiceInjectors; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.JsonConfigurator; @@ -33,9 +35,6 @@ import org.apache.druid.guice.MetadataConfigModule; import org.apache.druid.guice.annotations.Json; import org.apache.druid.guice.security.EscalatorModule; -import org.apache.druid.java.util.common.concurrent.ScheduledExecutorFactory; -import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.common.lifecycle.Lifecycle; import org.apache.druid.java.util.emitter.core.NoopEmitter; import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.junit.Assert; @@ -134,9 +133,11 @@ public ServiceEmitter getEmitter() } @Provides - public ScheduledExecutorFactory getScheduledExecutorFactory(Lifecycle lifecycle) + @IndexingService + public DruidLeaderSelector getLeaderSelector() { - return ScheduledExecutors.createFactory(lifecycle); + // A provider for DruidLeaderSelector is needed by SqlSegmentMetadataTransactionFactory + return null; } } ) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java index 183e62a3b296..c0d3201a15da 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/DruidOverlord.java @@ -134,7 +134,6 @@ public void becomeLeader() // First add "half leader" services: everything required for APIs except the supervisor manager. // Then, become "half leader" so those APIs light up and supervisor initialization can proceed. - leaderLifecycle.addManagedInstance(segmentMetadataCache); leaderLifecycle.addManagedInstance(taskRunner); leaderLifecycle.addManagedInstance(taskQueue); leaderLifecycle.addHandler( @@ -142,6 +141,7 @@ public void becomeLeader() @Override public void start() { + segmentMetadataCache.becomeLeader(); segmentAllocationQueue.becomeLeader(); taskMaster.becomeHalfLeader(taskRunner, taskQueue); } @@ -149,6 +149,7 @@ public void start() @Override public void stop() { + segmentMetadataCache.stopBeingLeader(); taskMaster.stopBeingLeader(); segmentAllocationQueue.stopBeingLeader(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java index c538853daeac..af6406d4c8df 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java @@ -56,6 +56,7 @@ import org.joda.time.Period; import org.junit.After; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -93,28 +94,32 @@ public class SegmentAllocateActionTest private SegmentAllocationQueue allocationQueue; - @Parameterized.Parameters(name = "granularity = {0}, useBatch = {1}, skipSegmentPayloadFetchForAllocation = {2}") + @Parameterized.Parameters(name = "lock={0}, useBatch={1}, useSegmentCache={2}, reduceMetadataIO={3}") public static Iterable constructorFeeder() { + // reduceMetadataIO is applicable only with batch allocation return ImmutableList.of( - new Object[]{LockGranularity.SEGMENT, true, true}, - new Object[]{LockGranularity.SEGMENT, true, false}, - new Object[]{LockGranularity.SEGMENT, false, false}, - new Object[]{LockGranularity.TIME_CHUNK, true, true}, - new Object[]{LockGranularity.TIME_CHUNK, true, false}, - new Object[]{LockGranularity.TIME_CHUNK, false, false} + new Object[]{LockGranularity.SEGMENT, true, true, true}, + new Object[]{LockGranularity.SEGMENT, true, false, false}, + new Object[]{LockGranularity.SEGMENT, false, false, false}, + new Object[]{LockGranularity.TIME_CHUNK, true, true, true}, + new Object[]{LockGranularity.TIME_CHUNK, true, false, false}, + new Object[]{LockGranularity.TIME_CHUNK, false, false, false}, + new Object[]{LockGranularity.TIME_CHUNK, false, true, false} ); } public SegmentAllocateActionTest( LockGranularity lockGranularity, boolean useBatch, + boolean useSegmentMetadataCache, boolean skipSegmentPayloadFetchForAllocation ) { this.lockGranularity = lockGranularity; this.useBatch = useBatch; this.taskActionTestKit.setSkipSegmentPayloadFetchForAllocation(skipSegmentPayloadFetchForAllocation); + this.taskActionTestKit.setUseSegmentMetadataCache(useSegmentMetadataCache); } @Before @@ -141,9 +146,7 @@ public void tearDown() @Test public void testManySegmentsSameInterval_noLineageCheck() throws Exception { - if (lockGranularity == LockGranularity.SEGMENT) { - return; - } + Assume.assumeTrue(lockGranularity == LockGranularity.TIME_CHUNK); final Task task = NoopTask.create(); final int numTasks = 2; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocationQueueTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocationQueueTest.java index 9aa9f4c9d263..0b39b2cbef3e 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocationQueueTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocationQueueTest.java @@ -59,15 +59,22 @@ public class SegmentAllocationQueueTest private final boolean reduceMetadataIO; - @Parameterized.Parameters(name = "reduceMetadataIO = {0}") + @Parameterized.Parameters(name = "reduceMetadataIO = {0}, useSegmentCache = {1}") public static Object[][] getTestParameters() { - return new Object[][]{{true}, {false}}; + return new Object[][]{ + {true, true}, + {true, false}, + {false, true}, + {false, false} + }; } - public SegmentAllocationQueueTest(boolean reduceMetadataIO) + public SegmentAllocationQueueTest(boolean reduceMetadataIO, boolean useSegmentMetadataCache) { this.reduceMetadataIO = reduceMetadataIO; + + taskActionTestKit.setUseSegmentMetadataCache(useSegmentMetadataCache); } @Before diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index 036adf2e346f..79c140323a21 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -38,11 +38,14 @@ import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; import org.apache.druid.segment.metadata.SegmentSchemaManager; +import org.apache.druid.server.coordinator.simulate.BlockingExecutorService; import org.apache.druid.server.coordinator.simulate.TestDruidLeaderSelector; +import org.apache.druid.server.coordinator.simulate.WrappingScheduledExecutorService; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.easymock.EasyMock; import org.joda.time.Period; @@ -60,7 +63,10 @@ public class TaskActionTestKit extends ExternalResource private TaskActionToolbox taskActionToolbox; private SegmentSchemaManager segmentSchemaManager; private SegmentSchemaCache segmentSchemaCache; + private SegmentMetadataCache segmentMetadataCache; + private BlockingExecutorService metadataCachePollExec; + private boolean useSegmentMetadataCache = new SegmentsMetadataManagerConfig(null, null).isUseCache(); private boolean skipSegmentPayloadFetchForAllocation = new TaskLockConfig().isBatchAllocationReduceMetadataIO(); public TaskLockbox getTaskLockbox() @@ -88,6 +94,11 @@ public void setSkipSegmentPayloadFetchForAllocation(boolean skipSegmentPayloadFe this.skipSegmentPayloadFetchForAllocation = skipSegmentPayloadFetchForAllocation; } + public void setUseSegmentMetadataCache(boolean useSegmentMetadataCache) + { + this.useSegmentMetadataCache = useSegmentMetadataCache; + } + @Override public void before() { @@ -98,20 +109,8 @@ public void before() ); final ObjectMapper objectMapper = new TestUtils().getTestObjectMapper(); segmentSchemaManager = new SegmentSchemaManager(metadataStorageTablesConfig, objectMapper, testDerbyConnector); - final SqlSegmentMetadataTransactionFactory transactionFactory = new SqlSegmentMetadataTransactionFactory( - objectMapper, - metadataStorageTablesConfig, - testDerbyConnector, - new TestDruidLeaderSelector(), - new NoopSegmentMetadataCache() - ) - { - @Override - public int getMaxRetries() - { - return 2; - } - }; + + final SqlSegmentMetadataTransactionFactory transactionFactory = setupTransactionFactory(objectMapper); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( transactionFactory, objectMapper, @@ -133,12 +132,6 @@ public int getMaxRetries() ); final TaskLockConfig taskLockConfig = new TaskLockConfig() { - @Override - public boolean isBatchSegmentAllocation() - { - return true; - } - @Override public long getBatchAllocationWaitTime() { @@ -175,6 +168,41 @@ public boolean isBatchAllocationReduceMetadataIO() testDerbyConnector.createConfigTable(); testDerbyConnector.createTaskTables(); testDerbyConnector.createAuditTable(); + + segmentMetadataCache.start(); + segmentMetadataCache.becomeLeader(); + metadataCachePollExec.finishNextPendingTask(); + } + + private SqlSegmentMetadataTransactionFactory setupTransactionFactory(ObjectMapper objectMapper) + { + metadataCachePollExec = new BlockingExecutorService("test-cache-poll-exec"); + segmentMetadataCache = new HeapMemorySegmentMetadataCache( + objectMapper, + Suppliers.ofInstance(new SegmentsMetadataManagerConfig(Period.seconds(1), useSegmentMetadataCache)), + Suppliers.ofInstance(metadataStorageTablesConfig), + testDerbyConnector, + (poolSize, name) -> new WrappingScheduledExecutorService(name, metadataCachePollExec, false), + new NoopServiceEmitter() + ); + + final TestDruidLeaderSelector leaderSelector = new TestDruidLeaderSelector(); + leaderSelector.becomeLeader(); + + return new SqlSegmentMetadataTransactionFactory( + objectMapper, + metadataStorageTablesConfig, + testDerbyConnector, + leaderSelector, + segmentMetadataCache + ) + { + @Override + public int getMaxRetries() + { + return 2; + } + }; } @Override @@ -187,5 +215,8 @@ public void after() metadataStorageCoordinator = null; segmentsMetadataManager = null; taskActionToolbox = null; + segmentMetadataCache.stopBeingLeader(); + segmentMetadataCache.stop(); + useSegmentMetadataCache = new SegmentsMetadataManagerConfig(null, null).isUseCache(); } } diff --git a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java index f6b9e6cd4592..0d8c64b21ca7 100644 --- a/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java +++ b/server/src/main/java/org/apache/druid/guice/SQLMetadataStorageDruidModule.java @@ -41,8 +41,9 @@ import org.apache.druid.metadata.SegmentsMetadataManagerProvider; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.SqlSegmentsMetadataManagerProvider; +import org.apache.druid.metadata.segment.SegmentMetadataTransactionFactory; import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; -import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.server.audit.AuditManagerConfig; import org.apache.druid.server.audit.AuditSerdeHelper; @@ -75,8 +76,7 @@ public void createBindingChoices(Binder binder, String defaultValue) PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentsMetadataManagerProvider.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManager.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataRuleManagerProvider.class), defaultValue); - PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentMetadataCache.class), defaultValue); - PolyBind.createChoiceWithDefault(binder, prop, Key.get(SqlSegmentMetadataTransactionFactory.class), defaultValue); + PolyBind.createChoiceWithDefault(binder, prop, Key.get(SegmentMetadataTransactionFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(IndexerMetadataStorageCoordinator.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageActionHandlerFactory.class), defaultValue); PolyBind.createChoiceWithDefault(binder, prop, Key.get(MetadataStorageUpdaterJobHandler.class), defaultValue); @@ -108,12 +108,11 @@ public void configure(Binder binder) .to(SQLMetadataRuleManagerProvider.class) .in(LazySingleton.class); - PolyBind.optionBinder(binder, Key.get(SegmentMetadataCache.class)) - .addBinding(type) - .to(HeapMemorySegmentMetadataCache.class) - .in(LazySingleton.class); + binder.bind(SegmentMetadataCache.class) + .to(NoopSegmentMetadataCache.class) + .in(LazySingleton.class); - PolyBind.optionBinder(binder, Key.get(SqlSegmentMetadataTransactionFactory.class)) + PolyBind.optionBinder(binder, Key.get(SegmentMetadataTransactionFactory.class)) .addBinding(type) .to(SqlSegmentMetadataTransactionFactory.class) .in(LazySingleton.class); diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 5aedfb72dff4..6dc8f26235fa 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -47,11 +47,10 @@ import org.apache.druid.java.util.common.jackson.JacksonUtils; import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.segment.DatasourceSegmentMetadataWriter; +import org.apache.druid.metadata.segment.SegmentMetadataReadTransaction; import org.apache.druid.metadata.segment.SegmentMetadataTransaction; -import org.apache.druid.metadata.segment.SegmentsMetadataReadTransaction; -import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; +import org.apache.druid.metadata.segment.SegmentMetadataTransactionFactory; import org.apache.druid.segment.SegmentMetadata; import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; @@ -92,7 +91,6 @@ import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -113,11 +111,11 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; private final boolean schemaPersistEnabled; - private final SqlSegmentMetadataTransactionFactory transactionFactory; + private final SegmentMetadataTransactionFactory transactionFactory; @Inject public IndexerSQLMetadataStorageCoordinator( - SqlSegmentMetadataTransactionFactory transactionFactory, + SegmentMetadataTransactionFactory transactionFactory, ObjectMapper jsonMapper, MetadataStorageTablesConfig dbTables, SQLMetadataConnector connector, @@ -183,7 +181,7 @@ private Set doRetrieveUsedSegments( final SegmentTimeline timeline = getTimelineForIntervals(transaction, intervals); return timeline.findNonOvershadowedObjectsInInterval(Intervals.ETERNITY, Partitions.ONLY_COMPLETE); } else { - return asSet(() -> transaction.findUsedSegmentsOverlappingAnyOf(intervals)); + return transaction.findUsedSegmentsOverlappingAnyOf(intervals); } } ); @@ -262,21 +260,11 @@ public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interv private SegmentTimeline getTimelineForIntervals( final SegmentMetadataTransaction transaction, final List intervals - ) throws IOException - { - try (final CloseableIterator iterator - = transaction.findUsedSegmentsOverlappingAnyOf(intervals)) { - return SegmentTimeline.forSegments(iterator); - } - } - - private static Set asSet(Supplier> iteratorSupplier) throws IOException + ) { - try (CloseableIterator iterator = iteratorSupplier.get()) { - final Set retVal = new HashSet<>(); - iterator.forEachRemaining(retVal::add); - return retVal; - } + return SegmentTimeline.forSegments( + transaction.findUsedSegmentsOverlappingAnyOf(intervals) + ); } @Override @@ -1963,7 +1951,7 @@ private void insertIntoUpgradeSegmentsTable( } private List retrieveSegmentsById( - SegmentsMetadataReadTransaction transaction, + SegmentMetadataReadTransaction transaction, Set segmentIds ) { @@ -2525,7 +2513,7 @@ private T retryDatasourceTransaction( private T inReadOnlyDatasourceTransaction( String dataSource, - SegmentsMetadataReadTransaction.Callback callback + SegmentMetadataReadTransaction.Callback callback ) { return transactionFactory.inReadOnlyDatasourceTransaction(dataSource, callback); diff --git a/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java index 26cd0d9bd0d3..37192ace350c 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/CachedSegmentMetadataTransaction.java @@ -22,7 +22,6 @@ import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.error.DruidException; import org.apache.druid.error.InternalServerError; -import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; @@ -175,7 +174,7 @@ public List findSegmentsWithSchema(Set segmentIds) } @Override - public CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals) + public Set findUsedSegmentsOverlappingAnyOf(List intervals) { return cacheReader().findUsedSegmentsOverlappingAnyOf(intervals); } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java index 890753749d16..54bee4ff00e1 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/DatasourceSegmentMetadataReader.java @@ -19,7 +19,6 @@ package org.apache.druid.metadata.segment; -import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.http.DataSegmentPlus; @@ -60,7 +59,7 @@ public interface DatasourceSegmentMetadataReader /** * Finds used segments that overlap with any of the given intervals. */ - CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals); + Set findUsedSegmentsOverlappingAnyOf(List intervals); List findUsedSegments(Set segmentIds); diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataReadTransaction.java similarity index 91% rename from server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java rename to server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataReadTransaction.java index 1e1a4838ca9f..cb02f7c8f38a 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SegmentsMetadataReadTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataReadTransaction.java @@ -28,7 +28,7 @@ * the metadata store. A transaction is associated with a single instance of a * {@link Handle} and is meant to be short-lived. */ -public interface SegmentsMetadataReadTransaction +public interface SegmentMetadataReadTransaction extends DatasourceSegmentMetadataReader, Closeable { /** @@ -45,7 +45,7 @@ public interface SegmentsMetadataReadTransaction @FunctionalInterface interface Callback { - T inTransaction(SegmentsMetadataReadTransaction transaction) throws Exception; + T inTransaction(SegmentMetadataReadTransaction transaction) throws Exception; } } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransaction.java index bee9f2f2f721..2aef63f1925e 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransaction.java @@ -27,7 +27,7 @@ * {@link Handle} and is meant to be short-lived. */ public interface SegmentMetadataTransaction - extends SegmentsMetadataReadTransaction, DatasourceSegmentMetadataWriter + extends SegmentMetadataReadTransaction, DatasourceSegmentMetadataWriter { /** * Marks this transaction to be rolled back. diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransactionFactory.java b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransactionFactory.java new file mode 100644 index 000000000000..33e72d1cb52b --- /dev/null +++ b/server/src/main/java/org/apache/druid/metadata/segment/SegmentMetadataTransactionFactory.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.metadata.segment; + +/** + * Factory for {@link SegmentMetadataTransaction}s. + */ +public interface SegmentMetadataTransactionFactory +{ + /** + * Creates and executes a new read-only transaction for the given datasource. + */ + T inReadOnlyDatasourceTransaction( + String dataSource, + SegmentMetadataReadTransaction.Callback callback + ); + + /** + * Creates and executes a new read-write transaction for the given datasource. + * The implementation may retry the transaction until it succeeds. + */ + T retryDatasourceTransaction( + String dataSource, + SegmentMetadataTransaction.Callback callback + ); +} diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransaction.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransaction.java index 5207054b1a26..1067d583ee6d 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransaction.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransaction.java @@ -115,7 +115,10 @@ public Set findExistingSegmentIds(Set segments) final Set existingSegmentIds = new HashSet<>(); final String sql = "SELECT id FROM %s WHERE id in (%s)"; - List> partitions = Lists.partition(new ArrayList<>(segments), MAX_SEGMENTS_PER_BATCH); + List> partitions = Lists.partition( + new ArrayList<>(segments), + MAX_SEGMENTS_PER_BATCH + ); for (List segmentList : partitions) { String segmentIds = segmentList.stream().map( segment -> "'" + StringUtils.escapeSql(segment.getId().toString()) + "'" @@ -144,9 +147,17 @@ public SegmentId findHighestUnusedSegmentId(Interval interval, String version) } @Override - public CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals) + public Set findUsedSegmentsOverlappingAnyOf(List intervals) { - return query.retrieveUsedSegments(dataSource, intervals); + try (CloseableIterator iterator + = query.retrieveUsedSegments(dataSource, intervals)) { + final Set segments = new HashSet<>(); + iterator.forEachRemaining(segments::add); + return segments; + } + catch (IOException e) { + throw InternalServerError.exception(e, "Error while fetching segments overlapping intervals[%s].", intervals); + } } @Override diff --git a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransactionFactory.java b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransactionFactory.java index 52cabaeb50cf..83fe5b2a91d4 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransactionFactory.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/SqlSegmentMetadataTransactionFactory.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.inject.Inject; +import org.apache.druid.client.indexing.IndexingService; import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.SQLMetadataConnector; @@ -36,7 +37,7 @@ * This class serves as a wrapper over the {@link SQLMetadataConnector} to * perform transactions specific to segment metadata. */ -public class SqlSegmentMetadataTransactionFactory +public class SqlSegmentMetadataTransactionFactory implements SegmentMetadataTransactionFactory { private static final int QUIET_RETRIES = 3; private static final int MAX_RETRIES = 10; @@ -52,7 +53,7 @@ public SqlSegmentMetadataTransactionFactory( ObjectMapper jsonMapper, MetadataStorageTablesConfig tablesConfig, SQLMetadataConnector connector, - DruidLeaderSelector leaderSelector, + @IndexingService DruidLeaderSelector leaderSelector, SegmentMetadataCache segmentMetadataCache ) { @@ -68,19 +69,20 @@ public int getMaxRetries() return MAX_RETRIES; } + @Override public T inReadOnlyDatasourceTransaction( String dataSource, - SegmentsMetadataReadTransaction.Callback callback + SegmentMetadataReadTransaction.Callback callback ) { return connector.inReadOnlyTransaction((handle, status) -> { final SegmentMetadataTransaction sqlTransaction = createSqlTransaction(dataSource, handle, status); - if (segmentMetadataCache.isReady()) { + if (segmentMetadataCache.isEnabled()) { final SegmentMetadataCache.DataSource datasourceCache = segmentMetadataCache.getDatasource(dataSource); - final SegmentsMetadataReadTransaction cachedTransaction + final SegmentMetadataReadTransaction cachedTransaction = new CachedSegmentMetadataTransaction(sqlTransaction, datasourceCache, leaderSelector); return datasourceCache.read(() -> executeReadAndClose(cachedTransaction, callback)); @@ -90,6 +92,7 @@ public T inReadOnlyDatasourceTransaction( }); } + @Override public T retryDatasourceTransaction( String dataSource, SegmentMetadataTransaction.Callback callback @@ -100,7 +103,7 @@ public T retryDatasourceTransaction( final SegmentMetadataTransaction sqlTransaction = createSqlTransaction(dataSource, handle, status); - if (segmentMetadataCache.isReady()) { + if (segmentMetadataCache.isEnabled()) { final SegmentMetadataCache.DataSource datasourceCache = segmentMetadataCache.getDatasource(dataSource); final SegmentMetadataTransaction cachedTransaction @@ -124,11 +127,7 @@ private SegmentMetadataTransaction createSqlTransaction( { return new SqlSegmentMetadataTransaction( dataSource, - handle, - transactionStatus, - connector, - tablesConfig, - jsonMapper + handle, transactionStatus, connector, tablesConfig, jsonMapper ); } @@ -150,8 +149,8 @@ private T executeWriteAndClose( } private T executeReadAndClose( - SegmentsMetadataReadTransaction transaction, - SegmentsMetadataReadTransaction.Callback callback + SegmentMetadataReadTransaction transaction, + SegmentMetadataReadTransaction.Callback callback ) throws Exception { try (transaction) { diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java index 8f52765edd15..051244a6d7fa 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCache.java @@ -20,8 +20,6 @@ package org.apache.druid.metadata.segment.cache; import org.apache.druid.error.DruidException; -import org.apache.druid.java.util.common.CloseableIterators; -import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.http.DataSegmentPlus; @@ -32,7 +30,6 @@ import org.joda.time.Interval; import javax.annotation.Nullable; -import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -256,14 +253,12 @@ public SegmentId findHighestUnusedSegmentId(Interval interval, String version) } @Override - public CloseableIterator findUsedSegmentsOverlappingAnyOf(List intervals) + public Set findUsedSegmentsOverlappingAnyOf(List intervals) { - return CloseableIterators.withEmptyBaggage( - findUsedSegmentsPlusOverlappingAnyOf(intervals) - .stream() - .map(DataSegmentPlus::getDataSegment) - .iterator() - ); + return findUsedSegmentsPlusOverlappingAnyOf(intervals) + .stream() + .map(DataSegmentPlus::getDataSegment) + .collect(Collectors.toSet()); } @Override @@ -408,18 +403,12 @@ public int insertSegmentsWithMetadata(Set segments) public int markSegmentsWithinIntervalAsUnused(Interval interval, DateTime updateTime) { int updatedCount = 0; - try (CloseableIterator segmentIterator - = findUsedSegmentsOverlappingAnyOf(List.of(interval))) { - while (segmentIterator.hasNext()) { - boolean updated = addUnusedSegmentId(segmentIterator.next().getId()); - if (updated) { - ++updatedCount; - } + for (DataSegment segment : findUsedSegmentsOverlappingAnyOf(List.of(interval))) { + boolean updated = addUnusedSegmentId(segment.getId()); + if (updated) { + ++updatedCount; } } - catch (IOException e) { - throw DruidException.defensive("Error while updating segments in cache"); - } return updatedCount; } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java index 42c076bc83d3..3f79eb489f41 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java @@ -47,6 +47,7 @@ import org.skife.jdbi.v2.ResultIterator; import javax.annotation.Nullable; +import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; import java.io.IOException; import java.sql.ResultSet; @@ -58,13 +59,11 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; /** - * In-memory implementation of {@link SegmentMetadataCache}, with no persistence - * across restarts. + * In-memory implementation of {@link SegmentMetadataCache}. */ @ThreadSafe public class HeapMemorySegmentMetadataCache implements SegmentMetadataCache @@ -74,20 +73,22 @@ public class HeapMemorySegmentMetadataCache implements SegmentMetadataCache private enum CacheState { - STOPPED, STARTING, READY + STOPPED, STANDBY, SYNC_PENDING, SYNC_STARTED, READY } private final ObjectMapper jsonMapper; private final Duration pollDuration; private final boolean isCacheEnabled; - private final Supplier tablesConfig; + private final MetadataStorageTablesConfig tablesConfig; private final SQLMetadataConnector connector; private final ScheduledExecutorService pollExecutor; private final ServiceEmitter emitter; - private final AtomicReference currentCacheState - = new AtomicReference<>(CacheState.STOPPED); + private final Object cacheStateLock = new Object(); + + @GuardedBy("cacheStateLock") + private volatile CacheState currentCacheState = CacheState.STOPPED; private final ConcurrentHashMap datasourceToSegmentCache = new ConcurrentHashMap<>(); @@ -106,43 +107,66 @@ public HeapMemorySegmentMetadataCache( this.jsonMapper = jsonMapper; this.isCacheEnabled = config.get().isUseCache(); this.pollDuration = config.get().getPollDuration().toStandardDuration(); - this.tablesConfig = tablesConfig; + this.tablesConfig = tablesConfig.get(); this.connector = connector; - this.pollExecutor = isCacheEnabled ? executorFactory.create(1, "SegmentsMetadataCache-%s") : null; + this.pollExecutor = isCacheEnabled ? executorFactory.create(1, "SegmentMetadataCache-%s") : null; this.emitter = emitter; } @Override @LifecycleStart - public synchronized void start() + public void start() { - if (isCacheEnabled && currentCacheState.compareAndSet(CacheState.STOPPED, CacheState.STARTING)) { - // Clean up any stray entries in the cache left over due to race conditions - tearDown(); - pollExecutor.schedule(this::pollMetadataStore, pollDuration.getMillis(), TimeUnit.MILLISECONDS); + synchronized (cacheStateLock) { + if (isCacheEnabled && currentCacheState == CacheState.STOPPED) { + log.info("Starting poll of metadata store. Cache is now in STANDBY mode."); + pollExecutor.schedule(this::pollMetadataStore, pollDuration.getMillis(), TimeUnit.MILLISECONDS); + currentCacheState = CacheState.STANDBY; + } } } - /** - * This method is called only when leadership is lost or when the service is - * being stopped. Any transaction that is in progress when this method is - * invoked will fail. - */ @Override @LifecycleStop - public synchronized void stop() + public void stop() { - if (isCacheEnabled) { - currentCacheState.set(CacheState.STOPPED); - tearDown(); + synchronized (cacheStateLock) { + if (isCacheEnabled) { + log.info("Stopping poll of metadata store. Cache is now STOPPED."); + currentCacheState = CacheState.STOPPED; + pollExecutor.shutdownNow(); + tearDown(); + } + } + } + + @Override + public void becomeLeader() + { + synchronized (cacheStateLock) { + if (isCacheEnabled) { + log.info("We are now leader. Waiting to sync latest updates from metadata store."); + currentCacheState = CacheState.SYNC_PENDING; + } + } + } + + @Override + public void stopBeingLeader() + { + synchronized (cacheStateLock) { + if (isCacheEnabled) { + log.info("Not leader anymore. Cache is now in STANDBY mode."); + currentCacheState = CacheState.STANDBY; + } } } @Override - public boolean isReady() + public boolean isEnabled() { - return currentCacheState.get() == CacheState.READY; + return isCacheEnabled; } @Override @@ -159,14 +183,47 @@ private DatasourceSegmentCache getCacheForDatasource(String dataSource) private void verifyCacheIsReady() { - if (!isReady()) { - throw DruidException.defensive("Segment metadata cache is not ready yet."); + synchronized (cacheStateLock) { + switch (currentCacheState) { + case STOPPED: + case STANDBY: + throw DruidException.defensive("Segment metadata cache has not been started."); + case SYNC_PENDING: + case SYNC_STARTED: + waitForCacheToFinishWarmup(); + verifyCacheIsReady(); + case READY: + // Cache is now ready for use + } } } - private boolean isStopped() + private void waitForCacheToFinishWarmup() { - return currentCacheState.get() == CacheState.STOPPED; + synchronized (cacheStateLock) { + while (currentCacheState == CacheState.SYNC_PENDING + || currentCacheState == CacheState.SYNC_STARTED) { + try { + cacheStateLock.wait(); + } + catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + } + + private void markCacheAsReadyIfLeader() + { + synchronized (cacheStateLock) { + if (currentCacheState == CacheState.SYNC_STARTED) { + log.info("Sync has finished. Cache is now READY to serve requests."); + currentCacheState = CacheState.READY; + + // State has changed from STARTING to READY, notify waiting threads + cacheStateLock.notifyAll(); + } + } } private void tearDown() @@ -178,63 +235,38 @@ private void tearDown() private void pollMetadataStore() { final Stopwatch sincePollStart = Stopwatch.createStarted(); - if (isStopped()) { - tearDown(); - return; + + synchronized (cacheStateLock) { + if (currentCacheState == CacheState.SYNC_PENDING) { + log.info("Started sync of latest updates from metadata store."); + currentCacheState = CacheState.SYNC_STARTED; + } } final Map datasourceToSummary = retrieveAllSegmentIds(); - - if (isStopped()) { - tearDown(); - return; - } + log.info("Found segments for datasources: %s", datasourceToSummary); removeUnknownDatasources(datasourceToSummary); datasourceToSummary.forEach(this::removeUnknownSegmentIdsFromCache); + datasourceToSummary.forEach( (datasource, summary) -> getCacheForDatasource(datasource) .resetMaxUnusedIds(summary.intervalVersionToMaxUnusedPartition) ); - if (isStopped()) { - tearDown(); - return; - } - - final int countOfRefreshedUsedSegments = datasourceToSummary.entrySet().stream().mapToInt( - entry -> retrieveAndRefreshUsedSegments( - entry.getKey(), - entry.getValue().segmentIdsToRefresh - ) - ).sum(); - if (countOfRefreshedUsedSegments > 0) { - log.info( - "Refreshed total [%d] used segments from metadata store.", - countOfRefreshedUsedSegments - ); - } - - if (isStopped()) { - tearDown(); - return; - } + datasourceToSummary.forEach(this::retrieveAndRefreshUsedSegments); retrieveAndRefreshAllPendingSegments(); emitMetric("poll/time", sincePollStart.millisElapsed()); pollFinishTime.set(DateTimes.nowUtc()); - if (isStopped()) { - tearDown(); - } else { - currentCacheState.compareAndSet(CacheState.STARTING, CacheState.READY); + markCacheAsReadyIfLeader(); - // Schedule the next poll - final long nextPollDelay = Math.max(pollDuration.getMillis() - sincePollStart.millisElapsed(), 0); - pollExecutor.schedule(this::pollMetadataStore, nextPollDelay, TimeUnit.MILLISECONDS); - } + // Schedule the next poll + final long nextPollDelay = Math.max(pollDuration.getMillis() - sincePollStart.millisElapsed(), 0); + pollExecutor.schedule(this::pollMetadataStore, nextPollDelay, TimeUnit.MILLISECONDS); } /** @@ -245,11 +277,10 @@ private void pollMetadataStore() private Map retrieveAllSegmentIds() { final Map datasourceToSummary = new HashMap<>(); - final AtomicInteger countOfRefreshedUnusedSegments = new AtomicInteger(0); final String sql = StringUtils.format( "SELECT id, dataSource, used, used_status_last_updated FROM %s", - getSegmentsTable() + tablesConfig.getSegmentsTable() ); connector.inReadOnlyTransaction((handle, status) -> { @@ -276,8 +307,7 @@ private Map retrieveAllSegmentIds() if (segmentId != null) { if (cache.addUnusedSegmentId(segmentId)) { - countOfRefreshedUnusedSegments.incrementAndGet(); - emitDatasourceMetric(record.dataSource, "refreshed/unused", 1); + summary.numUnusedSegmentsRefreshed++; } final int partitionNum = segmentId.getPartitionNum(); @@ -299,25 +329,26 @@ private Map retrieveAllSegmentIds() } }); - if (countOfRefreshedUnusedSegments.get() > 0) { - log.info("Refreshed total [%d] unused segments from metadata store.", countOfRefreshedUnusedSegments.get()); - } - return datasourceToSummary; } - private int retrieveAndRefreshUsedSegments( + private void retrieveAndRefreshUsedSegments( String dataSource, - Set segmentIdsToRefresh + DatasourceSegmentSummary summary ) { + if (summary.segmentIdsToRefresh.isEmpty()) { + return; + } + final DatasourceSegmentCache cache = getCacheForDatasource(dataSource); int numUpdatedUsedSegments = 0; + try ( CloseableIterator iterator = connector.inReadOnlyTransaction( (handle, status) -> SqlSegmentsMetadataQuery - .forHandle(handle, connector, tablesConfig.get(), jsonMapper) - .retrieveSegmentsByIdIterator(dataSource, segmentIdsToRefresh) + .forHandle(handle, connector, tablesConfig, jsonMapper) + .retrieveSegmentsByIdIterator(dataSource, summary.segmentIdsToRefresh) ) ) { while (iterator.hasNext()) { @@ -332,7 +363,12 @@ private int retrieveAndRefreshUsedSegments( } emitDatasourceMetric(dataSource, "refresh/used", numUpdatedUsedSegments); - return numUpdatedUsedSegments; + if (numUpdatedUsedSegments > 0) { + log.info( + "Refreshed [%d] used segments for datasource[%s] from metadata store.", + numUpdatedUsedSegments, dataSource + ); + } } private void retrieveAndRefreshAllPendingSegments() @@ -340,9 +376,10 @@ private void retrieveAndRefreshAllPendingSegments() final String sql = StringUtils.format( "SELECT payload, sequence_name, sequence_prev_id, upgraded_from_segment_id" + " task_allocator_id, created_date FROM %1$s", - tablesConfig.get().getPendingSegmentsTable() + tablesConfig.getPendingSegmentsTable() ); + final Map datasourceToUpdatedCount = new HashMap<>(); connector.inReadOnlyTransaction( (handle, status) -> handle .createQuery(sql) @@ -352,16 +389,22 @@ private void retrieveAndRefreshAllPendingSegments() final PendingSegmentRecord record = PendingSegmentRecord.fromResultSet(r, jsonMapper); final DatasourceSegmentCache cache = getCacheForDatasource(record.getId().getDataSource()); - if (cache.shouldRefreshPendingSegment(record)) { - cache.insertPendingSegment(record, false); + if (cache.shouldRefreshPendingSegment(record) + && cache.insertPendingSegment(record, false)) { + datasourceToUpdatedCount.merge(record.getId().getDataSource(), 1, Integer::sum); } - - return 0; } catch (Exception e) { - return 1; + log.makeAlert(e, "Error retrieving pending segments from metadata store.").emit(); + return 0; } - }) + + return 0; + }).list() + ); + datasourceToUpdatedCount.forEach( + (dataSource, updatedCount) -> + log.info("Refreshed [%d] pending segments for datasource[%s].", updatedCount, dataSource) ); } @@ -373,7 +416,10 @@ private void removeUnknownDatasources(Map data .filter(ds -> !datasourceToSummary.containsKey(ds)) .collect(Collectors.toSet()); - datasourcesNotInMetadataStore.forEach(datasourceToSegmentCache::remove); + if (!datasourcesNotInMetadataStore.isEmpty()) { + datasourcesNotInMetadataStore.forEach(datasourceToSegmentCache::remove); + log.info("Removed unknown datasources[%s] from cache.", datasourcesNotInMetadataStore); + } } /** @@ -397,11 +443,6 @@ private void removeUnknownSegmentIdsFromCache( } } - private String getSegmentsTable() - { - return tablesConfig.get().getSegmentsTable(); - } - private void emitMetric(String metric, long value) { emitter.emit( @@ -464,6 +505,17 @@ private static class DatasourceSegmentSummary final Set persistedSegmentIds = new HashSet<>(); final Set segmentIdsToRefresh = new HashSet<>(); final Map> intervalVersionToMaxUnusedPartition = new HashMap<>(); + int numUnusedSegmentsRefreshed = 0; + + @Override + public String toString() + { + return "DatasourceSegmentSummary{" + + "persistedSegmentIds=" + persistedSegmentIds + + ", segmentIdsToRefresh=" + segmentIdsToRefresh + + ", intervalVersionToMaxUnusedPartition=" + intervalVersionToMaxUnusedPartition + + '}'; + } } } diff --git a/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java similarity index 89% rename from server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java rename to server/src/main/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java index 5ef2c2511ae6..0b344aa26f9c 100644 --- a/server/src/test/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/NoopSegmentMetadataCache.java @@ -34,7 +34,19 @@ public void stop() } @Override - public boolean isReady() + public void becomeLeader() + { + + } + + @Override + public void stopBeingLeader() + { + + } + + @Override + public boolean isEnabled() { return false; } diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java index 66329d401674..96b8fd69758e 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/SegmentMetadataCache.java @@ -27,14 +27,30 @@ */ public interface SegmentMetadataCache { + /** + * Starts the cache on service start. + */ void start(); + /** + * Stops the cache on service stop. + */ void stop(); /** - * @return true if the cache is enabled and ready for reading and writing. + * Refreshes the cache once the service is elected leader. + */ + void becomeLeader(); + + /** + * Notifies the cache that the service has lost leadership. + */ + void stopBeingLeader(); + + /** + * @return true if the cache is enabled */ - boolean isReady(); + boolean isEnabled(); DataSource getDatasource(String dataSource); diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index f7219100ab8a..394f4a6d7dc2 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -141,7 +141,7 @@ public void setUp() cachePollExecutor = new BlockingExecutorService("test-cache-poll-exec"); segmentMetadataCache = new HeapMemorySegmentMetadataCache( mapper, - () -> new SegmentsMetadataManagerConfig(null, true), + () -> new SegmentsMetadataManagerConfig(null, useSegmentCache), derbyConnectorRule.metadataTablesConfigSupplier(), derbyConnector, (corePoolSize, nameFormat) -> new WrappingScheduledExecutorService( @@ -157,6 +157,7 @@ public void setUp() // Get the cache ready if required if (useSegmentCache) { segmentMetadataCache.start(); + segmentMetadataCache.becomeLeader(); cachePollExecutor.finishNextPendingTask(); } @@ -201,6 +202,7 @@ protected DataStoreMetadataUpdateResult updateDataSourceMetadataWithHandle( @After public void tearDown() { + segmentMetadataCache.stopBeingLeader(); segmentMetadataCache.stop(); leaderSelector.stopBeingLeader(); } diff --git a/server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java b/server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java index e1f3c35609fc..4ecc731ace45 100644 --- a/server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java +++ b/server/src/test/java/org/apache/druid/metadata/segment/cache/DatasourceSegmentCacheTest.java @@ -19,12 +19,10 @@ package org.apache.druid.metadata.segment.cache; -import com.google.common.collect.ImmutableList; import org.apache.druid.error.DruidException; import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; -import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.server.coordinator.CreateDataSegments; import org.apache.druid.server.http.DataSegmentPlus; import org.apache.druid.timeline.DataSegment; @@ -36,7 +34,6 @@ import org.junit.Test; import org.junit.function.ThrowingRunnable; -import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; @@ -114,9 +111,9 @@ public void testAddUsedSegment() Assert.assertEquals(Set.of(segmentId), cache.findUsedSegmentIdsOverlapping(interval)); Assert.assertEquals(Set.of(segmentId), cache.findUsedSegmentIdsOverlapping(Intervals.ETERNITY)); - Assert.assertEquals(List.of(segment), asList(cache.findUsedSegmentsOverlappingAnyOf(List.of()))); - Assert.assertEquals(List.of(segment), asList(cache.findUsedSegmentsOverlappingAnyOf(List.of(interval)))); - Assert.assertEquals(List.of(segment), asList(cache.findUsedSegmentsOverlappingAnyOf(List.of(Intervals.ETERNITY)))); + Assert.assertEquals(Set.of(segment), cache.findUsedSegmentsOverlappingAnyOf(List.of())); + Assert.assertEquals(Set.of(segment), cache.findUsedSegmentsOverlappingAnyOf(List.of(interval))); + Assert.assertEquals(Set.of(segment), cache.findUsedSegmentsOverlappingAnyOf(List.of(Intervals.ETERNITY))); Assert.assertEquals(Set.of(segmentPlus), cache.findUsedSegmentsPlusOverlappingAnyOf(List.of())); Assert.assertEquals(Set.of(segmentPlus), cache.findUsedSegmentsPlusOverlappingAnyOf(List.of(interval))); @@ -154,7 +151,7 @@ public void testAddUnusedSegment() Assert.assertNull(cache.findUsedSegment(segmentId.toString())); Assert.assertTrue(cache.findUsedSegments(Set.of(segmentId.toString())).isEmpty()); Assert.assertTrue(cache.findUsedSegmentIdsOverlapping(segment.getInterval()).isEmpty()); - Assert.assertTrue(asList(cache.findUsedSegmentsOverlappingAnyOf(List.of())).isEmpty()); + Assert.assertTrue(cache.findUsedSegmentsOverlappingAnyOf(List.of()).isEmpty()); Assert.assertTrue(cache.findUsedSegmentsPlusOverlappingAnyOf(List.of()).isEmpty()); Assert.assertEquals(Set.of(segmentId.toString()), cache.findExistingSegmentIds(Set.of(segment))); @@ -271,16 +268,6 @@ public void testOnlyResetUpdatesHighestId() Assert.assertNull(cache.findHighestUnusedSegmentId(segment.getInterval(), segment.getVersion())); } - private static List asList(CloseableIterator iterator) - { - try (iterator) { - return ImmutableList.copyOf(iterator); - } - catch (IOException e) { - throw new RuntimeException(e); - } - } - private static CreateDataSegments createUsedSegment() { return CreateDataSegments.ofDatasource(WIKI).markUsed(); diff --git a/services/src/main/java/org/apache/druid/cli/CliOverlord.java b/services/src/main/java/org/apache/druid/cli/CliOverlord.java index 81be83947cd8..18213730b54d 100644 --- a/services/src/main/java/org/apache/druid/cli/CliOverlord.java +++ b/services/src/main/java/org/apache/druid/cli/CliOverlord.java @@ -110,6 +110,8 @@ import org.apache.druid.metadata.SegmentsMetadataManager; import org.apache.druid.metadata.SegmentsMetadataManagerProvider; import org.apache.druid.metadata.input.InputSourceModule; +import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.query.lookup.LookupSerdeModule; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; @@ -228,6 +230,10 @@ public void configure(Binder binder) JsonConfigProvider.bind(binder, "druid.indexer.task.default", DefaultTaskConfig.class); binder.bind(RetryPolicyFactory.class).in(LazySingleton.class); + binder.bind(SegmentMetadataCache.class) + .to(HeapMemorySegmentMetadataCache.class) + .in(ManageLifecycle.class); + binder.bind(DruidOverlord.class).in(ManageLifecycle.class); binder.bind(TaskMaster.class).in(ManageLifecycle.class); binder.bind(TaskCountStatsProvider.class).to(TaskMaster.class); From 271a34812fdb24bac414c6b946810fdc1410fb99 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Wed, 29 Jan 2025 21:14:17 +0530 Subject: [PATCH 09/11] Remove forbidden APIs --- .../segment/cache/HeapMemorySegmentMetadataCache.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java index 3f79eb489f41..239d299905bb 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java @@ -21,6 +21,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Supplier; +import com.google.errorprone.annotations.ThreadSafe; +import com.google.errorprone.annotations.concurrent.GuardedBy; import com.google.inject.Inject; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.DateTimes; @@ -47,8 +49,6 @@ import org.skife.jdbi.v2.ResultIterator; import javax.annotation.Nullable; -import javax.annotation.concurrent.GuardedBy; -import javax.annotation.concurrent.ThreadSafe; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; From 12dc6f272aced857bdbc7c384c6f2fb1324349b0 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Thu, 30 Jan 2025 08:11:14 +0530 Subject: [PATCH 10/11] Run all ITs with cache enabled --- .../cases/cluster/Common/environment-configs/overlord.env | 3 +++ integration-tests/docker/environment-configs/overlord | 2 ++ 2 files changed, 5 insertions(+) diff --git a/integration-tests-ex/cases/cluster/Common/environment-configs/overlord.env b/integration-tests-ex/cases/cluster/Common/environment-configs/overlord.env index 1e73199ee58c..0d92fe9e09d4 100644 --- a/integration-tests-ex/cases/cluster/Common/environment-configs/overlord.env +++ b/integration-tests-ex/cases/cluster/Common/environment-configs/overlord.env @@ -28,3 +28,6 @@ druid_indexer_storage_type=metadata druid_indexer_runner_type=remote druid_auth_basic_common_cacheDirectory=/tmp/authCache/overlord druid_server_https_crlPath=/tls/revocations.crl + +druid_segments_manager_pollDuration=PT5S +druid_segments_manager_useCache=true diff --git a/integration-tests/docker/environment-configs/overlord b/integration-tests/docker/environment-configs/overlord index d6f7f01303e6..1f3e7fcb602e 100644 --- a/integration-tests/docker/environment-configs/overlord +++ b/integration-tests/docker/environment-configs/overlord @@ -33,3 +33,5 @@ druid_indexer_storage_type=metadata druid_indexer_runner_type=remote druid_auth_basic_common_cacheDirectory=/tmp/authCache/overlord druid_server_https_crlPath=/tls/revocations.crl +druid_segments_manager_pollDuration=PT2S +druid_segments_manager_useCache=true From 08dc96f2d8bb64456cf94aba83c2dc8b5c2e73bc Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Thu, 30 Jan 2025 12:53:53 +0530 Subject: [PATCH 11/11] Allow ingestion tests to use segment metadata cache --- .../common/task/IngestionTestBase.java | 59 +++++++++++++++-- ...stractParallelIndexSupervisorTaskTest.java | 10 +++ .../SinglePhaseParallelIndexingTest.java | 25 ++++---- .../cache/HeapMemorySegmentMetadataCache.java | 64 +++++++++++-------- 4 files changed, 111 insertions(+), 47 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 6d99240a61b0..c21075b8473d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; +import com.google.common.base.Suppliers; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import org.apache.druid.data.input.InputFormat; @@ -63,6 +64,7 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.SQLMetadataConnector; @@ -71,7 +73,9 @@ import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.metadata.segment.SqlSegmentMetadataTransactionFactory; +import org.apache.druid.metadata.segment.cache.HeapMemorySegmentMetadataCache; import org.apache.druid.metadata.segment.cache.NoopSegmentMetadataCache; +import org.apache.druid.metadata.segment.cache.SegmentMetadataCache; import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9Factory; @@ -92,6 +96,7 @@ import org.apache.druid.server.security.AuthTestUtils; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.DataSegment; +import org.joda.time.Period; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -121,6 +126,7 @@ public abstract class IngestionTestBase extends InitializedNullHandlingTest protected final TestUtils testUtils = new TestUtils(); private final ObjectMapper objectMapper = testUtils.getTestObjectMapper(); + private final boolean useSegmentMetadataCache; private SegmentCacheManagerFactory segmentCacheManagerFactory; private TaskStorage taskStorage; private IndexerSQLMetadataStorageCoordinator storageCoordinator; @@ -129,8 +135,20 @@ public abstract class IngestionTestBase extends InitializedNullHandlingTest private File baseDir; private SupervisorManager supervisorManager; private TestDataSegmentKiller dataSegmentKiller; + private SegmentMetadataCache segmentMetadataCache; protected File reportsFile; + protected IngestionTestBase() + { + this(false); + } + + protected IngestionTestBase(final boolean useSegmentMetadataCache) + { + this.useSegmentMetadataCache = useSegmentMetadataCache; + } + + @Before public void setUpIngestionTestBase() throws IOException { @@ -142,6 +160,7 @@ public void setUpIngestionTestBase() throws IOException connector.createTaskTables(); connector.createSegmentSchemasTable(); connector.createSegmentTable(); + connector.createPendingSegmentsTable(); taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null)); SegmentSchemaManager segmentSchemaManager = new SegmentSchemaManager( derbyConnectorRule.metadataTablesConfigSupplier().get(), @@ -150,13 +169,7 @@ public void setUpIngestionTestBase() throws IOException ); storageCoordinator = new IndexerSQLMetadataStorageCoordinator( - new SqlSegmentMetadataTransactionFactory( - objectMapper, - derbyConnectorRule.metadataTablesConfigSupplier().get(), - derbyConnectorRule.getConnector(), - new TestDruidLeaderSelector(), - new NoopSegmentMetadataCache() - ), + createTransactionFactory(), objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), derbyConnectorRule.getConnector(), @@ -177,12 +190,17 @@ public void setUpIngestionTestBase() throws IOException segmentCacheManagerFactory = new SegmentCacheManagerFactory(TestIndex.INDEX_IO, getObjectMapper()); reportsFile = temporaryFolder.newFile(); dataSegmentKiller = new TestDataSegmentKiller(); + + segmentMetadataCache.start(); + segmentMetadataCache.becomeLeader(); } @After public void tearDownIngestionTestBase() { temporaryFolder.delete(); + segmentMetadataCache.stopBeingLeader(); + segmentMetadataCache.stop(); } public TestLocalTaskActionClientFactory createActionClientFactory() @@ -296,6 +314,33 @@ public TaskToolbox createTaskToolbox(TaskConfig config, Task task, SupervisorMan .build(); } + private SqlSegmentMetadataTransactionFactory createTransactionFactory() + { + if (useSegmentMetadataCache) { + segmentMetadataCache = new HeapMemorySegmentMetadataCache( + objectMapper, + Suppliers.ofInstance(new SegmentsMetadataManagerConfig(Period.millis(10), true)), + derbyConnectorRule.metadataTablesConfigSupplier(), + derbyConnectorRule.getConnector(), + ScheduledExecutors::fixed, + NoopServiceEmitter.instance() + ); + } else { + segmentMetadataCache = new NoopSegmentMetadataCache(); + } + + final TestDruidLeaderSelector leaderSelector = new TestDruidLeaderSelector(); + leaderSelector.becomeLeader(); + + return new SqlSegmentMetadataTransactionFactory( + objectMapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + derbyConnectorRule.getConnector(), + leaderSelector, + segmentMetadataCache + ); + } + public IndexIO getIndexIO() { return testUtils.getTestIndexIO(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index eb470709405b..c599854055f2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -213,6 +213,16 @@ protected AbstractParallelIndexSupervisorTaskTest( double transientApiCallFailureRate ) { + this(transientTaskFailureRate, transientApiCallFailureRate, false); + } + + protected AbstractParallelIndexSupervisorTaskTest( + double transientTaskFailureRate, + double transientApiCallFailureRate, + boolean useSegmentMetadataCache + ) + { + super(useSegmentMetadataCache); this.transientTaskFailureRate = transientTaskFailureRate; this.transientApiCallFailureRate = transientApiCallFailureRate; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java index b51224908644..790a57786e13 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java @@ -63,6 +63,7 @@ import org.joda.time.Interval; import org.junit.After; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -92,13 +93,13 @@ public class SinglePhaseParallelIndexingTest extends AbstractParallelIndexSuperv @Rule public ExpectedException expectedException = ExpectedException.none(); - @Parameterized.Parameters(name = "{0}, useInputFormatApi={1}") + @Parameterized.Parameters(name = "{0}, useInputFormatApi={1}, useSegmentCache={2}") public static Iterable constructorFeeder() { return ImmutableList.of( - new Object[]{LockGranularity.TIME_CHUNK, false}, - new Object[]{LockGranularity.TIME_CHUNK, true}, - new Object[]{LockGranularity.SEGMENT, true} + new Object[]{LockGranularity.TIME_CHUNK, false, false}, + new Object[]{LockGranularity.TIME_CHUNK, true, true}, + new Object[]{LockGranularity.SEGMENT, true, false} ); } @@ -110,9 +111,13 @@ public static Iterable constructorFeeder() private File inputDir; - public SinglePhaseParallelIndexingTest(LockGranularity lockGranularity, boolean useInputFormatApi) + public SinglePhaseParallelIndexingTest( + LockGranularity lockGranularity, + boolean useInputFormatApi, + boolean useSegmentMetadataCache + ) { - super(DEFAULT_TRANSIENT_TASK_FAILURE_RATE, DEFAULT_TRANSIENT_API_FAILURE_RATE); + super(DEFAULT_TRANSIENT_TASK_FAILURE_RATE, DEFAULT_TRANSIENT_API_FAILURE_RATE, useSegmentMetadataCache); this.lockGranularity = lockGranularity; this.useInputFormatApi = useInputFormatApi; } @@ -378,9 +383,7 @@ public void testGetRunningTaskReports() throws Exception @Test public void testRunInParallelIngestNullColumn() { - if (!useInputFormatApi) { - return; - } + Assume.assumeTrue(useInputFormatApi); // Ingest all data. final List dimensionSchemas = DimensionsSpec.getDefaultSchemas( Arrays.asList("ts", "unknownDim", "dim") @@ -430,9 +433,7 @@ public void testRunInParallelIngestNullColumn() @Test public void testRunInParallelIngestNullColumn_storeEmptyColumnsOff_shouldNotStoreEmptyColumns() { - if (!useInputFormatApi) { - return; - } + Assume.assumeTrue(useInputFormatApi); // Ingest all data. final List dimensionSchemas = DimensionsSpec.getDefaultSchemas( Arrays.asList("ts", "unknownDim", "dim") diff --git a/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java index 239d299905bb..a8272fc9d09f 100644 --- a/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java +++ b/server/src/main/java/org/apache/druid/metadata/segment/cache/HeapMemorySegmentMetadataCache.java @@ -190,7 +190,7 @@ private void verifyCacheIsReady() throw DruidException.defensive("Segment metadata cache has not been started."); case SYNC_PENDING: case SYNC_STARTED: - waitForCacheToFinishWarmup(); + waitForCacheToFinishSync(); verifyCacheIsReady(); case READY: // Cache is now ready for use @@ -198,9 +198,10 @@ private void verifyCacheIsReady() } } - private void waitForCacheToFinishWarmup() + private void waitForCacheToFinishSync() { synchronized (cacheStateLock) { + log.info("Waiting for cache to finish sync with metadata store."); while (currentCacheState == CacheState.SYNC_PENDING || currentCacheState == CacheState.SYNC_STARTED) { try { @@ -210,6 +211,7 @@ private void waitForCacheToFinishWarmup() throw new RuntimeException(e); } } + log.info("Wait complete. Cache is now in state[%s].", currentCacheState); } } @@ -235,38 +237,44 @@ private void tearDown() private void pollMetadataStore() { final Stopwatch sincePollStart = Stopwatch.createStarted(); - - synchronized (cacheStateLock) { - if (currentCacheState == CacheState.SYNC_PENDING) { - log.info("Started sync of latest updates from metadata store."); - currentCacheState = CacheState.SYNC_STARTED; + try { + synchronized (cacheStateLock) { + if (currentCacheState == CacheState.SYNC_PENDING) { + log.info("Started sync of latest updates from metadata store."); + currentCacheState = CacheState.SYNC_STARTED; + } } - } - - final Map datasourceToSummary = retrieveAllSegmentIds(); - log.info("Found segments for datasources: %s", datasourceToSummary); - removeUnknownDatasources(datasourceToSummary); - datasourceToSummary.forEach(this::removeUnknownSegmentIdsFromCache); + final Map datasourceToSummary = retrieveAllSegmentIds(); - datasourceToSummary.forEach( - (datasource, summary) -> - getCacheForDatasource(datasource) - .resetMaxUnusedIds(summary.intervalVersionToMaxUnusedPartition) - ); + removeUnknownDatasources(datasourceToSummary); + datasourceToSummary.forEach(this::removeUnknownSegmentIdsFromCache); - datasourceToSummary.forEach(this::retrieveAndRefreshUsedSegments); + datasourceToSummary.forEach( + (datasource, summary) -> + getCacheForDatasource(datasource) + .resetMaxUnusedIds(summary.intervalVersionToMaxUnusedPartition) + ); - retrieveAndRefreshAllPendingSegments(); + datasourceToSummary.forEach(this::retrieveAndRefreshUsedSegments); - emitMetric("poll/time", sincePollStart.millisElapsed()); - pollFinishTime.set(DateTimes.nowUtc()); + retrieveAndRefreshAllPendingSegments(); - markCacheAsReadyIfLeader(); + final long pollDurationMillis = sincePollStart.millisElapsed(); + emitMetric("poll/time", pollDurationMillis); + pollFinishTime.set(DateTimes.nowUtc()); - // Schedule the next poll - final long nextPollDelay = Math.max(pollDuration.getMillis() - sincePollStart.millisElapsed(), 0); - pollExecutor.schedule(this::pollMetadataStore, nextPollDelay, TimeUnit.MILLISECONDS); + markCacheAsReadyIfLeader(); + } + catch (Throwable t) { + log.error(t, "Error occurred while polling metadata store"); + log.makeAlert(t, "Error occurred while polling metadata store"); + } + finally { + // Schedule the next poll + final long nextPollDelay = Math.max(pollDuration.getMillis() - sincePollStart.millisElapsed(), 0); + pollExecutor.schedule(this::pollMetadataStore, nextPollDelay, TimeUnit.MILLISECONDS); + } } /** @@ -374,8 +382,8 @@ private void retrieveAndRefreshUsedSegments( private void retrieveAndRefreshAllPendingSegments() { final String sql = StringUtils.format( - "SELECT payload, sequence_name, sequence_prev_id, upgraded_from_segment_id" - + " task_allocator_id, created_date FROM %1$s", + "SELECT payload, sequence_name, sequence_prev_id," + + " upgraded_from_segment_id, task_allocator_id, created_date FROM %1$s", tablesConfig.getPendingSegmentsTable() );