-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Only run normalization when needed (#16794)
Only run normalization when records have been committed
- Loading branch information
Showing
10 changed files
with
265 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
...rsistence/src/main/java/io/airbyte/persistence/job/models/AttemptNormalizationStatus.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
/* | ||
* Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.persistence.job.models; | ||
|
||
import java.util.Optional; | ||
|
||
public record AttemptNormalizationStatus(long attemptNumber, Optional<Long> recordsCommitted, boolean normalizationFailed) {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
...ers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationSummaryCheckActivity.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
/* | ||
* Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.workers.temporal.sync; | ||
|
||
import io.temporal.activity.ActivityInterface; | ||
import io.temporal.activity.ActivityMethod; | ||
import java.io.IOException; | ||
import java.util.Optional; | ||
|
||
@ActivityInterface | ||
public interface NormalizationSummaryCheckActivity { | ||
|
||
@ActivityMethod | ||
boolean shouldRunNormalization(Long jobId, Long attemptId, Optional<Long> numCommittedRecords) throws IOException; | ||
|
||
} |
78 changes: 78 additions & 0 deletions
78
...src/main/java/io/airbyte/workers/temporal/sync/NormalizationSummaryCheckActivityImpl.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.workers.temporal.sync; | ||
|
||
import io.airbyte.persistence.job.JobPersistence; | ||
import io.airbyte.persistence.job.models.AttemptNormalizationStatus; | ||
import java.io.IOException; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
import java.util.concurrent.atomic.AtomicLong; | ||
import javax.inject.Singleton; | ||
import lombok.extern.slf4j.Slf4j; | ||
|
||
@Slf4j | ||
@Singleton | ||
public class NormalizationSummaryCheckActivityImpl implements NormalizationSummaryCheckActivity { | ||
|
||
private final Optional<JobPersistence> jobPersistence; | ||
|
||
public NormalizationSummaryCheckActivityImpl(final Optional<JobPersistence> jobPersistence) { | ||
this.jobPersistence = jobPersistence; | ||
} | ||
|
||
@Override | ||
@SuppressWarnings("PMD.AvoidLiteralsInIfCondition") | ||
public boolean shouldRunNormalization(final Long jobId, final Long attemptNumber, final Optional<Long> numCommittedRecords) throws IOException { | ||
// if job persistence is unavailable, default to running normalization | ||
if (jobPersistence.isEmpty()) { | ||
return true; | ||
} | ||
|
||
// if the count of committed records for this attempt is > 0 OR if it is null, | ||
// then we should run normalization | ||
if (numCommittedRecords.get() == null || numCommittedRecords.get() > 0) { | ||
return true; | ||
} | ||
|
||
final List<AttemptNormalizationStatus> attemptNormalizationStatuses = jobPersistence.get().getAttemptNormalizationStatusesForJob(jobId); | ||
final AtomicLong totalRecordsCommitted = new AtomicLong(0L); | ||
final AtomicBoolean shouldReturnTrue = new AtomicBoolean(false); | ||
|
||
attemptNormalizationStatuses.stream().sorted(Comparator.comparing(AttemptNormalizationStatus::attemptNumber).reversed()).toList() | ||
.forEach(n -> { | ||
if (n.attemptNumber() == attemptNumber) { | ||
return; | ||
} | ||
|
||
// if normalization succeeded from a previous attempt succeeded, | ||
// we can stop looking for previous attempts | ||
if (!n.normalizationFailed()) { | ||
return; | ||
} | ||
|
||
// if normalization failed on past attempt, add number of records committed on that attempt to total | ||
// committed number | ||
// if there is no data recorded for the number of committed records, we should assume that there | ||
// were committed records and run normalization | ||
if (n.recordsCommitted().isEmpty()) { | ||
shouldReturnTrue.set(true); | ||
return; | ||
} else if (n.recordsCommitted().get() != 0L) { | ||
totalRecordsCommitted.addAndGet(n.recordsCommitted().get()); | ||
} | ||
}); | ||
|
||
if (shouldReturnTrue.get() || totalRecordsCommitted.get() > 0L) { | ||
return true; | ||
} | ||
|
||
return false; | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
...airbyte/workers/temporal/scheduling/activities/NormalizationSummaryCheckActivityTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
* Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.workers.temporal.scheduling.activities; | ||
|
||
import static org.mockito.Mockito.mock; | ||
|
||
import io.airbyte.persistence.job.JobPersistence; | ||
import io.airbyte.persistence.job.models.AttemptNormalizationStatus; | ||
import io.airbyte.workers.temporal.sync.NormalizationSummaryCheckActivityImpl; | ||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.assertj.core.api.Assertions; | ||
import org.junit.jupiter.api.BeforeAll; | ||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.extension.ExtendWith; | ||
import org.mockito.Mockito; | ||
import org.mockito.junit.jupiter.MockitoExtension; | ||
|
||
@Slf4j | ||
@ExtendWith(MockitoExtension.class) | ||
class NormalizationSummaryCheckActivityTest { | ||
|
||
private static final Long JOB_ID = 10L; | ||
static private NormalizationSummaryCheckActivityImpl normalizationSummaryCheckActivity; | ||
static private JobPersistence mJobPersistence; | ||
|
||
@BeforeAll | ||
static void setUp() { | ||
mJobPersistence = mock(JobPersistence.class); | ||
normalizationSummaryCheckActivity = new NormalizationSummaryCheckActivityImpl(Optional.of(mJobPersistence)); | ||
} | ||
|
||
@Test | ||
void testShouldRunNormalizationRecordsCommittedOnFirstAttemptButNotCurrentAttempt() throws IOException { | ||
// Attempt 1 committed records, but normalization failed | ||
// Attempt 2 did not commit records, normalization failed (or did not run) | ||
final AttemptNormalizationStatus attempt1 = new AttemptNormalizationStatus(1, Optional.of(10L), true); | ||
final AttemptNormalizationStatus attempt2 = new AttemptNormalizationStatus(2, Optional.of(0L), true); | ||
Mockito.when(mJobPersistence.getAttemptNormalizationStatusesForJob(JOB_ID)).thenReturn(List.of(attempt1, attempt2)); | ||
|
||
Assertions.assertThat(true).isEqualTo(normalizationSummaryCheckActivity.shouldRunNormalization(JOB_ID, 3L, Optional.of(0L))); | ||
} | ||
|
||
@Test | ||
void testShouldRunNormalizationRecordsCommittedOnCurrentAttempt() throws IOException { | ||
Assertions.assertThat(true).isEqualTo(normalizationSummaryCheckActivity.shouldRunNormalization(JOB_ID, 3L, Optional.of(30L))); | ||
} | ||
|
||
@Test | ||
void testShouldRunNormalizationNoRecordsCommittedOnCurrentAttemptOrPreviousAttempts() throws IOException { | ||
// No attempts committed any records | ||
// Normalization did not run on any attempts | ||
final AttemptNormalizationStatus attempt1 = new AttemptNormalizationStatus(1, Optional.of(0L), true); | ||
final AttemptNormalizationStatus attempt2 = new AttemptNormalizationStatus(2, Optional.of(0L), true); | ||
Mockito.when(mJobPersistence.getAttemptNormalizationStatusesForJob(JOB_ID)).thenReturn(List.of(attempt1, attempt2)); | ||
Assertions.assertThat(false).isEqualTo(normalizationSummaryCheckActivity.shouldRunNormalization(JOB_ID, 3L, Optional.of(0L))); | ||
} | ||
|
||
@Test | ||
void testShouldRunNormalizationNoRecordsCommittedOnCurrentAttemptPreviousAttemptsSucceeded() throws IOException { | ||
// Records committed on first two attempts and normalization succeeded | ||
// No records committed on current attempt and normalization has not yet run | ||
final AttemptNormalizationStatus attempt1 = new AttemptNormalizationStatus(1, Optional.of(10L), false); | ||
final AttemptNormalizationStatus attempt2 = new AttemptNormalizationStatus(2, Optional.of(20L), false); | ||
Mockito.when(mJobPersistence.getAttemptNormalizationStatusesForJob(JOB_ID)).thenReturn(List.of(attempt1, attempt2)); | ||
Assertions.assertThat(false).isEqualTo(normalizationSummaryCheckActivity.shouldRunNormalization(JOB_ID, 3L, Optional.of(0L))); | ||
} | ||
|
||
} |
Oops, something went wrong.