Skip to content

Commit

Permalink
Increase Database Source SELECT Batch Size (#19514)
Browse files Browse the repository at this point in the history
* Increase Database Source SELECT Batch Size

* 60% is a little safer

* comment

* format...

* Update byte-size tests

* fixup tests

* test fixes

* format

* readmes and connector versions

* match strict-encrypt versions

* auto-bump connector version

* auto-bump connector version

Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
  • Loading branch information
evantahler and octavia-squidington-iii authored Nov 28, 2022
1 parent 1598126 commit e73a533
Show file tree
Hide file tree
Showing 13 changed files with 142 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,7 @@
- name: MySQL
sourceDefinitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad
dockerRepository: airbyte/source-mysql
dockerImageTag: 1.0.13
dockerImageTag: 1.0.14
documentationUrl: https://docs.airbyte.com/integrations/sources/mysql
icon: mysql.svg
sourceType: database
Expand Down Expand Up @@ -1221,7 +1221,7 @@
- name: Postgres
sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750
dockerRepository: airbyte/source-postgres
dockerImageTag: 1.0.27
dockerImageTag: 1.0.28
documentationUrl: https://docs.airbyte.com/integrations/sources/postgres
icon: postgresql.svg
sourceType: database
Expand Down
4 changes: 2 additions & 2 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8709,7 +8709,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-mysql:1.0.13"
- dockerImage: "airbyte/source-mysql:1.0.14"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/mysql"
connectionSpecification:
Expand Down Expand Up @@ -11153,7 +11153,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-postgres:1.0.27"
- dockerImage: "airbyte/source-postgres:1.0.28"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/postgres"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ public static long getEstimatedByteSize(final Object rowData) {
// the whole method only provides an estimation. Please never convert
// the string to byte[] to get the exact length. That conversion is known
// to introduce a lot of memory overhead.
return Jsons.serialize(rowData).length() * 4L;
//
// We are using 3L as the median byte-size of a serialized char here assuming that most chars fit
// into the ASCII space (fewer bytes)

return Jsons.serialize(rowData).length() * 3L;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public final class FetchSizeConstants {
// This size is not enforced. It is only used to calculate a proper
// fetch size. The max row size the connector can handle is actually
// limited by the heap size.
public static final double TARGET_BUFFER_SIZE_RATIO = 0.5;
public static final double TARGET_BUFFER_SIZE_RATIO = 0.6;
public static final long MIN_BUFFER_BYTE_SIZE = 250L * 1024L * 1024L; // 250 MB
public static final long MAX_BUFFER_BYTE_SIZE = 1024L * 1024L * 1024L; // 1GB
// sample size for making the first estimation of the row size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ class BaseSizeEstimatorTest {
@Test
void testGetEstimatedByteSize() {
assertEquals(0L, BaseSizeEstimator.getEstimatedByteSize(null));
assertEquals(28L, BaseSizeEstimator.getEstimatedByteSize("12345"));
assertEquals(60L, BaseSizeEstimator.getEstimatedByteSize(Jsons.jsonNode(Map.of("key", "value"))));
assertEquals(21L, BaseSizeEstimator.getEstimatedByteSize("12345"));
assertEquals(45L, BaseSizeEstimator.getEstimatedByteSize(Jsons.jsonNode(Map.of("key", "value"))));
}

public static class TestSizeEstimator extends BaseSizeEstimator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ void testIt() {
sizeEstimator.accept("11111");
final Optional<Integer> fetchSize = sizeEstimator.getFetchSize();
assertTrue(fetchSize.isPresent());
final long expectedMaxByteSize = 28L;
final long expectedMaxByteSize = 21L;
assertEquals(expectedMaxByteSize, Math.round(sizeEstimator.getMaxRowByteSize()));
assertEquals(bufferByteSize / expectedMaxByteSize, fetchSize.get().longValue());
assertEquals((bufferByteSize / expectedMaxByteSize) + 1, fetchSize.get().longValue()); // + 1 needed for int remainder rounding
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,38 @@ void testIt() {

double maxByteSize = initialByteSize;

// size: 3 * 4 = 12, not sampled
// size: 3 * 3 = 12, not sampled
sizeEstimator.accept("1");
assertFalse(sizeEstimator.getFetchSize().isPresent());
assertEquals(maxByteSize, sizeEstimator.getMaxRowByteSize());

// size: 4 * 4 = 16, not sampled
// size: 4 * 3 = 16, not sampled
sizeEstimator.accept("11");
assertFalse(sizeEstimator.getFetchSize().isPresent());
assertEquals(maxByteSize, sizeEstimator.getMaxRowByteSize());

// size: 5 * 4 = 20, sampled, fetch size is ready
// size: 5 * 3 = 15, sampled, fetch size is ready
sizeEstimator.accept("111");
final Optional<Integer> fetchSize1 = sizeEstimator.getFetchSize();
maxByteSize = 20;
assertDoubleEquals(20, sizeEstimator.getMaxRowByteSize());
maxByteSize = 15;
assertDoubleEquals(15, sizeEstimator.getMaxRowByteSize());
assertDoubleEquals(bufferByteSize / maxByteSize, fetchSize1.get().doubleValue());

// size: 6 * 4 = 24, not sampled
// size: 6 * 3 = 24, not sampled
sizeEstimator.accept("1111");
assertFalse(sizeEstimator.getFetchSize().isPresent());
assertDoubleEquals(maxByteSize, sizeEstimator.getMaxRowByteSize());

// size: 7 * 4 = 28, not sampled
// size: 7 * 3 = 28, not sampled
sizeEstimator.accept("11111");
assertFalse(sizeEstimator.getFetchSize().isPresent());
assertDoubleEquals(maxByteSize, sizeEstimator.getMaxRowByteSize());

// size: 8 * 4 = 32, sampled, fetch size is ready
// size: 8 * 3 = 24, sampled, fetch size is ready
sizeEstimator.accept("111111");
final Optional<Integer> fetchSize2 = sizeEstimator.getFetchSize();
assertTrue(fetchSize2.isPresent());
maxByteSize = 32;
maxByteSize = 24;
assertDoubleEquals(maxByteSize, sizeEstimator.getMaxRowByteSize());
assertDoubleEquals(bufferByteSize / maxByteSize, fetchSize2.get().doubleValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ ENV APPLICATION source-mysql-strict-encrypt

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.13
LABEL io.airbyte.version=1.0.14

LABEL io.airbyte.name=airbyte/source-mysql-strict-encrypt
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-mysql/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ ENV APPLICATION source-mysql

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.13
LABEL io.airbyte.version=1.0.14

LABEL io.airbyte.name=airbyte/source-mysql
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres-strict-encrypt

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.27
LABEL io.airbyte.version=1.0.28
LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-postgres/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION source-postgres

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.27
LABEL io.airbyte.version=1.0.28
LABEL io.airbyte.name=airbyte/source-postgres
Loading

0 comments on commit e73a533

Please sign in to comment.