Skip to content

Commit

Permalink
🐛 Destination S3: Fix Parquet LZO compression (airbytehq#26284)
Browse files Browse the repository at this point in the history
* Revert "Move hadoop-lzo to test dependency (airbytehq#21085)"

This reverts commit 1241569.

* add basic test

* Automated Change

* version bumps, changelog

* Automated Change

* unused import

* Ran ./gradlew :spotlessJavaApply to trigger GitHub build

* regenerate registry

---------

Co-authored-by: edgao <edgao@users.noreply.github.com>
Co-authored-by: ryankfu <ryan.fu@airbyte.io>
  • Loading branch information
3 people authored and marcosmarxm committed Jun 8, 2023
1 parent 1dd4cbe commit 31d26c3
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 4 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-java-s3/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies {

implementation ('org.apache.parquet:parquet-avro:1.12.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'}
implementation ('com.github.airbytehq:json-avro-converter:1.1.0') { exclude group: 'ch.qos.logback', module: 'logback-classic'}
implementation group: 'com.hadoop.gplcompression', name: 'hadoop-lzo', version: '0.4.20'

// parquet
implementation ('org.apache.hadoop:hadoop-common:3.3.3') {
Expand All @@ -35,5 +36,4 @@ dependencies {

testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1'
testImplementation group: 'com.hadoop.gplcompression', name: 'hadoop-lzo', version: '0.4.20'
}
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/destination-s3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ RUN /bin/bash -c 'set -e && \
echo "unknown arch" ;\
fi'

LABEL io.airbyte.version=0.4.0
LABEL io.airbyte.version=0.4.1
LABEL io.airbyte.name=airbyte/destination-s3
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ dependencies {
}
implementation ('org.apache.parquet:parquet-avro:1.12.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'}
implementation ('com.github.airbytehq:json-avro-converter:1.1.0') { exclude group: 'ch.qos.logback', module: 'logback-classic'}

implementation group: 'com.hadoop.gplcompression', name: 'hadoop-lzo', version: '0.4.20'
testImplementation 'org.apache.commons:commons-lang3:3.11'
testImplementation 'org.xerial.snappy:snappy-java:1.1.8.4'
testImplementation "org.mockito:mockito-inline:4.1.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ data:
connectorSubtype: file
connectorType: destination
definitionId: 4816b78f-1489-44c1-9060-4b19d5fa9362
dockerImageTag: 0.4.0
dockerImageTag: 0.4.1
dockerRepository: airbyte/destination-s3
githubIssueLabel: destination-s3
icon: s3.svg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,19 @@
package io.airbyte.integrations.destination.s3;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.resources.MoreResources;
import io.airbyte.integrations.standardtest.destination.ProtocolVersion;
import io.airbyte.integrations.standardtest.destination.argproviders.DataArgumentsProvider;
import io.airbyte.integrations.standardtest.destination.comparator.TestDataComparator;
import io.airbyte.protocol.models.v0.AirbyteCatalog;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;

public class S3ParquetDestinationAcceptanceTest extends S3BaseParquetDestinationAcceptanceTest {

Expand All @@ -25,4 +36,27 @@ protected JsonNode getBaseConfigJson() {
return S3DestinationTestUtils.getBaseConfigJsonFilePath();
}

/**
* Quick and dirty test to verify that lzo compression works. Probably has some blind spots related
* to cpu architecture.
* <p>
* Only verifies that it runs successfully, which is sufficient to catch any issues with installing
* the lzo libraries.
*/
@Test
public void testLzoCompression() throws Exception {
final JsonNode config = getConfig().deepCopy();
((ObjectNode) config.get("format")).put("compression_codec", "LZO");

final AirbyteCatalog catalog = Jsons.deserialize(
MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getCatalogFileVersion(ProtocolVersion.V0)), AirbyteCatalog.class);
final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog);
final List<AirbyteMessage> messages =
MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.getMessageFileVersion(ProtocolVersion.V0)).lines()
.map(record -> Jsons.deserialize(record, AirbyteMessage.class))
.collect(Collectors.toList());

runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false);
}

}
1 change: 1 addition & 0 deletions docs/integrations/destinations/s3.md
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ In order for everything to work correctly, it is also necessary that the user wh

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:-----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------|
| 0.4.1 | 2023-05-18 | [#26284](https://github.com/airbytehq/airbyte/pull/26284) | Fix: reenable LZO compression for Parquet output |
| 0.4.0 | 2023-04-28 | [#25570](https://github.com/airbytehq/airbyte/pull/25570) | Fix: all integer schemas should be converted to Avro longs |
| 0.3.25 | 2023-04-27 | [#25346](https://github.com/airbytehq/airbyte/pull/25346) | Internal code cleanup |
| 0.3.23 | 2023-03-30 | [#24736](https://github.com/airbytehq/airbyte/pull/24736) | Improve behavior when throttled by AWS API |
Expand Down

0 comments on commit 31d26c3

Please sign in to comment.