Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add preserveAsciiControlCharacters to CsvOptions #2143

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,20 @@ If you are using Maven without BOM, add this to your dependencies:
If you are using Gradle 5.x or later, add this to your dependencies

```Groovy
implementation platform('com.google.cloud:libraries-bom:25.4.0')
implementation platform('com.google.cloud:libraries-bom:26.1.0')

implementation 'com.google.cloud:google-cloud-bigquery'
```
If you are using Gradle without BOM, add this to your dependencies

```Groovy
implementation 'com.google.cloud:google-cloud-bigquery:2.13.6'
implementation 'com.google.cloud:google-cloud-bigquery:2.15.0'
```

If you are using SBT, add this to your dependencies

```Scala
libraryDependencies += "com.google.cloud" % "google-cloud-bigquery" % "2.13.6"
libraryDependencies += "com.google.cloud" % "google-cloud-bigquery" % "2.15.0"
```

## Authentication
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public final class CsvOptions extends FormatOptions {
private final String fieldDelimiter;
private final String quote;
private final Long skipLeadingRows;
private final Boolean preserveAsciiControlCharacters;

public static final class Builder {

Expand All @@ -43,6 +44,7 @@ public static final class Builder {
private String fieldDelimiter;
private String quote;
private Long skipLeadingRows;
private Boolean preserveAsciiControlCharacters;

private Builder() {}

Expand All @@ -53,6 +55,7 @@ private Builder(CsvOptions csvOptions) {
this.fieldDelimiter = csvOptions.fieldDelimiter;
this.quote = csvOptions.quote;
this.skipLeadingRows = csvOptions.skipLeadingRows;
this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters;
}

/**
Expand Down Expand Up @@ -130,6 +133,15 @@ public Builder setSkipLeadingRows(long skipLeadingRows) {
return this;
}

/**
* Sets whether BigQuery should allow ascii control characters in a CSV file. By default ascii
* control characters are not allowed.
*/
public Builder setPreserveAsciiControlCharacters(boolean preserveAsciiControlCharacters) {
this.preserveAsciiControlCharacters = preserveAsciiControlCharacters;
return this;
}

/** Creates a {@code CsvOptions} object. */
public CsvOptions build() {
return new CsvOptions(this);
Expand All @@ -144,6 +156,7 @@ private CsvOptions(Builder builder) {
this.fieldDelimiter = builder.fieldDelimiter;
this.quote = builder.quote;
this.skipLeadingRows = builder.skipLeadingRows;
this.preserveAsciiControlCharacters = builder.preserveAsciiControlCharacters;
}

/**
Expand Down Expand Up @@ -192,6 +205,14 @@ public Long getSkipLeadingRows() {
return skipLeadingRows;
}

/**
* Returns whether BigQuery should allow ascii control characters in a CSV file. By default ascii
* control characters are not allowed.
*/
public Boolean getPreserveAsciiControlCharacters() {
return preserveAsciiControlCharacters;
}

/** Returns a builder for the {@code CsvOptions} object. */
public Builder toBuilder() {
return new Builder(this);
Expand All @@ -207,6 +228,7 @@ public String toString() {
.add("fieldDelimiter", fieldDelimiter)
.add("quote", quote)
.add("skipLeadingRows", skipLeadingRows)
.add("preserveAsciiControlCharacters", preserveAsciiControlCharacters)
.toString();
}

Expand All @@ -219,7 +241,8 @@ public int hashCode() {
encoding,
fieldDelimiter,
quote,
skipLeadingRows);
skipLeadingRows,
preserveAsciiControlCharacters);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,8 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
.setAllowJaggedRows(csvOptions.allowJaggedRows())
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
.setEncoding(csvOptions.getEncoding())
.setQuote(csvOptions.getQuote());
.setQuote(csvOptions.getQuote())
.setPreserveAsciiControlCharacters(csvOptions.getPreserveAsciiControlCharacters());
if (csvOptions.getSkipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public class CsvOptionsTest {
private static final String FIELD_DELIMITER = ",";
private static final String QUOTE = "\"";
private static final long SKIP_LEADING_ROWS = 42L;

private static final boolean PRESERVE_ASCII_CONTROL_CHARACTERS = true;
private static final CsvOptions CSV_OPTIONS =
CsvOptions.newBuilder()
.setAllowJaggedRows(ALLOW_JAGGED_ROWS)
Expand All @@ -38,6 +40,7 @@ public class CsvOptionsTest {
.setFieldDelimiter(FIELD_DELIMITER)
.setQuote(QUOTE)
.setSkipLeadingRows(SKIP_LEADING_ROWS)
.setPreserveAsciiControlCharacters(PRESERVE_ASCII_CONTROL_CHARACTERS)
.build();

@Test
Expand All @@ -64,6 +67,8 @@ public void testBuilder() {
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.getFieldDelimiter());
assertEquals(QUOTE, CSV_OPTIONS.getQuote());
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.getSkipLeadingRows());
assertEquals(
PRESERVE_ASCII_CONTROL_CHARACTERS, CSV_OPTIONS.getPreserveAsciiControlCharacters());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4449,4 +4449,30 @@ public void testLocation() throws Exception {
bigquery.delete(dataset.getDatasetId(), DatasetDeleteOption.deleteContents());
}
}

@Test
public void testPreserveAsciiControlCharacters()
throws InterruptedException, IOException, TimeoutException {
String destinationTableName = "test_preserve_ascii_control_characters";
TableId tableId = TableId.of(DATASET, destinationTableName);
WriteChannelConfiguration configuration =
WriteChannelConfiguration.newBuilder(tableId)
.setFormatOptions(
FormatOptions.csv().toBuilder().setPreserveAsciiControlCharacters(true).build())
.setCreateDisposition(JobInfo.CreateDisposition.CREATE_IF_NEEDED)
.setSchema(SIMPLE_SCHEMA)
.build();
TableDataWriteChannel channel = bigquery.writer(configuration);
try {
channel.write(ByteBuffer.wrap("\u0000".getBytes(StandardCharsets.UTF_8)));
} finally {
channel.close();
}
Job job = channel.getJob().waitFor();
assertNull(job.getStatus().getError());
Page<FieldValueList> rows = bigquery.listTableData(tableId);
FieldValueList row = rows.getValues().iterator().next();
assertEquals("\u0000", row.get(0).getStringValue());
assertTrue(bigquery.delete(tableId));
}
}