Skip to content

Commit

Permalink
feat: Add preserveAsciiControlCharacters to CsvOptions
Browse files Browse the repository at this point in the history
  • Loading branch information
takayahilton committed Jun 28, 2022
1 parent e1e58d3 commit ff800b0
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public final class CsvOptions extends FormatOptions {
private final String fieldDelimiter;
private final String quote;
private final Long skipLeadingRows;
private final Boolean preserveAsciiControlCharacters;

public static final class Builder {

Expand All @@ -43,6 +44,7 @@ public static final class Builder {
private String fieldDelimiter;
private String quote;
private Long skipLeadingRows;
private Boolean preserveAsciiControlCharacters;

private Builder() {}

Expand All @@ -53,6 +55,7 @@ private Builder(CsvOptions csvOptions) {
this.fieldDelimiter = csvOptions.fieldDelimiter;
this.quote = csvOptions.quote;
this.skipLeadingRows = csvOptions.skipLeadingRows;
this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters;
}

/**
Expand Down Expand Up @@ -130,6 +133,15 @@ public Builder setSkipLeadingRows(long skipLeadingRows) {
return this;
}

/**
* Sets whether BigQuery should allow ascii control characters in a CSV file. By default ascii
* control characters are not allowed.
*/
public Builder setPreserveAsciiControlCharacters(boolean preserveAsciiControlCharacters) {
this.preserveAsciiControlCharacters = preserveAsciiControlCharacters;
return this;
}

/** Creates a {@code CsvOptions} object. */
public CsvOptions build() {
return new CsvOptions(this);
Expand All @@ -144,6 +156,7 @@ private CsvOptions(Builder builder) {
this.fieldDelimiter = builder.fieldDelimiter;
this.quote = builder.quote;
this.skipLeadingRows = builder.skipLeadingRows;
this.preserveAsciiControlCharacters = builder.preserveAsciiControlCharacters;
}

/**
Expand Down Expand Up @@ -192,6 +205,14 @@ public Long getSkipLeadingRows() {
return skipLeadingRows;
}

/**
* Returns whether BigQuery should allow ascii control characters in a CSV file. By default ascii
* control characters are not allowed.
*/
public Boolean getPreserveAsciiControlCharacters() {
return preserveAsciiControlCharacters;
}

/** Returns a builder for the {@code CsvOptions} object. */
public Builder toBuilder() {
return new Builder(this);
Expand All @@ -207,6 +228,7 @@ public String toString() {
.add("fieldDelimiter", fieldDelimiter)
.add("quote", quote)
.add("skipLeadingRows", skipLeadingRows)
.add("preserveAsciiControlCharacters", preserveAsciiControlCharacters)
.toString();
}

Expand All @@ -219,7 +241,8 @@ public int hashCode() {
encoding,
fieldDelimiter,
quote,
skipLeadingRows);
skipLeadingRows,
preserveAsciiControlCharacters);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,8 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
.setAllowJaggedRows(csvOptions.allowJaggedRows())
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
.setEncoding(csvOptions.getEncoding())
.setQuote(csvOptions.getQuote());
.setQuote(csvOptions.getQuote())
.setPreserveAsciiControlCharacters(csvOptions.getPreserveAsciiControlCharacters());
if (csvOptions.getSkipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public class CsvOptionsTest {
private static final String FIELD_DELIMITER = ",";
private static final String QUOTE = "\"";
private static final long SKIP_LEADING_ROWS = 42L;

private static final boolean PRESERVE_ASCII_CONTROL_CHARACTERS = true;
private static final CsvOptions CSV_OPTIONS =
CsvOptions.newBuilder()
.setAllowJaggedRows(ALLOW_JAGGED_ROWS)
Expand All @@ -38,6 +40,7 @@ public class CsvOptionsTest {
.setFieldDelimiter(FIELD_DELIMITER)
.setQuote(QUOTE)
.setSkipLeadingRows(SKIP_LEADING_ROWS)
.setPreserveAsciiControlCharacters(PRESERVE_ASCII_CONTROL_CHARACTERS)
.build();

@Test
Expand All @@ -64,6 +67,8 @@ public void testBuilder() {
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.getFieldDelimiter());
assertEquals(QUOTE, CSV_OPTIONS.getQuote());
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.getSkipLeadingRows());
assertEquals(
PRESERVE_ASCII_CONTROL_CHARACTERS, CSV_OPTIONS.getPreserveAsciiControlCharacters());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public class WriteChannelConfigurationTest {
CsvOptions.newBuilder()
.setAllowJaggedRows(true)
.setAllowQuotedNewLines(false)
.setPreserveAsciiControlCharacters(true)
.setEncoding(StandardCharsets.UTF_8)
.build();
private static final TableId TABLE_ID = TableId.of("dataset", "table");
Expand Down

0 comments on commit ff800b0

Please sign in to comment.