Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish support for RFC4180 for CSV bulk insert operations #2338

Merged
merged 2 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,47 @@ private void initFileReader(InputStreamReader sr, String encoding, String demlim
}
}

/* RFC4180 specifies that rules for quoted fields. It allows quoted string data to contain newlines data
provided the contents otherwise conforms to the rules for escaping quotes. For example, the following is valid:
"a","b","c"
"aaa","b <-- newline is retained in data field
bb","c"
"aa","bb","cc"

We cannot simply use fileReader.readLine() to read these records but instead must continue reading until we reach
a newline that is not contained within quotes.
*/
private String readLineEscapeDelimiters() throws SQLServerException {
int quoteCount = 0;
StringBuilder sb = new StringBuilder();
try {
int c;
while ((c = fileReader.read()) != -1) {
if((c == '\n' || c == '\r') && quoteCount % 2 == 0) { // newlines only end the record if we are not in quotes
fileReader.mark(1);
c = fileReader.read(); // we might have read \r of a \r\n, if so we need to read the \n as well
if(c != '\n') {
fileReader.reset(); // only delimited by \n, unread last char so it goes into the next record
}
break;
}
sb.append((char)c);
if( c == '"') {
quoteCount++;
}
}
if (c == -1 && quoteCount % 2 != 0) { // stream ended, but we are within quotes -- data problem
throw new SQLServerException(SQLServerException.getErrString("R_InvalidCSVQuotes"),null,0,null);
}
if(c == -1) { // keep semantics of readLine() by returning a null when there is no more data
return null;
}
} catch (IOException e) {
throw new SQLServerException(e.getMessage(),null,0,e);
}
return sb.toString();
}

private void initLoggerResources() {
super.loggerPackageName = "com.microsoft.sqlserver.jdbc.SQLServerBulkCSVFileRecord";
}
Expand Down Expand Up @@ -526,7 +567,7 @@ else if ((null != columnNames) && (columnNames.length >= positionInSource))
@Override
public boolean next() throws SQLServerException {
try {
currentLine = fileReader.readLine();
currentLine = escapeDelimiters ? readLineEscapeDelimiters() : fileReader.readLine();
} catch (IOException e) {
throw new SQLServerException(e.getMessage(), null, 0, e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public void testEscapeColumnDelimitersCSV() throws Exception {
/*
* The list below is the copy of inputFileDelimiterEsc ape with quotes removed.
*/
String[][] expectedEscaped = new String[11][4];
String[][] expectedEscaped = new String[12][4];
expectedEscaped[0] = new String[] {"test", " test\"", "no@split", " testNoQuote", ""};
expectedEscaped[1] = new String[] {null, null, null, null, ""};
expectedEscaped[2] = new String[] {"\"", "test\"test", "test@\" test", null, ""};
Expand All @@ -166,6 +166,7 @@ public void testEscapeColumnDelimitersCSV() throws Exception {
expectedEscaped[8] = new String[] {"1997", "Ford", "E350", "Super@ \"luxurious\" truck", ""};
expectedEscaped[9] = new String[] {"1997", "Ford", "E350", "E63", ""};
expectedEscaped[10] = new String[] {"1997", "Ford", "E350", " Super luxurious truck ", ""};
expectedEscaped[11] = new String[] {"1997", "F\r\no\r\nr\r\nd", "E350", "\"Super\" \"luxurious\" \"truck\"", ""};

try (Connection con = getConnection(); Statement stmt = con.createStatement();
SQLServerBulkCopy bulkCopy = new SQLServerBulkCopy(con);
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/BulkCopyCSVTestInputDelimiterEscape.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@
9@1997@Ford@E350@"Super@ ""luxurious"" truck"@
10@1997@ "Ford" @E350@ "E63"@
11@1997@Ford@E350@" Super luxurious truck "@
12@1997@"F
o
r
d"@"E350"@"""Super"" ""luxurious"" ""truck"""@
funkyjive marked this conversation as resolved.
Show resolved Hide resolved