-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-3966 [Java] JDBC Column Metadata in Arrow Field Metadata #3134
Changes from 32 commits
5af1b5b
523387f
a78c770
da77cbe
b270044
df632e3
fe097c8
e34a9e7
4f1260c
8d6cf00
b5b0cb1
68c91e7
5bfd6a2
a6fb1be
bb3165b
7e9ce37
7b4527c
72d64cc
03091a8
881c6c8
1ceac9e
d847ebc
3b17c29
e5b19ee
789c8c8
509a1cc
4a6de86
69022c2
2928513
cfb2ba6
cc6cc88
65741a9
e9a9b2b
7049c36
02f2f34
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.arrow.adapter.jdbc; | ||
|
||
public class Constants { | ||
|
||
public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; | ||
public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; | ||
public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; | ||
public static final String SQL_TYPE_KEY = "SQL_TYPE"; | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,7 +36,11 @@ | |
import java.sql.Types; | ||
import java.util.ArrayList; | ||
import java.util.Calendar; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Locale; | ||
import java.util.Map; | ||
import java.util.TimeZone; | ||
|
||
import org.apache.arrow.memory.RootAllocator; | ||
import org.apache.arrow.vector.BaseFixedWidthVector; | ||
|
@@ -103,7 +107,14 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar | |
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); | ||
Preconditions.checkNotNull(calendar, "Calendar object can't be null"); | ||
|
||
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); | ||
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); | ||
} | ||
|
||
/** | ||
* Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. | ||
*/ | ||
public static Calendar getUtcCalendar() { | ||
return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); | ||
} | ||
|
||
/** | ||
|
@@ -148,75 +159,93 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig | |
List<Field> fields = new ArrayList<>(); | ||
int columnCount = rsmd.getColumnCount(); | ||
for (int i = 1; i <= columnCount; i++) { | ||
String columnName = rsmd.getColumnName(i); | ||
final String columnName = rsmd.getColumnName(i); | ||
final FieldType fieldType; | ||
|
||
final Map<String, String> metadata; | ||
if (config.shouldIncludeMetadata()) { | ||
metadata = new HashMap<>(); | ||
metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i)); | ||
metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i)); | ||
metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName); | ||
metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i)); | ||
|
||
} else { | ||
metadata = null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (minor nit) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can do that, but then I can't make the metadata variable final. I've worked on teams in the past where the best practice was to mark things final as often as possible; is that the same with Arrow? |
||
} | ||
|
||
switch (rsmd.getColumnType(i)) { | ||
case Types.BOOLEAN: | ||
case Types.BIT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Bool()), null)); | ||
fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata); | ||
break; | ||
case Types.TINYINT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(8, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata); | ||
break; | ||
case Types.SMALLINT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(16, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata); | ||
break; | ||
case Types.INTEGER: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata); | ||
break; | ||
case Types.BIGINT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(64, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata); | ||
break; | ||
case Types.NUMERIC: | ||
case Types.DECIMAL: | ||
int precision = rsmd.getPrecision(i); | ||
int scale = rsmd.getScale(i); | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Decimal(precision, scale)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata); | ||
break; | ||
case Types.REAL: | ||
case Types.FLOAT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(SINGLE)), null)); | ||
fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata); | ||
break; | ||
case Types.DOUBLE: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(DOUBLE)), null)); | ||
fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata); | ||
break; | ||
case Types.CHAR: | ||
case Types.NCHAR: | ||
case Types.VARCHAR: | ||
case Types.NVARCHAR: | ||
case Types.LONGVARCHAR: | ||
case Types.LONGNVARCHAR: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null)); | ||
case Types.CLOB: | ||
fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata); | ||
break; | ||
case Types.DATE: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Date(DateUnit.MILLISECOND)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata); | ||
break; | ||
case Types.TIME: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Time(TimeUnit.MILLISECOND, 32)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata); | ||
break; | ||
case Types.TIMESTAMP: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, | ||
config.getCalendar().getTimeZone().getID())), null)); | ||
fieldType = | ||
new FieldType( | ||
true, | ||
new ArrowType.Timestamp(TimeUnit.MILLISECOND, config.getCalendar().getTimeZone().getID()), | ||
null, | ||
metadata); | ||
break; | ||
case Types.BINARY: | ||
case Types.VARBINARY: | ||
case Types.LONGVARBINARY: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null)); | ||
break; | ||
case Types.ARRAY: | ||
// TODO Need to handle this type | ||
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); | ||
break; | ||
case Types.CLOB: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null)); | ||
break; | ||
case Types.BLOB: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null)); | ||
fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata); | ||
break; | ||
|
||
case Types.ARRAY: | ||
// TODO Need to handle this type | ||
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); | ||
default: | ||
// no-op, shouldn't get here | ||
fieldType = null; | ||
break; | ||
} | ||
|
||
if (fieldType != null) { | ||
fields.add(new Field(columnName, fieldType, null)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the use of FieldType object instead of creating Field object in each case stmt. I am fine with this. |
||
} | ||
} | ||
|
||
return new Schema(fields, null); | ||
|
@@ -250,7 +279,7 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen | |
Preconditions.checkNotNull(root, "Vector Schema cannot be null"); | ||
Preconditions.checkNotNull(calendar, "Calendar object can't be null"); | ||
|
||
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); | ||
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(minor nit - again ignore if you dont agree)
can we default the include metadata in the builder so that only the clients that need it will override..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense. Will change tonight.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hang on, I'm confused -
includeMetadata
is initialized tofalse
in the builder. This is the behavior you asked for, right? Only people who callsetIncludeMetadata(true)
will have the metadata generated.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I saw the ctors using the false flag in other places and was misled..this looks ok.