Skip to content

Commit

Permalink
ARROW-4142: [Java] JDBC Array -> Arrow ListVector
Browse files Browse the repository at this point in the history
https://issues.apache.org/jira/browse/ARROW-4142

This adds support for reading JDBC arrays and converting them to Arrow ListVectors.  JDBC does not provide a great way to get the array type; there is a ResultSet object for walking the array, but its ResultSetMetaData may not contain the right value type (H2, for example, returns a JDBC type of NULL).

This is based on #3134, which includes ARROW-3965 and ARROW-3966.

I found Arrow arrays to be very confusing, and I am not sure if I am using them correctly here.  One thing I noticed was if I added a null array to a ListVector of VarCharVectors, the next value in the VarCharVector would be empty.  I would appreciate any help on why!  The ListVector unit tests weren't very helpful.

For all other cases, this code seems to work.  I look forward to your review!

Author: Mike Pigott <mpigott@gmail.com>

Closes #3294 from mikepigott/jdbc-array-field and squashes the following commits:

66376dd <Mike Pigott> Support for reading Array records to ListVector from JDBC
  • Loading branch information
Mike Pigott authored and Pindikura Ravindra committed Feb 19, 2019
1 parent 24a98e4 commit 5aaf700
Show file tree
Hide file tree
Showing 9 changed files with 1,043 additions and 238 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.adapter.jdbc;

import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;

import com.google.common.base.Preconditions;

/**
* This class represents the information about a JDBC ResultSet Field that is
* needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}.
* Currently, this is:
* <ul>
* <li>The JDBC {@link java.sql.Types} type.</li>
* <li>The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
* <li>The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li>
* </ul>
*/
public class JdbcFieldInfo {
private final int jdbcType;
private final int precision;
private final int scale;

/**
* Builds a <code>JdbcFieldInfo</code> using only the {@link java.sql.Types} type. Do not use this constructor
* if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and
* scale will be set to <code>0</code>.
*
* @param jdbcType The {@link java.sql.Types} type.
* @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}.
*/
public JdbcFieldInfo(int jdbcType) {
Preconditions.checkArgument(
(jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC),
"DECIMAL and NUMERIC types require a precision and scale; please use another constructor.");

this.jdbcType = jdbcType;
this.precision = 0;
this.scale = 0;
}

/**
* Builds a <code>JdbcFieldInfo</code> from the {@link java.sql.Types} type, precision, and scale.
* Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types.
*
* @param jdbcType The {@link java.sql.Types} type.
* @param precision The field's numeric precision.
* @param scale The field's numeric scale.
*/
public JdbcFieldInfo(int jdbcType, int precision, int scale) {
this.jdbcType = jdbcType;
this.precision = precision;
this.scale = scale;
}

/**
* Builds a <code>JdbcFieldInfo</code> from the corresponding {@link java.sql.ResultSetMetaData} column.
*
* @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from.
* @param column The column to get the field information for (on a 1-based index).
* @throws SQLException If the column information cannot be retrieved.
* @throws NullPointerException if <code>rsmd</code> is <code>null</code>.
* @throws IllegalArgumentException if <code>column</code> is out of bounds.
*/
public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException {
Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null.");
Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1.");
Preconditions.checkArgument(
column <= rsmd.getColumnCount(),
"The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount());

this.jdbcType = rsmd.getColumnType(column);
this.precision = rsmd.getPrecision(column);
this.scale = rsmd.getScale(column);
}

/**
* The {@link java.sql.Types} type.
*/
public int getJdbcType() {
return jdbcType;
}

/**
* The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
*/
public int getPrecision() {
return precision;
}

/**
* The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types.
*/
public int getScale() {
return scale;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(connection, query, config);
}

Expand Down Expand Up @@ -116,7 +116,7 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar, false));
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
}

/**
Expand Down Expand Up @@ -170,7 +170,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
return sqlToArrow(resultSet, config);
}

Expand All @@ -184,8 +184,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
}

/**
Expand All @@ -205,7 +204,7 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar, false));
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;

import java.util.Calendar;
import java.util.Map;

import org.apache.arrow.memory.BaseAllocator;

Expand All @@ -28,16 +29,29 @@
* <p>
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
* fields that are created during the conversion.
* fields that are created during the conversion. Neither field may be <code>null</code>.
* </p>
* <p>
* Neither field may be <code>null</code>.
* If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information
* from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
* {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
* {@link org.apache.arrow.vector.FieldVector}.
* </p>
* <p>
* If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
* {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
* information cannot be retrieved from all JDBC implementations (H2 for example, returns
* {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
* or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
* </p>
*/
public final class JdbcToArrowConfig {

private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;

/**
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
Expand All @@ -46,20 +60,47 @@ public final class JdbcToArrowConfig {
*
* @param allocator The memory allocator to construct the Arrow vectors with.
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
*/
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");

this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = false;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
}

/**
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
* is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
* Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
*
* @param allocator The memory allocator to construct the Arrow vectors with.
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
* @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based).
* @param arraySubTypesByColumnName The type of the JDBC array at the column name.
*/
JdbcToArrowConfig(
BaseAllocator allocator,
Calendar calendar,
boolean includeMetadata,
Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
Map<String, JdbcFieldInfo> arraySubTypesByColumnName) {

this(allocator, calendar);

this.includeMetadata = includeMetadata;
this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex;
this.arraySubTypesByColumnName = arraySubTypesByColumnName;
}

/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or <code>null</code> if not converting.
*
* @return the calendar.
*/
public Calendar getCalendar() {
Expand All @@ -82,4 +123,32 @@ public BaseAllocator getAllocator() {
public boolean shouldIncludeMetadata() {
return includeMetadata;
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
*
* @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
if (arraySubTypesByColumnIndex == null) {
return null;
} else {
return arraySubTypesByColumnIndex.get(index);
}
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
*
* @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
if (arraySubTypesByColumnName == null) {
return null;
} else {
return arraySubTypesByColumnName.get(name);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;

import java.util.Calendar;
import java.util.Map;

import org.apache.arrow.memory.BaseAllocator;

Expand All @@ -30,6 +31,8 @@ public class JdbcToArrowConfigBuilder {
private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;

/**
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
Expand All @@ -40,6 +43,8 @@ public JdbcToArrowConfigBuilder() {
this.allocator = null;
this.calendar = null;
this.includeMetadata = false;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
}

/**
Expand Down Expand Up @@ -126,6 +131,29 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
return this;
}

/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
* The column index is 1-based, to match the JDBC column index.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) {
this.arraySubTypesByColumnIndex = map;
return this;
}

/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) {
this.arraySubTypesByColumnName = map;
return this;
}

/**
* This builds the {@link JdbcToArrowConfig} from the provided
* {@link BaseAllocator} and {@link Calendar}.
Expand All @@ -134,6 +162,11 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
* @throws NullPointerException if either the allocator or calendar was not set.
*/
public JdbcToArrowConfig build() {
return new JdbcToArrowConfig(allocator, calendar, includeMetadata);
return new JdbcToArrowConfig(
allocator,
calendar,
includeMetadata,
arraySubTypesByColumnIndex,
arraySubTypesByColumnName);
}
}
Loading

0 comments on commit 5aaf700

Please sign in to comment.