diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java new file mode 100644 index 00000000000..c4ea55d47d8 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; + +/** + * Defines the schema of a tuple: either the top-level row or a nested + * "map" (really structure). A schema is a collection of columns (backed + * by vectors in the loader itself.) Columns are accessible by name or + * index. New columns may be added at any time; the new column takes the + * next available index. + */ + +public class TupleSchema implements TupleMetadata { + + public static abstract class BaseColumnMetadata implements ColumnMetadata { + private final int index; + private final TupleSchema parent; + protected final MaterializedField schema; + + public BaseColumnMetadata(int index, TupleSchema parent, MaterializedField schema) { + this.index = index; + this.parent = parent; + this.schema = schema; + } + + @Override + public abstract StructureType structureType(); + @Override + public abstract TupleMetadata mapSchema(); + @Override + public int index() { return index; } + @Override + public MaterializedField schema() { return schema; } + @Override + public String name() { return schema.getName(); } + @Override + public MajorType majorType() { return schema.getType(); } + @Override + public MinorType type() { return schema.getType().getMinorType(); } + @Override + public DataMode mode() { return schema.getDataMode(); } + @Override + public TupleMetadata parent() { return parent; } + public MapColumnMetadata parentMap() { return parent.map(); } + + @Override + public String fullName( ) { + MapColumnMetadata parentMap = parentMap(); + if (parentMap == null) { + return name(); + } else { + return parentMap.fullName() + "." + name(); + } + } + + @Override + public boolean isEquivalent(ColumnMetadata other) { + return schema.isEquivalent(other.schema()); + } + } + + public static class PrimitiveColumnMetadata extends BaseColumnMetadata { + + public PrimitiveColumnMetadata(int index, TupleSchema parent, + MaterializedField schema) { + super(index, parent, schema); + } + + @Override + public StructureType structureType() { return StructureType.PRIMITIVE; } + @Override + public TupleMetadata mapSchema() { return null; } + } + + public static class MapColumnMetadata extends BaseColumnMetadata { + private final TupleMetadata mapSchema; + + public MapColumnMetadata(int index, TupleSchema parent, MaterializedField schema) { + super(index, parent, schema); + mapSchema = new TupleSchema(this); + for (MaterializedField child : schema.getChildren()) { + mapSchema.add(child); + } + } + + @Override + public StructureType structureType() { return StructureType.TUPLE; } + @Override + public TupleMetadata mapSchema() { return mapSchema; } + } + + private final MapColumnMetadata parentMap; + private final TupleNameSpace nameSpace = new TupleNameSpace<>(); + + public TupleSchema() { this((MapColumnMetadata) null); } + + public TupleSchema(MapColumnMetadata parentMap) { + this.parentMap = parentMap; + } + + public static TupleMetadata fromFields(MapColumnMetadata parent, Iterable fields) { + TupleMetadata tuple = new TupleSchema(parent); + for (MaterializedField field : fields) { + tuple.add(field); + } + return tuple; + } + + public static TupleMetadata fromFields(Iterable fields) { + return fromFields(null, fields); + } + + public TupleMetadata copy() { + TupleMetadata tuple = new TupleSchema(); + for (ColumnMetadata md : this) { + tuple.add(md.schema()); + } + return tuple; + } + + @Override + public void add(MaterializedField field) { + int index = nameSpace.count(); + ColumnMetadata md; + if (field.getType().getMinorType() == MinorType.MAP) { + md = new MapColumnMetadata(index, this, field); + } else { + md = new PrimitiveColumnMetadata(index, this, field); + } + nameSpace.add(field.getName(), md); + } + + @Override + public MaterializedField column(String name) { + ColumnMetadata md = metadata(name); + return md == null ? null : md.schema(); + } + + @Override + public ColumnMetadata metadata(String name) { + return nameSpace.get(name); + } + + @Override + public int index(String name) { + return nameSpace.indexOf(name); + } + + @Override + public MaterializedField column(int index) { + ColumnMetadata md = metadata(index); + return md == null ? null : md.schema(); + } + + @Override + public ColumnMetadata metadata(int index) { + return nameSpace.get(index); + } + + public MapColumnMetadata map() { return parentMap; } + @Override + public int size() { return nameSpace.count(); } + + @Override + public boolean isEmpty() { return nameSpace.count( ) == 0; } + + @Override + public Iterator iterator() { + return nameSpace.iterator(); + } + + @Override + public boolean isEquivalent(TupleMetadata other) { + TupleSchema otherSchema = (TupleSchema) other; + if (nameSpace.count() != otherSchema.nameSpace.count()) { + return false; + } + for (int i = 0; i < nameSpace.count(); i++) { + if (! nameSpace.get(i).isEquivalent(otherSchema.nameSpace.get(i))) { + return false; + } + } + return true; + } + + @Override + public List toFieldList() { + List cols = new ArrayList<>(); + for (ColumnMetadata md : nameSpace) { + cols.add(md.schema()); + } + return cols; + } + + public BatchSchema toBatchSchema(SelectionVectorMode svMode) { + return new BatchSchema(svMode, toFieldList()); + } +} diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java index 3392a216099..b5b8804f61d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java @@ -40,6 +40,7 @@ import org.apache.drill.exec.store.sys.PersistentStoreProvider; import org.apache.drill.exec.util.AssertionUtil; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -51,7 +52,8 @@ public class SystemOptionManager extends BaseOptionManager implements OptionManager, AutoCloseable { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(SystemOptionManager.class); - private static final CaseInsensitiveMap VALIDATORS; + @VisibleForTesting + public static final CaseInsensitiveMap VALIDATORS; static { final OptionValidator[] validators = new OptionValidator[]{ diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java b/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java index 05670c54ca2..3b1e3782179 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java @@ -17,7 +17,6 @@ */ package org.apache.drill.exec.cache; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.BufferedInputStream; @@ -35,7 +34,7 @@ import org.apache.drill.test.OperatorFixture; import org.apache.drill.test.rowSet.RowSet; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; +import org.apache.drill.test.rowSet.RowSetWriter; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetComparison; import org.apache.drill.test.rowSet.RowSetUtilities; @@ -76,7 +75,7 @@ public SingleRowSet makeNullableRowSet(BatchSchema schema, int rowCount) { if (i % 2 == 0) { RowSetUtilities.setFromInt(writer, 0, i); } else { - writer.column(0).setNull(); + writer.scalar(0).setNull(); } writer.save(); } @@ -169,9 +168,9 @@ public void testTypes() throws IOException { private SingleRowSet buildMapSet(BatchSchema schema) { return fixture.rowSetBuilder(schema) - .add(1, 100, "first") - .add(2, 200, "second") - .add(3, 300, "third") + .add(1, new Object[] {100, "first"}) + .add(2, new Object[] {200, "second"}) + .add(3, new Object[] {300, "third"}) .build(); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java index 1a4d4b21956..9eda4c32603 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java @@ -33,11 +33,11 @@ import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.TupleMetadata; import org.apache.drill.test.OperatorFixture; import org.apache.drill.test.rowSet.DirectRowSet; import org.apache.drill.test.rowSet.RowSet; import org.apache.drill.test.rowSet.RowSetComparison; -import org.apache.drill.test.rowSet.RowSetSchema; import org.apache.drill.test.rowSet.SchemaBuilder; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; @@ -92,7 +92,7 @@ public void addOutput(SingleRowSet output) { public void run() throws Exception { PriorityQueueCopierWrapper copier = makeCopier(fixture, sortOrder, nullOrder); List batches = new ArrayList<>(); - RowSetSchema schema = null; + TupleMetadata schema = null; for (SingleRowSet rowSet : rowSets) { batches.add(new BatchGroup.InputBatch(rowSet.container(), rowSet.getSv2(), fixture.allocator(), rowSet.size())); @@ -103,7 +103,7 @@ public void run() throws Exception { int rowCount = outputRowCount(); VectorContainer dest = new VectorContainer(); @SuppressWarnings("resource") - BatchMerger merger = copier.startMerge(schema.toBatchSchema(SelectionVectorMode.NONE), + BatchMerger merger = copier.startMerge(new BatchSchema(SelectionVectorMode.NONE, schema.toFieldList()), batches, dest, rowCount); verifyResults(merger, dest); @@ -121,7 +121,7 @@ public int outputRowCount() { protected void verifyResults(BatchMerger merger, VectorContainer dest) { for (RowSet expectedSet : expected) { assertTrue(merger.next()); - RowSet rowSet = new DirectRowSet(fixture.allocator(), dest); + RowSet rowSet = DirectRowSet.fromContainer(fixture.allocator(), dest); new RowSetComparison(expectedSet) .verifyAndClearAll(rowSet); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java index 0050747b665..0014c4f68b1 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java @@ -28,15 +28,13 @@ import org.apache.drill.exec.physical.impl.xsort.managed.SortTestUtilities.CopierTester; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.VectorContainer; -import org.apache.drill.test.DrillTest; import org.apache.drill.test.OperatorFixture; +import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetUtilities; +import org.apache.drill.test.rowSet.RowSetWriter; import org.apache.drill.test.rowSet.SchemaBuilder; -import org.junit.AfterClass; -import org.junit.BeforeClass; import org.junit.Test; /** @@ -48,19 +46,7 @@ * then additional tests should be added to re-validate the code. */ -public class TestCopier extends DrillTest { - - public static OperatorFixture fixture; - - @BeforeClass - public static void setup() { - fixture = OperatorFixture.builder().build(); - } - - @AfterClass - public static void tearDown() throws Exception { - fixture.close(); - } +public class TestCopier extends SubOperatorTest { @Test public void testEmptyInput() throws Exception { @@ -129,7 +115,7 @@ public static SingleRowSet makeDataSet(BatchSchema schema, int first, int step, int value = first; for (int i = 0; i < count; i++, value += step) { RowSetUtilities.setFromInt(writer, 0, value); - writer.column(1).setString(Integer.toString(value)); + writer.scalar(1).setString(Integer.toString(value)); writer.save(); } writer.done(); @@ -354,22 +340,22 @@ public void testMapType(OperatorFixture fixture) throws Exception { CopierTester tester = new CopierTester(fixture); tester.addInput(fixture.rowSetBuilder(schema) - .add(1, 10, 100) - .add(5, 50, 500) + .add(1, new Object[] {10, new Object[] {100}}) + .add(5, new Object[] {50, new Object[] {500}}) .withSv2() .build()); tester.addInput(fixture.rowSetBuilder(schema) - .add(2, 20, 200) - .add(6, 60, 600) + .add(2, new Object[] {20, new Object[] {200}}) + .add(6, new Object[] {60, new Object[] {600}}) .withSv2() .build()); tester.addOutput(fixture.rowSetBuilder(schema) - .add(1, 10, 100) - .add(2, 20, 200) - .add(5, 50, 500) - .add(6, 60, 600) + .add(1, new Object[] {10, new Object[] {100}}) + .add(2, new Object[] {20, new Object[] {200}}) + .add(5, new Object[] {50, new Object[] {500}}) + .add(6, new Object[] {60, new Object[] {600}}) .build()); tester.run(); diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java index e249c197ea6..24dde4cad0e 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java @@ -45,8 +45,8 @@ import org.apache.drill.test.rowSet.IndirectRowSet; import org.apache.drill.test.rowSet.RowSet; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; -import org.apache.drill.test.rowSet.RowSet.RowSetReader; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; +import org.apache.drill.test.rowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetWriter; import org.apache.drill.test.rowSet.RowSetBuilder; import org.apache.drill.test.rowSet.RowSetComparison; import org.apache.drill.test.rowSet.SchemaBuilder; @@ -192,9 +192,9 @@ private static RowSet toRowSet(OperatorFixture fixture, SortResults results, Vec if (results.getSv4() != null) { return new HyperRowSetImpl(fixture.allocator(), dest, results.getSv4()); } else if (results.getSv2() != null) { - return new IndirectRowSet(fixture.allocator(), dest, results.getSv2()); + return IndirectRowSet.fromContainer(fixture.allocator(), dest, results.getSv2()); } else { - return new DirectRowSet(fixture.allocator(), dest); + return DirectRowSet.fromContainer(fixture.allocator(), dest); } } @@ -297,7 +297,7 @@ public void testTwoBatches() throws Exception { /** * Crude-but-effective data generator that produces pseudo-random data - * that can be easily verified. The pseudo-random data is generate by the + * that can be easily verified. The pseudo-random data is generated by the * simple means of incrementing a counter using a random value, and wrapping. * This ensures we visit each value twice, and that the sorted output will * be a continuous run of numbers in proper order. @@ -384,7 +384,7 @@ public void validate(RowSet output) { RowSetReader reader = output.reader(); while (reader.next()) { assertEquals("Value of " + batchCount + ":" + rowCount, - rowCount, reader.column(0).getInt()); + rowCount, reader.scalar(0).getInt()); rowCount++; } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java index 9da8968995c..be0b7052b1a 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java @@ -31,20 +31,18 @@ import org.apache.drill.exec.ops.OperExecContext; import org.apache.drill.exec.physical.config.Sort; import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.test.DrillTest; import org.apache.drill.test.OperatorFixture; +import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; -import org.apache.drill.test.rowSet.RowSet.RowSetReader; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetBuilder; import org.apache.drill.test.rowSet.RowSetComparison; +import org.apache.drill.test.rowSet.RowSetReader; import org.apache.drill.test.rowSet.RowSetUtilities; +import org.apache.drill.test.rowSet.RowSetWriter; import org.apache.drill.test.rowSet.SchemaBuilder; import org.joda.time.Period; -import org.junit.AfterClass; -import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; @@ -54,19 +52,7 @@ * Tests the generated per-batch sort code via its wrapper layer. */ -public class TestSorter extends DrillTest { - - public static OperatorFixture fixture; - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - fixture = OperatorFixture.builder().build(); - } - - @AfterClass - public static void tearDownAfterClass() throws Exception { - fixture.close(); - } +public class TestSorter extends SubOperatorTest { public static Sort makeSortConfig(String key, String sortOrder, String nullOrder) { FieldReference expr = FieldReference.getWithQuotedRef(key); @@ -204,11 +190,11 @@ public SingleRowSet makeDataSet(BufferAllocator allocator, BatchSchema schema, D for (int i = 0; i < items.length; i++) { DataItem item = items[i]; if (nullable && item.isNull) { - writer.column(0).setNull(); + writer.scalar(0).setNull(); } else { RowSetUtilities.setFromInt(writer, 0, item.key); } - writer.column(1).setString(Integer.toString(item.value)); + writer.scalar(1).setString(Integer.toString(item.value)); writer.save(); } writer.done(); @@ -218,7 +204,7 @@ public SingleRowSet makeDataSet(BufferAllocator allocator, BatchSchema schema, D private void verify(RowSet actual) { DataItem expected[] = Arrays.copyOf(data, data.length); doSort(expected); - RowSet expectedRows = makeDataSet(actual.allocator(), actual.schema().batch(), expected); + RowSet expectedRows = makeDataSet(actual.allocator(), actual.batchSchema(), expected); doVerify(expected, expectedRows, actual); } @@ -380,7 +366,7 @@ private void verify(SingleRowSet output) { int prevMonths = 0; long prevMs = 0; while (reader.next()) { - Period period = reader.column(0).getPeriod().normalizedStandard(); + Period period = reader.scalar(0).getPeriod().normalizedStandard(); int years = period.getYears(); assertTrue(prevYears <= years); if (prevYears != years) { diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/ClusterTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/ClusterTest.java index e204fded5e7..fc3b6b93e04 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/ClusterTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/ClusterTest.java @@ -21,7 +21,6 @@ import org.apache.drill.TestBuilder; import org.apache.drill.common.AutoCloseables; -import org.apache.drill.test.DrillTest; import org.junit.AfterClass; /** diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java index a770d3e0232..cc5469e68f5 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java @@ -164,8 +164,8 @@ public void fourthTest() throws Exception { .maxParallelization(1) // Set some session options .sessionOption(ExecConstants.MAX_QUERY_MEMORY_PER_NODE_KEY, 2L * 1024 * 1024 * 1024) - .sessionOption(PlannerSettings.EXCHANGE.getOptionName(), true) - .sessionOption(PlannerSettings.HASHAGG.getOptionName(), false) + .sessionOption(PlannerSettings.EXCHANGE, true) + .sessionOption(PlannerSettings.HASHAGG, false) ; try (LogFixture logs = logBuilder.build(); @@ -175,6 +175,9 @@ public void fourthTest() throws Exception { cluster.defineWorkspace("dfs", "data", "/tmp/drill-test", "psv"); String sql = "select * from `dfs.data`.`example.tbl` order by columns[0]"; QuerySummary results = client.queryBuilder().sql(sql).run(); + + // Example of explaining the query plan as JSON. + System.out.println(client.queryBuilder().sql(sql).explainJson()); assertEquals( 2, results.recordCount() ); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/FixtureBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/FixtureBuilder.java index b305609c147..519ad4d04a9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/FixtureBuilder.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/FixtureBuilder.java @@ -24,6 +24,7 @@ import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.ZookeeperHelper; +import org.apache.drill.exec.server.options.OptionValidator; /** * Build a Drillbit and client with the options provided. The simplest @@ -141,6 +142,10 @@ public FixtureBuilder sessionOption(String key, Object value) { return this; } + public FixtureBuilder sessionOption(OptionValidator key, Object value) { + return sessionOption(key.getOptionName(), value); + } + /** * Provide a system option to be set once the Drillbit * is started. @@ -159,6 +164,10 @@ public FixtureBuilder systemOption(String key, Object value) { return this; } + public FixtureBuilder systemOption(OptionValidator key, Object value) { + return systemOption(key.getOptionName(), value); + } + /** * Set the maximum parallelization (max width per node). Defaults * to 2. diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java b/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java index 976812c5950..34abecc9f48 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java @@ -41,11 +41,15 @@ import org.apache.drill.exec.ops.OperatorStatReceiver; import org.apache.drill.exec.physical.base.PhysicalOperator; import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleSchema; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.server.options.BaseOptionManager; import org.apache.drill.exec.server.options.OptionSet; +import org.apache.drill.exec.server.options.OptionValidator; import org.apache.drill.exec.server.options.OptionValue; import org.apache.drill.exec.server.options.OptionValue.OptionType; +import org.apache.drill.exec.server.options.SystemOptionManager; import org.apache.drill.exec.testing.ExecutionControls; import org.apache.drill.test.rowSet.DirectRowSet; import org.apache.drill.test.rowSet.HyperRowSetImpl; @@ -112,14 +116,15 @@ public OperatorFixture build() { public static class TestOptionSet extends BaseOptionManager { + private boolean withDefaults; private Map values = new HashMap<>(); public TestOptionSet() { - // Crashes in FunctionImplementationRegistry if not set - set(ExecConstants.CAST_TO_NULLABLE_NUMERIC, false); - // Crashes in the Dynamic UDF code if not disabled - set(ExecConstants.USE_DYNAMIC_UDFS_KEY, false); -// set(ExecConstants.CODE_GEN_EXP_IN_METHOD_SIZE_VALIDATOR, false); + this(true); + } + + public TestOptionSet(boolean withDefaults) { + this.withDefaults = withDefaults; } public void set(String key, int value) { @@ -144,7 +149,14 @@ public void set(String key, String value) { @Override public OptionValue getOption(String name) { - return values.get(name); + OptionValue value = values.get(name); + if (value == null && withDefaults) { + OptionValidator validator = SystemOptionManager.VALIDATORS.get(name); + if (validator != null) { + value = SystemOptionManager.VALIDATORS.get(name).getDefault(); + } + } + return value; } } @@ -310,11 +322,19 @@ public OperExecContext newOperExecContext(PhysicalOperator opDefn) { } public RowSetBuilder rowSetBuilder(BatchSchema schema) { + return rowSetBuilder(TupleSchema.fromFields(schema)); + } + + public RowSetBuilder rowSetBuilder(TupleMetadata schema) { return new RowSetBuilder(allocator, schema); } public ExtendableRowSet rowSet(BatchSchema schema) { - return new DirectRowSet(allocator, schema); + return DirectRowSet.fromSchema(allocator, schema); + } + + public ExtendableRowSet rowSet(TupleMetadata schema) { + return DirectRowSet.fromSchema(allocator, schema); } public RowSet wrap(VectorContainer container) { @@ -322,9 +342,9 @@ public RowSet wrap(VectorContainer container) { case FOUR_BYTE: return new HyperRowSetImpl(allocator(), container, container.getSelectionVector4()); case NONE: - return new DirectRowSet(allocator(), container); + return DirectRowSet.fromContainer(allocator(), container); case TWO_BYTE: - return new IndirectRowSet(allocator(), container); + return IndirectRowSet.fromContainer(allocator(), container); default: throw new IllegalStateException( "Unexpected selection mode" ); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java index f2a27c8de4a..b2340708217 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java @@ -50,7 +50,7 @@ import org.apache.drill.test.BufferingQueryEventListener.QueryEvent; import org.apache.drill.test.rowSet.DirectRowSet; import org.apache.drill.test.rowSet.RowSet; -import org.apache.drill.test.rowSet.RowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetReader; import com.google.common.base.Preconditions; @@ -314,7 +314,7 @@ public DirectRowSet rowSet() throws RpcException { dataBatch.release(); VectorContainer container = loader.getContainer(); container.setRecordCount(loader.getRecordCount()); - return new DirectRowSet(client.allocator(), container); + return DirectRowSet.fromContainer(client.allocator(), container); } catch (SchemaChangeException e) { throw new IllegalStateException(e); } @@ -336,7 +336,7 @@ public long singletonLong() throws RpcException { } RowSetReader reader = rowSet.reader(); reader.next(); - long value = reader.column(0).getLong(); + long value = reader.scalar(0).getLong(); rowSet.clear(); return value; } @@ -357,7 +357,7 @@ public int singletonInt() throws RpcException { } RowSetReader reader = rowSet.reader(); reader.next(); - int value = reader.column(0).getInt(); + int value = reader.scalar(0).getInt(); rowSet.clear(); return value; } @@ -379,10 +379,10 @@ public String singletonString() throws RpcException { RowSetReader reader = rowSet.reader(); reader.next(); String value; - if (reader.column(0).isNull()) { + if (reader.scalar(0).isNull()) { value = null; } else { - value = reader.column(0).getString(); + value = reader.scalar(0).getString(); } rowSet.clear(); return value; diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java index a32262a9e4a..5ce88da8095 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java @@ -19,12 +19,14 @@ import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; import org.apache.drill.exec.record.VectorAccessible; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.vector.SchemaChangeCallBack; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnAccessor.RowIndex; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader; -import org.apache.drill.exec.vector.accessor.impl.TupleReaderImpl; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader; +import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter; /** * Basic implementation of a row set for both the single and multiple @@ -40,30 +42,18 @@ public abstract class AbstractRowSet implements RowSet { * must call next() to advance to the first row. */ - public static abstract class RowSetIndex implements RowIndex { - protected int rowIndex = -1; - - public int position() { return rowIndex; } - public abstract boolean next(); - public abstract int size(); - public abstract boolean valid(); - public void set(int index) { rowIndex = index; } - } - - /** - * Bounded (read-only) version of the row set index. When reading, - * the row count is fixed, and set here. - */ - - public static abstract class BoundedRowIndex extends RowSetIndex { + public static abstract class RowSetReaderIndex implements ColumnReaderIndex { + protected int rowIndex = -1; protected final int rowCount; - public BoundedRowIndex(int rowCount) { + public RowSetReaderIndex(int rowCount) { this.rowCount = rowCount; } - @Override + public int position() { return rowIndex; } + public void set(int index) { rowIndex = index; } + public boolean next() { if (++rowIndex < rowCount ) { return true; @@ -73,76 +63,142 @@ public boolean next() { } } - @Override public int size() { return rowCount; } - @Override public boolean valid() { return rowIndex < rowCount; } } /** - * Reader implementation for a row set. + * Common interface to access a tuple backed by a vector container or a + * map vector. */ - public class RowSetReaderImpl extends TupleReaderImpl implements RowSetReader { + public interface TupleStorage { + TupleMetadata tupleSchema(); + int size(); + AbstractRowSet.ColumnStorage storage(int index); + AbstractObjectReader[] readers(); + AbstractObjectWriter[] writers(); + void allocate(BufferAllocator allocator, int rowCount); + } + + /** + * Represents a column within a tuple, including the tuple metadata + * and column storage. A wrapper around a vector to include metadata + * and handle nested tuples. + */ - protected final RowSetIndex index; + public static abstract class ColumnStorage { + protected final ColumnMetadata schema; - public RowSetReaderImpl(TupleSchema schema, RowSetIndex index, AbstractColumnReader[] readers) { - super(schema, readers); - this.index = index; + public ColumnStorage(ColumnMetadata schema) { + this.schema = schema; } - @Override - public boolean next() { return index.next(); } + public ColumnMetadata columnSchema() { return schema; } + public abstract AbstractObjectReader reader(); + public abstract AbstractObjectWriter writer(); + public abstract void allocate(BufferAllocator allocator, int rowCount); + } + + + /** + * Wrapper around a map vector to provide both a column and tuple view of + * a single or repeated map. + */ + + public static abstract class BaseMapColumnStorage extends ColumnStorage implements TupleStorage { + + protected final ColumnStorage columns[]; + + public BaseMapColumnStorage(ColumnMetadata schema, ColumnStorage columns[]) { + super(schema); + this.columns = columns; + } @Override - public boolean valid() { return index.valid(); } + public int size() { return schema.mapSchema().size(); } @Override - public int index() { return index.position(); } + public TupleMetadata tupleSchema() { return schema.mapSchema(); } @Override - public int size() { return index.size(); } + public ColumnStorage storage(int index) { return columns[index]; } + } + + + /** + * Wrapper around a vector container to map the vector container into the common + * tuple format. + */ + + public static abstract class BaseRowStorage implements TupleStorage { + private final TupleMetadata schema; + private final VectorContainer container; + private final ColumnStorage columns[]; + + public BaseRowStorage(TupleMetadata schema, VectorContainer container, ColumnStorage columns[]) { + this.schema = schema; + this.container = container; + this.columns = columns; + } @Override - public int rowIndex() { return index.index(); } + public int size() { return schema.size(); } @Override - public int batchIndex() { return index.batch(); } + public TupleMetadata tupleSchema() { return schema; } + + public VectorContainer container() { return container; } @Override - public void set(int index) { this.index.set(index); } + public ColumnStorage storage(int index) { return columns[index]; } + + protected static AbstractObjectReader[] readers(AbstractRowSet.TupleStorage storage) { + AbstractObjectReader[] readers = new AbstractObjectReader[storage.tupleSchema().size()]; + for (int i = 0; i < readers.length; i++) { + readers[i] = storage.storage(i).reader(); + } + return readers; + } + + protected static AbstractObjectWriter[] writers(AbstractRowSet.TupleStorage storage) { + AbstractObjectWriter[] writers = new AbstractObjectWriter[storage.size()]; + for (int i = 0; i < writers.length; i++) { + writers[i] = storage.storage(i).writer(); + } + return writers; + } } protected final BufferAllocator allocator; - protected final RowSetSchema schema; - protected final VectorContainer container; protected SchemaChangeCallBack callBack = new SchemaChangeCallBack(); + protected final BaseRowStorage rowStorage; - public AbstractRowSet(BufferAllocator allocator, BatchSchema schema, VectorContainer container) { + + public AbstractRowSet(BufferAllocator allocator, BaseRowStorage rowStorage) { this.allocator = allocator; - this.schema = new RowSetSchema(schema); - this.container = container; + this.rowStorage = rowStorage; } @Override - public VectorAccessible vectorAccessible() { return container; } + public VectorAccessible vectorAccessible() { return container(); } @Override - public VectorContainer container() { return container; } + public VectorContainer container() { return rowStorage.container(); } @Override - public int rowCount() { return container.getRecordCount(); } + public int rowCount() { return container().getRecordCount(); } @Override public void clear() { + VectorContainer container = container(); container.zeroVectors(); container.setRecordCount(0); } @Override - public RowSetSchema schema() { return schema; } + public TupleMetadata schema() { return rowStorage.tupleSchema(); } @Override public BufferAllocator allocator() { return allocator; } @@ -158,7 +214,5 @@ public int size() { } @Override - public BatchSchema batchSchema() { - return container.getSchema(); - } + public BatchSchema batchSchema() { return container().getSchema(); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java index d8176dedec0..b7df8f31f06 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java @@ -17,23 +17,27 @@ */ package org.apache.drill.test.rowSet; -import org.apache.drill.common.types.TypeProtos.MajorType; -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer; -import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; +import org.apache.drill.exec.record.TupleMetadata.StructureType; +import org.apache.drill.exec.record.TupleSchema; import org.apache.drill.exec.record.VectorContainer; -import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.AllocationHelper; import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader; import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory; +import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader; +import org.apache.drill.exec.vector.accessor.reader.MapReader; +import org.apache.drill.exec.vector.accessor.reader.ObjectArrayReader; +import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter; +import org.apache.drill.exec.vector.accessor.writer.MapWriter; +import org.apache.drill.exec.vector.accessor.writer.ObjectArrayWriter; +import org.apache.drill.exec.vector.complex.AbstractMapVector; import org.apache.drill.exec.vector.complex.MapVector; +import org.apache.drill.exec.vector.complex.RepeatedMapVector; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; -import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema; -import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn; -import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema; /** * Base class for row sets backed by a single record batch. @@ -42,150 +46,178 @@ public abstract class AbstractSingleRowSet extends AbstractRowSet implements SingleRowSet { /** - * Internal helper class to organize a set of value vectors for use by the - * row set class. Subclasses either build vectors from a schema, or map an - * existing vector container into the row set structure. The row set - * structure is based on a flattened structure; all vectors appear in - * a single vector array. Maps are set aside in a separate map list. + * Wrapper around a primitive (non-map, non-list) column vector. */ - public abstract static class StructureBuilder { - protected final PhysicalSchema schema; - protected final BufferAllocator allocator; - protected final ValueVector[] valueVectors; - protected final MapVector[] mapVectors; - protected int vectorIndex; - protected int mapIndex; - - public StructureBuilder(BufferAllocator allocator, RowSetSchema schema) { - this.allocator = allocator; - this.schema = schema.physical(); - FlattenedSchema flatSchema = schema.flatAccess(); - valueVectors = new ValueVector[flatSchema.count()]; - if (flatSchema.mapCount() == 0) { - mapVectors = null; - } else { - mapVectors = new MapVector[flatSchema.mapCount()]; - } + public static class PrimitiveColumnStorage extends ColumnStorage { + protected final ValueVector vector; + + public PrimitiveColumnStorage(ColumnMetadata schema, ValueVector vector) { + super(schema); + this.vector = vector; + } + + @Override + public AbstractObjectReader reader() { + return ColumnAccessorFactory.buildColumnReader(vector); + } + + @Override + public void allocate(BufferAllocator allocator, int rowCount) { + // TODO: Use better estimates + + AllocationHelper.allocate(vector, rowCount, 50, 10); + } + + @Override + public AbstractObjectWriter writer() { + return ColumnAccessorFactory.buildColumnWriter(vector); } } /** - * Create a set of value vectors given a schema, then map them into both - * the value container and the row set structure. + * Wrapper around a map vector to provide both a column and tuple view of + * a single or repeated map. */ - public static class VectorBuilder extends StructureBuilder { + public static class MapColumnStorage extends BaseMapColumnStorage { + + private final AbstractMapVector vector; + + public MapColumnStorage(ColumnMetadata schema, AbstractMapVector vector, ColumnStorage columns[]) { + super(schema, columns); + this.vector = vector; + } - public VectorBuilder(BufferAllocator allocator, RowSetSchema schema) { - super(allocator, schema); + public static MapColumnStorage fromMap(ColumnMetadata schema, AbstractMapVector vector) { + return new MapColumnStorage(schema, vector, buildColumns(schema, vector)); } - public ValueVector[] buildContainer(VectorContainer container) { - for (int i = 0; i < schema.count(); i++) { - LogicalColumn colSchema = schema.column(i); + private static ColumnStorage[] buildColumns(ColumnMetadata schema, AbstractMapVector vector) { + TupleMetadata mapSchema = schema.mapSchema(); + ColumnStorage columns[] = new ColumnStorage[mapSchema.size()]; + for (int i = 0; i < mapSchema.size(); i++) { + ColumnMetadata colSchema = mapSchema.metadata(i); @SuppressWarnings("resource") - ValueVector v = TypeHelper.getNewVector(colSchema.field, allocator, null); - container.add(v); - if (colSchema.field.getType().getMinorType() == MinorType.MAP) { - MapVector mv = (MapVector) v; - mapVectors[mapIndex++] = mv; - buildMap(mv, colSchema.mapSchema); + ValueVector child = vector.getChildByOrdinal(i); + if (colSchema.structureType() == StructureType.TUPLE) { + columns[i] = MapColumnStorage.fromMap(colSchema, (AbstractMapVector) child); } else { - valueVectors[vectorIndex++] = v; + columns[i] = new PrimitiveColumnStorage(colSchema, child); } } - container.buildSchema(SelectionVectorMode.NONE); - return valueVectors; + return columns; } - private void buildMap(MapVector mapVector, PhysicalSchema mapSchema) { - for (int i = 0; i < mapSchema.count(); i++) { - LogicalColumn colSchema = mapSchema.column(i); - MajorType type = colSchema.field.getType(); - Class vectorClass = TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()); - @SuppressWarnings("resource") - ValueVector v = mapVector.addOrGet(colSchema.field.getName(), type, vectorClass); - if (type.getMinorType() == MinorType.MAP) { - MapVector mv = (MapVector) v; - mapVectors[mapIndex++] = mv; - buildMap(mv, colSchema.mapSchema); - } else { - valueVectors[vectorIndex++] = v; - } + @Override + public AbstractObjectReader[] readers() { + return RowStorage.readers(this); + } + + @Override + public AbstractObjectWriter[] writers() { + return RowStorage.writers(this); + } + + @Override + public void allocate(BufferAllocator allocator, int rowCount) { + RowStorage.allocate(this, allocator, rowCount); + } + + @Override + public AbstractObjectWriter writer() { + if (schema.mode() == DataMode.REPEATED) { + RepeatedMapVector repeatedMapVector = (RepeatedMapVector) vector; + AbstractObjectWriter mapWriter = MapWriter.build(columnSchema(), repeatedMapVector, writers()); + return ObjectArrayWriter.build(repeatedMapVector, mapWriter); + } else { + return MapWriter.build(columnSchema(), (MapVector) vector, writers()); } } + + @Override + public AbstractObjectReader reader() { + AbstractObjectReader mapReader = MapReader.build(columnSchema(), readers()); + if (schema.mode() != DataMode.REPEATED) { + return mapReader; + } + return ObjectArrayReader.build((RepeatedMapVector) vector, mapReader); + } } /** - * Build a row set given an existing vector container. In this case, - * the vectors exist and we simply need to pull them out of the container - * and maps and put them into the row set arrays. + * Wrapper around a vector container to map the vector container into the common + * tuple format. */ - public static class VectorMapper extends StructureBuilder { + public static class RowStorage extends BaseRowStorage { - public VectorMapper(BufferAllocator allocator, RowSetSchema schema) { - super(allocator, schema); + public RowStorage(TupleMetadata schema, VectorContainer container, ColumnStorage columns[]) { + super(schema, container, columns); } - public ValueVector[] mapContainer(VectorContainer container) { - for (VectorWrapper w : container) { - @SuppressWarnings("resource") - ValueVector v = w.getValueVector(); - if (v.getField().getType().getMinorType() == MinorType.MAP) { - MapVector mv = (MapVector) v; - mapVectors[mapIndex++] = mv; - buildMap(mv); - } else { - valueVectors[vectorIndex++] = v; - } - } - return valueVectors; + public static RowStorage fromSchema(BufferAllocator allocator, TupleMetadata schema) { + VectorContainer container = RowSetUtilities.buildVectors(allocator, schema); + return new RowStorage(schema, container, buildChildren(schema, container)); + } + + public static RowStorage fromContainer(TupleMetadata schema, VectorContainer container) { + return new RowStorage(schema, container, buildChildren(schema, container)); + } + + public static RowStorage fromContainer(VectorContainer container) { + return fromContainer(TupleSchema.fromFields(container.getSchema()), container); } - private void buildMap(MapVector mapVector) { - for (ValueVector v : mapVector) { - if (v.getField().getType().getMinorType() == MinorType.MAP) { - MapVector mv = (MapVector) v; - mapVectors[mapIndex++] = mv; - buildMap(mv); + private static ColumnStorage[] buildChildren(TupleMetadata schema, VectorContainer container) { + assert schema.size() == container.getNumberOfColumns(); + ColumnStorage colStorage[] = new ColumnStorage[schema.size()]; + for (int i = 0; i < schema.size(); i++) { + ColumnMetadata colSchema = schema.metadata(i); + @SuppressWarnings("resource") + ValueVector vector = container.getValueVector(i).getValueVector(); + if (colSchema.structureType() == StructureType.TUPLE) { + colStorage[i] = MapColumnStorage.fromMap(colSchema, (AbstractMapVector) vector); } else { - valueVectors[vectorIndex++] = v; + colStorage[i] = new PrimitiveColumnStorage(colSchema, vector); } } + return colStorage; } - } - /** - * Flattened representation of value vectors using a depth-first - * traversal of maps. Order of vectors here correspond to the column - * indexes used to access columns in a reader or writer. - */ + @Override + public AbstractObjectReader[] readers() { + return readers(this); + } - protected final ValueVector[] valueVectors; + @Override + public AbstractObjectWriter[] writers() { + return writers(this); + } - public AbstractSingleRowSet(BufferAllocator allocator, BatchSchema schema) { - super(allocator, schema, new VectorContainer()); - valueVectors = new VectorBuilder(allocator, super.schema).buildContainer(container); - } + @Override + public void allocate(BufferAllocator allocator, int rowCount) { + allocate(this, allocator, rowCount); + } - public AbstractSingleRowSet(BufferAllocator allocator, VectorContainer container) { - super(allocator, container.getSchema(), container); - valueVectors = new VectorMapper(allocator, super.schema).mapContainer(container); + protected static void allocate(TupleStorage storage, BufferAllocator allocator, int rowCount) { + for (int i = 0; i < storage.size(); i++) { + storage.storage(i).allocate(allocator, rowCount); + } + } } public AbstractSingleRowSet(AbstractSingleRowSet rowSet) { - super(rowSet.allocator, rowSet.schema.batch(), rowSet.container); - valueVectors = rowSet.valueVectors; + super(rowSet.allocator, rowSet.rowStorage); } - @Override - public ValueVector[] vectors() { return valueVectors; } + public AbstractSingleRowSet(BufferAllocator allocator, RowStorage storage) { + super(allocator, storage); + } @Override public int size() { - RecordBatchSizer sizer = new RecordBatchSizer(container); + RecordBatchSizer sizer = new RecordBatchSizer(container()); return sizer.actualSize(); } @@ -197,21 +229,7 @@ public int size() { * (non-map) vectors. */ - protected RowSetReader buildReader(RowSetIndex rowIndex) { - FlattenedSchema accessSchema = schema().flatAccess(); - ValueVector[] valueVectors = vectors(); - AbstractColumnReader[] readers = new AbstractColumnReader[valueVectors.length]; - for (int i = 0; i < readers.length; i++) { - MinorType type = accessSchema.column(i).getType().getMinorType(); - if (type == MinorType.MAP) { - readers[i] = null; // buildMapAccessor(i); - } else if (type == MinorType.LIST) { - readers[i] = null; // buildListAccessor(i); - } else { - readers[i] = ColumnAccessorFactory.newReader(valueVectors[i].getField().getType()); - readers[i].bind(rowIndex, valueVectors[i]); - } - } - return new RowSetReaderImpl(accessSchema, rowIndex, readers); + protected RowSetReader buildReader(RowSetReaderIndex rowIndex) { + return new RowSetReaderImpl(rowStorage.tupleSchema(), rowIndex, rowStorage.readers()); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java index 29a1702fd5b..6103be33887 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java @@ -20,17 +20,13 @@ import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleSchema; import org.apache.drill.exec.record.VectorAccessible; -import org.apache.drill.exec.record.VectorAccessibleUtilities; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.selection.SelectionVector2; -import org.apache.drill.exec.vector.AllocationHelper; -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnWriter; -import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory; -import org.apache.drill.exec.vector.accessor.impl.TupleWriterImpl; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; +import org.apache.drill.test.rowSet.RowSetWriterImpl.WriterIndexImpl; /** * Implementation of a single row set with no indirection (selection) @@ -46,118 +42,41 @@ public class DirectRowSet extends AbstractSingleRowSet implements ExtendableRowS * the first. (This is the JDBC RecordSet convention.) */ - private static class DirectRowIndex extends BoundedRowIndex { + private static class DirectRowIndex extends RowSetReaderIndex { public DirectRowIndex(int rowCount) { super(rowCount); } @Override - public int index() { return rowIndex; } + public int vectorIndex() { return rowIndex; } @Override - public int batch() { return 0; } + public int batchIndex() { return 0; } } - /** - * Writer index that points to each row in the row set. The index starts at - * the 0th row and advances one row on each increment. This allows writers to - * start positioned at the first row. Writes happen in the current row. - * Calling next() advances to the next position, effectively saving - * the current row. The most recent row can be abandoned easily simply by not - * calling next(). This means that the number of completed rows is - * the same as the row index. - */ - - private static class ExtendableRowIndex extends RowSetIndex { - - private final int maxSize; - - public ExtendableRowIndex(int maxSize) { - this.maxSize = maxSize; - rowIndex = 0; - } - - @Override - public int index() { return rowIndex; } - - @Override - public boolean next() { - if (++rowIndex <= maxSize ) { - return true; - } else { - rowIndex--; - return false; - } - } - - @Override - public int size() { return rowIndex; } - - @Override - public boolean valid() { return rowIndex < maxSize; } - - @Override - public int batch() { return 0; } + private DirectRowSet(BufferAllocator allocator, RowStorage storage) { + super(allocator, storage); } - /** - * Implementation of a row set writer. Only available for newly-created, - * empty, direct, single row sets. Rewriting is not allowed, nor is writing - * to a hyper row set. - */ - - public class RowSetWriterImpl extends TupleWriterImpl implements RowSetWriter { - - private final ExtendableRowIndex index; - private final ExtendableRowSet rowSet; - - protected RowSetWriterImpl(ExtendableRowSet rowSet, TupleSchema schema, ExtendableRowIndex index, AbstractColumnWriter[] writers) { - super(schema, writers); - this.rowSet = rowSet; - this.index = index; - start(); - } - - @Override - public void setRow(Object...values) { - if (! index.valid()) { - throw new IndexOutOfBoundsException("Write past end of row set"); - } - for (int i = 0; i < values.length; i++) { - set(i, values[i]); - } - save(); - } - - @Override - public boolean valid() { return index.valid(); } - - @Override - public int index() { return index.position(); } - - @Override - public void save() { - index.next(); - start(); - } + public DirectRowSet(AbstractSingleRowSet from) { + super(from); + } - @Override - public void done() { - rowSet.setRowCount(index.size()); - } + public static DirectRowSet fromSchema(BufferAllocator allocator, BatchSchema schema) { + return fromSchema(allocator, TupleSchema.fromFields(schema)); } - public DirectRowSet(BufferAllocator allocator, BatchSchema schema) { - super(allocator, schema); + public static DirectRowSet fromSchema(BufferAllocator allocator, TupleMetadata schema) { + return new DirectRowSet(allocator, RowStorage.fromSchema(allocator, schema)); } - public DirectRowSet(BufferAllocator allocator, VectorContainer container) { - super(allocator, container); + public static DirectRowSet fromContainer(BufferAllocator allocator, VectorContainer container) { + return new DirectRowSet(allocator, RowStorage.fromContainer(container)); } - public DirectRowSet(BufferAllocator allocator, VectorAccessible va) { - super(allocator, toContainer(va, allocator)); + public static DirectRowSet fromVectorAccessible(BufferAllocator allocator, VectorAccessible va) { + return fromContainer(allocator, toContainer(va, allocator)); } private static VectorContainer toContainer(VectorAccessible va, BufferAllocator allocator) { @@ -169,15 +88,7 @@ private static VectorContainer toContainer(VectorAccessible va, BufferAllocator @Override public void allocate(int recordCount) { - for (final ValueVector v : valueVectors) { - AllocationHelper.allocate(v, recordCount, 50, 10); - } - } - - @Override - public void setRowCount(int rowCount) { - container.setRecordCount(rowCount); - VectorAccessibleUtilities.setValueCount(container, rowCount); + rowStorage.allocate(allocator, recordCount); } @Override @@ -187,29 +98,12 @@ public RowSetWriter writer() { @Override public RowSetWriter writer(int initialRowCount) { - if (container.hasRecordCount()) { + if (container().hasRecordCount()) { throw new IllegalStateException("Row set already contains data"); } allocate(initialRowCount); - return buildWriter(new ExtendableRowIndex(Character.MAX_VALUE)); - } - - /** - * Build writer objects for each column based on the column type. - * - * @param rowIndex the index which points to each row - * @return an array of writers - */ - - protected RowSetWriter buildWriter(ExtendableRowIndex rowIndex) { - ValueVector[] valueVectors = vectors(); - AbstractColumnWriter[] writers = new AbstractColumnWriter[valueVectors.length]; - for (int i = 0; i < writers.length; i++) { - writers[i] = ColumnAccessorFactory.newWriter(valueVectors[i].getField().getType()); - writers[i].bind(rowIndex, valueVectors[i]); - } - TupleSchema accessSchema = schema().hierarchicalAccess(); - return new RowSetWriterImpl(this, accessSchema, rowIndex, writers); + WriterIndexImpl index = new WriterIndexImpl(); + return new RowSetWriterImpl(this, rowStorage.tupleSchema(), index, rowStorage.writers()); } @Override @@ -236,6 +130,6 @@ public SingleRowSet toIndirect() { @Override public RowSet merge(RowSet other) { - return new DirectRowSet(allocator, container().merge(other.container())); + return fromContainer(allocator, container().merge(other.container())); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java index afc2e6e1633..fb719ab214b 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java @@ -17,27 +17,29 @@ */ package org.apache.drill.test.rowSet; -import java.util.ArrayList; -import java.util.List; - -import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; -import org.apache.drill.exec.record.HyperVectorWrapper; -import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; +import org.apache.drill.exec.record.TupleMetadata.StructureType; +import org.apache.drill.exec.record.TupleSchema; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.record.selection.SelectionVector4; import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.accessor.AccessorUtilities; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities; import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory; +import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader; +import org.apache.drill.exec.vector.accessor.reader.MapReader; +import org.apache.drill.exec.vector.accessor.reader.ObjectArrayReader; +import org.apache.drill.exec.vector.accessor.reader.VectorAccessor; +import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter; import org.apache.drill.exec.vector.complex.AbstractMapVector; +import org.apache.drill.test.rowSet.AbstractSingleRowSet.MapColumnStorage; +import org.apache.drill.test.rowSet.AbstractSingleRowSet.RowStorage; import org.apache.drill.test.rowSet.RowSet.HyperRowSet; -import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema; -import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn; -import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema; /** * Implements a row set wrapper around a collection of "hyper vectors." @@ -57,7 +59,7 @@ public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet { * values mapping via an SV4. */ - public static class HyperRowIndex extends BoundedRowIndex { + public static class HyperRowIndex extends RowSetReaderIndex { private final SelectionVector4 sv4; @@ -67,12 +69,12 @@ public HyperRowIndex(SelectionVector4 sv4) { } @Override - public int index() { + public int vectorIndex() { return AccessorUtilities.sv4Index(sv4.get(rowIndex)); } @Override - public int batch( ) { + public int batchIndex( ) { return AccessorUtilities.sv4Batch(sv4.get(rowIndex)); } } @@ -85,143 +87,161 @@ public int batch( ) { public static class HyperVectorAccessor implements VectorAccessor { - private final HyperRowIndex rowIndex; private final ValueVector[] vectors; + private ColumnReaderIndex rowIndex; - public HyperVectorAccessor(HyperVectorWrapper hvw, HyperRowIndex rowIndex) { - this.rowIndex = rowIndex; - vectors = hvw.getValueVectors(); + public HyperVectorAccessor(VectorWrapper vw) { + vectors = vw.getValueVectors(); + } + + @Override + public void bind(ColumnReaderIndex index) { + rowIndex = index; } @Override public ValueVector vector() { - return vectors[rowIndex.batch()]; + return vectors[rowIndex.batchIndex()]; } } /** - * Build a hyper row set by restructuring a hyper vector bundle into a uniform - * shape. Consider this schema:

-   * { a: 10, b: { c: 20, d: { e: 30 } } }
- *

- * The hyper container, with two batches, has this structure: - * - * - * - * - *
Batchab
0Int vectorMap Vector(Int vector, Map Vector(Int vector))
1Int vectorMap Vector(Int vector, Map Vector(Int vector))
- *

- * The above table shows that top-level scalar vectors (such as the Int Vector for column - * a) appear "end-to-end" as a hyper-vector. Maps also appear end-to-end. But, the - * contents of the map (column c) do not appear end-to-end. Instead, they appear as - * contents in the map vector. To get to c, one indexes into the map vector, steps inside - * the map to find c and indexes to the right row. - *

- * Similarly, the maps for d do not appear end-to-end, one must step to the right batch - * in b, then step to d. - *

- * Finally, to get to e, one must step - * into the hyper vector for b, then steps to the proper batch, steps to d, step to e - * and finally step to the row within e. This is a very complex, costly indexing scheme - * that differs depending on map nesting depth. - *

- * To simplify access, this class restructures the maps to flatten the scalar vectors - * into end-to-end hyper vectors. For example, for the above: - *

- * - * - * - * - *
Batchacd
0Int vectorInt vectorInt vector
1Int vectorInt vectorInt vector
- * - * The maps are still available as hyper vectors, but separated into map fields. - * (Scalar access no longer needs to access the maps.) The result is a uniform - * addressing scheme for both top-level and nested vectors. + * Wrapper around a primitive (non-map, non-list) column vector. */ - public static class HyperVectorBuilder { - - protected final HyperVectorWrapper valueVectors[]; - protected final HyperVectorWrapper mapVectors[]; - private final List nestedScalars[]; - private int vectorIndex; - private int mapIndex; - private final PhysicalSchema physicalSchema; - - @SuppressWarnings("unchecked") - public HyperVectorBuilder(RowSetSchema schema) { - physicalSchema = schema.physical(); - FlattenedSchema flatSchema = schema.flatAccess(); - valueVectors = new HyperVectorWrapper[schema.hierarchicalAccess().count()]; - if (flatSchema.mapCount() == 0) { - mapVectors = null; - nestedScalars = null; - } else { - mapVectors = (HyperVectorWrapper[]) - new HyperVectorWrapper[flatSchema.mapCount()]; - nestedScalars = new ArrayList[flatSchema.count()]; - } + public static class PrimitiveHyperColumnStorage extends ColumnStorage { + protected final VectorWrapper vectors; + + public PrimitiveHyperColumnStorage(ColumnMetadata schema, VectorWrapper vectors) { + super(schema); + this.vectors = vectors; + } + + @Override + public AbstractObjectReader reader() { + return ColumnAccessorFactory.buildColumnReader(schema.majorType(), new HyperVectorAccessor(vectors)); } - @SuppressWarnings("unchecked") - public HyperVectorWrapper[] mapContainer(VectorContainer container) { - int i = 0; - for (VectorWrapper w : container) { - HyperVectorWrapper hvw = (HyperVectorWrapper) w; - if (w.getField().getType().getMinorType() == MinorType.MAP) { - HyperVectorWrapper mw = (HyperVectorWrapper) hvw; - mapVectors[mapIndex++] = mw; - buildHyperMap(physicalSchema.column(i).mapSchema(), mw); + @Override + public void allocate(BufferAllocator allocator, int rowCount) { + throw new IllegalStateException("Cannot allocate a hyper-vector."); + } + + @Override + public AbstractObjectWriter writer() { + throw new IllegalStateException("Cannot write to a hyper-vector."); + } + } + + /** + * Wrapper around a map vector to provide both a column and tuple view of + * a single or repeated map. + */ + + public static class MapHyperColumnStorage extends BaseMapColumnStorage { + private final VectorWrapper vectors; + + public MapHyperColumnStorage(ColumnMetadata schema, VectorWrapper vectors, ColumnStorage columns[]) { + super(schema, columns); + this.vectors = vectors; + } + + public static MapHyperColumnStorage fromMap(ColumnMetadata schema, VectorWrapper vectors) { + return new MapHyperColumnStorage(schema, vectors, buildColumns(schema, vectors)); + } + + private static ColumnStorage[] buildColumns(ColumnMetadata schema, VectorWrapper vectors) { + TupleMetadata mapSchema = schema.mapSchema(); + ColumnStorage columns[] = new ColumnStorage[mapSchema.size()]; + for (int i = 0; i < mapSchema.size(); i++) { + ColumnMetadata colSchema = mapSchema.metadata(i); + VectorWrapper child = vectors.getChildWrapper(new int[] {i}); + if (colSchema.structureType() == StructureType.TUPLE) { + columns[i] = MapColumnStorage.fromMap(colSchema, (AbstractMapVector) child); } else { - valueVectors[vectorIndex++] = hvw; + columns[i] = new PrimitiveHyperColumnStorage(colSchema, child); } - i++; - } - if (nestedScalars != null) { - buildNestedHyperVectors(); } - return (HyperVectorWrapper[]) valueVectors; + return columns; } - private void buildHyperMap(PhysicalSchema mapSchema, HyperVectorWrapper mapWrapper) { - createHyperVectors(mapSchema); - for (AbstractMapVector mapVector : mapWrapper.getValueVectors()) { - buildMap(mapSchema, mapVector); - } + @Override + public AbstractObjectReader[] readers() { + return HyperRowStorage.readers(this); } - private void buildMap(PhysicalSchema mapSchema, AbstractMapVector mapVector) { - for (ValueVector v : mapVector) { - LogicalColumn col = mapSchema.column(v.getField().getName()); - if (col.isMap()) { - buildMap(col.mapSchema, (AbstractMapVector) v); - } else { - nestedScalars[col.accessIndex()].add(v); - } + @Override + public AbstractObjectWriter[] writers() { + throw new IllegalStateException("Cannot write to a hyper-vector."); + } + + @Override + public void allocate(BufferAllocator allocator, int rowCount) { + throw new IllegalStateException("Cannot allocate a hyper-vector."); + } + + @Override + public AbstractObjectWriter writer() { + throw new IllegalStateException("Cannot write to a hyper-vector."); + } + + @Override + public AbstractObjectReader reader() { + AbstractObjectReader mapReader = MapReader.build(columnSchema(), readers()); + if (schema.mode() != DataMode.REPEATED) { + return mapReader; } + return ObjectArrayReader.build(new HyperVectorAccessor(vectors), mapReader); } + } + + /** + * Wrapper around a vector container to map the vector container into the common + * tuple format. + */ + + public static class HyperRowStorage extends BaseRowStorage { - private void createHyperVectors(PhysicalSchema mapSchema) { - for (int i = 0; i < mapSchema.count(); i++) { - LogicalColumn col = mapSchema.column(i); - if (col.isMap()) { - createHyperVectors(col.mapSchema); + public HyperRowStorage(TupleMetadata schema, VectorContainer container, ColumnStorage columns[]) { + super(schema, container, columns); + } + + public static RowStorage fromContainer(TupleMetadata schema, VectorContainer container) { + return new RowStorage(schema, container, buildChildren(schema, container)); + } + + public static RowStorage fromContainer(VectorContainer container) { + return fromContainer(TupleSchema.fromFields(container.getSchema()), container); + } + + private static ColumnStorage[] buildChildren(TupleMetadata schema, VectorContainer container) { + assert schema.size() == container.getNumberOfColumns(); + ColumnStorage colStorage[] = new ColumnStorage[schema.size()]; + for (int i = 0; i < schema.size(); i++) { + ColumnMetadata colSchema = schema.metadata(i); + VectorWrapper vectors = container.getValueVector(i); + if (colSchema.structureType() == StructureType.TUPLE) { + colStorage[i] = MapHyperColumnStorage.fromMap(colSchema, vectors); } else { - nestedScalars[col.accessIndex()] = new ArrayList(); + colStorage[i] = new PrimitiveHyperColumnStorage(colSchema, vectors); } } + return colStorage; } - private void buildNestedHyperVectors() { - for (int i = 0; i < nestedScalars.length; i++) { - if (nestedScalars[i] == null) { - continue; - } - ValueVector vectors[] = new ValueVector[nestedScalars[i].size()]; - nestedScalars[i].toArray(vectors); - assert valueVectors[i] == null; - valueVectors[i] = new HyperVectorWrapper(vectors[0].getField(), vectors, false); - } + @Override + public AbstractObjectReader[] readers() { + return readers(this); + } + + @Override + public AbstractObjectWriter[] writers() { + throw new IllegalStateException("Cannot write to a hyper-vector."); + } + + @Override + public void allocate(BufferAllocator allocator, int rowCount) { + throw new IllegalStateException("Cannot allocate a hyper-vector."); } } @@ -231,18 +251,9 @@ private void buildNestedHyperVectors() { private final SelectionVector4 sv4; - /** - * Collection of hyper vectors in flattened order: a left-to-right, - * depth first ordering of vectors in maps. Order here corresponds to - * the order used for column indexes in the row set reader. - */ - - private final HyperVectorWrapper hvw[]; - public HyperRowSetImpl(BufferAllocator allocator, VectorContainer container, SelectionVector4 sv4) { - super(allocator, container.getSchema(), container); + super(allocator, HyperRowStorage.fromContainer(container)); this.sv4 = sv4; - hvw = new HyperVectorBuilder(schema).mapContainer(container); } @Override @@ -251,11 +262,6 @@ public HyperRowSetImpl(BufferAllocator allocator, VectorContainer container, Sel @Override public boolean isWritable() { return false; } - @Override - public RowSetWriter writer() { - throw new UnsupportedOperationException("Cannot write to a hyper vector"); - } - @Override public RowSetReader reader() { return buildReader(new HyperRowIndex(sv4)); @@ -270,15 +276,7 @@ public RowSetReader reader() { */ protected RowSetReader buildReader(HyperRowIndex rowIndex) { - FlattenedSchema accessSchema = schema().flatAccess(); - AbstractColumnReader readers[] = new AbstractColumnReader[accessSchema.count()]; - for (int i = 0; i < readers.length; i++) { - MaterializedField field = accessSchema.column(i); - readers[i] = ColumnAccessorFactory.newReader(field.getType()); - HyperVectorWrapper hvw = getHyperVector(i); - readers[i].bind(rowIndex, field, new HyperVectorAccessor(hvw, rowIndex)); - } - return new RowSetReaderImpl(accessSchema, rowIndex, readers); + return new RowSetReaderImpl(rowStorage.tupleSchema(), rowIndex, rowStorage.readers()); } @Override @@ -287,9 +285,6 @@ protected RowSetReader buildReader(HyperRowIndex rowIndex) { @Override public SelectionVector4 getSv4() { return sv4; } - @Override - public HyperVectorWrapper getHyperVector(int i) { return hvw[i]; } - @Override public int rowCount() { return sv4.getCount(); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java index 17a0ac8f092..ad4b1aa7120 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java @@ -33,14 +33,14 @@ public class IndirectRowSet extends AbstractSingleRowSet { /** * Reader index that points to each row indirectly through the - * selection vector. The {@link #index()} method points to the + * selection vector. The {@link #vectorIndex()} method points to the * actual data row, while the {@link #position()} method gives * the position relative to the indirection vector. That is, * the position increases monotonically, but the index jumps * around as specified by the indirection vector. */ - private static class IndirectRowIndex extends BoundedRowIndex { + private static class IndirectRowIndex extends RowSetReaderIndex { private final SelectionVector2 sv2; @@ -50,21 +50,25 @@ public IndirectRowIndex(SelectionVector2 sv2) { } @Override - public int index() { return sv2.getIndex(rowIndex); } + public int vectorIndex() { return sv2.getIndex(rowIndex); } @Override - public int batch() { return 0; } + public int batchIndex() { return 0; } } private final SelectionVector2 sv2; - public IndirectRowSet(BufferAllocator allocator, VectorContainer container) { - this(allocator, container, makeSv2(allocator, container)); + private IndirectRowSet(BufferAllocator allocator, RowStorage storage, SelectionVector2 sv2) { + super(allocator, storage); + this.sv2 = sv2; } - public IndirectRowSet(BufferAllocator allocator, VectorContainer container, SelectionVector2 sv2) { - super(allocator, container); - this.sv2 = sv2; + public static IndirectRowSet fromContainer(BufferAllocator allocator, VectorContainer container) { + return new IndirectRowSet(allocator, RowStorage.fromContainer(container), makeSv2(allocator, container)); + } + + public static IndirectRowSet fromContainer(BufferAllocator allocator, VectorContainer container, SelectionVector2 sv2) { + return new IndirectRowSet(allocator, RowStorage.fromContainer(container), sv2); } private static SelectionVector2 makeSv2(BufferAllocator allocator, VectorContainer container) { @@ -83,7 +87,7 @@ private static SelectionVector2 makeSv2(BufferAllocator allocator, VectorContain public IndirectRowSet(DirectRowSet directRowSet) { super(directRowSet); - sv2 = makeSv2(allocator, container); + sv2 = makeSv2(allocator, container()); } @Override @@ -95,11 +99,6 @@ public void clear() { getSv2().clear(); } - @Override - public RowSetWriter writer() { - throw new UnsupportedOperationException("Cannot write to an existing row set"); - } - @Override public RowSetReader reader() { return buildReader(new IndirectRowIndex(getSv2())); @@ -119,12 +118,12 @@ public RowSetReader reader() { @Override public int size() { - RecordBatchSizer sizer = new RecordBatchSizer(container, sv2); + RecordBatchSizer sizer = new RecordBatchSizer(container(), sv2); return sizer.actualSize(); } @Override public RowSet merge(RowSet other) { - return new IndirectRowSet(allocator, container().merge(other.container()), sv2); + return IndirectRowSet.fromContainer(allocator, container().merge(other.container()), sv2); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java index b6bbd4f2d96..71defec61c6 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java @@ -20,21 +20,18 @@ import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; -import org.apache.drill.exec.record.HyperVectorWrapper; +import org.apache.drill.exec.record.TupleMetadata; import org.apache.drill.exec.record.VectorAccessible; import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.selection.SelectionVector2; import org.apache.drill.exec.record.selection.SelectionVector4; -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.accessor.ColumnReader; -import org.apache.drill.exec.vector.accessor.ColumnWriter; -import org.apache.drill.exec.vector.accessor.TupleReader; -import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.parquet.column.ColumnWriter; /** * A row set is a collection of rows stored as value vectors. Elsewhere in * Drill we call this a "record batch", but that term has been overloaded to - * mean the runtime implementation of an operator... + * mean the runtime implementation of an operator. *

* A row set encapsulates a set of vectors and provides access to Drill's * various "views" of vectors: {@link VectorContainer}, @@ -52,7 +49,7 @@ * Drill provides a large number of vector (data) types. Each requires a * type-specific way to set data. The row set writer uses a {@link ColumnWriter} * to set each value in a way unique to the specific data type. Similarly, the - * row set reader provides a {@link ColumnReader} interface. In both cases, + * row set reader provides a {@link ScalarReader} interface. In both cases, * columns can be accessed by index number (as defined in the schema) or * by name. *

@@ -78,56 +75,6 @@ public interface RowSet { - /** - * Interface for writing values to a row set. Only available - * for newly-created, single, direct row sets. Eventually, if - * we want to allow updating a row set, we have to create a - * new row set with the updated columns, then merge the new - * and old row sets to create a new immutable row set. - */ - - public interface RowSetWriter extends TupleWriter { - void setRow(Object...values); - boolean valid(); - int index(); - void save(); - void done(); - } - - /** - * Reader for all types of row sets. - */ - - public interface RowSetReader extends TupleReader { - - /** - * Total number of rows in the row set. - * @return total number of rows - */ - int size(); - - boolean next(); - int index(); - void set(int index); - - /** - * Batch index: 0 for a single batch, batch for the current - * row is a hyper-batch. - * @return index of the batch for the current row - */ - int batchIndex(); - - /** - * The index of the underlying row which may be indexed by an - * Sv2 or Sv4. - * - * @return - */ - - int rowIndex(); - boolean valid(); - } - boolean isExtendable(); boolean isWritable(); @@ -138,13 +85,11 @@ public interface RowSetReader extends TupleReader { int rowCount(); - RowSetWriter writer(); - RowSetReader reader(); void clear(); - RowSetSchema schema(); + TupleMetadata schema(); BufferAllocator allocator(); @@ -171,7 +116,6 @@ public interface RowSetReader extends TupleReader { */ public interface SingleRowSet extends RowSet { - ValueVector[] vectors(); SingleRowSet toIndirect(); SelectionVector2 getSv2(); } @@ -184,7 +128,7 @@ public interface SingleRowSet extends RowSet { public interface ExtendableRowSet extends SingleRowSet { void allocate(int recordCount); - void setRowCount(int rowCount); + RowSetWriter writer(); RowSetWriter writer(int initialRowCount); } @@ -195,6 +139,5 @@ public interface ExtendableRowSet extends SingleRowSet { public interface HyperRowSet extends RowSet { SelectionVector4 getSv4(); - HyperVectorWrapper getHyperVector(int i); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java index 80e8ae4224f..aa4367eec31 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java @@ -19,7 +19,10 @@ import org.apache.drill.exec.memory.BufferAllocator; import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleSchema; +import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.test.OperatorFixture; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; /** @@ -40,14 +43,20 @@ public final class RowSetBuilder { private boolean withSv2; public RowSetBuilder(BufferAllocator allocator, BatchSchema schema) { + this(allocator, TupleSchema.fromFields(schema), 10); + } + + public RowSetBuilder(BufferAllocator allocator, TupleMetadata schema) { this(allocator, schema, 10); } - public RowSetBuilder(BufferAllocator allocator, BatchSchema schema, int capacity) { - rowSet = new DirectRowSet(allocator, schema); + public RowSetBuilder(BufferAllocator allocator, TupleMetadata schema, int capacity) { + rowSet = DirectRowSet.fromSchema(allocator, schema); writer = rowSet.writer(capacity); } + public TupleWriter writer() { return writer; } + /** * Add a new row using column values passed as variable-length arguments. Expects * map values to be flattened. a schema of (a:int, b:map(c:varchar)) would be> @@ -56,8 +65,9 @@ public RowSetBuilder(BufferAllocator allocator, BatchSchema schema, int capacity * add(10, new int[] {100, 200});
* @param values column values in column index order * @return this builder - * @see {@link #addSingleCol(Object)} to create a row of a single column when - * the value to add() is ambiguous + * @throws IllegalStateException if the batch, or any vector in the batch, + * becomes full. This method is designed to be used in tests where we will + * seldom create a full vector of data. */ public RowSetBuilder add(Object...values) { @@ -110,10 +120,10 @@ public RowSetBuilder withSv2() { } public SingleRowSet build() { - writer.done(); + SingleRowSet result = writer.done(); if (withSv2) { - return rowSet.toIndirect(); + return result.toIndirect(); } - return rowSet; + return result; } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java index ea500744f32..8172ade87ed 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java @@ -21,27 +21,58 @@ import static org.junit.Assert.assertTrue; import org.apache.drill.exec.vector.accessor.ArrayReader; -import org.apache.drill.exec.vector.accessor.ColumnReader; -import org.apache.drill.test.rowSet.RowSet.RowSetReader; +import org.apache.drill.exec.vector.accessor.ObjectReader; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.TupleReader; import org.bouncycastle.util.Arrays; /** * For testing, compare the contents of two row sets (record batches) * to verify that they are identical. Supports masks to exclude certain * columns from comparison. + *

+ * Drill rows are analogous to JSON documents: they can have scalars, + * arrays and maps, with maps and lists holding maps, arrays and scalars. + * This class walks the row structure tree to compare each structure + * of two row sets checking counts, types and values to ensure that the + * "actual" result set (result of a test) matches the "expected" result + * set. + *

+ * This class acts as an example of how to use the suite of reader + * abstractions. */ public class RowSetComparison { + /** + * Row set with the expected outcome of a test. This is the "golden" + * copy defined in the test itself. + */ private RowSet expected; + /** + * Some tests wish to ignore certain (top-level) columns. If a + * mask is provided, then only those columns with a true + * will be verified. + */ private boolean mask[]; + /** + * Floats and doubles do not compare exactly. This delta is used + * by JUnit for such comparisons. + */ private double delta = 0.001; + /** + * Tests can skip the first n rows. + */ private int offset; private int span = -1; public RowSetComparison(RowSet expected) { this.expected = expected; - mask = new boolean[expected.schema().hierarchicalAccess().count()]; + + // TODO: The mask only works at the top level presently + + mask = new boolean[expected.schema().size()]; for (int i = 0; i < mask.length; i++) { mask[i] = true; } @@ -132,7 +163,8 @@ public void verify(RowSet actual) { for (int i = 0; i < testLength; i++) { er.next(); ar.next(); - verifyRow(er, ar); + String label = Integer.toString(er.index() + 1); + verifyRow(label, er, ar); } } @@ -165,22 +197,50 @@ public void verifyAndClearAll(RowSet actual) { } } - private void verifyRow(RowSetReader er, RowSetReader ar) { + private void verifyRow(String label, TupleReader er, TupleReader ar) { + String prefix = label + ":"; for (int i = 0; i < mask.length; i++) { if (! mask[i]) { continue; } - ColumnReader ec = er.column(i); - ColumnReader ac = ar.column(i); - String label = (er.index() + 1) + ":" + i; - assertEquals(label, ec.valueType(), ac.valueType()); - if (ec.isNull()) { - assertTrue(label + " - column not null", ac.isNull()); - continue; - } - if (! ec.isNull()) { - assertTrue(label + " - column is null", ! ac.isNull()); - } + verifyColumn(prefix + i, er.column(i), ar.column(i)); + } + } + + private void verifyColumn(String label, ObjectReader ec, ObjectReader ac) { + assertEquals(label, ec.type(), ac.type()); + switch (ec.type()) { + case ARRAY: + verifyArray(label, ec.array(), ac.array()); + break; + case SCALAR: + verifyScalar(label, ac.scalar(), ec.scalar()); + break; + case TUPLE: + verifyTuple(label, ec.tuple(), ac.tuple()); + break; + default: + throw new IllegalStateException( "Unexpected type: " + ec.type()); + } + } + + private void verifyTuple(String label, TupleReader er, TupleReader ar) { + assertEquals(label, er.columnCount(), ar.columnCount()); + String prefix = label + ":"; + for (int i = 0; i < er.columnCount(); i++) { + verifyColumn(prefix + i, er.column(i), ar.column(i)); + } + } + + private void verifyScalar(String label, ScalarReader ec, ScalarReader ac) { + assertEquals(label, ec.valueType(), ac.valueType()); + if (ec.isNull()) { + assertTrue(label + " - column not null", ac.isNull()); + return; + } + if (! ec.isNull()) { + assertTrue(label + " - column is null", ! ac.isNull()); + } switch (ec.valueType()) { case BYTES: { byte expected[] = ac.getBytes(); @@ -207,24 +267,42 @@ private void verifyRow(RowSetReader er, RowSetReader ar) { case PERIOD: assertEquals(label, ec.getPeriod(), ac.getPeriod()); break; - case ARRAY: - verifyArray(label, ec.array(), ac.array()); - break; default: throw new IllegalStateException( "Unexpected type: " + ec.valueType()); - } } } - private void verifyArray(String colLabel, ArrayReader ea, + private void verifyArray(String label, ArrayReader ea, ArrayReader aa) { + assertEquals(label, ea.entryType(), aa.entryType()); + assertEquals(label, ea.size(), aa.size()); + switch (ea.entryType()) { + case ARRAY: + throw new UnsupportedOperationException(); + case SCALAR: + verifyScalarArray(label, ea.elements(), aa.elements()); + break; + case TUPLE: + verifyTupleArray(label, ea, aa); + break; + default: + throw new IllegalStateException( "Unexpected type: " + ea.entryType()); + } + } + + private void verifyTupleArray(String label, ArrayReader ea, ArrayReader aa) { + for (int i = 0; i < ea.size(); i++) { + verifyTuple(label + "[" + i + "]", ea.tuple(i), aa.tuple(i)); + } + } + + private void verifyScalarArray(String colLabel, ScalarElementReader ea, + ScalarElementReader aa) { assertEquals(colLabel, ea.valueType(), aa.valueType()); assertEquals(colLabel, ea.size(), aa.size()); for (int i = 0; i < ea.size(); i++) { String label = colLabel + "[" + i + "]"; switch (ea.valueType()) { - case ARRAY: - throw new IllegalStateException("Arrays of arrays not supported yet"); case BYTES: { byte expected[] = ea.getBytes(i); byte actual[] = aa.getBytes(i); diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java index 601abb13f64..8d85832979b 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java @@ -20,8 +20,7 @@ import java.io.PrintStream; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; -import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema; -import org.apache.drill.test.rowSet.RowSet.RowSetReader; +import org.apache.drill.exec.record.TupleMetadata; /** * Print a row set in CSV-like format. Primarily for debugging. @@ -41,21 +40,21 @@ public void print() { public void print(PrintStream out) { SelectionVectorMode selectionMode = rowSet.indirectionType(); RowSetReader reader = rowSet.reader(); - int colCount = reader.schema().count(); - printSchema(out, selectionMode); + int colCount = reader.schema().size(); + printSchema(out, selectionMode, reader); while (reader.next()) { printHeader(out, reader, selectionMode); for (int i = 0; i < colCount; i++) { if (i > 0) { out.print(", "); } - out.print(reader.getAsString(i)); + out.print(reader.column(i).getAsString()); } out.println(); } } - private void printSchema(PrintStream out, SelectionVectorMode selectionMode) { + private void printSchema(PrintStream out, SelectionVectorMode selectionMode, RowSetReader reader) { out.print("#"); switch (selectionMode) { case FOUR_BYTE: @@ -68,8 +67,8 @@ private void printSchema(PrintStream out, SelectionVectorMode selectionMode) { break; } out.print(": "); - TupleSchema schema = rowSet.schema().hierarchicalAccess(); - for (int i = 0; i < schema.count(); i++) { + TupleMetadata schema = reader.schema(); + for (int i = 0; i < schema.size(); i++) { if (i > 0) { out.print(", "); } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReader.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReader.java new file mode 100644 index 00000000000..3e27529b733 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReader.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import org.apache.drill.exec.vector.accessor.TupleReader; + +/** + * Reader for all types of row sets. + */ + +public interface RowSetReader extends TupleReader { + + /** + * Total number of rows in the row set. + * @return total number of rows + */ + int rowCount(); + + boolean next(); + int index(); + void set(int index); + + /** + * Batch index: 0 for a single batch, batch for the current + * row is a hyper-batch. + * @return index of the batch for the current row + */ + int batchIndex(); + + /** + * The index of the underlying row which may be indexed by an + * Sv2 or Sv4. + * + * @return + */ + + int rowIndex(); + boolean valid(); +} \ No newline at end of file diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReaderImpl.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReaderImpl.java new file mode 100644 index 00000000000..8a7f1b86be6 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReaderImpl.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader; +import org.apache.drill.exec.vector.accessor.reader.AbstractTupleReader; +import org.apache.drill.test.rowSet.AbstractRowSet.RowSetReaderIndex; + +/** + * Reader implementation for a row set. + */ + +public class RowSetReaderImpl extends AbstractTupleReader implements RowSetReader { + + protected final RowSetReaderIndex readerIndex; + + public RowSetReaderImpl(TupleMetadata schema, RowSetReaderIndex index, AbstractObjectReader[] readers) { + super(schema, readers); + this.readerIndex = index; + bindIndex(index); + } + + @Override + public boolean next() { + if (! readerIndex.next()) { + return false; + } + reposition(); + return true; + } + + @Override + public boolean valid() { return readerIndex.valid(); } + + @Override + public int index() { return readerIndex.position(); } + + @Override + public int rowCount() { return readerIndex.size(); } + + @Override + public int rowIndex() { return readerIndex.vectorIndex(); } + + @Override + public int batchIndex() { return readerIndex.batchIndex(); } + + @Override + public void set(int index) { this.readerIndex.set(index); } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetSchema.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetSchema.java deleted file mode 100644 index 55b5f121020..00000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetSchema.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.test.rowSet; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.record.BatchSchema; -import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; -import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema; -import org.apache.drill.exec.record.MaterializedField; - -/** - * Row set schema presented as a number of distinct "views" for various - * purposes: - *

    - *
  • Batch schema: the schema used by a VectorContainer.
  • - *
  • Physical schema: the schema expressed as a hierarchy of - * tuples with the top tuple representing the row, nested tuples - * representing maps.
  • - *
  • Access schema: a flattened schema with all scalar columns - * at the top level, and with map columns pulled out into a separate - * collection. The flattened-scalar view is the one used to write to, - * and read from, the row set.
  • - *
- * Allows easy creation of multiple row sets from the same schema. - * Each schema is immutable, which is fine for tests in which we - * want known inputs and outputs. - */ - -public class RowSetSchema { - - /** - * Logical description of a column. A logical column is a - * materialized field. For maps, also includes a logical schema - * of the map. - */ - - public static class LogicalColumn { - protected final String fullName; - protected final int accessIndex; - protected int flatIndex; - protected final MaterializedField field; - - /** - * Schema of the map. Includes only those fields directly within - * the map; does not include fields from nested tuples. - */ - - protected PhysicalSchema mapSchema; - - public LogicalColumn(String fullName, int accessIndex, MaterializedField field) { - this.fullName = fullName; - this.accessIndex = accessIndex; - this.field = field; - } - - private void updateStructure(int index, PhysicalSchema children) { - flatIndex = index; - mapSchema = children; - } - - public int accessIndex() { return accessIndex; } - public int flatIndex() { return flatIndex; } - public boolean isMap() { return mapSchema != null; } - public PhysicalSchema mapSchema() { return mapSchema; } - public MaterializedField field() { return field; } - public String fullName() { return fullName; } - } - - /** - * Implementation of a tuple name space. Tuples allow both indexed and - * named access to their members. - * - * @param the type of object representing each column - */ - - public static class NameSpace { - private final Map nameSpace = new HashMap<>(); - private final List columns = new ArrayList<>(); - - public int add(String key, T value) { - int index = columns.size(); - nameSpace.put(key, index); - columns.add(value); - return index; - } - - public T get(int index) { - return columns.get(index); - } - - public T get(String key) { - int index = getIndex(key); - if (index == -1) { - return null; - } - return get(index); - } - - public int getIndex(String key) { - Integer index = nameSpace.get(key); - if (index == null) { - return -1; - } - return index; - } - - public int count() { return columns.size(); } - } - - /** - * Provides a non-flattened, physical view of the schema. The top-level - * row includes maps, maps expand to a nested tuple schema. This view - * corresponds, more-or-less, to the physical storage of vectors in - * a vector accessible or vector container. - */ - - private static class TupleSchemaImpl implements TupleSchema { - - private NameSpace columns; - - public TupleSchemaImpl(NameSpace ns) { - this.columns = ns; - } - - @Override - public MaterializedField column(int index) { - return logicalColumn(index).field(); - } - - public LogicalColumn logicalColumn(int index) { return columns.get(index); } - - @Override - public MaterializedField column(String name) { - LogicalColumn col = columns.get(name); - return col == null ? null : col.field(); - } - - @Override - public int columnIndex(String name) { - return columns.getIndex(name); - } - - @Override - public int count() { return columns.count(); } - } - - /** - * Represents the flattened view of the schema used to get and set columns. - * Represents a left-to-right, depth-first traversal of the row and map - * columns. Holds only materialized vectors (non-maps). For completeness, - * provides access to maps also via separate methods, but this is generally - * of little use. - */ - - public static class FlattenedSchema extends TupleSchemaImpl { - protected final TupleSchemaImpl maps; - - public FlattenedSchema(NameSpace cols, NameSpace maps) { - super(cols); - this.maps = new TupleSchemaImpl(maps); - } - - public LogicalColumn logicalMap(int index) { return maps.logicalColumn(index); } - public MaterializedField map(int index) { return maps.column(index); } - public MaterializedField map(String name) { return maps.column(name); } - public int mapIndex(String name) { return maps.columnIndex(name); } - public int mapCount() { return maps.count(); } - } - - /** - * Physical schema of a row set showing the logical hierarchy of fields - * with map fields as first-class fields. Map members appear as children - * under the map, much as they appear in the physical value-vector - * implementation. - */ - - public static class PhysicalSchema { - protected final NameSpace schema = new NameSpace<>(); - - public LogicalColumn column(int index) { - return schema.get(index); - } - - public LogicalColumn column(String name) { - return schema.get(name); - } - - public int count() { return schema.count(); } - - public NameSpace nameSpace() { return schema; } - } - - private static class SchemaExpander { - private final PhysicalSchema physicalSchema; - private final NameSpace cols = new NameSpace<>(); - private final NameSpace maps = new NameSpace<>(); - - public SchemaExpander(BatchSchema schema) { - physicalSchema = expand("", schema); - } - - private PhysicalSchema expand(String prefix, Iterable fields) { - PhysicalSchema physical = new PhysicalSchema(); - for (MaterializedField field : fields) { - String name = prefix + field.getName(); - int index; - LogicalColumn colSchema = new LogicalColumn(name, physical.count(), field); - physical.schema.add(field.getName(), colSchema); - PhysicalSchema children = null; - if (field.getType().getMinorType() == MinorType.MAP) { - index = maps.add(name, colSchema); - children = expand(name + ".", field.getChildren()); - } else { - index = cols.add(name, colSchema); - } - colSchema.updateStructure(index, children); - } - return physical; - } - } - - private final BatchSchema batchSchema; - private final TupleSchemaImpl accessSchema; - private final FlattenedSchema flatSchema; - private final PhysicalSchema physicalSchema; - - public RowSetSchema(BatchSchema schema) { - batchSchema = schema; - SchemaExpander expander = new SchemaExpander(schema); - physicalSchema = expander.physicalSchema; - accessSchema = new TupleSchemaImpl(physicalSchema.nameSpace()); - flatSchema = new FlattenedSchema(expander.cols, expander.maps); - } - - /** - * A hierarchical schema that includes maps, with maps expanding - * to a nested tuple schema. Not used at present; this is intended - * to be the bases of non-flattened accessors if we find the need. - * @return the hierarchical access schema - */ - - public TupleSchema hierarchicalAccess() { return accessSchema; } - - /** - * A flattened (left-to-right, depth-first traversal) of the non-map - * columns in the row. Used to define the column indexes in the - * get methods for row readers and the set methods for row writers. - * @return the flattened access schema - */ - - public FlattenedSchema flatAccess() { return flatSchema; } - - /** - * Internal physical schema in hierarchical order. Mostly used to create - * the other schemas, but may be of use in special cases. Has the same - * structure as the batch schema, but with additional information. - * @return a tree-structured physical schema - */ - - public PhysicalSchema physical() { return physicalSchema; } - - /** - * The batch schema used by the Drill runtime. Represents a tree-structured - * list of top-level fields, including maps. Maps contain a nested schema. - * @return the batch schema used by the Drill runtime - */ - - public BatchSchema batch() { return batchSchema; } - - /** - * Convert this schema to a new batch schema that includes the specified - * selection vector mode. - * @param svMode selection vector mode for the new schema - * @return the new batch schema - */ - - public BatchSchema toBatchSchema(SelectionVectorMode svMode) { - List fields = new ArrayList<>(); - for (MaterializedField field : batchSchema) { - fields.add(field); - } - return new BatchSchema(svMode, fields); - } -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java index 261a9c180a0..ede6a899ca3 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java @@ -17,12 +17,27 @@ */ package org.apache.drill.test.rowSet; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.math.BigDecimal; + +import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; +import org.apache.drill.exec.record.TupleMetadata.StructureType; +import org.apache.drill.exec.record.VectorContainer; import org.apache.drill.exec.record.selection.SelectionVector2; -import org.apache.drill.exec.vector.accessor.AccessorUtilities; -import org.apache.drill.exec.vector.accessor.ColumnAccessor.ValueType; -import org.apache.drill.exec.vector.accessor.ColumnWriter; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.exec.vector.complex.AbstractMapVector; +import org.bouncycastle.util.Arrays; import org.joda.time.Duration; import org.joda.time.Period; @@ -62,11 +77,42 @@ public static void reverse(SelectionVector2 sv2) { */ public static void setFromInt(RowSetWriter rowWriter, int index, int value) { - ColumnWriter writer = rowWriter.column(index); - if (writer.valueType() == ValueType.PERIOD) { - setPeriodFromInt(writer, rowWriter.schema().column(index).getType().getMinorType(), value); - } else { - AccessorUtilities.setFromInt(writer, value); + ScalarWriter writer = rowWriter.scalar(index); + MaterializedField field = rowWriter.schema().column(index); + writer.setObject(testDataFromInt(writer.valueType(), field.getType(), value)); + } + + public static Object testDataFromInt(ValueType valueType, MajorType dataType, int value) { + switch (valueType) { + case BYTES: + return Integer.toHexString(value).getBytes(); + case DOUBLE: + return (double) value; + case INTEGER: + switch (dataType.getMinorType()) { + case BIT: + return value & 0x01; + case SMALLINT: + return value % 32768; + case UINT2: + return value & 0xFFFF; + case TINYINT: + return value % 128; + case UINT1: + return value & 0xFF; + default: + return value; + } + case LONG: + return (long) value; + case STRING: + return Integer.toString(value); + case DECIMAL: + return BigDecimal.valueOf(value, dataType.getScale()); + case PERIOD: + return periodFromInt(dataType.getMinorType(), value); + default: + throw new IllegalStateException("Unknown writer type: " + valueType); } } @@ -81,26 +127,83 @@ public static void setFromInt(RowSetWriter rowWriter, int index, int value) { * @param writer column writer for a period column * @param minorType the Drill data type * @param value the integer value to apply + * @throws VectorOverflowException */ - public static void setPeriodFromInt(ColumnWriter writer, MinorType minorType, - int value) { + public static Period periodFromInt(MinorType minorType, int value) { switch (minorType) { case INTERVAL: - writer.setPeriod(Duration.millis(value).toPeriod()); - break; + return Duration.millis(value).toPeriod(); case INTERVALYEAR: - writer.setPeriod(Period.years(value / 12).withMonths(value % 12)); - break; + return Period.years(value / 12).withMonths(value % 12); case INTERVALDAY: int sec = value % 60; value = value / 60; int min = value % 60; value = value / 60; - writer.setPeriod(Period.days(value).withMinutes(min).withSeconds(sec)); - break; + return Period.days(value).withMinutes(min).withSeconds(sec); default: throw new IllegalArgumentException("Writer is not an interval: " + minorType); } } + + public static VectorContainer buildVectors(BufferAllocator allocator, TupleMetadata schema) { + VectorContainer container = new VectorContainer(allocator); + for (int i = 0; i < schema.size(); i++) { + ColumnMetadata colSchema = schema.metadata(i); + @SuppressWarnings("resource") + ValueVector vector = TypeHelper.getNewVector(colSchema.schema(), allocator, null); + container.add(vector); + if (colSchema.structureType() == StructureType.TUPLE) { + buildMap(allocator, (AbstractMapVector) vector, colSchema.mapSchema()); + } + } + container.buildSchema(SelectionVectorMode.NONE); + return container; + } + + private static void buildMap(BufferAllocator allocator, AbstractMapVector mapVector, TupleMetadata mapSchema) { + for (int i = 0; i < mapSchema.size(); i++) { + ColumnMetadata colSchema = mapSchema.metadata(i); + @SuppressWarnings("resource") + ValueVector vector = TypeHelper.getNewVector(colSchema.schema(), allocator, null); + mapVector.putChild(colSchema.name(), vector); + if (colSchema.structureType() == StructureType.TUPLE) { + buildMap(allocator, (AbstractMapVector) vector, colSchema.mapSchema()); + } + } + } + + public static void assertEqualValues(ValueType type, Object expectedObj, Object actualObj) { + assertEqualValues(type.toString(), type, expectedObj, actualObj); + } + + public static void assertEqualValues(String msg, ValueType type, Object expectedObj, Object actualObj) { + switch (type) { + case BYTES: { + byte expected[] = (byte[]) expectedObj; + byte actual[] = (byte[]) actualObj; + assertEquals(msg + " - byte lengths differ", expected.length, actual.length); + assertTrue(msg, Arrays.areEqual(expected, actual)); + break; + } + case DOUBLE: + assertEquals(msg, (double) expectedObj, (double) actualObj, 0.0001); + break; + case INTEGER: + case LONG: + case STRING: + case DECIMAL: + assertEquals(msg, expectedObj, actualObj); + break; + case PERIOD: { + Period expected = (Period) expectedObj; + Period actual = (Period) actualObj; + assertEquals(msg, expected.normalizedStandard(), actual.normalizedStandard()); + break; + } + default: + throw new IllegalStateException( "Unexpected type: " + type); + } + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriter.java new file mode 100644 index 00000000000..15900b18c4e --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriter.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; + +/** + * Interface for writing values to a row set. Only available + * for newly-created, single, direct row sets. Eventually, if + * we want to allow updating a row set, we have to create a + * new row set with the updated columns, then merge the new + * and old row sets to create a new immutable row set. + *

+ * Typical usage: + *


+ * void writeABatch() {
+ *   RowSetWriter writer = ...
+ *   while (! writer.isFull()) {
+ *     writer.scalar(0).setInt(10);
+ *     writer.scalar(1).setString("foo");
+ *     ...
+ *     writer.save();
+ *   }
+ * }
+ * The above writes until the batch is full, based on size. If values + * are large enough to potentially cause vector overflow, do the + * following instead: + *

+ * void writeABatch() {
+ *   RowSetWriter writer = ...
+ *   while (! writer.isFull()) {
+ *     writer.column(0).setInt(10);
+ *     try {
+ *        writer.column(1).setString("foo");
+ *     } catch (VectorOverflowException e) { break; }
+ *     ...
+ *     writer.save();
+ *   }
+ *   // Do something with the partially-written last row.
+ * }
+ *

+ * This writer is for testing, so no provision is available to handle a + * partial last row. (Elsewhere n Drill there are classes that handle that case.) + */ + +public interface RowSetWriter extends TupleWriter { + + /** + * Write a row of values, given by Java objects. Object type must + * match expected column type. Stops writing, and returns false, + * if any value causes vector overflow. + * + * @param values variable-length argument list of column values + * @return true if the row was written, false if any column + * caused vector overflow. + * @throws VectorOverflowException + */ + void setRow(Object...values); + + /** + * Indicates if the current row position is valid for + * writing. Will be false on the first row, and all subsequent + * rows until either the maximum number of rows are written, + * or a vector overflows. After that, will return true. The + * method returns false as soon as any column writer overflows + * even in the middle of a row write. That is, this writer + * does not automatically handle overflow rows because that + * added complexity is seldom needed for tests. + * + * @return true if the current row can be written, false + * if not + */ + + boolean isFull(); + int rowIndex(); + + /** + * Saves the current row and moves to the next row. + * Done automatically if using setRow(). + */ + + void save(); + + /** + * Finish writing and finalize the row set being + * written. + * @return the completed, read-only row set without a + * selection vector + */ + + SingleRowSet done(); +} \ No newline at end of file diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java new file mode 100644 index 00000000000..8e22cb3c48e --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet; + +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter; +import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter; +import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; + +/** + * Implementation of a row set writer. Only available for newly-created, + * empty, direct, single row sets. Rewriting is not allowed, nor is writing + * to a hyper row set. + */ + +public class RowSetWriterImpl extends AbstractTupleWriter implements RowSetWriter { + + /** + * Writer index that points to each row in the row set. The index starts at + * the 0th row and advances one row on each increment. This allows writers to + * start positioned at the first row. Writes happen in the current row. + * Calling next() advances to the next position, effectively saving + * the current row. The most recent row can be abandoned easily simply by not + * calling next(). This means that the number of completed rows is + * the same as the row index. + */ + + static class WriterIndexImpl implements ColumnWriterIndex { + + public enum State { OK, VECTOR_OVERFLOW, END_OF_BATCH } + + private int rowIndex = 0; + private State state = State.OK; + + @Override + public final int vectorIndex() { return rowIndex; } + + public final boolean next() { + if (++rowIndex < ValueVector.MAX_ROW_COUNT) { + return true; + } + // Should not call next() again once batch is full. + assert rowIndex == ValueVector.MAX_ROW_COUNT; + rowIndex = ValueVector.MAX_ROW_COUNT; + state = state == State.OK ? State.END_OF_BATCH : state; + return false; + } + + public int size() { + // The index always points to the next slot past the + // end of valid rows. + return rowIndex; + } + + public boolean valid() { return state == State.OK; } + + public boolean hasOverflow() { return state == State.VECTOR_OVERFLOW; } + + @Override + public void overflowed() { + state = State.VECTOR_OVERFLOW; + throw new IndexOutOfBoundsException("Row sets don't support vector overflow"); + } + + @Override + public boolean legal() { + return true; + } + + @Override + public final void nextElement() { } + } + + private final WriterIndexImpl writerIndex; + private final ExtendableRowSet rowSet; + + protected RowSetWriterImpl(ExtendableRowSet rowSet, TupleMetadata schema, WriterIndexImpl index, AbstractObjectWriter[] writers) { + super(schema, writers); + this.rowSet = rowSet; + this.writerIndex = index; + bindIndex(index); + startWrite(); + } + + @Override + public void setRow(Object...values) { + setObject(values); + save(); + } + + @Override + public int rowIndex() { return writerIndex.vectorIndex(); } + + @Override + public void save() { + endValue(); + if (writerIndex.next()) { + startValue(); + } + } + + @Override + public boolean isFull( ) { return ! writerIndex.valid(); } + + @Override + public SingleRowSet done() { + endWrite(); + rowSet.container().setRecordCount(writerIndex.vectorIndex()); + return rowSet; + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java index 39b012874fa..bbda91055c4 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java @@ -17,15 +17,15 @@ */ package org.apache.drill.test.rowSet; -import java.util.ArrayList; -import java.util.List; - import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; +import org.apache.drill.exec.record.TupleSchema; /** * Builder of a row set schema expressed as a list of materialized @@ -59,8 +59,6 @@ public class SchemaBuilder { * need scale and precision, and so on. */ - // TODO: Add map methods - public static class ColumnBuilder { private final String name; private final MajorType.Builder typeBuilder; @@ -101,10 +99,14 @@ public MaterializedField build() { public static class MapBuilder extends SchemaBuilder { private final SchemaBuilder parent; private final String memberName; + private final DataMode mode; - public MapBuilder(SchemaBuilder parent, String memberName) { + public MapBuilder(SchemaBuilder parent, String memberName, DataMode mode) { this.parent = parent; this.memberName = memberName; + // Optional maps not supported in Drill + assert mode != DataMode.OPTIONAL; + this.mode = mode; } @Override @@ -114,9 +116,12 @@ public BatchSchema build() { @Override public SchemaBuilder buildMap() { - MaterializedField col = columnSchema(memberName, MinorType.MAP, DataMode.REQUIRED); - for (MaterializedField childCol : columns) { - col.addChild(childCol); + // TODO: Use the map schema directly rather than + // rebuilding it as is done here. + + MaterializedField col = columnSchema(memberName, MinorType.MAP, mode); + for (ColumnMetadata md : schema) { + col.addChild(md.schema()); } parent.finishMap(col); return parent; @@ -128,7 +133,7 @@ public SchemaBuilder withSVMode(SelectionVectorMode svMode) { } } - protected List columns = new ArrayList<>( ); + protected TupleSchema schema = new TupleSchema(); private SelectionVectorMode svMode = SelectionVectorMode.NONE; public SchemaBuilder() { } @@ -144,59 +149,60 @@ public SchemaBuilder(BatchSchema baseSchema) { } } - public SchemaBuilder add(String pathName, MajorType type) { - return add(MaterializedField.create(pathName, type)); + public SchemaBuilder add(String name, MajorType type) { + return add(MaterializedField.create(name, type)); } public SchemaBuilder add(MaterializedField col) { - columns.add(col); + schema.add(col); return this; } /** * Create a column schema using the "basic three" properties of name, type and * cardinality (AKA "data mode.") Use the {@link ColumnBuilder} for to set - * other schema attributes. + * other schema attributes. Name is relative to the enclosing map or tuple; + * it is not the fully qualified path name. */ - public static MaterializedField columnSchema(String pathName, MinorType type, DataMode mode) { - return MaterializedField.create(pathName, + public static MaterializedField columnSchema(String name, MinorType type, DataMode mode) { + return MaterializedField.create(name, MajorType.newBuilder() .setMinorType(type) .setMode(mode) .build()); } - public SchemaBuilder add(String pathName, MinorType type, DataMode mode) { - return add(columnSchema(pathName, type, mode)); + public SchemaBuilder add(String name, MinorType type, DataMode mode) { + return add(columnSchema(name, type, mode)); } - public SchemaBuilder add(String pathName, MinorType type) { - return add(pathName, type, DataMode.REQUIRED); + public SchemaBuilder add(String name, MinorType type) { + return add(name, type, DataMode.REQUIRED); } - public SchemaBuilder add(String pathName, MinorType type, int width) { - MaterializedField field = new SchemaBuilder.ColumnBuilder(pathName, type) + public SchemaBuilder add(String name, MinorType type, int width) { + MaterializedField field = new SchemaBuilder.ColumnBuilder(name, type) .setMode(DataMode.REQUIRED) .setWidth(width) .build(); return add(field); } - public SchemaBuilder addNullable(String pathName, MinorType type) { - return add(pathName, type, DataMode.OPTIONAL); + public SchemaBuilder addNullable(String name, MinorType type) { + return add(name, type, DataMode.OPTIONAL); } - public SchemaBuilder addNullable(String pathName, MinorType type, int width) { - MaterializedField field = new SchemaBuilder.ColumnBuilder(pathName, type) + public SchemaBuilder addNullable(String name, MinorType type, int width) { + MaterializedField field = new SchemaBuilder.ColumnBuilder(name, type) .setMode(DataMode.OPTIONAL) .setWidth(width) .build(); return add(field); } - public SchemaBuilder addArray(String pathName, MinorType type) { - return add(pathName, type, DataMode.REPEATED); + public SchemaBuilder addArray(String name, MinorType type) { + return add(name, type, DataMode.REPEATED); } /** @@ -209,7 +215,11 @@ public SchemaBuilder addArray(String pathName, MinorType type) { */ public MapBuilder addMap(String pathName) { - return new MapBuilder(this, pathName); + return new MapBuilder(this, pathName, DataMode.REQUIRED); + } + + public MapBuilder addMapArray(String pathName) { + return new MapBuilder(this, pathName, DataMode.REPEATED); } public SchemaBuilder withSVMode(SelectionVectorMode svMode) { @@ -218,14 +228,18 @@ public SchemaBuilder withSVMode(SelectionVectorMode svMode) { } public BatchSchema build() { - return new BatchSchema(svMode, columns); + return schema.toBatchSchema(svMode); } void finishMap(MaterializedField map) { - columns.add(map); + schema.add(map); } public SchemaBuilder buildMap() { throw new IllegalStateException("Cannot build map for a top-level schema"); } + + public TupleMetadata buildSchema() { + return schema; + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java new file mode 100644 index 00000000000..9ec810bb69b --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java @@ -0,0 +1,289 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import java.util.concurrent.TimeUnit; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.IntVector; +import org.apache.drill.exec.vector.NullableIntVector; +import org.apache.drill.exec.vector.RepeatedIntVector; +import org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.ArrayObjectWriter; +import org.apache.drill.exec.vector.accessor.writer.NullableScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.ScalarArrayWriter; +import org.apache.drill.test.OperatorFixture; +import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; +import org.apache.drill.test.rowSet.RowSetWriter; +import org.apache.drill.test.rowSet.SchemaBuilder; + +import com.google.common.base.Stopwatch; + +/** + * Tests the performance of the writers compared to using the value + * vector mutators directly. In order to achieve apples-to-apples + * comparison, the tests work directly with individual columns in + * the writer case; the row writer level is omitted as the row writer + * simulates the reader logic previously used to write to vectors. + *

+ * Current results: + *

    + *
  • Writer is 42% faster than a required mutator.
  • + *
  • Writer is 73% faster than a nullable mutator.
  • + *
  • Writer is 407% faster than a repeated mutator.
  • + *
+ * Since performance is critical for this component (this is the + * ultimate "inner loop", please run these tests periodically to + * ensure that performance does not drop; it is very easy to add + * a bit of code here or there that greatly impacts performance. + *

+ * This is not a JUnit test. Rather, it is a stand-alone program + * which must be run explicitly. One handy way is to run it from + * your IDE. If using Eclipse, monitor the system to wait for Eclipse + * to finish its background processing before launching. + */ + +public class PerformanceTool { + + public static final int ROW_COUNT = 16 * 1024 * 1024 / 4; + public static final int ITERATIONS = 300; + + public static abstract class PerfTester { + final TupleMetadata rowSchema; + final MaterializedField field; + final OperatorFixture fixture; + final String label; + final Stopwatch timer = Stopwatch.createUnstarted(); + + public PerfTester(OperatorFixture fixture, DataMode mode, String label) { + this.fixture = fixture; + this.label = label; + field = SchemaBuilder.columnSchema("a", MinorType.INT, mode); + rowSchema = new SchemaBuilder() + .add(field) + .buildSchema(); + } + + public void runTest() { + for (int i = 0; i < ITERATIONS; i++) { + doTest(); + } + System.out.println(label + ": " + timer.elapsed(TimeUnit.MILLISECONDS)); + } + + public abstract void doTest(); + } + + public static class RequiredVectorTester extends PerfTester { + + public RequiredVectorTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Required vector"); + } + + @Override + public void doTest() { + try (IntVector vector = new IntVector(field, fixture.allocator());) { + vector.allocateNew(4096); + IntVector.Mutator mutator = vector.getMutator(); + timer.start(); + for (int i = 0; i < ROW_COUNT; i++) { + mutator.setSafe(i, 1234); + } + timer.stop(); + } + } + } + + public static class NullableVectorTester extends PerfTester { + + public NullableVectorTester(OperatorFixture fixture) { + super(fixture, DataMode.OPTIONAL, "Nullable vector"); + } + + @Override + public void doTest() { + try (NullableIntVector vector = new NullableIntVector(field, fixture.allocator());) { + vector.allocateNew(4096); + NullableIntVector.Mutator mutator = vector.getMutator(); + timer.start(); + for (int i = 0; i < ROW_COUNT; i++) { + mutator.setSafe(i, 1234); + } + timer.stop(); + } + } + } + + public static class RepeatedVectorTester extends PerfTester { + + public RepeatedVectorTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Repeated vector"); + } + + @Override + public void doTest() { + try (RepeatedIntVector vector = new RepeatedIntVector(field, fixture.allocator());) { + vector.allocateNew(4096, 5); + RepeatedIntVector.Mutator mutator = vector.getMutator(); + timer.start(); + for (int i = 0; i < ROW_COUNT / 5; i++) { + mutator.startNewValue(i); + mutator.addSafe(i, 12341); + mutator.addSafe(i, 12342); + mutator.addSafe(i, 12343); + mutator.addSafe(i, 12344); + mutator.addSafe(i, 12345); + } + timer.stop(); + } + } + } + + private static class TestWriterIndex implements ColumnWriterIndex { + + public int index; + + @Override + public int vectorIndex() { return index; } + + @Override + public void overflowed() { + throw new IllegalStateException(); + } + + @Override + public boolean legal() { return true; } + + @Override + public void nextElement() { index++; } + } + + public static class RequiredWriterTester extends PerfTester { + + public RequiredWriterTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Required writer"); + } + + @Override + public void doTest() { + try (IntVector vector = new IntVector(rowSchema.column(0), fixture.allocator());) { + vector.allocateNew(4096); + IntColumnWriter colWriter = new IntColumnWriter(); + colWriter.bindVector(vector); + TestWriterIndex index = new TestWriterIndex(); + colWriter.bindIndex(index); + timer.start(); + while (index.index < ROW_COUNT) { + colWriter.setInt(1234); + } + timer.stop(); + } + } + } + + public static class NullableWriterTester extends PerfTester { + + public NullableWriterTester(OperatorFixture fixture) { + super(fixture, DataMode.OPTIONAL, "Nullable writer"); + } + + @Override + public void doTest() { + try (NullableIntVector vector = new NullableIntVector(rowSchema.column(0), fixture.allocator());) { + vector.allocateNew(4096); + NullableScalarWriter colWriter = new NullableScalarWriter(new IntColumnWriter()); + colWriter.bindVector(vector); + TestWriterIndex index = new TestWriterIndex(); + colWriter.bindIndex(index); + timer.start(); + while (index.index < ROW_COUNT) { + colWriter.setInt(1234); + } + timer.stop(); + } + } + } + + public static class ArrayWriterTester extends PerfTester { + + public ArrayWriterTester(OperatorFixture fixture) { + super(fixture, DataMode.REQUIRED, "Array writer"); + } + + @Override + public void doTest() { + try (RepeatedIntVector vector = new RepeatedIntVector(rowSchema.column(0), fixture.allocator());) { + vector.allocateNew(4096, 5); + IntColumnWriter colWriter = new IntColumnWriter(); + ArrayObjectWriter arrayWriter = ScalarArrayWriter.build(vector, colWriter); + TestWriterIndex index = new TestWriterIndex(); + arrayWriter.bindIndex(index); + arrayWriter.startWrite(); + timer.start(); + for ( ; index.index < ROW_COUNT / 5; index.index++) { + arrayWriter.startValue(); + colWriter.setInt(12341); + colWriter.setInt(12342); + colWriter.setInt(12343); + colWriter.setInt(12344); + colWriter.setInt(12345); + arrayWriter.endValue(); + } + arrayWriter.endWrite(); + timer.stop(); + } + } + } + + public static void main(String args[]) { + try (OperatorFixture fixture = OperatorFixture.standardFixture();) { + for (int i = 0; i < 2; i++) { + System.out.println((i==0) ? "Warmup" : "Test run"); + new RequiredVectorTester(fixture).runTest(); + new RequiredWriterTester(fixture).runTest(); + new NullableVectorTester(fixture).runTest(); + new NullableWriterTester(fixture).runTest(); + new RepeatedVectorTester(fixture).runTest(); + new ArrayWriterTester(fixture).runTest(); + } + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + @SuppressWarnings("unused") + private static void testWriter2(TupleMetadata rowSchema, + OperatorFixture fixture, Stopwatch timer) { + ExtendableRowSet rs = fixture.rowSet(rowSchema); + RowSetWriter writer = rs.writer(4096); + ScalarWriter colWriter = writer.scalar(0); + timer.start(); + for (int i = 0; i < ROW_COUNT; i++) { + colWriter.setInt(i); + writer.save(); + } + timer.stop(); + writer.done().clear(); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java index 03417ff2490..2609504b74b 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java @@ -19,363 +19,510 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.UnsupportedEncodingException; -import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.ArrayReader; import org.apache.drill.exec.vector.accessor.ArrayWriter; -import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.TupleReader; +import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.exec.vector.complex.MapVector; +import org.apache.drill.exec.vector.complex.RepeatedMapVector; import org.apache.drill.test.SubOperatorTest; import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; -import org.apache.drill.test.rowSet.RowSet.RowSetReader; -import org.apache.drill.test.rowSet.RowSet.RowSetWriter; import org.apache.drill.test.rowSet.RowSet.SingleRowSet; import org.apache.drill.test.rowSet.RowSetComparison; -import org.apache.drill.test.rowSet.RowSetSchema; -import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema; -import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema; +import org.apache.drill.test.rowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetWriter; import org.apache.drill.test.rowSet.SchemaBuilder; +import org.bouncycastle.util.Arrays; import org.junit.Test; -import com.google.common.base.Splitter; +/** + * Test row sets. Since row sets are a thin wrapper around vectors, + * readers and writers, this is also a test of those constructs. + *

+ * Tests basic protocol of the writers:


+ * row : tuple
+ * tuple : column *
+ * column : scalar obj | array obj | tuple obj
+ * scalar obj : scalar
+ * arary obj : array writer
+ * array writer : element
+ * element : column
+ * tuple obj : tuple
+ */ public class RowSetTest extends SubOperatorTest { /** - * Test a simple physical schema with no maps. + * Test the simplest constructs: a row with top-level scalar + * columns. + *

+ * The focus here is the structure of the readers and writers, along + * with the row set loader and verifier that use those constructs. + * That is, while this test uses the int vector, this test is not + * focused on that vector. + * + * @throws VectorOverflowException should never occur */ @Test - public void testSchema() { - BatchSchema batchSchema = new SchemaBuilder() - .add("c", MinorType.INT) - .add("a", MinorType.INT, DataMode.REPEATED) - .addNullable("b", MinorType.VARCHAR) - .build(); + public void testScalarStructure() { + TupleMetadata schema = new SchemaBuilder() + .add("a", MinorType.INT) + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); + + // Required Int + // Verify the invariants of the "full" and "simple" access paths + + assertEquals(ObjectType.SCALAR, writer.column("a").type()); + assertSame(writer.column("a"), writer.column(0)); + assertSame(writer.scalar("a"), writer.scalar(0)); + assertSame(writer.column("a").scalar(), writer.scalar("a")); + assertSame(writer.column(0).scalar(), writer.scalar(0)); + assertEquals(ValueType.INTEGER, writer.scalar(0).valueType()); + + // Sanity checks + + try { + writer.column(0).array(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + try { + writer.column(0).tuple(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } - assertEquals("c", batchSchema.getColumn(0).getName()); - assertEquals("a", batchSchema.getColumn(1).getName()); - assertEquals("b", batchSchema.getColumn(2).getName()); - - RowSetSchema schema = new RowSetSchema(batchSchema); - TupleSchema access = schema.hierarchicalAccess(); - assertEquals(3, access.count()); - - crossCheck(access, 0, "c", MinorType.INT); - assertEquals(DataMode.REQUIRED, access.column(0).getDataMode()); - assertEquals(DataMode.REQUIRED, access.column(0).getType().getMode()); - assertTrue(! access.column(0).isNullable()); - - crossCheck(access, 1, "a", MinorType.INT); - assertEquals(DataMode.REPEATED, access.column(1).getDataMode()); - assertEquals(DataMode.REPEATED, access.column(1).getType().getMode()); - assertTrue(! access.column(1).isNullable()); - - crossCheck(access, 2, "b", MinorType.VARCHAR); - assertEquals(MinorType.VARCHAR, access.column(2).getType().getMinorType()); - assertEquals(DataMode.OPTIONAL, access.column(2).getDataMode()); - assertEquals(DataMode.OPTIONAL, access.column(2).getType().getMode()); - assertTrue(access.column(2).isNullable()); - - // No maps: physical schema is the same as access schema. - - PhysicalSchema physical = schema.physical(); - assertEquals(3, physical.count()); - assertEquals("c", physical.column(0).field().getName()); - assertEquals("a", physical.column(1).field().getName()); - assertEquals("b", physical.column(2).field().getName()); - } + // Test the various ways to get at the scalar writer. - /** - * Validate that the actual column metadata is as expected by - * cross-checking: validate that the column at the index and - * the column at the column name are both correct. - * - * @param schema the schema for the row set - * @param index column index - * @param fullName expected column name - * @param type expected type - */ + writer.column("a").scalar().setInt(10); + writer.save(); + writer.scalar("a").setInt(20); + writer.save(); + writer.column(0).scalar().setInt(30); + writer.save(); + writer.scalar(0).setInt(40); + writer.save(); - public void crossCheck(TupleSchema schema, int index, String fullName, MinorType type) { - String name = null; - for (String part : Splitter.on(".").split(fullName)) { - name = part; - } - assertEquals(name, schema.column(index).getName()); - assertEquals(index, schema.columnIndex(fullName)); - assertSame(schema.column(index), schema.column(fullName)); - assertEquals(type, schema.column(index).getType().getMinorType()); - } + // Finish the row set and get a reader. - /** - * Verify that a nested map schema works as expected. - */ + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); - @Test - public void testMapSchema() { - BatchSchema batchSchema = new SchemaBuilder() - .add("c", MinorType.INT) - .addMap("a") - .addNullable("b", MinorType.VARCHAR) - .add("d", MinorType.INT) - .addMap("e") - .add("f", MinorType.VARCHAR) - .buildMap() - .add("g", MinorType.INT) - .buildMap() - .add("h", MinorType.BIGINT) - .build(); + // Verify invariants - RowSetSchema schema = new RowSetSchema(batchSchema); - - // Access schema: flattened with maps removed - - FlattenedSchema access = schema.flatAccess(); - assertEquals(6, access.count()); - crossCheck(access, 0, "c", MinorType.INT); - crossCheck(access, 1, "a.b", MinorType.VARCHAR); - crossCheck(access, 2, "a.d", MinorType.INT); - crossCheck(access, 3, "a.e.f", MinorType.VARCHAR); - crossCheck(access, 4, "a.g", MinorType.INT); - crossCheck(access, 5, "h", MinorType.BIGINT); - - // Should have two maps. - - assertEquals(2, access.mapCount()); - assertEquals("a", access.map(0).getName()); - assertEquals("e", access.map(1).getName()); - assertEquals(0, access.mapIndex("a")); - assertEquals(1, access.mapIndex("a.e")); - - // Verify physical schema: should mirror the schema created above. - - PhysicalSchema physical = schema.physical(); - assertEquals(3, physical.count()); - assertEquals("c", physical.column(0).field().getName()); - assertEquals("c", physical.column(0).fullName()); - assertFalse(physical.column(0).isMap()); - assertNull(physical.column(0).mapSchema()); - - assertEquals("a", physical.column(1).field().getName()); - assertEquals("a", physical.column(1).fullName()); - assertTrue(physical.column(1).isMap()); - assertNotNull(physical.column(1).mapSchema()); - - assertEquals("h", physical.column(2).field().getName()); - assertEquals("h", physical.column(2).fullName()); - assertFalse(physical.column(2).isMap()); - assertNull(physical.column(2).mapSchema()); - - PhysicalSchema aSchema = physical.column(1).mapSchema(); - assertEquals(4, aSchema.count()); - assertEquals("b", aSchema.column(0).field().getName()); - assertEquals("a.b", aSchema.column(0).fullName()); - assertEquals("d", aSchema.column(1).field().getName()); - assertEquals("e", aSchema.column(2).field().getName()); - assertEquals("g", aSchema.column(3).field().getName()); - - PhysicalSchema eSchema = aSchema.column(2).mapSchema(); - assertEquals(1, eSchema.count()); - assertEquals("f", eSchema.column(0).field().getName()); - assertEquals("a.e.f", eSchema.column(0).fullName()); - } + assertEquals(ObjectType.SCALAR, reader.column(0).type()); + assertSame(reader.column("a"), reader.column(0)); + assertSame(reader.scalar("a"), reader.scalar(0)); + assertSame(reader.column("a").scalar(), reader.scalar("a")); + assertSame(reader.column(0).scalar(), reader.scalar(0)); + assertEquals(ValueType.INTEGER, reader.scalar(0).valueType()); - /** - * Verify that simple scalar (non-repeated) column readers - * and writers work as expected. This is for tiny ints. - */ + // Test various accessors: full and simple - @Test - public void testTinyIntRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.TINYINT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0) - .add(Byte.MAX_VALUE) - .add(Byte.MIN_VALUE) - .build(); - assertEquals(3, rs.rowCount()); - RowSetReader reader = rs.reader(); assertTrue(reader.next()); - assertEquals(0, reader.column(0).getInt()); - assertTrue(reader.next()); - assertEquals(Byte.MAX_VALUE, reader.column(0).getInt()); - assertEquals((int) Byte.MAX_VALUE, reader.column(0).getObject()); - assertTrue(reader.next()); - assertEquals(Byte.MIN_VALUE, reader.column(0).getInt()); - assertFalse(reader.next()); - rs.clear(); - } - - @Test - public void testSmallIntRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.SMALLINT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0) - .add(Short.MAX_VALUE) - .add(Short.MIN_VALUE) - .build(); - RowSetReader reader = rs.reader(); + assertEquals(10, reader.column("a").scalar().getInt()); assertTrue(reader.next()); - assertEquals(0, reader.column(0).getInt()); + assertEquals(20, reader.scalar("a").getInt()); assertTrue(reader.next()); - assertEquals(Short.MAX_VALUE, reader.column(0).getInt()); - assertEquals((int) Short.MAX_VALUE, reader.column(0).getObject()); + assertEquals(30, reader.column(0).scalar().getInt()); assertTrue(reader.next()); - assertEquals(Short.MIN_VALUE, reader.column(0).getInt()); + assertEquals(40, reader.scalar(0).getInt()); assertFalse(reader.next()); - rs.clear(); - } - @Test - public void testIntRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.INT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0) - .add(Integer.MAX_VALUE) - .add(Integer.MIN_VALUE) + // Test the above again via the writer and reader + // utility classes. + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .add(10) + .add(20) + .add(30) + .add(40) .build(); - RowSetReader reader = rs.reader(); - assertTrue(reader.next()); - assertEquals(0, reader.column(0).getInt()); - assertTrue(reader.next()); - assertEquals(Integer.MAX_VALUE, reader.column(0).getInt()); - assertEquals(Integer.MAX_VALUE, reader.column(0).getObject()); - assertTrue(reader.next()); - assertEquals(Integer.MIN_VALUE, reader.column(0).getInt()); - assertFalse(reader.next()); - rs.clear(); + new RowSetComparison(expected).verifyAndClearAll(actual); } + /** + * Test a record with a top level array. The focus here is on the + * scalar array structure. + * + * @throws VectorOverflowException should never occur + */ + @Test - public void testLongRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.BIGINT) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0L) - .add(Long.MAX_VALUE) - .add(Long.MIN_VALUE) - .build(); - RowSetReader reader = rs.reader(); + public void testScalarArrayStructure() { + TupleMetadata schema = new SchemaBuilder() + .addArray("a", MinorType.INT) + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); + + // Repeated Int + // Verify the invariants of the "full" and "simple" access paths + + assertEquals(ObjectType.ARRAY, writer.column("a").type()); + + assertSame(writer.column("a"), writer.column(0)); + assertSame(writer.array("a"), writer.array(0)); + assertSame(writer.column("a").array(), writer.array("a")); + assertSame(writer.column(0).array(), writer.array(0)); + + assertEquals(ObjectType.SCALAR, writer.column("a").array().entry().type()); + assertEquals(ObjectType.SCALAR, writer.column("a").array().entryType()); + assertSame(writer.array(0).entry().scalar(), writer.array(0).scalar()); + assertEquals(ValueType.INTEGER, writer.array(0).scalar().valueType()); + + // Sanity checks + + try { + writer.column(0).scalar(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + try { + writer.column(0).tuple(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + + // Write some data + + ScalarWriter intWriter = writer.array("a").scalar(); + intWriter.setInt(10); + intWriter.setInt(11); + writer.save(); + intWriter.setInt(20); + intWriter.setInt(21); + intWriter.setInt(22); + writer.save(); + intWriter.setInt(30); + writer.save(); + intWriter.setInt(40); + intWriter.setInt(41); + writer.save(); + + // Finish the row set and get a reader. + + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); + + // Verify the invariants of the "full" and "simple" access paths + + assertEquals(ObjectType.ARRAY, writer.column("a").type()); + + assertSame(reader.column("a"), reader.column(0)); + assertSame(reader.array("a"), reader.array(0)); + assertSame(reader.column("a").array(), reader.array("a")); + assertSame(reader.column(0).array(), reader.array(0)); + + assertEquals(ObjectType.SCALAR, reader.column("a").array().entryType()); + assertEquals(ValueType.INTEGER, reader.array(0).elements().valueType()); + + // Read and verify the rows + + ScalarElementReader intReader = reader.array(0).elements(); assertTrue(reader.next()); - assertEquals(0, reader.column(0).getLong()); + assertEquals(2, intReader.size()); + assertEquals(10, intReader.getInt(0)); + assertEquals(11, intReader.getInt(1)); assertTrue(reader.next()); - assertEquals(Long.MAX_VALUE, reader.column(0).getLong()); - assertEquals(Long.MAX_VALUE, reader.column(0).getObject()); + assertEquals(3, intReader.size()); + assertEquals(20, intReader.getInt(0)); + assertEquals(21, intReader.getInt(1)); + assertEquals(22, intReader.getInt(2)); assertTrue(reader.next()); - assertEquals(Long.MIN_VALUE, reader.column(0).getLong()); + assertEquals(1, intReader.size()); + assertEquals(30, intReader.getInt(0)); + assertTrue(reader.next()); + assertEquals(2, intReader.size()); + assertEquals(40, intReader.getInt(0)); + assertEquals(41, intReader.getInt(1)); assertFalse(reader.next()); - rs.clear(); - } - @Test - public void testFloatRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.FLOAT4) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0F) - .add(Float.MAX_VALUE) - .add(Float.MIN_VALUE) + // Test the above again via the writer and reader + // utility classes. + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .addSingleCol(new int[] {10, 11}) + .addSingleCol(new int[] {20, 21, 22}) + .addSingleCol(new int[] {30}) + .addSingleCol(new int[] {40, 41}) .build(); - RowSetReader reader = rs.reader(); - assertTrue(reader.next()); - assertEquals(0, reader.column(0).getDouble(), 0.000001); - assertTrue(reader.next()); - assertEquals((double) Float.MAX_VALUE, reader.column(0).getDouble(), 0.000001); - assertEquals((double) Float.MAX_VALUE, (double) reader.column(0).getObject(), 0.000001); - assertTrue(reader.next()); - assertEquals((double) Float.MIN_VALUE, reader.column(0).getDouble(), 0.000001); - assertFalse(reader.next()); - rs.clear(); + new RowSetComparison(expected) + .verifyAndClearAll(actual); } + /** + * Test a simple map structure at the top level of a row. + * + * @throws VectorOverflowException should never occur + */ + @Test - public void testDoubleRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.FLOAT8) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(0D) - .add(Double.MAX_VALUE) - .add(Double.MIN_VALUE) - .build(); - RowSetReader reader = rs.reader(); + public void testMapStructure() { + TupleMetadata schema = new SchemaBuilder() + .add("a", MinorType.INT) + .addMap("m") + .addArray("b", MinorType.INT) + .buildMap() + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); + + // Map and Int + // Test Invariants + + assertEquals(ObjectType.SCALAR, writer.column("a").type()); + assertEquals(ObjectType.SCALAR, writer.column(0).type()); + assertEquals(ObjectType.TUPLE, writer.column("m").type()); + assertEquals(ObjectType.TUPLE, writer.column(1).type()); + assertSame(writer.column(1).tuple(), writer.tuple(1)); + + TupleWriter mapWriter = writer.column(1).tuple(); + assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entry().type()); + assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entryType()); + + ScalarWriter aWriter = writer.column("a").scalar(); + ScalarWriter bWriter = writer.column("m").tuple().column("b").array().entry().scalar(); + assertSame(bWriter, writer.tuple(1).array(0).scalar()); + assertEquals(ValueType.INTEGER, bWriter.valueType()); + + // Sanity checks + + try { + writer.column(1).scalar(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + try { + writer.column(1).array(); + fail(); + } catch (UnsupportedOperationException e) { + // Expected + } + + // Write data + + aWriter.setInt(10); + bWriter.setInt(11); + bWriter.setInt(12); + writer.save(); + aWriter.setInt(20); + bWriter.setInt(21); + bWriter.setInt(22); + writer.save(); + aWriter.setInt(30); + bWriter.setInt(31); + bWriter.setInt(32); + writer.save(); + + // Finish the row set and get a reader. + + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); + + assertEquals(ObjectType.SCALAR, reader.column("a").type()); + assertEquals(ObjectType.SCALAR, reader.column(0).type()); + assertEquals(ObjectType.TUPLE, reader.column("m").type()); + assertEquals(ObjectType.TUPLE, reader.column(1).type()); + assertSame(reader.column(1).tuple(), reader.tuple(1)); + + ScalarReader aReader = reader.column(0).scalar(); + TupleReader mReader = reader.column(1).tuple(); + assertEquals(ObjectType.SCALAR, mReader.column("b").array().entryType()); + ScalarElementReader bReader = mReader.column(0).elements(); + assertEquals(ValueType.INTEGER, bReader.valueType()); + assertTrue(reader.next()); - assertEquals(0, reader.column(0).getDouble(), 0.000001); + assertEquals(10, aReader.getInt()); + assertEquals(11, bReader.getInt(0)); + assertEquals(12, bReader.getInt(1)); assertTrue(reader.next()); - assertEquals(Double.MAX_VALUE, reader.column(0).getDouble(), 0.000001); - assertEquals(Double.MAX_VALUE, (double) reader.column(0).getObject(), 0.000001); + assertEquals(20, aReader.getInt()); + assertEquals(21, bReader.getInt(0)); + assertEquals(22, bReader.getInt(1)); assertTrue(reader.next()); - assertEquals(Double.MIN_VALUE, reader.column(0).getDouble(), 0.000001); + assertEquals(30, aReader.getInt()); + assertEquals(31, bReader.getInt(0)); + assertEquals(32, bReader.getInt(1)); assertFalse(reader.next()); - rs.clear(); - } - @Test - public void testStringRW() { - BatchSchema batchSchema = new SchemaBuilder() - .add("col", MinorType.VARCHAR) - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add("") - .add("abcd") + // Verify that the map accessor's value count was set. + + @SuppressWarnings("resource") + MapVector mapVector = (MapVector) actual.container().getValueVector(1).getValueVector(); + assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount()); + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .add(10, new Object[] {new int[] {11, 12}}) + .add(20, new Object[] {new int[] {21, 22}}) + .add(30, new Object[] {new int[] {31, 32}}) .build(); - RowSetReader reader = rs.reader(); - assertTrue(reader.next()); - assertEquals("", reader.column(0).getString()); - assertTrue(reader.next()); - assertEquals("abcd", reader.column(0).getString()); - assertEquals("abcd", reader.column(0).getObject()); - assertFalse(reader.next()); - rs.clear(); + new RowSetComparison(expected) + .verifyAndClearAll(actual); } - /** - * Test writing to and reading from a row set with nested maps. - * Map fields are flattened into a logical schema. - */ - @Test - public void testMap() { - BatchSchema batchSchema = new SchemaBuilder() + public void testRepeatedMapStructure() { + TupleMetadata schema = new SchemaBuilder() .add("a", MinorType.INT) - .addMap("b") + .addMapArray("m") + .add("b", MinorType.INT) .add("c", MinorType.INT) - .add("d", MinorType.INT) .buildMap() - .build(); - SingleRowSet rs = fixture.rowSetBuilder(batchSchema) - .add(10, 20, 30) - .add(40, 50, 60) - .build(); - RowSetReader reader = rs.reader(); + .buildSchema(); + ExtendableRowSet rowSet = fixture.rowSet(schema); + RowSetWriter writer = rowSet.writer(); + + // Map and Int + // Pick out components and lightly test. (Assumes structure + // tested earlier is still valid, so no need to exhaustively + // test again.) + + assertEquals(ObjectType.SCALAR, writer.column("a").type()); + assertEquals(ObjectType.ARRAY, writer.column("m").type()); + + ArrayWriter maWriter = writer.column(1).array(); + assertEquals(ObjectType.TUPLE, maWriter.entryType()); + + TupleWriter mapWriter = maWriter.tuple(); + assertEquals(ObjectType.SCALAR, mapWriter.column("b").type()); + assertEquals(ObjectType.SCALAR, mapWriter.column("c").type()); + + ScalarWriter aWriter = writer.column("a").scalar(); + ScalarWriter bWriter = mapWriter.scalar("b"); + ScalarWriter cWriter = mapWriter.scalar("c"); + assertEquals(ValueType.INTEGER, aWriter.valueType()); + assertEquals(ValueType.INTEGER, bWriter.valueType()); + assertEquals(ValueType.INTEGER, cWriter.valueType()); + + // Write data + + aWriter.setInt(10); + bWriter.setInt(101); + cWriter.setInt(102); + maWriter.save(); // Advance to next array position + bWriter.setInt(111); + cWriter.setInt(112); + maWriter.save(); + writer.save(); + + aWriter.setInt(20); + bWriter.setInt(201); + cWriter.setInt(202); + maWriter.save(); + bWriter.setInt(211); + cWriter.setInt(212); + maWriter.save(); + writer.save(); + + aWriter.setInt(30); + bWriter.setInt(301); + cWriter.setInt(302); + maWriter.save(); + bWriter.setInt(311); + cWriter.setInt(312); + maWriter.save(); + writer.save(); + + // Finish the row set and get a reader. + + SingleRowSet actual = writer.done(); + RowSetReader reader = actual.reader(); + + // Verify reader structure + + assertEquals(ObjectType.SCALAR, reader.column("a").type()); + assertEquals(ObjectType.ARRAY, reader.column("m").type()); + + ArrayReader maReader = reader.column(1).array(); + assertEquals(ObjectType.TUPLE, maReader.entryType()); + + TupleReader mapReader = maReader.tuple(); + assertEquals(ObjectType.SCALAR, mapReader.column("b").type()); + assertEquals(ObjectType.SCALAR, mapReader.column("c").type()); + + ScalarReader aReader = reader.column("a").scalar(); + ScalarReader bReader = mapReader.scalar("b"); + ScalarReader cReader = mapReader.scalar("c"); + assertEquals(ValueType.INTEGER, aReader.valueType()); + assertEquals(ValueType.INTEGER, bReader.valueType()); + assertEquals(ValueType.INTEGER, cReader.valueType()); + + // Row 1: use index accessors + + assertTrue(reader.next()); + assertEquals(10, aReader.getInt()); + TupleReader ixReader = maReader.tuple(0); + assertEquals(101, ixReader.scalar(0).getInt()); + assertEquals(102, ixReader.scalar(1).getInt()); + ixReader = maReader.tuple(1); + assertEquals(111, ixReader.scalar(0).getInt()); + assertEquals(112, ixReader.scalar(1).getInt()); + + // Row 2: use common accessor with explicit positioning, + // but access scalars through the map reader. + assertTrue(reader.next()); - assertEquals(10, reader.column(0).getInt()); - assertEquals(20, reader.column(1).getInt()); - assertEquals(30, reader.column(2).getInt()); - assertEquals(10, reader.column("a").getInt()); - assertEquals(30, reader.column("b.d").getInt()); + assertEquals(20, aReader.getInt()); + maReader.setPosn(0); + assertEquals(201, mapReader.scalar(0).getInt()); + assertEquals(202, mapReader.scalar(1).getInt()); + maReader.setPosn(1); + assertEquals(211, mapReader.scalar(0).getInt()); + assertEquals(212, mapReader.scalar(1).getInt()); + + // Row 3: use common accessor for scalars + assertTrue(reader.next()); - assertEquals(40, reader.column(0).getInt()); - assertEquals(50, reader.column(1).getInt()); - assertEquals(60, reader.column(2).getInt()); + assertEquals(30, aReader.getInt()); + maReader.setPosn(0); + assertEquals(301, bReader.getInt()); + assertEquals(302, cReader.getInt()); + maReader.setPosn(1); + assertEquals(311, bReader.getInt()); + assertEquals(312, cReader.getInt()); + assertFalse(reader.next()); - rs.clear(); - } + // Verify that the map accessor's value count was set. + + @SuppressWarnings("resource") + RepeatedMapVector mapVector = (RepeatedMapVector) actual.container().getValueVector(1).getValueVector(); + assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount()); + + // Verify the readers and writers again using the testing tools. + + SingleRowSet expected = fixture.rowSetBuilder(schema) + .add(10, new Object[] {new Object[] {101, 102}, new Object[] {111, 112}}) + .add(20, new Object[] {new Object[] {201, 202}, new Object[] {211, 212}}) + .add(30, new Object[] {new Object[] {301, 302}, new Object[] {311, 312}}) + .build(); + new RowSetComparison(expected) + .verifyAndClearAll(actual); + } /** * Test an array of ints (as an example fixed-width type) * at the top level of a schema. @@ -390,38 +537,36 @@ public void TestTopFixedWidthArray() { ExtendableRowSet rs1 = fixture.rowSet(batchSchema); RowSetWriter writer = rs1.writer(); - writer.column(0).setInt(10); - ArrayWriter array = writer.column(1).array(); + writer.scalar(0).setInt(10); + ScalarWriter array = writer.array(1).scalar(); array.setInt(100); array.setInt(110); writer.save(); - writer.column(0).setInt(20); - array = writer.column(1).array(); + writer.scalar(0).setInt(20); array.setInt(200); array.setInt(120); array.setInt(220); writer.save(); - writer.column(0).setInt(30); + writer.scalar(0).setInt(30); writer.save(); - writer.done(); - RowSetReader reader = rs1.reader(); + SingleRowSet result = writer.done(); + + RowSetReader reader = result.reader(); assertTrue(reader.next()); - assertEquals(10, reader.column(0).getInt()); - ArrayReader arrayReader = reader.column(1).array(); + assertEquals(10, reader.scalar(0).getInt()); + ScalarElementReader arrayReader = reader.array(1).elements(); assertEquals(2, arrayReader.size()); assertEquals(100, arrayReader.getInt(0)); assertEquals(110, arrayReader.getInt(1)); assertTrue(reader.next()); - assertEquals(20, reader.column(0).getInt()); - arrayReader = reader.column(1).array(); + assertEquals(20, reader.scalar(0).getInt()); assertEquals(3, arrayReader.size()); assertEquals(200, arrayReader.getInt(0)); assertEquals(120, arrayReader.getInt(1)); assertEquals(220, arrayReader.getInt(2)); assertTrue(reader.next()); - assertEquals(30, reader.column(0).getInt()); - arrayReader = reader.column(1).array(); + assertEquals(30, reader.scalar(0).getInt()); assertEquals(0, arrayReader.size()); assertFalse(reader.next()); @@ -435,4 +580,88 @@ public void TestTopFixedWidthArray() { .verifyAndClearAll(rs2); } + /** + * Test filling a row set up to the maximum number of rows. + * Values are small enough to prevent filling to the + * maximum buffer size. + */ + + @Test + public void testRowBounds() { + BatchSchema batchSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .build(); + + ExtendableRowSet rs = fixture.rowSet(batchSchema); + RowSetWriter writer = rs.writer(); + int count = 0; + while (! writer.isFull()) { + writer.scalar(0).setInt(count++); + writer.save(); + } + writer.done(); + + assertEquals(ValueVector.MAX_ROW_COUNT, count); + // The writer index points past the writable area. + // But, this is fine, the valid() method says we can't + // write at this location. + assertEquals(ValueVector.MAX_ROW_COUNT, writer.rowIndex()); + assertEquals(ValueVector.MAX_ROW_COUNT, rs.rowCount()); + rs.clear(); + } + + /** + * Test filling a row set up to the maximum vector size. + * Values in the first column are small enough to prevent filling to the + * maximum buffer size, but values in the second column + * will reach maximum buffer size before maximum row size. + * The result should be the number of rows that fit, with the + * partial last row not counting. (A complete application would + * reload the partial row into a new row set.) + */ + + @Test + public void testBufferBounds() { + BatchSchema batchSchema = new SchemaBuilder() + .add("a", MinorType.INT) + .add("b", MinorType.VARCHAR) + .build(); + + String varCharValue; + try { + byte rawValue[] = new byte[512]; + Arrays.fill(rawValue, (byte) 'X'); + varCharValue = new String(rawValue, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new IllegalStateException(e); + } + + ExtendableRowSet rs = fixture.rowSet(batchSchema); + RowSetWriter writer = rs.writer(); + int count = 0; + try { + + // Test overflow. This is not a typical use case: don't want to + // hit overflow without overflow handling. In this case, we throw + // away the last row because the row set abstraction does not + // implement vector overflow other than throwing an exception. + + for (;;) { + writer.scalar(0).setInt(count); + writer.scalar(1).setString(varCharValue); + + // Won't get here on overflow. + writer.save(); + count++; + } + } catch (IndexOutOfBoundsException e) { + assertTrue(e.getMessage().contains("overflow")); + } + writer.done(); + + assertTrue(count < ValueVector.MAX_ROW_COUNT); + assertEquals(count, writer.rowIndex()); + assertEquals(count, rs.rowCount()); + rs.clear(); + } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java new file mode 100644 index 00000000000..147b713a8cd --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; +import org.apache.drill.test.rowSet.RowSetReader; +import org.apache.drill.test.rowSet.RowSetUtilities; +import org.apache.drill.test.rowSet.RowSetWriter; +import org.apache.drill.test.rowSet.SchemaBuilder; +import org.junit.Test; + +/** + * Test the "fill empties" logic for all types for all modes. + * This test exploits the dynamic typing ability of the + * accessors. Creating an object per value is too slow for + * production code, but very handy for tests such as this. + *

+ * Note that this test also has the handy side-effect of testing + * null handling in the accessor classes. + */ + +public class TestFillEmpties extends SubOperatorTest { + + public static final int ROW_COUNT = 1000; + + /** + * Test "fill empties" for required types. Here, the fill value + * is more of a convention: 0 (fixed-width) or an empty + * entry (variable width.) Some fill value is required to avoid + * the alternatives which are either 1) leave the value as + * garbage, or 2) raise an exception about the missing value. + */ + + @Test + public void testFillEmptiesRequired() { + testFillEmpties(DataMode.REQUIRED); + } + + /** + * Test "fill empties" for nullable types which are the most + * "natural" type for omitted values. + * Nullable vectors fill empties with nulls. + */ + + @Test + public void testFillEmptiesNullable() { + testFillEmpties(DataMode.OPTIONAL); + } + + /** + * Test "fill empties" for repeated types. + * Drill defines a null (omitted) array as the same thing as + * a zero-length array. + */ + + @Test + public void testFillEmptiesRepeated() { + testFillEmpties(DataMode.REPEATED); + } + + private void testFillEmpties(DataMode mode) { + for (MinorType type : MinorType.values()) { + switch (type) { + case DECIMAL28DENSE: + case DECIMAL38DENSE: + // Not yet supported + break; + case GENERIC_OBJECT: + case LATE: + case LIST: + case MAP: + case NULL: + case UNION: + // Writer N/A + break; + case BIT: + case FIXEDBINARY: + case FIXEDCHAR: + case FIXED16CHAR: + case MONEY: + case TIMESTAMPTZ: + case TIMETZ: + // Not supported in Drill + break; + case DECIMAL18: + case DECIMAL28SPARSE: + case DECIMAL9: + case DECIMAL38SPARSE: + doFillEmptiesTest(type, mode, 9, 2); + break; + default: + doFillEmptiesTest(type, mode); + } + } + } + + private void doFillEmptiesTest(MinorType type, DataMode mode, int prec, int scale) { + MajorType majorType = MajorType.newBuilder() + .setMinorType(type) + .setMode(mode) + .setPrecision(prec) + .setScale(scale) + .build(); + doFillEmptiesTest(majorType); + } + + private void doFillEmptiesTest(MinorType type, DataMode mode) { + MajorType majorType = MajorType.newBuilder() + .setMinorType(type) + .setMode(mode) + .build(); + doFillEmptiesTest(majorType); + } + + private void doFillEmptiesTest(MajorType majorType) { + if (majorType.getMode() == DataMode.REPEATED) { + dofillEmptiesRepeated(majorType); + } else { + doFillEmptiesScalar(majorType); + } + } + + private void doFillEmptiesScalar(MajorType majorType) { + TupleMetadata schema = new SchemaBuilder() + .add("a", majorType) + .buildSchema(); + ExtendableRowSet rs = fixture.rowSet(schema); + RowSetWriter writer = rs.writer(); + ScalarWriter colWriter = writer.scalar(0); + ValueType valueType = colWriter.valueType(); + boolean nullable = majorType.getMode() == DataMode.OPTIONAL; + for (int i = 0; i < ROW_COUNT; i++) { + if (i % 5 == 0) { + colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i)); + } + writer.save(); + } + SingleRowSet result = writer.done(); + RowSetReader reader = result.reader(); + ScalarReader colReader = reader.scalar(0); + MinorType type = majorType.getMinorType(); + boolean isVariable = (type == MinorType.VARCHAR || + type == MinorType.VAR16CHAR || + type == MinorType.VARBINARY); + for (int i = 0; i < ROW_COUNT; i++) { + assertTrue(reader.next()); + if (i % 5 != 0) { + if (nullable) { + // Nullable types fill with nulls. + + assertTrue(colReader.isNull()); + continue; + } + if (isVariable) { + // Variable width types fill with a zero-length value. + + assertEquals(0, colReader.getBytes().length); + continue; + } + } + + // All other types fill with zero-bytes, interpreted as some form + // of zero for each type. + + Object actual = colReader.getObject(); + Object expected = RowSetUtilities.testDataFromInt(valueType, majorType, + i % 5 == 0 ? i : 0); + RowSetUtilities.assertEqualValues( + majorType.toString().replace('\n', ' ') + "[" + i + "]", + valueType, expected, actual); + } + result.clear(); + } + + private void dofillEmptiesRepeated(MajorType majorType) { + TupleMetadata schema = new SchemaBuilder() + .add("a", majorType) + .buildSchema(); + ExtendableRowSet rs = fixture.rowSet(schema); + RowSetWriter writer = rs.writer(); + ScalarWriter colWriter = writer.array(0).scalar(); + ValueType valueType = colWriter.valueType(); + for (int i = 0; i < ROW_COUNT; i++) { + if (i % 5 == 0) { + // Write two values so we can exercise a bit of the array logic. + + colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i)); + colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i+1)); + } + writer.save(); + } + SingleRowSet result = writer.done(); + RowSetReader reader = result.reader(); + ScalarElementReader colReader = reader.array(0).elements(); + for (int i = 0; i < ROW_COUNT; i++) { + assertTrue(reader.next()); + if (i % 5 != 0) { + // Empty arrays are defined to be the same as a zero-length array. + + assertEquals(0, colReader.size()); + } else { + for (int j = 0; j < 2; j++) { + Object actual = colReader.getObject(j); + Object expected = RowSetUtilities.testDataFromInt(valueType, majorType, i + j); + RowSetUtilities.assertEqualValues( + majorType.toString().replace('\n', ' ') + "[" + i + "][" + j + "]", + valueType, expected, actual); + } + } + } + result.clear(); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java new file mode 100644 index 00000000000..38162cf31b0 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java @@ -0,0 +1,1266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import static org.junit.Assert.*; + +import java.math.BigDecimal; +import java.util.Arrays; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.RowSetReader; +import org.apache.drill.test.rowSet.SchemaBuilder; +import org.joda.time.Period; +import org.apache.drill.test.rowSet.RowSet.SingleRowSet; +import org.junit.Test; + +/** + * Verify that simple scalar (non-repeated) column readers + * and writers work as expected. The focus is on the generated + * and type-specific functions for each type. + */ + +// The following types are not fully supported in Drill +// TODO: Var16Char +// TODO: Bit +// TODO: Decimal28Sparse +// TODO: Decimal38Sparse + +public class TestScalarAccessors extends SubOperatorTest { + + @Test + public void testTinyIntRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.TINYINT) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(0) + .add(Byte.MAX_VALUE) + .add(Byte.MIN_VALUE) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.INTEGER, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, colReader.getInt()); + + assertTrue(reader.next()); + assertEquals(Byte.MAX_VALUE, colReader.getInt()); + assertEquals((int) Byte.MAX_VALUE, colReader.getObject()); + assertEquals(Byte.toString(Byte.MAX_VALUE), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(Byte.MIN_VALUE, colReader.getInt()); + + assertFalse(reader.next()); + rs.clear(); + } + + private void nullableIntTester(MinorType type) { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", type) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(10) + .addSingleCol(null) + .add(30) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(10, colReader.getInt()); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + // Data value is undefined, may be garbage + + assertTrue(reader.next()); + assertEquals(30, colReader.getInt()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableTinyInt() { + nullableIntTester(MinorType.TINYINT); + } + + private void intArrayTester(MinorType type) { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", type) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new int[] {}) + .addSingleCol(new int[] {0, 20, 30}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.INTEGER, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals(0, colReader.getInt(0)); + assertEquals(20, colReader.getInt(1)); + assertEquals(30, colReader.getInt(2)); + assertEquals(0, colReader.getObject(0)); + assertEquals(20, colReader.getObject(1)); + assertEquals(30, colReader.getObject(2)); + assertEquals("0", colReader.getAsString(0)); + assertEquals("20", colReader.getAsString(1)); + assertEquals("30", colReader.getAsString(2)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testTinyIntArray() { + intArrayTester(MinorType.TINYINT); + } + + @Test + public void testSmallIntRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.SMALLINT) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(0) + .add(Short.MAX_VALUE) + .add(Short.MIN_VALUE) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.INTEGER, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, colReader.getInt()); + + assertTrue(reader.next()); + assertEquals(Short.MAX_VALUE, colReader.getInt()); + assertEquals((int) Short.MAX_VALUE, colReader.getObject()); + assertEquals(Short.toString(Short.MAX_VALUE), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(Short.MIN_VALUE, colReader.getInt()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableSmallInt() { + nullableIntTester(MinorType.SMALLINT); + } + + @Test + public void testSmallArray() { + intArrayTester(MinorType.SMALLINT); + } + + @Test + public void testIntRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.INT) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(0) + .add(Integer.MAX_VALUE) + .add(Integer.MIN_VALUE) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.INTEGER, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, reader.scalar(0).getInt()); + + assertTrue(reader.next()); + assertEquals(Integer.MAX_VALUE, colReader.getInt()); + assertEquals(Integer.MAX_VALUE, colReader.getObject()); + assertEquals(Integer.toString(Integer.MAX_VALUE), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(Integer.MIN_VALUE, colReader.getInt()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableInt() { + nullableIntTester(MinorType.INT); + } + + @Test + public void testIntArray() { + intArrayTester(MinorType.INT); + } + + private void longRWTester(MinorType type) { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", type) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(0L) + .add(Long.MAX_VALUE) + .add(Long.MIN_VALUE) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.LONG, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, colReader.getLong()); + + assertTrue(reader.next()); + assertEquals(Long.MAX_VALUE, colReader.getLong()); + assertEquals(Long.MAX_VALUE, colReader.getObject()); + assertEquals(Long.toString(Long.MAX_VALUE), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(Long.MIN_VALUE, colReader.getLong()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testLongRW() { + longRWTester(MinorType.BIGINT); + } + + private void nullableLongTester(MinorType type) { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", type) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(10L) + .addSingleCol(null) + .add(30L) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(10, colReader.getLong()); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + // Data value is undefined, may be garbage + + assertTrue(reader.next()); + assertEquals(30, colReader.getLong()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableLong() { + nullableLongTester(MinorType.BIGINT); + } + + private void longArrayTester(MinorType type) { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", type) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new long[] {}) + .addSingleCol(new long[] {0, 20, 30}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.LONG, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals(0, colReader.getLong(0)); + assertEquals(20, colReader.getLong(1)); + assertEquals(30, colReader.getLong(2)); + assertEquals(0L, colReader.getObject(0)); + assertEquals(20L, colReader.getObject(1)); + assertEquals(30L, colReader.getObject(2)); + assertEquals("0", colReader.getAsString(0)); + assertEquals("20", colReader.getAsString(1)); + assertEquals("30", colReader.getAsString(2)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testLongArray() { + longArrayTester(MinorType.BIGINT); + } + + @Test + public void testFloatRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.FLOAT4) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(0F) + .add(Float.MAX_VALUE) + .add(Float.MIN_VALUE) + .add(100F) + .build(); + assertEquals(4, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.DOUBLE, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, colReader.getDouble(), 0.000001); + + assertTrue(reader.next()); + assertEquals(Float.MAX_VALUE, colReader.getDouble(), 0.000001); + assertEquals((double) Float.MAX_VALUE, (double) colReader.getObject(), 0.000001); + + assertTrue(reader.next()); + assertEquals(Float.MIN_VALUE, colReader.getDouble(), 0.000001); + + assertTrue(reader.next()); + assertEquals(100, colReader.getDouble(), 0.000001); + assertEquals("100.0", colReader.getAsString()); + + assertFalse(reader.next()); + rs.clear(); + } + + private void nullableDoubleTester(MinorType type) { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", type) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(10F) + .addSingleCol(null) + .add(30F) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(10, colReader.getDouble(), 0.000001); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + // Data value is undefined, may be garbage + + assertTrue(reader.next()); + assertEquals(30, colReader.getDouble(), 0.000001); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableFloat() { + nullableDoubleTester(MinorType.FLOAT4); + } + + private void doubleArrayTester(MinorType type) { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", type) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new double[] {}) + .addSingleCol(new double[] {0, 20.5, 30.0}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.DOUBLE, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals(0, colReader.getDouble(0), 0.00001); + assertEquals(20.5, colReader.getDouble(1), 0.00001); + assertEquals(30.0, colReader.getDouble(2), 0.00001); + assertEquals(0, (double) colReader.getObject(0), 0.00001); + assertEquals(20.5, (double) colReader.getObject(1), 0.00001); + assertEquals(30.0, (double) colReader.getObject(2), 0.00001); + assertEquals("0.0", colReader.getAsString(0)); + assertEquals("20.5", colReader.getAsString(1)); + assertEquals("30.0", colReader.getAsString(2)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testFloatArray() { + doubleArrayTester(MinorType.FLOAT4); + } + + @Test + public void testDoubleRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.FLOAT8) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(0D) + .add(Double.MAX_VALUE) + .add(Double.MIN_VALUE) + .add(100D) + .build(); + assertEquals(4, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.DOUBLE, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, colReader.getDouble(), 0.000001); + + assertTrue(reader.next()); + assertEquals(Double.MAX_VALUE, colReader.getDouble(), 0.000001); + assertEquals(Double.MAX_VALUE, (double) colReader.getObject(), 0.000001); + + assertTrue(reader.next()); + assertEquals(Double.MIN_VALUE, colReader.getDouble(), 0.000001); + + assertTrue(reader.next()); + assertEquals(100, colReader.getDouble(), 0.000001); + assertEquals("100.0", colReader.getAsString()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableDouble() { + nullableDoubleTester(MinorType.FLOAT8); + } + + @Test + public void testDoubleArray() { + doubleArrayTester(MinorType.FLOAT8); + } + + @Test + public void testStringRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.VARCHAR) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add("") + .add("abcd") + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.STRING, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals("", colReader.getString()); + + assertTrue(reader.next()); + assertEquals("abcd", colReader.getString()); + assertEquals("abcd", colReader.getObject()); + assertEquals("\"abcd\"", colReader.getAsString()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableString() { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", MinorType.VARCHAR) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add("") + .addSingleCol(null) + .add("abcd") + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals("", colReader.getString()); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals("abcd", colReader.getString()); + assertEquals("abcd", colReader.getObject()); + assertEquals("\"abcd\"", colReader.getAsString()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testStringArray() { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", MinorType.VARCHAR) + .build(); + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new String[] {}) + .addSingleCol(new String[] {"fred", "", "wilma"}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.STRING, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals("fred", colReader.getString(0)); + assertEquals("", colReader.getString(1)); + assertEquals("wilma", colReader.getString(2)); + assertEquals("fred", colReader.getObject(0)); + assertEquals("", colReader.getObject(1)); + assertEquals("wilma", colReader.getObject(2)); + assertEquals("\"fred\"", colReader.getAsString(0)); + assertEquals("\"\"", colReader.getAsString(1)); + assertEquals("\"wilma\"", colReader.getAsString(2)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testIntervalYearRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.INTERVALYEAR) + .build(); + + Period p1 = Period.years(0); + Period p2 = Period.years(2).plusMonths(3); + Period p3 = Period.years(1234).plusMonths(11); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(p1) + .add(p2) + .add(p3) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(p1, colReader.getPeriod()); + + assertTrue(reader.next()); + assertEquals(p2, colReader.getPeriod()); + assertEquals(p2, colReader.getObject()); + assertEquals(p2.toString(), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(p3, colReader.getPeriod()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableIntervalYear() { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", MinorType.INTERVALYEAR) + .build(); + + Period p1 = Period.years(0); + Period p2 = Period.years(2).plusMonths(3); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(p1) + .addSingleCol(null) + .add(p2) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(p1, colReader.getPeriod()); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getPeriod()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(p2, colReader.getPeriod()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testIntervalYearArray() { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", MinorType.INTERVALYEAR) + .build(); + + Period p1 = Period.years(0); + Period p2 = Period.years(2).plusMonths(3); + Period p3 = Period.years(1234).plusMonths(11); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new Period[] {}) + .addSingleCol(new Period[] {p1, p2, p3}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals(p1, colReader.getPeriod(0)); + assertEquals(p2, colReader.getPeriod(1)); + assertEquals(p3, colReader.getPeriod(2)); + assertEquals(p2, colReader.getObject(1)); + assertEquals(p2.toString(), colReader.getAsString(1)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testIntervalDayRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.INTERVALDAY) + .build(); + + Period p1 = Period.days(0); + Period p2 = Period.days(3).plusHours(4).plusMinutes(5).plusSeconds(23); + Period p3 = Period.days(999).plusHours(23).plusMinutes(59).plusSeconds(59); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(p1) + .add(p2) + .add(p3) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + // The normalizedStandard() call is a hack. See DRILL-5689. + assertEquals(p1, colReader.getPeriod().normalizedStandard()); + + assertTrue(reader.next()); + assertEquals(p2, colReader.getPeriod().normalizedStandard()); + assertEquals(p2, ((Period) colReader.getObject()).normalizedStandard()); + assertEquals(p2.toString(), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(p3.normalizedStandard(), colReader.getPeriod().normalizedStandard()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableIntervalDay() { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", MinorType.INTERVALDAY) + .build(); + + Period p1 = Period.years(0); + Period p2 = Period.days(3).plusHours(4).plusMinutes(5).plusSeconds(23); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(p1) + .addSingleCol(null) + .add(p2) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(p1, colReader.getPeriod().normalizedStandard()); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getPeriod()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(p2, colReader.getPeriod().normalizedStandard()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testIntervalDayArray() { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", MinorType.INTERVALDAY) + .build(); + + Period p1 = Period.days(0); + Period p2 = Period.days(3).plusHours(4).plusMinutes(5).plusSeconds(23); + Period p3 = Period.days(999).plusHours(23).plusMinutes(59).plusSeconds(59); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new Period[] {}) + .addSingleCol(new Period[] {p1, p2, p3}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals(p1, colReader.getPeriod(0).normalizedStandard()); + assertEquals(p2, colReader.getPeriod(1).normalizedStandard()); + assertEquals(p3.normalizedStandard(), colReader.getPeriod(2).normalizedStandard()); + assertEquals(p2, ((Period) colReader.getObject(1)).normalizedStandard()); + assertEquals(p2.toString(), colReader.getAsString(1)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testIntervalRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.INTERVAL) + .build(); + + Period p1 = Period.days(0); + Period p2 = Period.years(7).plusMonths(8) + .plusDays(3).plusHours(4) + .plusMinutes(5).plusSeconds(23); + Period p3 = Period.years(9999).plusMonths(11) + .plusDays(365).plusHours(23) + .plusMinutes(59).plusSeconds(59); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(p1) + .add(p2) + .add(p3) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + // The normalizedStandard() call is a hack. See DRILL-5689. + assertEquals(p1, colReader.getPeriod().normalizedStandard()); + + assertTrue(reader.next()); + assertEquals(p2, colReader.getPeriod().normalizedStandard()); + assertEquals(p2, ((Period) colReader.getObject()).normalizedStandard()); + assertEquals(p2.toString(), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(p3.normalizedStandard(), colReader.getPeriod().normalizedStandard()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableInterval() { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", MinorType.INTERVAL) + .build(); + + Period p1 = Period.years(0); + Period p2 = Period.years(7).plusMonths(8) + .plusDays(3).plusHours(4) + .plusMinutes(5).plusSeconds(23); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(p1) + .addSingleCol(null) + .add(p2) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(p1, colReader.getPeriod().normalizedStandard()); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getPeriod()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(p2, colReader.getPeriod().normalizedStandard()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testIntervalArray() { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", MinorType.INTERVAL) + .build(); + + Period p1 = Period.days(0); + Period p2 = Period.years(7).plusMonths(8) + .plusDays(3).plusHours(4) + .plusMinutes(5).plusSeconds(23); + Period p3 = Period.years(9999).plusMonths(11) + .plusDays(365).plusHours(23) + .plusMinutes(59).plusSeconds(59); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new Period[] {}) + .addSingleCol(new Period[] {p1, p2, p3}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.PERIOD, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals(p1, colReader.getPeriod(0).normalizedStandard()); + assertEquals(p2, colReader.getPeriod(1).normalizedStandard()); + assertEquals(p3.normalizedStandard(), colReader.getPeriod(2).normalizedStandard()); + assertEquals(p2, ((Period) colReader.getObject(1)).normalizedStandard()); + assertEquals(p2.toString(), colReader.getAsString(1)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testDecimal9RW() { + MajorType type = MajorType.newBuilder() + .setMinorType(MinorType.DECIMAL9) + .setScale(3) + .setPrecision(9) + .setMode(DataMode.REQUIRED) + .build(); + BatchSchema batchSchema = new SchemaBuilder() + .add("col", type) + .build(); + + BigDecimal v1 = BigDecimal.ZERO; + BigDecimal v2 = BigDecimal.valueOf(123_456_789, 3); + BigDecimal v3 = BigDecimal.valueOf(999_999_999, 3); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(v1) + .add(v2) + .add(v3) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.DECIMAL, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, v1.compareTo(colReader.getDecimal())); + + assertTrue(reader.next()); + assertEquals(0, v2.compareTo(colReader.getDecimal())); + assertEquals(0, v2.compareTo((BigDecimal) colReader.getObject())); + assertEquals(v2.toString(), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(0, v3.compareTo(colReader.getDecimal())); + + assertFalse(reader.next()); + rs.clear(); + } + + private void nullableDecimalTester(MinorType type, int precision) { + MajorType majorType = MajorType.newBuilder() + .setMinorType(type) + .setScale(3) + .setPrecision(precision) + .setMode(DataMode.OPTIONAL) + .build(); + BatchSchema batchSchema = new SchemaBuilder() + .add("col", majorType) + .build(); + + BigDecimal v1 = BigDecimal.ZERO; + BigDecimal v2 = BigDecimal.valueOf(123_456_789, 3); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(v1) + .addSingleCol(null) + .add(v2) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.DECIMAL, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, v1.compareTo(colReader.getDecimal())); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(0, v2.compareTo(colReader.getDecimal())); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableDecimal9() { + nullableDecimalTester(MinorType.DECIMAL9, 9); + } + + private void decimalArrayTester(MinorType type, int precision) { + MajorType majorType = MajorType.newBuilder() + .setMinorType(type) + .setScale(3) + .setPrecision(precision) + .setMode(DataMode.REPEATED) + .build(); + BatchSchema batchSchema = new SchemaBuilder() + .add("col", majorType) + .build(); + + BigDecimal v1 = BigDecimal.ZERO; + BigDecimal v2 = BigDecimal.valueOf(123_456_789, 3); + BigDecimal v3 = BigDecimal.TEN; + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new BigDecimal[] {}) + .addSingleCol(new BigDecimal[] {v1, v2, v3}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.DECIMAL, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertEquals(0, v1.compareTo(colReader.getDecimal(0))); + assertEquals(0, v2.compareTo(colReader.getDecimal(1))); + assertEquals(0, v3.compareTo(colReader.getDecimal(2))); + assertEquals(0, v2.compareTo((BigDecimal) colReader.getObject(1))); + assertEquals(v2.toString(), colReader.getAsString(1)); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testDecimal9Array() { + decimalArrayTester(MinorType.DECIMAL9, 9); + } + + @Test + public void testDecimal18RW() { + MajorType type = MajorType.newBuilder() + .setMinorType(MinorType.DECIMAL18) + .setScale(3) + .setPrecision(9) + .setMode(DataMode.REQUIRED) + .build(); + BatchSchema batchSchema = new SchemaBuilder() + .add("col", type) + .build(); + + BigDecimal v1 = BigDecimal.ZERO; + BigDecimal v2 = BigDecimal.valueOf(123_456_789_123_456_789L, 3); + BigDecimal v3 = BigDecimal.valueOf(999_999_999_999_999_999L, 3); + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(v1) + .add(v2) + .add(v3) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.DECIMAL, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertEquals(0, v1.compareTo(colReader.getDecimal())); + + assertTrue(reader.next()); + assertEquals(0, v2.compareTo(colReader.getDecimal())); + assertEquals(0, v2.compareTo((BigDecimal) colReader.getObject())); + assertEquals(v2.toString(), colReader.getAsString()); + + assertTrue(reader.next()); + assertEquals(0, v3.compareTo(colReader.getDecimal())); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableDecimal18() { + nullableDecimalTester(MinorType.DECIMAL18, 9); + } + + @Test + public void testDecimal18Array() { + decimalArrayTester(MinorType.DECIMAL18, 9); + } + + // From the perspective of the vector, a date vector is just a long. + + @Test + public void testDateRW() { + longRWTester(MinorType.DATE); + } + + @Test + public void testNullableDate() { + nullableLongTester(MinorType.DATE); + } + + @Test + public void testDateArray() { + longArrayTester(MinorType.DATE); + } + + // From the perspective of the vector, a timestamp vector is just a long. + + @Test + public void testTimestampRW() { + longRWTester(MinorType.TIMESTAMP); + } + + @Test + public void testNullableTimestamp() { + nullableLongTester(MinorType.TIMESTAMP); + } + + @Test + public void testTimestampArray() { + longArrayTester(MinorType.TIMESTAMP); + } + + @Test + public void testVarBinaryRW() { + BatchSchema batchSchema = new SchemaBuilder() + .add("col", MinorType.VARBINARY) + .build(); + + byte v1[] = new byte[] {}; + byte v2[] = new byte[] { (byte) 0x00, (byte) 0x7f, (byte) 0x80, (byte) 0xFF}; + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(v1) + .add(v2) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.BYTES, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertTrue(Arrays.equals(v1, colReader.getBytes())); + + assertTrue(reader.next()); + assertTrue(Arrays.equals(v2, colReader.getBytes())); + assertTrue(Arrays.equals(v2, (byte[]) colReader.getObject())); + assertEquals("[00, 7f, 80, ff]", colReader.getAsString()); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testNullableVarBinary() { + BatchSchema batchSchema = new SchemaBuilder() + .addNullable("col", MinorType.VARBINARY) + .build(); + + byte v1[] = new byte[] {}; + byte v2[] = new byte[] { (byte) 0x00, (byte) 0x7f, (byte) 0x80, (byte) 0xFF}; + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .add(v1) + .addSingleCol(null) + .add(v2) + .build(); + assertEquals(3, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarReader colReader = reader.scalar(0); + assertEquals(ValueType.BYTES, colReader.valueType()); + + assertTrue(reader.next()); + assertFalse(colReader.isNull()); + assertTrue(Arrays.equals(v1, colReader.getBytes())); + + assertTrue(reader.next()); + assertTrue(colReader.isNull()); + assertNull(colReader.getObject()); + assertEquals("null", colReader.getAsString()); + + assertTrue(reader.next()); + assertTrue(Arrays.equals(v2, colReader.getBytes())); + + assertFalse(reader.next()); + rs.clear(); + } + + @Test + public void testVarBinaryArray() { + BatchSchema batchSchema = new SchemaBuilder() + .addArray("col", MinorType.VARBINARY) + .build(); + + byte v1[] = new byte[] {}; + byte v2[] = new byte[] { (byte) 0x00, (byte) 0x7f, (byte) 0x80, (byte) 0xFF}; + byte v3[] = new byte[] { (byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xAF}; + + SingleRowSet rs = fixture.rowSetBuilder(batchSchema) + .addSingleCol(new byte[][] {}) + .addSingleCol(new byte[][] {v1, v2, v3}) + .build(); + assertEquals(2, rs.rowCount()); + + RowSetReader reader = rs.reader(); + ScalarElementReader colReader = reader.elements(0); + assertEquals(ValueType.BYTES, colReader.valueType()); + + assertTrue(reader.next()); + assertEquals(0, colReader.size()); + + assertTrue(reader.next()); + assertEquals(3, colReader.size()); + assertTrue(Arrays.equals(v1, colReader.getBytes(0))); + assertTrue(Arrays.equals(v2, colReader.getBytes(1))); + assertTrue(Arrays.equals(v3, colReader.getBytes(2))); + assertTrue(Arrays.equals(v2, (byte[]) colReader.getObject(1))); + assertEquals("[00, 7f, 80, ff]", colReader.getAsString(1)); + + assertFalse(reader.next()); + rs.clear(); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestSchema.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestSchema.java new file mode 100644 index 00000000000..4736ae8e170 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestSchema.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.test.rowSet.test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; +import org.apache.drill.exec.record.TupleMetadata.StructureType; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.SchemaBuilder; +import org.junit.Test; + +public class TestSchema extends SubOperatorTest { + + /** + * Test a simple physical schema with no maps. + */ + + @Test + public void testSchema() { + TupleMetadata tupleSchema = new SchemaBuilder() + .add("c", MinorType.INT) + .add("a", MinorType.INT, DataMode.REPEATED) + .addNullable("b", MinorType.VARCHAR) + .buildSchema(); + + assertEquals(3, tupleSchema.size()); + assertFalse(tupleSchema.isEmpty()); + + assertEquals("c", tupleSchema.column(0).getName()); + assertEquals("a", tupleSchema.column(1).getName()); + assertEquals("b", tupleSchema.column(2).getName()); + + ColumnMetadata md0 = tupleSchema.metadata(0); + assertEquals(StructureType.PRIMITIVE, md0.structureType()); + assertNull(md0.mapSchema()); + assertEquals(0, md0.index()); + assertSame(md0.schema(), tupleSchema.column(0)); + assertEquals(md0.name(), tupleSchema.column(0).getName()); + assertEquals(MinorType.INT, md0.type()); + assertEquals(DataMode.REQUIRED, md0.mode()); + assertSame(tupleSchema, md0.parent()); + assertEquals(md0.name(), md0.fullName()); + assertTrue(md0.isEquivalent(md0)); + assertFalse(md0.isEquivalent(tupleSchema.metadata(1))); + + assertEquals(1, tupleSchema.metadata(1).index()); + assertEquals(DataMode.REPEATED, tupleSchema.metadata(1).mode()); + assertEquals(2, tupleSchema.metadata(2).index()); + assertEquals(DataMode.OPTIONAL, tupleSchema.metadata(2).mode()); + + assertSame(tupleSchema.column(0), tupleSchema.column("c")); + assertSame(tupleSchema.column(1), tupleSchema.column("a")); + assertSame(tupleSchema.column(2), tupleSchema.column("b")); + + assertSame(tupleSchema.metadata(0), tupleSchema.metadata("c")); + assertSame(tupleSchema.metadata(1), tupleSchema.metadata("a")); + assertSame(tupleSchema.metadata(2), tupleSchema.metadata("b")); + assertEquals(0, tupleSchema.index("c")); + assertEquals(1, tupleSchema.index("a")); + assertEquals(2, tupleSchema.index("b")); + + // Test undefined column + + assertEquals(-1, tupleSchema.index("x")); + assertNull(tupleSchema.metadata("x")); + assertNull(tupleSchema.column("x")); + + try { + tupleSchema.metadata(4); + fail(); + } catch (IndexOutOfBoundsException e) { + // Expected + } + + try { + tupleSchema.column(4); + fail(); + } catch (IndexOutOfBoundsException e) { + // Expected + } + + // No maps. Flat schema is the same as tuple schema. + +// TupleMetadata flatSchema = tupleSchema.flatten(); +// assertEquals(3, flatSchema.size()); +// +// crossCheck(flatSchema, 0, "c", MinorType.INT); +// assertEquals(DataMode.REQUIRED, flatSchema.column(0).getDataMode()); +// assertEquals(DataMode.REQUIRED, flatSchema.column(0).getType().getMode()); +// assertTrue(! flatSchema.column(0).isNullable()); +// +// crossCheck(flatSchema, 1, "a", MinorType.INT); +// assertEquals(DataMode.REPEATED, flatSchema.column(1).getDataMode()); +// assertEquals(DataMode.REPEATED, flatSchema.column(1).getType().getMode()); +// assertTrue(! flatSchema.column(1).isNullable()); +// +// crossCheck(flatSchema, 2, "b", MinorType.VARCHAR); +// assertEquals(MinorType.VARCHAR, flatSchema.column(2).getType().getMinorType()); +// assertEquals(DataMode.OPTIONAL, flatSchema.column(2).getDataMode()); +// assertEquals(DataMode.OPTIONAL, flatSchema.column(2).getType().getMode()); +// assertTrue(flatSchema.column(2).isNullable()); + + // Verify batch schema + // Tests toFieldList() internally + + BatchSchema batchSchema = new BatchSchema(SelectionVectorMode.NONE, tupleSchema.toFieldList()); + assertEquals(3, batchSchema.getFieldCount()); + assertSame(batchSchema.getColumn(0), tupleSchema.column(0)); + assertSame(batchSchema.getColumn(1), tupleSchema.column(1)); + assertSame(batchSchema.getColumn(2), tupleSchema.column(2)); + } + + @Test + public void testEmptySchema() { + TupleMetadata tupleSchema = new SchemaBuilder() + .buildSchema(); + + assertEquals(0, tupleSchema.size()); + assertTrue(tupleSchema.isEmpty()); + } + + @Test + public void testDuplicateName() { + try { + new SchemaBuilder() + .add("foo", MinorType.INT) + .add("a", MinorType.INT, DataMode.REPEATED) + .addNullable("foo", MinorType.VARCHAR) + .buildSchema(); + fail(); + } catch (IllegalArgumentException e) { + // Expected + assertTrue(e.getMessage().contains("foo")); + } + } + + @Test + public void testSVMode() { + BatchSchema batchSchema = new SchemaBuilder() + .add("c", MinorType.INT) + .add("a", MinorType.INT, DataMode.REPEATED) + .addNullable("b", MinorType.VARCHAR) + .withSVMode(SelectionVectorMode.TWO_BYTE) + .build(); + + assertEquals(3, batchSchema.getFieldCount()); + assertEquals(SelectionVectorMode.TWO_BYTE, batchSchema.getSelectionVectorMode()); + } + + /** + * Verify that a nested map schema. + * Schema has non-repeated maps, so can be flattened. + */ + + @Test + public void testMapSchema() { + TupleMetadata tupleSchema = new SchemaBuilder() + .add("c", MinorType.INT) + .addMap("a") + .addNullable("b", MinorType.VARCHAR) + .add("d", MinorType.INT) + .addMap("e") + .add("f", MinorType.VARCHAR) + .buildMap() + .add("g", MinorType.INT) + .buildMap() + .add("h", MinorType.BIGINT) + .buildSchema(); + + assertEquals(3, tupleSchema.size()); + assertEquals("c", tupleSchema.metadata(0).name()); + assertEquals("c", tupleSchema.metadata(0).fullName()); + assertEquals(StructureType.PRIMITIVE, tupleSchema.metadata(0).structureType()); + assertNull(tupleSchema.metadata(0).mapSchema()); + + assertEquals("a", tupleSchema.metadata(1).name()); + assertEquals("a", tupleSchema.metadata(1).fullName()); + assertEquals(StructureType.TUPLE, tupleSchema.metadata(1).structureType()); + assertNotNull(tupleSchema.metadata(1).mapSchema()); + + assertEquals("h", tupleSchema.metadata(2).name()); + assertEquals("h", tupleSchema.metadata(2).fullName()); + assertEquals(StructureType.PRIMITIVE, tupleSchema.metadata(2).structureType()); + assertNull(tupleSchema.metadata(2).mapSchema()); + + TupleMetadata aSchema = tupleSchema.metadata(1).mapSchema(); + assertEquals(4, aSchema.size()); + assertEquals("b", aSchema.metadata(0).name()); + assertEquals("a.b", aSchema.metadata(0).fullName()); + assertEquals("d", aSchema.metadata(1).name()); + assertEquals("e", aSchema.metadata(2).name()); + assertEquals("g", aSchema.metadata(3).name()); + + TupleMetadata eSchema = aSchema.metadata(2).mapSchema(); + assertEquals(1, eSchema.size()); + assertEquals("f", eSchema.metadata(0).name()); + assertEquals("a.e.f", eSchema.metadata(0).fullName()); + + // Flattened with maps removed. This is for testing use only + // as it is ambiguous in production. + +// TupleMetadata flatSchema = tupleSchema.flatten(); +// assertEquals(6, flatSchema.size()); +// crossCheck(flatSchema, 0, "c", MinorType.INT); +// crossCheck(flatSchema, 1, "a.b", MinorType.VARCHAR); +// crossCheck(flatSchema, 2, "a.d", MinorType.INT); +// crossCheck(flatSchema, 3, "a.e.f", MinorType.VARCHAR); +// crossCheck(flatSchema, 4, "a.g", MinorType.INT); +// crossCheck(flatSchema, 5, "h", MinorType.BIGINT); + + // Verify batch schema: should mirror the schema created above. + + BatchSchema batchSchema = new BatchSchema(SelectionVectorMode.NONE, tupleSchema.toFieldList()); + assertEquals(3, batchSchema.getFieldCount()); + assertSame(tupleSchema.column(0), batchSchema.getColumn(0)); + assertSame(tupleSchema.column(2), batchSchema.getColumn(2)); + + assertEquals("a", batchSchema.getColumn(1).getName()); + assertEquals(MinorType.MAP, batchSchema.getColumn(1).getType().getMinorType()); + assertNotNull(batchSchema.getColumn(1).getChildren()); + + List aMap = new ArrayList<>(); + for (MaterializedField field : batchSchema.getColumn(1).getChildren()) { + aMap.add(field); + } + assertEquals(4, aMap.size()); + assertSame(aMap.get(0), aSchema.column(0)); + assertSame(aMap.get(1), aSchema.column(1)); + assertSame(aMap.get(2), aSchema.column(2)); + assertSame(aMap.get(3), aSchema.column(3)); + + List eMap = new ArrayList<>(); + for (MaterializedField field : aMap.get(2).getChildren()) { + eMap.add(field); + } + assertEquals(1, eMap.size()); + assertSame(eSchema.column(0), eMap.get(0)); + } + +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java b/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java index 266bff2c2fd..e42d91cb891 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java +++ b/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java @@ -19,56 +19,27 @@ package org.apache.drill.vector; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; import org.apache.drill.common.types.TypeProtos.DataMode; -import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.vector.BaseDataValueVector; import org.apache.drill.exec.vector.IntVector; import org.apache.drill.exec.vector.NullableVarCharVector; import org.apache.drill.exec.vector.RepeatedVarCharVector; import org.apache.drill.exec.vector.UInt4Vector; import org.apache.drill.exec.vector.VarCharVector; -import org.apache.drill.exec.vector.VectorOverflowException; -import org.apache.drill.test.DrillTest; -import org.apache.drill.test.OperatorFixture; -import org.junit.AfterClass; -import org.junit.BeforeClass; +import org.apache.drill.test.SubOperatorTest; +import org.apache.drill.test.rowSet.SchemaBuilder; import org.junit.Test; import io.netty.buffer.DrillBuf; -public class TestFillEmpties extends DrillTest { - - public static OperatorFixture fixture; - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - fixture = OperatorFixture.builder().build(); - } - - @AfterClass - public static void tearDownAfterClass() throws Exception { - fixture.close(); - } - - // To be replaced by a test method in a separate commit. - - public static MaterializedField makeField(String name, MinorType dataType, DataMode mode) { - MajorType type = MajorType.newBuilder() - .setMinorType(dataType) - .setMode(mode) - .build(); - - return MaterializedField.create(name, type); - } +public class TestFillEmpties extends SubOperatorTest { @Test public void testNullableVarChar() { @SuppressWarnings("resource") - NullableVarCharVector vector = new NullableVarCharVector(makeField("a", MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator()); + NullableVarCharVector vector = new NullableVarCharVector(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator()); vector.allocateNew( ); // Create "foo", null, "bar", but omit the null. @@ -88,7 +59,7 @@ public void testNullableVarChar() { @Test public void testVarChar() { @SuppressWarnings("resource") - VarCharVector vector = new VarCharVector(makeField("a", MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator()); + VarCharVector vector = new VarCharVector(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator()); vector.allocateNew( ); // Create "foo", null, "bar", but omit the null. @@ -100,11 +71,7 @@ public void testVarChar() { // Work around: test fails without this. But, only the new column writers // call this method. - try { - mutator.fillEmptiesBounded(0, 2); - } catch (VectorOverflowException e) { - fail(); - } + mutator.fillEmpties(0, 2); value = makeValue("bar"); mutator.setSafe(2, value, 0, value.length); @@ -116,7 +83,7 @@ public void testVarChar() { @Test public void testInt() { @SuppressWarnings("resource") - IntVector vector = new IntVector(makeField("a", MinorType.INT, DataMode.REQUIRED), fixture.allocator()); + IntVector vector = new IntVector(SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED), fixture.allocator()); vector.allocateNew( ); // Create 1, 0, 2, but omit the 0. @@ -133,7 +100,7 @@ public void testInt() { @Test public void testRepeatedVarChar() { @SuppressWarnings("resource") - RepeatedVarCharVector vector = new RepeatedVarCharVector(makeField("a", MinorType.VARCHAR, DataMode.REPEATED), fixture.allocator()); + RepeatedVarCharVector vector = new RepeatedVarCharVector(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REPEATED), fixture.allocator()); vector.allocateNew( ); // Create "foo", null, "bar", but omit the null. @@ -148,11 +115,7 @@ public void testRepeatedVarChar() { // Work around: test fails without this. But, only the new column writers // call this method. - try { - mutator.fillEmptiesBounded(0, 2); - } catch (VectorOverflowException e) { - fail(); - } + mutator.fillEmpties(0, 2); mutator.startNewValue(2); value = makeValue( "c" ); mutator.addSafe(2, value, 0, value.length); diff --git a/exec/java-exec/src/test/java/org/apache/drill/vector/TestVectorLimits.java b/exec/java-exec/src/test/java/org/apache/drill/vector/TestVectorLimits.java deleted file mode 100644 index 86bd2069968..00000000000 --- a/exec/java-exec/src/test/java/org/apache/drill/vector/TestVectorLimits.java +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.vector; - -import static org.junit.Assert.*; - -import org.apache.drill.common.types.TypeProtos.DataMode; -import org.apache.drill.common.types.TypeProtos.MajorType; -import org.apache.drill.common.types.TypeProtos.MinorType; -import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.vector.IntVector; -import org.apache.drill.exec.vector.NullableIntVector; -import org.apache.drill.exec.vector.NullableVarCharVector; -import org.apache.drill.exec.vector.RepeatedIntVector; -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.VarCharVector; -import org.apache.drill.exec.vector.VectorOverflowException; -import org.apache.drill.test.DrillTest; -import org.apache.drill.test.OperatorFixture; -import org.bouncycastle.util.Arrays; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -import io.netty.buffer.DrillBuf; - -/** - * Test the setScalar() methods in the various generated vector - * classes. Rather than test all 100+ vectors, we sample a few and - * rely on the fact that code is generated from a common template. - */ - -public class TestVectorLimits extends DrillTest { - - public static OperatorFixture fixture; - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - fixture = OperatorFixture.builder().build(); - } - - @AfterClass - public static void tearDownAfterClass() throws Exception { - fixture.close(); - } - - /** - * Test a vector directly using the vector mutator to ensure - * that the setScalar method works for the maximum - * row count. - *

- * This test is a proxy for all the other fixed types, since all - * share the same code template. - */ - - @Test - public void testFixedVector() { - - // Create a non-nullable int vector: a typical fixed-size vector - - @SuppressWarnings("resource") - IntVector vector = new IntVector(makeField(MinorType.INT, DataMode.REQUIRED), fixture.allocator() ); - - // Sanity test of generated constants. - - assertTrue( IntVector.MAX_SCALAR_COUNT <= ValueVector.MAX_ROW_COUNT ); - assertEquals( 4, IntVector.VALUE_WIDTH ); - assertTrue( IntVector.NET_MAX_SCALAR_SIZE <= ValueVector.MAX_BUFFER_SIZE ); - - // Allocate a default size, small vector. Forces test of - // the auto-grow (setSafe()) aspect of setScalar(). - - vector.allocateNew( ); - - // Write to the vector until it complains. At that point, - // we should have written up to the static fixed value count - // (which is computed to stay below the capacity limit.) - - IntVector.Mutator mutator = vector.getMutator(); - for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) { - try { - mutator.setScalar(i, i); - } catch (VectorOverflowException e) { - assertEquals(IntVector.MAX_SCALAR_COUNT, i); - break; - } - } - - // The vector should be below the allocation limit. Since this - // is an int vector, in practice the size will be far below - // the overall limit (if the limit stays at 16 MB.) But, it should - // be at the type-specific limit since we filled up the vector. - - assertEquals(IntVector.NET_MAX_SCALAR_SIZE, vector.getBuffer().getActualMemoryConsumed()); - vector.close(); - } - - @Test - public void testNullableFixedVector() { - - @SuppressWarnings("resource") - NullableIntVector vector = new NullableIntVector(makeField(MinorType.INT, DataMode.OPTIONAL), fixture.allocator() ); - vector.allocateNew( ); - - NullableIntVector.Mutator mutator = vector.getMutator(); - for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) { - try { - mutator.setScalar(i, i); - } catch (VectorOverflowException e) { - assertEquals(IntVector.MAX_SCALAR_COUNT, i); - break; - } - } - - vector.close(); - } - - /** - * Repeated fixed vector. Using an int vector, each column array can hold - * 256 / 4 = 64 values. We write only 10. The vector becomes full when we - * exceed 64K items. - */ - - @Test - public void testRepeatedFixedVectorCountLimit() { - - @SuppressWarnings("resource") - RepeatedIntVector vector = new RepeatedIntVector(makeField(MinorType.INT, DataMode.REPEATED), fixture.allocator() ); - vector.allocateNew( ); - - RepeatedIntVector.Mutator mutator = vector.getMutator(); - top: - for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) { - if (! mutator.startNewValueBounded(i)) { - assertEquals(ValueVector.MAX_ROW_COUNT, i); - // Continue, let's check the addBounded method also - } - for (int j = 0; j < 10; j++) { - try { - mutator.addEntry(i, i * 100 + j); - } catch (VectorOverflowException e) { - assertEquals(ValueVector.MAX_ROW_COUNT, i); - mutator.setValueCount(i); - break top; - } - } - } - - vector.close(); - } - - /** - * Repeated fixed vector. Using an int vector, each column array can hold - * 256 / 4 = 64 values. We write 100. The vector becomes full when we - * exceed the 16 MB size limit. - */ - - @Test - public void testRepeatedFixedVectorBufferLimit() { - - @SuppressWarnings("resource") - RepeatedIntVector vector = new RepeatedIntVector(makeField(MinorType.INT, DataMode.REPEATED), fixture.allocator() ); - vector.allocateNew( ); - - RepeatedIntVector.Mutator mutator = vector.getMutator(); - top: - for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) { - // We'll never hit the value count limit - assertTrue(mutator.startNewValueBounded(i)); - for (int j = 0; j < 100; j++) { - try { - mutator.addEntry(i, i * 100 + j); - } catch (VectorOverflowException e) { - // We should have hit the buffer limit before the value limit. - assertTrue(i < ValueVector.MAX_ROW_COUNT); - mutator.setValueCount(i); - break top; - } - } - } - - vector.close(); - } - - // To be replaced by a test method in a separate commit. - - public static MaterializedField makeField(MinorType dataType, DataMode mode) { - MajorType type = MajorType.newBuilder() - .setMinorType(dataType) - .setMode(mode) - .build(); - - return MaterializedField.create("foo", type); - } - - /** - * Baseline test for a variable-width vector using setSafe and - * loading the vector up to the maximum size. Doing so will cause the vector - * to have a buffer that exceeds the maximum size, demonstrating the - * need for setScalar(). - */ - - @Test - public void variableVectorBaseline() { - - // Create a non-nullable VarChar vector: a typical variable-size vector - - @SuppressWarnings("resource") - VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() ); - vector.allocateNew( ); - - // A 16 MB value can hold 64K values of up to 256 bytes each. - // To force a size overflow, write values much larger. - // Write the maximum number of values which will silently - // allow the vector to grow beyond the critical size of 16 MB. - // Doing this in production would lead to memory fragmentation. - // So, this is what the setScalar() method assures we don't do. - - byte dummyValue[] = new byte[512]; - Arrays.fill(dummyValue, (byte) 'X'); - VarCharVector.Mutator mutator = vector.getMutator(); - for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) { - mutator.setSafe(i, dummyValue, 0, dummyValue.length); - } - - // The vector should be above the allocation limit. - // This is why code must migrate to the setScalar() call - // away from the setSafe() call. - - assertTrue(ValueVector.MAX_BUFFER_SIZE < vector.getBuffer().getActualMemoryConsumed()); - vector.close(); - } - - /** - * Test a vector directly using the vector mutator to ensure - * that the setScalar method works for the maximum - * vector size. - */ - - @Test - public void testWideVariableVector() { - - @SuppressWarnings("resource") - VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() ); - vector.allocateNew( ); - - // A 16 MB value can hold 64K values of up to 256 bytes each. - // To force a size overflow, write values much larger. - // Write to the vector until it complains. At that point, - // we should have written up to the maximum buffer size. - - byte dummyValue[] = makeVarCharValue(512); - VarCharVector.Mutator mutator = vector.getMutator(); - int count = 0; - for ( ; count < 2 * ValueVector.MAX_ROW_COUNT; count++) { - try { - mutator.setScalar(count, dummyValue, 0, dummyValue.length); - } catch (VectorOverflowException e) { - break; - } - } - - // The vector should be at the allocation limit. If it wasn't, we - // should have grown it to hold more data. The value count will - // be below the maximum. - - mutator.setValueCount(count); - assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getBuffer().getActualMemoryConsumed()); - assertTrue(count < ValueVector.MAX_ROW_COUNT); - vector.close(); - } - - private byte[] makeVarCharValue(int n) { - byte dummyValue[] = new byte[n]; - Arrays.fill(dummyValue, (byte) 'X'); - return dummyValue; - } - - @Test - public void testNullableWideVariableVector() { - - @SuppressWarnings("resource") - NullableVarCharVector vector = new NullableVarCharVector(makeField(MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator() ); - vector.allocateNew( ); - - byte dummyValue[] = makeVarCharValue(512); - NullableVarCharVector.Mutator mutator = vector.getMutator(); - int count = 0; - for ( ; count < 2 * ValueVector.MAX_ROW_COUNT; count++) { - try { - mutator.setScalar(count, dummyValue, 0, dummyValue.length); - } catch (VectorOverflowException e) { - break; - } - } - - mutator.setValueCount(count); - assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getValuesVector().getBuffer().getActualMemoryConsumed()); - assertTrue(count < ValueVector.MAX_ROW_COUNT); - vector.close(); - } - - /** - * Test a vector directly using the vector mutator to ensure - * that the setScalar method works for the maximum - * value count. - */ - - @Test - public void testNarrowVariableVector() { - - @SuppressWarnings("resource") - VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() ); - vector.allocateNew( ); - - // Write small values that fit into 16 MB. We should stop writing - // when we reach the value count limit. - - byte dummyValue[] = makeVarCharValue(254); - VarCharVector.Mutator mutator = vector.getMutator(); - int count = 0; - for (; count < 2 * ValueVector.MAX_ROW_COUNT; count++) { - try { - mutator.setScalar(count, dummyValue, 0, dummyValue.length); - } catch (VectorOverflowException e) { - break; - } - } - - // Buffer size should be at or below the maximum, with count - // at the maximum. - - mutator.setValueCount(count); - assertTrue(vector.getBuffer().getActualMemoryConsumed() <= ValueVector.MAX_BUFFER_SIZE); - assertEquals(ValueVector.MAX_ROW_COUNT, count); - vector.close(); - } - - /** - * Test a vector directly using the vector mutator to ensure - * that the setScalar method works for the maximum - * value count. Uses a DrillBuf as input. - */ - - @Test - public void testDirectVariableVector() { - - @SuppressWarnings("resource") - VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() ); - vector.allocateNew( ); - - // Repeat the big-value test, but with data coming from a DrillBuf - // (direct memory) rather than a heap array. - - @SuppressWarnings("resource") - DrillBuf drillBuf = makeVarCharValueDirect(260); - VarCharVector.Mutator mutator = vector.getMutator(); - int count = 0; - for (; count < 2 * ValueVector.MAX_ROW_COUNT; count++) { - try { - mutator.setScalar(count, drillBuf, 0, 260); - } catch (VectorOverflowException e) { - break; - } - } - drillBuf.close(); - - // Again, vector should be at the size limit, count below the - // value limit. - - mutator.setValueCount(count); - assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getBuffer().getActualMemoryConsumed()); - assertTrue(count < ValueVector.MAX_ROW_COUNT); - vector.close(); - } - - private DrillBuf makeVarCharValueDirect(int n) { - byte dummyValue[] = makeVarCharValue(n); - DrillBuf drillBuf = fixture.allocator().buffer(dummyValue.length); - drillBuf.setBytes(0, dummyValue); - return drillBuf; - } - - @Test - public void testDirectNullableVariableVector() { - - @SuppressWarnings("resource") - NullableVarCharVector vector = new NullableVarCharVector(makeField(MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator() ); - vector.allocateNew( ); - - @SuppressWarnings("resource") - DrillBuf drillBuf = makeVarCharValueDirect(260); - NullableVarCharVector.Mutator mutator = vector.getMutator(); - int count = 0; - for (; count < 2 * ValueVector.MAX_ROW_COUNT; count++) { - try { - mutator.setScalar(count, drillBuf, 0, 260); - } catch (VectorOverflowException e) { - break; - } - } - drillBuf.close(); - - mutator.setValueCount(count); - assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getValuesVector().getBuffer().getActualMemoryConsumed()); - assertTrue(count < ValueVector.MAX_ROW_COUNT); - vector.close(); - } - - public static void main(String args[]) { - try { - setUpBeforeClass(); - new TestVectorLimits().performanceTest(); - tearDownAfterClass(); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - private void performanceTest() { - @SuppressWarnings("resource") - VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator() ); - byte value[] = makeVarCharValue(1); - int warmCount = 100; - timeSetSafe(vector, value, warmCount); - runSetBounded(vector, value, warmCount); - int runCount = 1000; - timeSetSafe(vector, value, runCount); - runSetBounded(vector, value, runCount); - timeSetSafe(vector, value, runCount); - vector.close(); - } - - private void timeSetSafe(VarCharVector vector, byte[] value, int iterCount) { - long start = System.currentTimeMillis(); - for (int i = 0; i < iterCount; i++) { - vector.clear(); - vector.allocateNew( ); - - VarCharVector.Mutator mutator = vector.getMutator(); - for (int j = 0; j < ValueVector.MAX_ROW_COUNT; j++) { - mutator.setSafe(j, value, 0, value.length); - } - } - long elapsed = System.currentTimeMillis() - start; - System.out.println( iterCount + " runs of setSafe: " + elapsed + " ms." ); - } - - private void runSetBounded(VarCharVector vector, byte[] value, int iterCount) { - long start = System.currentTimeMillis(); - for (int i = 0; i < iterCount; i++) { - vector.clear(); - vector.allocateNew( ); - - VarCharVector.Mutator mutator = vector.getMutator(); - int posn = 0; - for (;;) { - try { - mutator.setScalar(posn++, value, 0, value.length); - } catch (VectorOverflowException e) { - break; - } - } - } - long elapsed = System.currentTimeMillis() - start; - System.out.println( iterCount + " runs of setScalar: " + elapsed + " ms." ); - } -} diff --git a/exec/java-exec/src/test/java/org/apache/drill/vector/package-info.java b/exec/java-exec/src/test/java/org/apache/drill/vector/package-info.java index c858814d7f0..89c489e34ff 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/vector/package-info.java +++ b/exec/java-exec/src/test/java/org/apache/drill/vector/package-info.java @@ -16,7 +16,7 @@ * limitations under the License. */ /** - * Tests for value vectors. Is in this module to allow use of - * the test tools which are available only in this module. + * Tests for value vectors. Is in this package to allow use of + * the test tools which are available only in this package. */ package org.apache.drill.vector; diff --git a/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java b/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java index 51390868a84..77b94e19c03 100644 --- a/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java +++ b/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -107,6 +107,8 @@ public int refCnt() { } } + public long addr() { return addr; } + private long addr(int index) { return addr + index; } diff --git a/exec/vector/src/main/codegen/templates/ColumnAccessors.java b/exec/vector/src/main/codegen/templates/ColumnAccessors.java index f1fbf2f057c..154213e5afd 100644 --- a/exec/vector/src/main/codegen/templates/ColumnAccessors.java +++ b/exec/vector/src/main/codegen/templates/ColumnAccessors.java @@ -19,145 +19,134 @@ <@pp.dropOutputFile /> <@pp.changeOutputFile name="/org/apache/drill/exec/vector/accessor/ColumnAccessors.java" /> <#include "/@includes/license.ftl" /> -<#macro getType label> +<#macro getType drillType label> @Override public ValueType valueType() { <#if label == "Int"> return ValueType.INTEGER; + <#elseif drillType == "VarChar" || drillType == "Var16Char"> + return ValueType.STRING; <#else> return ValueType.${label?upper_case}; } -<#macro bindReader prefix drillType> +<#macro bindReader vectorPrefix drillType isArray > <#if drillType = "Decimal9" || drillType == "Decimal18"> - private MaterializedField field; + private MajorType type; - private ${prefix}${drillType}Vector.Accessor accessor; + private ${vectorPrefix}${drillType}Vector.Accessor accessor; @Override - public void bind(RowIndex vectorIndex, ValueVector vector) { - bind(vectorIndex); + public void bindVector(ValueVector vector) { <#if drillType = "Decimal9" || drillType == "Decimal18"> - field = vector.getField(); + type = vector.getField().getType(); - accessor = ((${prefix}${drillType}Vector) vector).getAccessor(); + accessor = ((${vectorPrefix}${drillType}Vector) vector).getAccessor(); } <#if drillType = "Decimal9" || drillType == "Decimal18"> @Override - public void bind(RowIndex vectorIndex, MaterializedField field, VectorAccessor va) { - bind(vectorIndex, field, va); - this.field = field; + public void bindVector(MajorType type, VectorAccessor va) { + super.bindVector(type, va); + this.type = type; } - private ${prefix}${drillType}Vector.Accessor accessor() { + private ${vectorPrefix}${drillType}Vector.Accessor accessor() { if (vectorAccessor == null) { return accessor; } else { - return ((${prefix}${drillType}Vector) vectorAccessor.vector()).getAccessor(); + return ((${vectorPrefix}${drillType}Vector) vectorAccessor.vector()).getAccessor(); } } <#macro get drillType accessorType label isArray> @Override public ${accessorType} get${label}(<#if isArray>int index) { + <#assign getObject ="getObject"/> <#if isArray> - <#assign index=", index"/> - <#assign getObject="getSingleObject"> + <#assign indexVar = "index"/> <#else> - <#assign index=""/> - <#assign getObject="getObject"> + <#assign indexVar = ""/> - <#if drillType == "VarChar"> - return new String(accessor().get(vectorIndex.index()${index}), Charsets.UTF_8); - <#elseif drillType == "Var16Char"> - return new String(accessor().get(vectorIndex.index()${index}), Charsets.UTF_16); - <#elseif drillType == "VarBinary"> - return accessor().get(vectorIndex.index()${index}); + <#if drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary"> + return accessor().get(vectorIndex.vectorIndex(${indexVar})); <#elseif drillType == "Decimal9" || drillType == "Decimal18"> return DecimalUtility.getBigDecimalFromPrimitiveTypes( - accessor().get(vectorIndex.index()${index}), - field.getScale(), - field.getPrecision()); + accessor().get(vectorIndex.vectorIndex(${indexVar})), + type.getScale(), + type.getPrecision()); <#elseif accessorType == "BigDecimal" || accessorType == "Period"> - return accessor().${getObject}(vectorIndex.index()${index}); + return accessor().${getObject}(vectorIndex.vectorIndex(${indexVar})); + <#elseif drillType == "UInt1"> + return ((int) accessor().get(vectorIndex.vectorIndex(${indexVar}))) & 0xFF; <#else> - return accessor().get(vectorIndex.index()${index}); + return accessor().get(vectorIndex.vectorIndex(${indexVar})); } - -<#macro bindWriter prefix drillType> - <#if drillType = "Decimal9" || drillType == "Decimal18"> - private MaterializedField field; - - private ${prefix}${drillType}Vector.Mutator mutator; + <#if drillType == "VarChar"> @Override - public void bind(RowIndex vectorIndex, ValueVector vector) { - bind(vectorIndex); - <#if drillType = "Decimal9" || drillType == "Decimal18"> - field = vector.getField(); - - this.mutator = ((${prefix}${drillType}Vector) vector).getMutator(); + public String getString(<#if isArray>int index) { + return new String(getBytes(${indexVar}), Charsets.UTF_8); } - -<#macro set drillType accessorType label nullable verb> - @Override - public void set${label}(${accessorType} value) { - <#if drillType == "VarChar"> - byte bytes[] = value.getBytes(Charsets.UTF_8); - mutator.${verb}Safe(vectorIndex.index(), bytes, 0, bytes.length); <#elseif drillType == "Var16Char"> - byte bytes[] = value.getBytes(Charsets.UTF_16); - mutator.${verb}Safe(vectorIndex.index(), bytes, 0, bytes.length); - <#elseif drillType == "VarBinary"> - mutator.${verb}Safe(vectorIndex.index(), value, 0, value.length); - <#elseif drillType == "Decimal9"> - mutator.${verb}Safe(vectorIndex.index(), - DecimalUtility.getDecimal9FromBigDecimal(value, - field.getScale(), field.getPrecision())); - <#elseif drillType == "Decimal18"> - mutator.${verb}Safe(vectorIndex.index(), - DecimalUtility.getDecimal18FromBigDecimal(value, - field.getScale(), field.getPrecision())); - <#elseif drillType == "IntervalYear"> - mutator.${verb}Safe(vectorIndex.index(), value.getYears() * 12 + value.getMonths()); - <#elseif drillType == "IntervalDay"> - mutator.${verb}Safe(vectorIndex.index(),<#if nullable> 1, - value.getDays(), - ((value.getHours() * 60 + value.getMinutes()) * 60 + - value.getSeconds()) * 1000 + value.getMillis()); - <#elseif drillType == "Interval"> - mutator.${verb}Safe(vectorIndex.index(),<#if nullable> 1, - value.getYears() * 12 + value.getMonths(), - value.getDays(), - ((value.getHours() * 60 + value.getMinutes()) * 60 + - value.getSeconds()) * 1000 + value.getMillis()); + + @Override + public String getString(<#if isArray>int index) { + return new String(getBytes(${indexVar}), Charsets.UTF_16); + } + + +<#macro build types vectorType accessorType> + <#if vectorType == "Repeated"> + <#assign fnPrefix = "Array" /> + <#assign classType = "Element" /> <#else> - mutator.${verb}Safe(vectorIndex.index(), <#if cast=="set">(${javaType}) value); + <#assign fnPrefix = vectorType /> + <#assign classType = "Scalar" /> - } + <#if vectorType == "Required"> + <#assign vectorPrefix = "" /> + <#else> + <#assign vectorPrefix = vectorType /> + + public static void define${fnPrefix}${accessorType}s( + Class ${accessorType?lower_case}s[]) { + <#list types as type> + <#list type.minor as minor> + <#assign drillType=minor.class> + <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false> + <#if ! notyet> + <#assign typeEnum=drillType?upper_case> + ${accessorType?lower_case}s[MinorType.${typeEnum}.ordinal()] = ${vectorPrefix}${drillType}Column${accessorType}.class; + + + + } package org.apache.drill.exec.vector.accessor; import java.math.BigDecimal; -import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.memory.BaseAllocator; import org.apache.drill.exec.vector.*; -import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.util.DecimalUtility; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnWriter; -import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector; -import org.apache.drill.exec.vector.accessor.impl.AbstractArrayReader; -import org.apache.drill.exec.vector.accessor.impl.AbstractArrayWriter; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor; +import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader; +import org.apache.drill.exec.vector.accessor.reader.BaseElementReader; +import org.apache.drill.exec.vector.accessor.reader.VectorAccessor; +import org.apache.drill.exec.vector.accessor.writer.BaseScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.OffsetVectorWriter; import com.google.common.base.Charsets; + +import io.netty.buffer.DrillBuf; +import io.netty.util.internal.PlatformDependent; + import org.joda.time.Period; /** @@ -179,6 +168,8 @@ public void bind(RowIndex vectorIndex, ValueVector vector) { public class ColumnAccessors { + public static final int MIN_BUFFER_SIZE = 4096; + <#list vv.types as type> <#list type.minor as minor> <#assign drillType=minor.class> @@ -191,141 +182,320 @@ public class ColumnAccessors { <#if accessorType=="BigDecimal"> <#assign label="Decimal"> + <#if drillType == "VarChar" || drillType == "Var16Char"> + <#assign accessorType = "byte[]"> + <#assign label = "Bytes"> + <#if ! notyet> //------------------------------------------------------------------------ // ${drillType} readers and writers - public static class ${drillType}ColumnReader extends AbstractColumnReader { + public static class ${drillType}ColumnReader extends BaseScalarReader { - <@bindReader "" drillType /> + <@bindReader "" drillType false /> - <@getType label /> + <@getType drillType label /> <@get drillType accessorType label false/> } - public static class Nullable${drillType}ColumnReader extends AbstractColumnReader { + public static class Nullable${drillType}ColumnReader extends BaseScalarReader { - <@bindReader "Nullable" drillType /> + <@bindReader "Nullable" drillType false /> - <@getType label /> + <@getType drillType label /> @Override public boolean isNull() { - return accessor().isNull(vectorIndex.index()); - } - - <@get drillType accessorType label false/> - } - - public static class Repeated${drillType}ColumnReader extends AbstractArrayReader { - - <@bindReader "Repeated" drillType /> - - <@getType label /> - - @Override - public int size() { - return accessor().getInnerValueCountAt(vectorIndex.index()); + return accessor().isNull(vectorIndex.vectorIndex()); } - <@get drillType accessorType label true/> + <@get drillType accessorType label false /> } - public static class ${drillType}ColumnWriter extends AbstractColumnWriter { + public static class Repeated${drillType}ColumnReader extends BaseElementReader { - <@bindWriter "" drillType /> + <@bindReader "" drillType true /> - <@getType label /> + <@getType drillType label /> - <@set drillType accessorType label false "set" /> + <@get drillType accessorType label true /> } - public static class Nullable${drillType}ColumnWriter extends AbstractColumnWriter { + public static class ${drillType}ColumnWriter extends BaseScalarWriter { + <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" /> + <#if drillType = "Decimal9" || drillType == "Decimal18" || + drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse"> + private MajorType type; + + <#if varWidth> + private OffsetVectorWriter offsetsWriter = new OffsetVectorWriter(); + <#else> + private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH; + + private ${drillType}Vector vector; - <@bindWriter "Nullable" drillType /> + @Override + public final void bindVector(final ValueVector vector) { + <#if drillType = "Decimal9" || drillType == "Decimal18" || + drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse"> + type = vector.getField().getType(); + + this.vector = (${drillType}Vector) vector; + setAddr(this.vector.getBuffer()); + <#if drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary"> + offsetsWriter.bindVector(this.vector.getOffsetVector()); + <#-- lastWriteIndex unused for variable width vectors. --> + <#else> + lastWriteIndex = -1; + + } - <@getType label /> + <#-- All change of buffer comes through this function to allow capturing + the buffer address and capacity. Only two ways to set the buffer: + by binding to a vector in bindVector(), or by resizing the vector + in writeIndex(). --> + private final void setAddr(final DrillBuf buf) { + bufAddr = buf.addr(); + <#if varWidth> + capacity = buf.capacity(); + <#else> + <#-- Turns out that keeping track of capacity as the count of + values simplifies the per-value code path. --> + capacity = buf.capacity() / VALUE_WIDTH; + + } + <#if varWidth> @Override - public void setNull() { - mutator.setNull(vectorIndex.index()); + public void bindIndex(final ColumnWriterIndex index) { + offsetsWriter.bindIndex(index); + super.bindIndex(index); } - <@set drillType accessorType label true "set" /> - } - - public static class Repeated${drillType}ColumnWriter extends AbstractArrayWriter { + + <@getType drillType label /> + + <#if accessorType == "byte[]"> + <#assign args = ", int len"> + <#else> + <#assign args = ""> + + <#if javaType == "char"> + <#assign putType = "short" /> + <#assign doCast = true /> + <#else> + <#assign putType = javaType /> + <#assign doCast = (cast == "set") /> + + <#-- This is performance critical code; every operation counts. + Please thoughtful when changing the code. + Generated per class in the belief that the JVM will optimize the + code path for each value width. Also, the reallocRaw() and + setFoo() methods are type specific. (reallocRaw() could be virtual, + but the PlatformDependent.setFoo() cannot be. + This is a bit tricky. This method has side effects, by design. + The current vector buffer, and buffer address, will change in + this method when a vector grows or overflows. So, don't use this + method in inline calls of the form + vector.getBuffer().doSomething(writeIndex()); + The buffer obtained by getBuffer() can be different than the current + buffer after writeIndex(). + --> + <#if varWidth> + private final int writeIndex(final int width) { + int writeOffset = offsetsWriter.writeOffset(); + if (writeOffset + width < capacity) { + return writeOffset; + } + <#else> + private final int writeIndex() { + <#-- "Fast path" for the normal case of no fills, no overflow. + This is the only bounds check we want to do for the entire + set operation. --> + int writeIndex = vectorIndex.vectorIndex(); + if (lastWriteIndex + 1 == writeIndex && writeIndex < capacity) { + lastWriteIndex = writeIndex; + return writeIndex; + } + + <#-- Either empties must be filed or the vector is full. --> + <#if varWidth> + int size = writeOffset + width; + if (size > capacity) { + <#else> + if (writeIndex >= capacity) { + int size = (writeIndex + 1) * VALUE_WIDTH; + + <#-- Two cases: grow this vector or allocate a new one. --> + if (size > ValueVector.MAX_BUFFER_SIZE) { + <#-- Allocate a new vector, or throw an exception if overflow is not supported. + If overflow is supported, the callback will call finish(), which will + fill empties, so no need to do that here. The call to finish() will + also set the final writer index for the current vector. Then, bindVector() will + be called to provide the new vector. The write index changes with + the new vector. --> + vectorIndex.overflowed(); + <#if varWidth> + writeOffset = offsetsWriter.writeOffset(); + <#else> + writeIndex = vectorIndex.vectorIndex(); + + } else { + <#-- Optimized form of reAlloc() which does not zero memory, does not do bounds + checks (since they were already done above) and which returns + the new buffer to save a method call. The write index and offset + remain unchanged. Since some vectors start off as 0 length, set a + minimum size to avoid silly thrashing on early rows. --> + if (size < MIN_BUFFER_SIZE) { + size = MIN_BUFFER_SIZE; + } + setAddr(vector.reallocRaw(BaseAllocator.nextPowerOfTwo(size))); + } + } + <#-- Fill empties. This is required because the allocated memory is not + zero-filled. --> + <#if ! varWidth> + while (lastWriteIndex < writeIndex - 1) { + <#assign putAddr = "bufAddr + ++lastWriteIndex * VALUE_WIDTH" /> + <#if drillType == "Decimal9"> + PlatformDependent.putInt(${putAddr}, 0); + <#elseif drillType == "Decimal18"> + PlatformDependent.putLong(${putAddr}, 0); + <#elseif drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse"> + long addr = ${putAddr}; + for (int i = 0; i < VALUE_WIDTH / 4; i++, addr += 4) { + PlatformDependent.putInt(addr, 0); + } + <#elseif drillType == "IntervalYear"> + PlatformDependent.putInt(${putAddr}, 0); + <#elseif drillType == "IntervalDay"> + final long addr = ${putAddr}; + PlatformDependent.putInt(addr, 0); + PlatformDependent.putInt(addr + 4, 0); + <#elseif drillType == "Interval"> + final long addr = ${putAddr}; + PlatformDependent.putInt(addr, 0); + PlatformDependent.putInt(addr + 4, 0); + PlatformDependent.putInt(addr + 8, 0); + <#elseif drillType == "Float4"> + PlatformDependent.putInt(${putAddr}, 0); + <#elseif drillType == "Float8"> + PlatformDependent.putLong(${putAddr}, 0); + <#else> + PlatformDependent.put${putType?cap_first}(${putAddr}, <#if doCast>(${putType}) 0); + + } + <#-- Track the last write location for zero-fill use next time around. --> + lastWriteIndex = writeIndex; + return writeIndex; + <#else> + return writeOffset; + + } - <@bindWriter "Repeated" drillType /> + @Override + public final void set${label}(final ${accessorType} value${args}) { + <#-- Must compute the write offset first; can't be inline because the + writeOffset() function has a side effect of possibly changing the buffer + address (bufAddr). --> + <#if varWidth> + final int offset = writeIndex(len); + <#else> + final int writeIndex = writeIndex(); + <#assign putAddr = "bufAddr + writeIndex * VALUE_WIDTH"> + + <#if varWidth> + PlatformDependent.copyMemory(value, 0, bufAddr + offset, len); + offsetsWriter.setOffset(offset + len); + <#elseif drillType == "Decimal9"> + PlatformDependent.putInt(${putAddr}, + DecimalUtility.getDecimal9FromBigDecimal(value, + type.getScale(), type.getPrecision())); + <#elseif drillType == "Decimal18"> + PlatformDependent.putLong(${putAddr}, + DecimalUtility.getDecimal18FromBigDecimal(value, + type.getScale(), type.getPrecision())); + <#elseif drillType == "Decimal38Sparse"> + <#-- Hard to optimize this case. Just use the available tools. --> + DecimalUtility.getSparseFromBigDecimal(value, vector.getBuffer(), writeIndex * VALUE_WIDTH, + type.getScale(), type.getPrecision(), 6); + <#elseif drillType == "Decimal28Sparse"> + <#-- Hard to optimize this case. Just use the available tools. --> + DecimalUtility.getSparseFromBigDecimal(value, vector.getBuffer(), writeIndex * VALUE_WIDTH, + type.getScale(), type.getPrecision(), 5); + <#elseif drillType == "IntervalYear"> + PlatformDependent.putInt(${putAddr}, + value.getYears() * 12 + value.getMonths()); + <#elseif drillType == "IntervalDay"> + final long addr = ${putAddr}; + PlatformDependent.putInt(addr, value.getDays()); + PlatformDependent.putInt(addr + 4, periodToMillis(value)); + <#elseif drillType == "Interval"> + final long addr = ${putAddr}; + PlatformDependent.putInt(addr, value.getYears() * 12 + value.getMonths()); + PlatformDependent.putInt(addr + 4, value.getDays()); + PlatformDependent.putInt(addr + 8, periodToMillis(value)); + <#elseif drillType == "Float4"> + PlatformDependent.putInt(${putAddr}, Float.floatToRawIntBits((float) value)); + <#elseif drillType == "Float8"> + PlatformDependent.putLong(${putAddr}, Double.doubleToRawLongBits(value)); + <#else> + PlatformDependent.put${putType?cap_first}(${putAddr}, <#if doCast>(${putType}) value); + + vectorIndex.nextElement(); + } + <#if drillType == "VarChar"> - <@getType label /> + @Override + public final void setString(String value) { + final byte bytes[] = value.getBytes(Charsets.UTF_8); + setBytes(bytes, bytes.length); + } + <#elseif drillType == "Var16Char"> - protected BaseRepeatedValueVector.BaseRepeatedMutator mutator() { - return mutator; + @Override + public final void setString(String value) { + final byte bytes[] = value.getBytes(Charsets.UTF_16); + setBytes(bytes, bytes.length); } + - <@set drillType accessorType label false "add" /> + @Override + public final void finish() { + <#if varWidth> + vector.getBuffer().writerIndex(offsetsWriter.writeOffset()); + offsetsWriter.finish(); + <#else> + <#-- Done this way to avoid another drill buf access in value set path. + Though this calls writeOffset(), which handles vector overflow, + such overflow should never occur because here we are simply + finalizing a position already set. However, the vector size may + grow and the "missing" values may be zero-filled. Note that, in + odd cases, the call to writeOffset() might cause the vector to + resize (as part of filling empties), so grab the buffer AFTER + the call to writeOffset(). --> + final int finalIndex = writeIndex(<#if varWidth>0); + vector.getBuffer().writerIndex(finalIndex * VALUE_WIDTH); + + } } - public static void defineReaders( - Class readers[][]) { -<#list vv.types as type> - <#list type.minor as minor> - <#assign drillType=minor.class> - <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false> - <#if ! notyet> - <#assign typeEnum=drillType?upper_case> - readers[MinorType.${typeEnum}.ordinal()][DataMode.REQUIRED.ordinal()] = ${drillType}ColumnReader.class; - readers[MinorType.${typeEnum}.ordinal()][DataMode.OPTIONAL.ordinal()] = Nullable${drillType}ColumnReader.class; - - - + public static int periodToMillis(Period value) { + return ((value.getHours() * 60 + + value.getMinutes()) * 60 + + value.getSeconds()) * 1000 + + value.getMillis(); } - public static void defineWriters( - Class writers[][]) { -<#list vv.types as type> - <#list type.minor as minor> - <#assign drillType=minor.class> - <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false> - <#if ! notyet> - <#assign typeEnum=drillType?upper_case> - writers[MinorType.${typeEnum}.ordinal()][DataMode.REQUIRED.ordinal()] = ${drillType}ColumnWriter.class; - writers[MinorType.${typeEnum}.ordinal()][DataMode.OPTIONAL.ordinal()] = Nullable${drillType}ColumnWriter.class; - - - - } +<@build vv.types "Required" "Reader" /> - public static void defineArrayReaders( - Class readers[]) { -<#list vv.types as type> - <#list type.minor as minor> - <#assign drillType=minor.class> - <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false> - <#if ! notyet> - <#assign typeEnum=drillType?upper_case> - readers[MinorType.${typeEnum}.ordinal()] = Repeated${drillType}ColumnReader.class; - - - - } +<@build vv.types "Nullable" "Reader" /> - public static void defineArrayWriters( - Class writers[]) { -<#list vv.types as type> - <#list type.minor as minor> - <#assign drillType=minor.class> - <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false> - <#if ! notyet> - <#assign typeEnum=drillType?upper_case> - writers[MinorType.${typeEnum}.ordinal()] = Repeated${drillType}ColumnWriter.class; - - - - } +<@build vv.types "Repeated" "Reader" /> + +<@build vv.types "Required" "Writer" /> } diff --git a/exec/vector/src/main/codegen/templates/FixedValueVectors.java b/exec/vector/src/main/codegen/templates/FixedValueVectors.java index 1e83a4f6ccc..4718145807f 100644 --- a/exec/vector/src/main/codegen/templates/FixedValueVectors.java +++ b/exec/vector/src/main/codegen/templates/FixedValueVectors.java @@ -199,15 +199,27 @@ public void reAlloc() { throw new OversizedAllocationException("Unable to expand the buffer. Max allowed buffer size is reached."); } + reallocRaw((int) newAllocationSize); + final int halfNewCapacity = data.capacity() / 2; + data.setZero(halfNewCapacity, halfNewCapacity); + } + + /** + * Core of vector allocation. Given a new size (which must be a power of two), allocate + * the new buffer, copy the current values, and leave the unused parts garbage-filled. + * + * @param newAllocationSize new buffer size as a power of two + * @return the new buffer + */ + public DrillBuf reallocRaw(int newAllocationSize) { logger.debug("Reallocating vector [{}]. # of bytes: [{}] -> [{}]", field, allocationSizeInBytes, newAllocationSize); - final DrillBuf newBuf = allocator.buffer((int)newAllocationSize); + final DrillBuf newBuf = allocator.buffer(newAllocationSize); newBuf.setBytes(0, data, 0, data.capacity()); - final int halfNewCapacity = newBuf.capacity() / 2; - newBuf.setZero(halfNewCapacity, halfNewCapacity); newBuf.writerIndex(data.writerIndex()); data.release(1); data = newBuf; - allocationSizeInBytes = (int)newAllocationSize; + allocationSizeInBytes = newAllocationSize; + return newBuf; } /** @@ -401,7 +413,6 @@ public StringBuilder getAsStringBuilder(int index) { final String monthString = (Math.abs(months) == 1) ? " month " : " months "; final String dayString = (Math.abs(days) == 1) ? " day " : " days "; - return(new StringBuilder(). append(years).append(yearString). append(months).append(monthString). @@ -633,37 +644,6 @@ public void setSafe(int index, <#if (type.width > 4)>${minor.javaType!type.javaT data.setBytes(index * VALUE_WIDTH, value, 0, VALUE_WIDTH); } - /** - * Set the value of a required or nullable vector. Enforces the value - * and size limits. - * @param index item to write - * @param value value to set - * @throws VectorOverflowException if the item was written, false if the index would - * overfill the vector - */ - - public void setScalar(int index, <#if (type.width > 4)>${minor.javaType!type.javaType}<#else>int value) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, value); - } - - /** - * Set the value of a repeated vector. Enforces only the size limit. - * @param index item to write - * @param value value to set - * @throws VectorOverflowException if the item was written, false if the index would - * overfill the vector - */ - - public void setArrayItem(int index, <#if (type.width > 4)>${minor.javaType!type.javaType}<#else>int value) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, value); - } - <#if minor.class == "Interval"> public void set(int index, int months, int days, int milliseconds) { final int offsetIndex = index * VALUE_WIDTH; @@ -679,20 +659,6 @@ public void setSafe(int index, int months, int days, int milliseconds) { set(index, months, days, milliseconds); } - public void setScalar(int index, int months, int days, int milliseconds) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, months, days, milliseconds); - } - - public void setArrayItem(int index, int months, int days, int milliseconds) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, months, days, milliseconds); - } - protected void set(int index, ${minor.class}Holder holder) { set(index, holder.months, holder.days, holder.milliseconds); } @@ -701,14 +667,6 @@ public void setSafe(int index, ${minor.class}Holder holder) { setSafe(index, holder.months, holder.days, holder.milliseconds); } - public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException { - setScalar(index, holder.months, holder.days, holder.milliseconds); - } - - public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException { - setArrayItem(index, holder.months, holder.days, holder.milliseconds); - } - protected void set(int index, Nullable${minor.class}Holder holder) { set(index, holder.months, holder.days, holder.milliseconds); } @@ -717,14 +675,6 @@ public void setSafe(int index, Nullable${minor.class}Holder holder) { setSafe(index, holder.months, holder.days, holder.milliseconds); } - public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - setScalar(index, holder.months, holder.days, holder.milliseconds); - } - - public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - setArrayItem(index, holder.months, holder.days, holder.milliseconds); - } - <#elseif minor.class == "IntervalDay"> public void set(int index, int days, int milliseconds) { final int offsetIndex = index * VALUE_WIDTH; @@ -739,20 +689,6 @@ public void setSafe(int index, int days, int milliseconds) { set(index, days, milliseconds); } - public void setScalar(int index, int days, int milliseconds) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, days, milliseconds); - } - - public void setArrayItem(int index, int days, int milliseconds) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, days, milliseconds); - } - protected void set(int index, ${minor.class}Holder holder) { set(index, holder.days, holder.milliseconds); } @@ -761,14 +697,6 @@ public void setSafe(int index, ${minor.class}Holder holder) { setSafe(index, holder.days, holder.milliseconds); } - public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException { - setScalar(index, holder.days, holder.milliseconds); - } - - public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException { - setArrayItem(index, holder.days, holder.milliseconds); - } - protected void set(int index, Nullable${minor.class}Holder holder) { set(index, holder.days, holder.milliseconds); } @@ -777,14 +705,6 @@ public void setSafe(int index, Nullable${minor.class}Holder holder){ setSafe(index, holder.days, holder.milliseconds); } - public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - setScalar(index, holder.days, holder.milliseconds); - } - - public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - setArrayItem(index, holder.days, holder.milliseconds); - } - <#elseif minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense"> public void setSafe(int index, int start, DrillBuf buffer) { while(index >= getValueCapacity()) { @@ -793,20 +713,6 @@ public void setSafe(int index, int start, DrillBuf buffer) { set(index, start, buffer); } - public void setScalar(int index, int start, DrillBuf buffer) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, start, buffer); - } - - public void setArrayItem(int index, int start, DrillBuf buffer) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, start, buffer); - } - public void set(int index, ${minor.class}Holder holder) { set(index, holder.start, holder.buffer); } @@ -815,14 +721,6 @@ public void setSafe(int index, ${minor.class}Holder holder) { setSafe(index, holder.start, holder.buffer); } - public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException { - setScalar(index, holder.start, holder.buffer); - } - - public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException { - setArrayItem(index, holder.start, holder.buffer); - } - void set(int index, Nullable${minor.class}Holder holder) { set(index, holder.start, holder.buffer); } @@ -831,14 +729,6 @@ public void setSafe(int index, Nullable${minor.class}Holder holder) { setSafe(index, holder.start, holder.buffer); } - public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - setScalar(index, holder.start, holder.buffer); - } - - public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - setArrayItem(index, holder.start, holder.buffer); - } - <#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse"> public void set(int index, BigDecimal value) { DecimalUtility.getSparseFromBigDecimal(value, data, index * VALUE_WIDTH, @@ -852,20 +742,6 @@ public void setSafe(int index, BigDecimal value) { set(index, value); } - public void setScalar(int index, BigDecimal value) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, value); - } - - public void setArrayItem(int index, BigDecimal value) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, value); - } - public void set(int index, int start, DrillBuf buffer){ data.setBytes(index * VALUE_WIDTH, buffer, start, VALUE_WIDTH); @@ -890,42 +766,18 @@ public void set(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType data.set${(minor.javaType!type.javaType)?cap_first}(index * VALUE_WIDTH, value); } - public void setSafe(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int value) { - while(index >= getValueCapacity()) { - reAlloc(); - } - set(index, value); - } - - /** - * Set the value of a required or nullable vector. Enforces the value - * and size limits. - * @param index item to write - * @param value value to set - * @throws VectorOverflowException if the item was written, false if the index would - * overfill the vector - */ - - public void setScalar(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int value) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, value); - } - /** - * Set the value of a repeated vector. Enforces only the size limit. + * Set the value of a required or nullable vector. Grows the vector as needed. + * Does not enforce size limits; scalar fixed-width types can never overflow + * a vector. * @param index item to write - * @param value value to set - * @throws VectorOverflowException if the item was written, false if the index would - * overfill the vector */ - public void setArrayItem(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int value) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); + public void setSafe(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int value) { + while(index >= getValueCapacity()) { + reAlloc(); } - setSafe(index, value); + set(index, value); } protected void set(int index, ${minor.class}Holder holder) { @@ -939,20 +791,6 @@ public void setSafe(int index, ${minor.class}Holder holder) { set(index, holder); } - public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - - public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - protected void set(int index, Nullable${minor.class}Holder holder) { data.set${(minor.javaType!type.javaType)?cap_first}(index * VALUE_WIDTH, holder.value); } @@ -964,20 +802,6 @@ public void setSafe(int index, Nullable${minor.class}Holder holder) { set(index, holder); } - public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_SCALAR_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - - public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_VALUE_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - @Override public void generateTestData(int size) { setValueCount(size); @@ -1006,30 +830,6 @@ public void generateTestDataAlt(int size) { } <#-- type.width --> - /** - * Backfill missing offsets from the given last written position to the - * given current write position. Used by the "new" size-safe column - * writers to allow skipping values. The set() and setSafe() - * do not fill empties. See DRILL-5529 and DRILL-5530. - * @param lastWrite the position of the last valid write: the offset - * to be copied forward - * @param index the current write position filling occurs up to, - * but not including, this position - * @throws VectorOverflowException if the item was written, false if the index would - * overfill the vector - */ - - public void fillEmptiesBounded(int lastWrite, int index) - throws VectorOverflowException { - <#if type.width <= 8> - for (int i = lastWrite + 1; i <= index; i++) { - setSafe(i, <#if (type.width >= 4)>(${minor.javaType!type.javaType}) 0); - } - <#else> - throw new UnsupportedOperationException("Cannot zero-fill ${minor.class} vectors."); - - } - @Override public void setValueCount(int valueCount) { final int currentValueCapacity = getValueCapacity(); diff --git a/exec/vector/src/main/codegen/templates/NullableValueVectors.java b/exec/vector/src/main/codegen/templates/NullableValueVectors.java index 0f8d90c01eb..d3d435274d2 100644 --- a/exec/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/exec/vector/src/main/codegen/templates/NullableValueVectors.java @@ -131,9 +131,10 @@ public DrillBuf getBuffer() { } @Override - public ${valuesName} getValuesVector() { - return values; - } + public ${valuesName} getValuesVector() { return values; } + + @Override + public UInt1Vector getBitsVector() { return bits; } @Override public void setInitialCapacity(int numRecords) { @@ -446,8 +447,8 @@ public void get(int index, Nullable${minor.class}Holder holder){ @Override public ${friendlyType} getObject(int index) { if (isNull(index)) { - return null; - }else{ + return null; + } else { return vAccessor.getObject(index); } } @@ -455,8 +456,8 @@ public void get(int index, Nullable${minor.class}Holder holder){ <#if minor.class == "Interval" || minor.class == "IntervalDay" || minor.class == "IntervalYear"> public StringBuilder getAsStringBuilder(int index) { if (isNull(index)) { - return null; - }else{ + return null; + } else { return vAccessor.getAsStringBuilder(index); } } @@ -535,16 +536,6 @@ public void setSafe(int index, byte[] value, int start, int length) { lastSet = index; } - public void setScalar(int index, byte[] value, int start, int length) throws VectorOverflowException { - if (index > lastSet + 1) { - fillEmpties(index); // Filling empties cannot overflow the vector - } - values.getMutator().setScalar(index, value, start, length); - bits.getMutator().setSafe(index, 1); - setCount++; - lastSet = index; - } - public void setSafe(int index, ByteBuffer value, int start, int length) { if (index > lastSet + 1) { fillEmpties(index); @@ -556,17 +547,6 @@ public void setSafe(int index, ByteBuffer value, int start, int length) { lastSet = index; } - public void setScalar(int index, DrillBuf value, int start, int length) throws VectorOverflowException { - if (index > lastSet + 1) { - fillEmpties(index); // Filling empties cannot overflow the vector - } - - values.getMutator().setScalar(index, value, start, length); - bits.getMutator().setSafe(index, 1); - setCount++; - lastSet = index; - } - public void setNull(int index) { bits.getMutator().setSafe(index, 0); @@ -580,10 +560,6 @@ public void setSkipNull(int index, Nullable${minor.class}Holder holder) { values.getMutator().set(index, holder); } - public void setNullBounded(int index) throws VectorOverflowException { - bits.getMutator().setScalar(index, 0); - } - public void set(int index, Nullable${minor.class}Holder holder) { final ${valuesName}.Mutator valuesMutator = values.getMutator(); <#if type.major == "VarLen"> @@ -637,17 +613,6 @@ public void setSafe(int index, int isSet<#list fields as field><#if field.includ <#if type.major == "VarLen">lastSet = index; } - public void setScalar(int index, int isSet<#list fields as field><#if field.include!true >, ${field.type} ${field.name}Field ) throws VectorOverflowException { - <#if type.major == "VarLen"> - if (index > lastSet + 1) { - fillEmpties(index); - } - - values.getMutator().setScalar(index<#list fields as field><#if field.include!true >, ${field.name}Field); - bits.getMutator().setSafe(index, isSet); - setCount++; - <#if type.major == "VarLen">lastSet = index; - } public void setSafe(int index, Nullable${minor.class}Holder value) { <#if type.major == "VarLen"> @@ -661,18 +626,6 @@ public void setSafe(int index, Nullable${minor.class}Holder value) { <#if type.major == "VarLen">lastSet = index; } - public void setScalar(int index, Nullable${minor.class}Holder value) throws VectorOverflowException { - <#if type.major == "VarLen"> - if (index > lastSet + 1) { - fillEmpties(index); - } - - values.getMutator().setScalar(index, value); - bits.getMutator().setSafe(index, value.isSet); - setCount++; - <#if type.major == "VarLen">lastSet = index; - } - public void setSafe(int index, ${minor.class}Holder value) { <#if type.major == "VarLen"> if (index > lastSet + 1) { @@ -685,18 +638,6 @@ public void setSafe(int index, ${minor.class}Holder value) { <#if type.major == "VarLen">lastSet = index; } - public void setScalar(int index, ${minor.class}Holder value) throws VectorOverflowException { - <#if type.major == "VarLen"> - if (index > lastSet + 1) { - fillEmpties(index); - } - - values.getMutator().setScalar(index, value); - bits.getMutator().setSafe(index, 1); - setCount++; - <#if type.major == "VarLen">lastSet = index; - } - <#if !(type.major == "VarLen" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense" || minor.class == "Interval" || minor.class == "IntervalDay")> public void setSafe(int index, ${minor.javaType!type.javaType} value) { <#if type.major == "VarLen"> @@ -709,17 +650,6 @@ public void setSafe(int index, ${minor.javaType!type.javaType} value) { setCount++; } - public void setScalar(int index, ${minor.javaType!type.javaType} value) throws VectorOverflowException { - <#if type.major == "VarLen"> - if (index > lastSet + 1) { - fillEmpties(index); - } - - values.getMutator().setScalar(index, value); - bits.getMutator().setSafe(index, 1); - setCount++; - } - <#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse"> public void set(int index, BigDecimal value) { @@ -734,12 +664,6 @@ public void setSafe(int index, BigDecimal value) { setCount++; } - public void setScalar(int index, BigDecimal value) throws VectorOverflowException { - values.getMutator().setScalar(index, value); - bits.getMutator().setSafe(index, 1); - setCount++; - } - @Override public void setValueCount(int valueCount) { diff --git a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java index 9780b7da5c8..d408c0b5eeb 100644 --- a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java +++ b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java @@ -18,6 +18,9 @@ import java.lang.Override; +import org.apache.drill.common.types.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.TransferPair; import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector; import org.mortbay.jetty.servlet.Holder; @@ -55,7 +58,10 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector public Repeated${minor.class}Vector(MaterializedField field, BufferAllocator allocator) { super(field, allocator); - addOrGetVector(VectorDescriptor.create(Types.required(field.getType().getMinorType()))); + MajorType majorType = field.getType(); + addOrGetVector(VectorDescriptor.create(Types.withScaleAndPrecision( + majorType.getMinorType(), DataMode.REQUIRED, + majorType.getScale(), majorType.getPrecision()))); } @Override @@ -338,25 +344,12 @@ public void addSafe(int index, byte[] bytes) { addSafe(index, bytes, 0, bytes.length); } - public void addEntry(int index, byte[] bytes) throws VectorOverflowException { - addEntry(index, bytes, 0, bytes.length); - } - public void addSafe(int index, byte[] bytes, int start, int length) { final int nextOffset = offsets.getAccessor().get(index+1); values.getMutator().setSafe(nextOffset, bytes, start, length); offsets.getMutator().setSafe(index+1, nextOffset+1); } - public void addEntry(int index, byte[] bytes, int start, int length) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - final int nextOffset = offsets.getAccessor().get(index+1); - values.getMutator().setArrayItem(nextOffset, bytes, start, length); - offsets.getMutator().setSafe(index+1, nextOffset+1); - } - <#else> public void addSafe(int index, ${minor.javaType!type.javaType} srcValue) { final int nextOffset = offsets.getAccessor().get(index+1); @@ -364,15 +357,6 @@ public void addSafe(int index, ${minor.javaType!type.javaType} srcValue) { offsets.getMutator().setSafe(index+1, nextOffset+1); } - public void addEntry(int index, ${minor.javaType!type.javaType} srcValue) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - final int nextOffset = offsets.getAccessor().get(index+1); - values.getMutator().setArrayItem(nextOffset, srcValue); - offsets.getMutator().setSafe(index+1, nextOffset+1); - } - public void setSafe(int index, Repeated${minor.class}Holder h) { final ${minor.class}Holder ih = new ${minor.class}Holder(); @@ -390,14 +374,6 @@ public void addSafe(int index, ${minor.class}Holder holder) { offsets.getMutator().setSafe(index+1, nextOffset+1); } - public void addEntry(int index, ${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - final int nextOffset = offsets.getAccessor().get(index+1); - values.getMutator().setArrayItem(nextOffset, holder); - offsets.getMutator().setSafe(index+1, nextOffset+1); - } public void addSafe(int index, Nullable${minor.class}Holder holder) { final int nextOffset = offsets.getAccessor().get(index+1); @@ -405,15 +381,6 @@ public void addSafe(int index, Nullable${minor.class}Holder holder) { offsets.getMutator().setSafe(index+1, nextOffset+1); } - public void addEntry(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - final int nextOffset = offsets.getAccessor().get(index+1); - values.getMutator().setArrayItem(nextOffset, holder); - offsets.getMutator().setSafe(index+1, nextOffset+1); - } - /** * Backfill missing offsets from the given last written position to the * given current write position. Used by the "new" size-safe column @@ -424,11 +391,7 @@ public void addEntry(int index, Nullable${minor.class}Holder holder) throws Vect * @param index the current write position to be initialized */ - public void fillEmptiesBounded(int lastWrite, int index) - throws VectorOverflowException { - if (index >= UInt4Vector.MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } + public void fillEmpties(int lastWrite, int index) { // If last write was 2, offsets are [0, 3, 6] // If next write is 4, offsets must be: [0, 3, 6, 6, 6] // Remember the offsets are one more than row count. @@ -446,15 +409,6 @@ public void addSafe(int rowIndex, <#list fields as field>${field.type} ${field.n offsets.getMutator().setSafe(rowIndex+1, nextOffset+1); } - public void addEntry(int rowIndex, <#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) throws VectorOverflowException { - if (rowIndex >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - final int nextOffset = offsets.getAccessor().get(rowIndex+1); - values.getMutator().setArrayItem(nextOffset, <#list fields as field>${field.name}<#if field_has_next>, ); - offsets.getMutator().setSafe(rowIndex+1, nextOffset+1); - } - <#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse"> public void addSafe(int index, BigDecimal value) { @@ -463,15 +417,6 @@ public void addSafe(int index, BigDecimal value) { offsets.getMutator().setSafe(index+1, nextOffset+1); } - public void addEntry(int index, BigDecimal value) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - final int nextOffset = offsets.getAccessor().get(index+1); - values.getMutator().setArrayItem(nextOffset, value); - offsets.getMutator().setSafe(index+1, nextOffset+1); - } - protected void add(int index, ${minor.class}Holder holder) { final int nextOffset = offsets.getAccessor().get(index+1); @@ -480,7 +425,6 @@ protected void add(int index, ${minor.class}Holder holder) { } public void add(int index, Repeated${minor.class}Holder holder) { - final ${minor.class}Vector.Accessor accessor = holder.vector.getAccessor(); final ${minor.class}Holder innerHolder = new ${minor.class}Holder(); diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java index 9a9e178a375..33d35ec3bba 100644 --- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java +++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java @@ -385,12 +385,16 @@ public void reAlloc() { throw new OversizedAllocationException("Unable to expand the buffer. Max allowed buffer size is reached."); } - logger.trace("Reallocating VarChar, new size {}",newAllocationSize); - final DrillBuf newBuf = allocator.buffer((int)newAllocationSize); + reallocRaw((int) newAllocationSize); + } + + public DrillBuf reallocRaw(int newAllocationSize) { + final DrillBuf newBuf = allocator.buffer(newAllocationSize); newBuf.setBytes(0, data, 0, data.capacity()); data.release(); data = newBuf; - allocationSizeInBytes = (int)newAllocationSize; + allocationSizeInBytes = newAllocationSize; + return data; } public void decrementAllocationMonitor() { @@ -534,10 +538,6 @@ public void setSafe(int index, byte[] bytes) { } } - public void setScalar(int index, byte[] bytes) throws VectorOverflowException { - setScalar(index, bytes, 0, bytes.length); - } - /** * Set the variable length element at the specified index to the supplied byte array. * @@ -568,23 +568,6 @@ public void setSafe(int index, ByteBuffer bytes, int start, int length) { } } - public void setScalar(int index, DrillBuf bytes, int start, int length) throws VectorOverflowException { - assert index >= 0; - - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - int currentOffset = offsetVector.getAccessor().get(index); - final int newSize = currentOffset + length; - if (newSize > MAX_BUFFER_SIZE) { - throw new VectorOverflowException(); - } - while (! data.setBytesBounded(currentOffset, bytes, start, length)) { - reAlloc(); - } - offsetVector.getMutator().setSafe(index + 1, newSize); - } - public void setSafe(int index, byte[] bytes, int start, int length) { assert index >= 0; @@ -601,28 +584,6 @@ public void setSafe(int index, byte[] bytes, int start, int length) { } } - public void setScalar(int index, byte[] bytes, int start, int length) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - setArrayItem(index, bytes, start, length); - } - - public void setArrayItem(int index, byte[] bytes, int start, int length) throws VectorOverflowException { - assert index >= 0; - - final int currentOffset = offsetVector.getAccessor().get(index); - final int newSize = currentOffset + length; - if (newSize > MAX_BUFFER_SIZE) { - throw new VectorOverflowException(); - } - - while (! data.setBytesBounded(currentOffset, bytes, start, length)) { - reAlloc(); - } - offsetVector.getMutator().setSafe(index + 1, newSize); - } - @Override public void setValueLengthSafe(int index, int length) { final int offset = offsetVector.getAccessor().get(index); @@ -647,39 +608,12 @@ public void setSafe(int index, int start, int end, DrillBuf buffer) { } } - public void setScalar(int index, int start, int end, DrillBuf buffer) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - setArrayItem(index, start, end, buffer); - } - - public void setArrayItem(int index, int start, int end, DrillBuf buffer) throws VectorOverflowException { - final int len = end - start; - final int outputStart = offsetVector.data.get${(minor.javaType!type.javaType)?cap_first}(index * ${type.width}); - final int newSize = outputStart + len; - if (newSize > MAX_BUFFER_SIZE) { - throw new VectorOverflowException(); - } - - offsetVector.getMutator().setSafe(index+1, newSize); - try{ - buffer.getBytes(start, data, outputStart, len); - } catch (IndexOutOfBoundsException e) { - while (data.capacity() < newSize) { - reAlloc(); - } - buffer.getBytes(start, data, outputStart, len); - } - } - public void setSafe(int index, Nullable${minor.class}Holder holder) { assert holder.isSet == 1; final int start = holder.start; final int end = holder.end; final int len = end - start; - final int outputStart = offsetVector.data.get${(minor.javaType!type.javaType)?cap_first}(index * ${type.width}); try { @@ -693,37 +627,6 @@ public void setSafe(int index, Nullable${minor.class}Holder holder) { offsetVector.getMutator().setSafe(index+1, outputStart + len); } - public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - setArrayItem(index, holder); - } - - public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException { - assert holder.isSet == 1; - - final int start = holder.start; - final int end = holder.end; - final int len = end - start; - - final int outputStart = offsetVector.data.get${(minor.javaType!type.javaType)?cap_first}(index * ${type.width}); - final int newSize = outputStart + len; - if (newSize > MAX_BUFFER_SIZE) { - throw new VectorOverflowException(); - } - - try { - holder.buffer.getBytes(start, data, outputStart, len); - } catch (IndexOutOfBoundsException e) { - while (data.capacity() < newSize) { - reAlloc(); - } - holder.buffer.getBytes(start, data, outputStart, len); - } - offsetVector.getMutator().setSafe(index+1, newSize); - } - public void setSafe(int index, ${minor.class}Holder holder) { final int start = holder.start; final int end = holder.end; @@ -741,34 +644,6 @@ public void setSafe(int index, ${minor.class}Holder holder) { offsetVector.getMutator().setSafe( index+1, outputStart + len); } - public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - setArrayItem(index, holder); - } - - public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException { - final int start = holder.start; - final int end = holder.end; - final int len = end - start; - final int outputStart = offsetVector.data.get${(minor.javaType!type.javaType)?cap_first}(index * ${type.width}); - final int newSize = outputStart + len; - if (newSize > MAX_BUFFER_SIZE) { - throw new VectorOverflowException(); - } - - try { - holder.buffer.getBytes(start, data, outputStart, len); - } catch (IndexOutOfBoundsException e) { - while(data.capacity() < newSize) { - reAlloc(); - } - holder.buffer.getBytes(start, data, outputStart, len); - } - offsetVector.getMutator().setSafe( index+1, newSize); - } - /** * Backfill missing offsets from the given last written position to the * given current write position. Used by the "new" size-safe column @@ -778,20 +653,9 @@ public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOv * to be copied forward * @param index the current write position filling occurs up to, * but not including, this position - * @throws VectorOverflowException if the item was written, false if the index would - * overfill the vector */ - public void fillEmptiesBounded(int lastWrite, int index) - throws VectorOverflowException { - - // Index is the next write index, which might be "virtual", - // that is, past the last row at EOF. This check only protects - // the actual data written here, which is up to index-1. - - if (index > UInt4Vector.MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } + public void fillEmpties(int lastWrite, int index) { // If last write was 2, offsets are [0, 3, 6] // If next write is 4, offsets must be: [0, 3, 6, 6, 6] // Remember the offsets are one more than row count. @@ -803,7 +667,7 @@ public void fillEmptiesBounded(int lastWrite, int index) } } - protected void set(int index, int start, int length, DrillBuf buffer){ + protected void set(int index, int start, int length, DrillBuf buffer) { assert index >= 0; final int currentOffset = offsetVector.getAccessor().get(index); offsetVector.getMutator().set(index + 1, currentOffset + length); @@ -811,33 +675,20 @@ protected void set(int index, int start, int length, DrillBuf buffer){ data.setBytes(currentOffset, bb); } - protected void set(int index, Nullable${minor.class}Holder holder){ + protected void set(int index, Nullable${minor.class}Holder holder) { final int length = holder.end - holder.start; final int currentOffset = offsetVector.getAccessor().get(index); offsetVector.getMutator().set(index + 1, currentOffset + length); data.setBytes(currentOffset, holder.buffer, holder.start, length); } - protected void set(int index, ${minor.class}Holder holder){ + protected void set(int index, ${minor.class}Holder holder) { final int length = holder.end - holder.start; final int currentOffset = offsetVector.getAccessor().get(index); offsetVector.getMutator().set(index + 1, currentOffset + length); data.setBytes(currentOffset, holder.buffer, holder.start, length); } - <#if (minor.class == "VarChar")> - public void setScalar(int index, String value) throws VectorOverflowException { - if (index >= MAX_ROW_COUNT) { - throw new VectorOverflowException(); - } - // Treat a null string as an empty string. - if (value != null) { - byte encoded[] = value.getBytes(Charsets.UTF_8); - setScalar(index, encoded, 0, encoded.length); - } - } - - @Override public void setValueCount(int valueCount) { final int currentByteCapacity = getByteCapacity(); diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java index bc1ec3a5c58..1e91c3719a4 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java @@ -25,6 +25,7 @@ import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.expr.BasicTypeHelper; import org.apache.drill.exec.proto.UserBitShared.NamePart; import org.apache.drill.exec.proto.UserBitShared.SerializedField; @@ -199,8 +200,59 @@ public boolean equals(Object obj) { Objects.equals(this.type, other.type); } + public boolean isEquivalent(MaterializedField other) { + if (! name.equalsIgnoreCase(other.name)) { + return false; + } + + // Requires full type equality, including fields such as precision and scale. + // But, unset fields are equivalent to 0. Can't use the protobuf-provided + // isEquals(), that treats set and unset fields as different. + + if (type.getMinorType() != other.type.getMinorType()) { + return false; + } + if (type.getMode() != other.type.getMode()) { + return false; + } + if (type.getScale() != other.type.getScale()) { + return false; + } + if (type.getPrecision() != other.type.getPrecision()) { + return false; + } + + // Compare children -- but only for maps, not the internal children + // for Varchar, repeated or nullable types. + + if (type.getMinorType() != MinorType.MAP) { + return true; + } + + if (children == null || other.children == null) { + return children == other.children; + } + if (children.size() != other.children.size()) { + return false; + } + + // Maps are name-based, not position. But, for our + // purposes, we insist on identical ordering. + + Iterator thisIter = children.iterator(); + Iterator otherIter = other.children.iterator(); + while (thisIter.hasNext()) { + MaterializedField thisChild = thisIter.next(); + MaterializedField otherChild = otherIter.next(); + if (! thisChild.isEquivalent(otherChild)) { + return false; + } + } + return true; + } + /** - *

Creates materialized field string representation. + * Creates materialized field string representation. * Includes field name, its type with precision and scale if any and data mode. * Nested fields if any are included. Number of nested fields to include is limited to 10.

* diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/TupleMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleMetadata.java new file mode 100644 index 00000000000..901511dbaed --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleMetadata.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record; + +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; + +/** + * Metadata description of the schema of a row or a map. + * In Drill, both rows and maps are + * tuples: both are an ordered collection of values, defined by a + * schema. Each tuple has a schema that defines the column ordering + * for indexed access. Each tuple also provides methods to get column + * accessors by name or index. + *

+ * Models the physical schema of a row set showing the logical hierarchy of fields + * with map fields as first-class fields. Map members appear as children + * under the map, much as they appear in the physical value-vector + * implementation. + *

    + *
  • Provides fast lookup by name or index.
  • + *
  • Provides a nested schema, in this same form, for maps.
  • + *
+ * This form is useful when performing semantic analysis and when + * working with vectors. + *

+ * In the future, this structure will also gather metadata useful + * for vector processing such as expected widths and so on. + */ + +public interface TupleMetadata extends Iterable { + + public enum StructureType { + PRIMITIVE, LIST, TUPLE + } + + /** + * Metadata description of a column including names, types and structure + * information. + */ + + public interface ColumnMetadata { + StructureType structureType(); + TupleMetadata mapSchema(); + int index(); + MaterializedField schema(); + String name(); + MajorType majorType(); + MinorType type(); + DataMode mode(); + TupleMetadata parent(); + + /** + * Full name of the column. Note: this name cannot be used to look up + * the column because of ambiguity. The name "a.b.c" may mean a single + * column with that name, or may mean maps "a", and "b" with column "c", + * etc. + * + * @return full, dotted, column name + */ + + String fullName( ); + + /** + * Report whether one column is equivalent to another. Columns are equivalent + * if they have the same name, type and structure (ignoring internal structure + * such as offset vectors.) + */ + + boolean isEquivalent(ColumnMetadata other); + } + + void add(MaterializedField field); + int size(); + boolean isEmpty(); + int index(String name); + ColumnMetadata metadata(int index); + ColumnMetadata metadata(String name); + MaterializedField column(int index); + MaterializedField column(String name); + boolean isEquivalent(TupleMetadata other); + + /** + * Return the schema as a list of MaterializedField objects + * which can be used to create other schemas. Not valid for a + * flattened schema. + * + * @return a list of the top-level fields. Maps contain their child + * fields + */ + + List toFieldList(); +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/TupleNameSpace.java b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleNameSpace.java new file mode 100644 index 00000000000..5853c9363ed --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleNameSpace.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.record; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.drill.common.map.CaseInsensitiveMap; + +import com.google.common.collect.ImmutableList; + +/** + * Implementation of a tuple name space. Tuples allow both indexed and + * named access to their members. + * + * @param the type of object representing each column + */ + +public class TupleNameSpace implements Iterable { + private final Map nameSpace = CaseInsensitiveMap.newHashMap(); + private final List entries = new ArrayList<>(); + + public int add(String key, T value) { + if (indexOf(key) != -1) { + throw new IllegalArgumentException("Duplicate entry: " + key); + } + int index = entries.size(); + nameSpace.put(key, index); + entries.add(value); + return index; + } + + public T get(int index) { + return entries.get(index); + } + + public T get(String key) { + int index = indexOf(key); + if (index == -1) { + return null; + } + return get(index); + } + + public int indexOf(String key) { + Integer index = nameSpace.get(key); + if (index == null) { + return -1; + } + return index; + } + + public int count() { return entries.size(); } + + @Override + public Iterator iterator() { + return entries.iterator(); + } + + public boolean isEmpty() { + return entries.isEmpty(); + } + + public List entries() { + return ImmutableList.copyOf(entries); + } + + @Override + public String toString() { + return entries.toString(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java index 0062e7701bb..e551f858e6f 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java @@ -193,6 +193,16 @@ public void reAlloc() { allocationSizeInBytes = curSize; } + // This version uses the base version because this vector appears to not be + // used, so not worth the effort to avoid zero-fill. + + public DrillBuf reallocRaw(int newAllocationSize) { + while (allocationSizeInBytes < newAllocationSize) { + reAlloc(); + } + return data; + } + /** * {@inheritDoc} */ @@ -437,20 +447,6 @@ public void setSafe(int index, int value) { set(index, value); } - public void setScalar(int index, int value) throws VectorOverflowException { - if (index >= MAX_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, value); - } - - public void setArrayItem(int index, int value) throws VectorOverflowException { - if (index >= MAX_CAPACITY) { - throw new VectorOverflowException(); - } - setSafe(index, value); - } - public void setSafe(int index, BitHolder holder) { while(index >= getValueCapacity()) { reAlloc(); @@ -458,20 +454,6 @@ public void setSafe(int index, BitHolder holder) { set(index, holder.value); } - public void setScalar(int index, BitHolder holder) throws VectorOverflowException { - if (index >= MAX_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - - public void setArrayItem(int index, BitHolder holder) throws VectorOverflowException { - if (index >= MAX_CAPACITY) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - public void setSafe(int index, NullableBitHolder holder) { while(index >= getValueCapacity()) { reAlloc(); @@ -479,20 +461,6 @@ public void setSafe(int index, NullableBitHolder holder) { set(index, holder.value); } - public void setScalar(int index, NullableBitHolder holder) throws VectorOverflowException { - if (index >= MAX_COUNT) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - - public void setArrayItem(int index, NullableBitHolder holder) throws VectorOverflowException { - if (index >= MAX_CAPACITY) { - throw new VectorOverflowException(); - } - setSafe(index, holder); - } - @Override public final void setValueCount(int valueCount) { int currentValueCapacity = getValueCapacity(); diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java index 8091c4c56ac..51b5e0c4803 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -19,5 +19,6 @@ public interface NullableVector extends ValueVector{ + ValueVector getBitsVector(); ValueVector getValuesVector(); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/AccessorUtilities.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/AccessorUtilities.java deleted file mode 100644 index 708d0db08bf..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/AccessorUtilities.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor; - -import java.math.BigDecimal; - -import org.joda.time.Duration; -import org.joda.time.Period; - -public class AccessorUtilities { - - private AccessorUtilities() { } - - public static void setFromInt(ColumnWriter writer, int value) { - switch (writer.valueType()) { - case BYTES: - writer.setBytes(Integer.toHexString(value).getBytes()); - break; - case DOUBLE: - writer.setDouble(value); - break; - case INTEGER: - writer.setInt(value); - break; - case LONG: - writer.setLong(value); - break; - case STRING: - writer.setString(Integer.toString(value)); - break; - case DECIMAL: - writer.setDecimal(BigDecimal.valueOf(value)); - break; - case PERIOD: - writer.setPeriod(Duration.millis(value).toPeriod()); - break; - default: - throw new IllegalStateException("Unknown writer type: " + writer.valueType()); - } - } - - public static int sv4Batch(int sv4Index) { - return sv4Index >>> 16; - } - - public static int sv4Index(int sv4Index) { - return sv4Index & 0xFFFF; - } - - public static void setBooleanArray(ArrayWriter arrayWriter, boolean[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setInt(value[i] ? 1 : 0); - } - } - - public static void setByteArray(ArrayWriter arrayWriter, byte[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setInt(value[i]); - } - } - - public static void setShortArray(ArrayWriter arrayWriter, short[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setInt(value[i]); - } - } - - public static void setIntArray(ArrayWriter arrayWriter, int[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setInt(value[i]); - } - } - - public static void setLongArray(ArrayWriter arrayWriter, long[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setLong(value[i]); - } - } - - public static void setFloatArray(ArrayWriter arrayWriter, float[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setDouble(value[i]); - } - } - - public static void setDoubleArray(ArrayWriter arrayWriter, double[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setDouble(value[i]); - } - } - - public static void setStringArray(ArrayWriter arrayWriter, String[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setString(value[i]); - } - } - - public static void setPeriodArray(ArrayWriter arrayWriter, Period[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setPeriod(value[i]); - } - } - - public static void setBigDecimalArray(ArrayWriter arrayWriter, - BigDecimal[] value) { - for (int i = 0; i < value.length; i++) { - arrayWriter.setDecimal(value[i]); - } - } -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java index 040dcda120b..8f33f0ecf5b 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java @@ -17,36 +17,90 @@ */ package org.apache.drill.exec.vector.accessor; -import java.math.BigDecimal; - -import org.joda.time.Period; - /** - * Interface to access the values of an array column. In general, each - * vector implements just one of the get methods. Check the vector type - * to know which method to use. Though, generally, when writing test - * code, the type is known to the test writer. - *

- * Arrays allow random access to the values within the array. The index - * passed to each method is the index into the array for the current - * row and column. (This means that arrays are three dimensional: - * the usual (row, column) dimensions plus an array index dimension: - * (row, column, array index). - *

- * Note that the isNull() method is provided for completeness, - * but no Drill array allows null values at present. + * Generic array reader. An array is one of the following: + *

    + *
  • Array of scalars. Read the values using {@link #elements()}, which provides + * an array-like access to the scalars.
  • + *
  • A repeated map. Use {@link #tuple(int)} to get a tuple reader for a + * specific array element. Use {@link #size()} to learn the number of maps in + * the array.
  • + *
  • List of lists. Use the {@link #array(int)} method to get the nested list + * at a given index. Use {@link #size()} to learn the number of maps in + * the array.
  • + *
+ * {@see ArrayWriter} */ -public interface ArrayReader extends ColumnAccessor { +public interface ArrayReader { + + /** + * Number of elements in the array. + * @return the number of elements + */ + int size(); - boolean isNull(int index); - int getInt(int index); - long getLong(int index); - double getDouble(int index); - String getString(int index); - byte[] getBytes(int index); - BigDecimal getDecimal(int index); - Period getPeriod(int index); - TupleReader map(int index); + + /** + * The object type of the list entry. All entries have the same + * type. + * @return the object type of each entry + */ + + ObjectType entryType(); + + /** + * Return a reader for the elements of a scalar array. + * @return reader for scalar elements + */ + + ScalarElementReader elements(); + + /** + * Return a generic object reader for the array entry. Not available + * for scalar elements. Positions the reader to read the selected + * element. + * + * @param index array index + * @return generic object reader + */ + + ObjectReader entry(int index); + TupleReader tuple(int index); ArrayReader array(int index); + + /** + * Return the generic object reader for the array element. This + * version does not position the reader, the client must + * call {@link setPosn()} to set the position. This form allows + * up-front setup of the readers when convenient for the caller. + */ + + ObjectReader entry(); + TupleReader tuple(); + ArrayReader array(); + + /** + * Set the array reader to read a given array entry. Not used for + * scalars, only for maps and arrays when using the non-indexed + * methods {@link #entry()}, {@link #tuple()} and {@link #array()}. + */ + + void setPosn(int index); + + /** + * Return the entire array as an List of objects. + * Note, even if the array is scalar, the elements are still returned + * as a list. This method is primarily for testing. + * @return array as a List of objects + */ + + Object getObject(); + + /** + * Return the entire array as a string. Primarily for debugging. + * @return string representation of the array + */ + + String getAsString(); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java index 16ff89ed1be..49a1e7770ed 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java @@ -18,25 +18,65 @@ package org.apache.drill.exec.vector.accessor; /** - * Writer for values into an array. Array writes are write-once, - * sequential: each call to a setFoo() method writes a - * value and advances the array index. + * Writer for values into an array. Array writes are write-once, sequential: + * each call to a setFoo() method writes a value and advances the array + * index. *

* {@see ArrayReader} */ -public interface ArrayWriter extends ColumnAccessor, ScalarWriter { +public interface ArrayWriter { + + /** + * Number of elements written thus far to the array. + * @return the number of elements + */ int size(); /** - * Determine if the next position is valid for writing. Will be invalid - * if the writer hits a size or other limit. + * The object type of the list entry. All entries have the same + * type. + * @return the object type of each entry + */ + + ObjectWriter entry(); + + /** + * Return a generic object writer for the array entry. + * + * @return generic object reader + */ + + ObjectType entryType(); + ScalarWriter scalar(); + TupleWriter tuple(); + ArrayWriter array(); + + /** + * When the array contains a tuple or an array, call save() + * after each array value. Not necessary when writing scalars; each + * set operation calls save automatically. + */ + + void save(); + + /** + * Write the values of an array from a list of arguments. + * @param values values for each array element + * @throws VectorOverflowException + */ + void set(Object ...values); + + /** + * Write the array given an array of values. The type of array must match + * the type of element in the array. That is, if the value is an int, + * provide an int[] array. * - * @return true if another item is available and the reader is positioned - * at that item, false if no more items are available and the reader - * is no longer valid + * @param array array of values to write + * @throws VectorOverflowException */ - boolean valid(); + void setObject(Object array); +// void setList(List list); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java new file mode 100644 index 00000000000..1bbfe613159 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor; + +public interface ColumnReaderIndex { + int batchIndex(); + int vectorIndex(); +} \ No newline at end of file diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriter.java deleted file mode 100644 index 0cc691cefe5..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriter.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor; - -/** - * Defines a writer to set values for value vectors using - * a simple, uniform interface. Vector values are mapped to - * their "natural" representations: the representation closest - * to the actual vector value. For date and time values, this - * generally means a numeric value. Applications can then map - * this value to Java objects as desired. Decimal types all - * map to BigDecimal as that is the only way in Java to - * represent large decimal values. - *

- * In general, a column maps to just one value. However, derived - * classes may choose to provide type conversions if convenient. - * An exception is thrown if a call is made to a method that - * is not supported by the column type. - *

- * Values of scalars are set directly, using the get method - * for the target type. Maps and arrays are structured types and - * require another level of writer abstraction to access each value - * in the structure. - */ - -public interface ColumnWriter extends ColumnAccessor, ScalarWriter { - void setNull(); - TupleWriter map(); - ArrayWriter array(); -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriterIndex.java similarity index 58% rename from exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnAccessor.java rename to exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriterIndex.java index 44cd48aed19..7e6c8d67845 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnAccessor.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriterIndex.java @@ -17,24 +17,16 @@ */ package org.apache.drill.exec.vector.accessor; -/** - * Common base interface for columns readers and writers. Provides - * the access type for the column. Note that multiple Drill types and - * data modes map to the same access type. - */ - -public interface ColumnAccessor { - public enum ValueType { - INTEGER, LONG, DOUBLE, STRING, BYTES, DECIMAL, PERIOD, ARRAY, MAP - } +public interface ColumnWriterIndex { + int vectorIndex(); + void overflowed(); + boolean legal(); /** - * Describe the type of the value. This is a compression of the - * value vector type: it describes which method will return the - * vector value. - * @return the value type which indicates which get method - * is valid for the column + * Index for array elements that allows the caller to increment the + * index. For arrays, writing (or saving) one value automatically + * moves to the next value. Ignored for non-element indexes. */ - ColumnAccessor.ValueType valueType(); + void nextElement(); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java new file mode 100644 index 00000000000..039d1d29dee --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor; + +/** + * Defines a reader to get values for value vectors using + * a simple, uniform interface modeled after a JSON object. + * Every column value is an object of one of three types: + * scalar, array or tuple. Methods exist to "cast" this object + * to the proper type. This model allows a very simple representation: + * tuples (rows, maps) consist of objects. Arrays are lists of + * objects. + *

+ * {@see ObjectWriter> + */ + +public interface ObjectReader { + + /** + * The type of this reader. + * + * @return type of reader + */ + + ObjectType type(); + ScalarReader scalar(); + ScalarElementReader elements(); + TupleReader tuple(); + ArrayReader array(); + + /** + * Return the value of the underlying data as a Java object. + * Primarily for testing + * @return Java object that represents the underlying value + */ + + Object getObject(); + + /** + * Return the entire object as a string. Primarily for debugging. + * @return string representation of the object + */ + + String getAsString(); +} \ No newline at end of file diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectType.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectType.java new file mode 100644 index 00000000000..26f1ca8c3c8 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectType.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor; + +public enum ObjectType { + SCALAR, TUPLE, ARRAY +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectWriter.java new file mode 100644 index 00000000000..aa8fdec1593 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectWriter.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor; + +/** + * Defines a writer to set values for value vectors using + * a simple, uniform interface modeled after a JSON object. + * Every column value is an object of one of three types: + * scalar, array or tuple. Methods exist to "cast" this object + * to the proper type. This model allows a very simple representation: + * tuples (rows, maps) consist of objects. Arrays are lists of + * objects. + *

+ * {@see ObjectReader} + */ + +public interface ObjectWriter { + + /** + * Return the object (structure) type of this writer. + * @return type indicating if this is a scalar, tuple or array + */ + + ObjectType type(); + ScalarWriter scalar(); + TupleWriter tuple(); + ArrayWriter array(); + + /** + * For debugging, set the object to the proper form of Java object + * as defined by the underlying writer type. + * + * @param value Java object value to write + * @throws VectorOverflowException + */ + + void set(Object value); +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java new file mode 100644 index 00000000000..d1f31a82f24 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor; + +import java.math.BigDecimal; + +import org.joda.time.Period; + +/** + * Interface to access the values of an array column. In general, each + * vector implements just one of the get methods. Check the vector type + * to know which method to use. Though, generally, when writing test + * code, the type is known to the test writer. + *

+ * Arrays allow random access to the values within the array. The index + * passed to each method is the index into the array for the current + * row and column. (This means that arrays are three dimensional: + * the usual (row, column) dimensions plus an array index dimension: + * (row, column, array index). + *

+ * Note that the isNull() method is provided for completeness, + * but no Drill array allows null values at present. + *

+ * {@see ScalarWriter} + */ + +public interface ScalarElementReader { + /** + * Describe the type of the value. This is a compression of the + * value vector type: it describes which method will return the + * vector value. + * @return the value type which indicates which get method + * is valid for the column + */ + + ValueType valueType(); + int size(); + + boolean isNull(int index); + int getInt(int index); + long getLong(int index); + double getDouble(int index); + String getString(int index); + byte[] getBytes(int index); + BigDecimal getDecimal(int index); + Period getPeriod(int index); + + Object getObject(int index); + String getAsString(int index); +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java similarity index 85% rename from exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java rename to exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java index 4932567f262..e1c26bf29e9 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java @@ -40,9 +40,20 @@ * for the target type. Maps and arrays are structured types and * require another level of reader abstraction to access each value * in the structure. + *

+ * {@see ScalarWriter} */ -public interface ColumnReader extends ColumnAccessor { +public interface ScalarReader { + /** + * Describe the type of the value. This is a compression of the + * value vector type: it describes which method will return the + * vector value. + * @return the value type which indicates which get method + * is valid for the column + */ + + ValueType valueType(); /** * Report if the column is null. Non-nullable columns always @@ -58,7 +69,7 @@ public interface ColumnReader extends ColumnAccessor { byte[] getBytes(); BigDecimal getDecimal(); Period getPeriod(); + Object getObject(); - TupleReader map(); - ArrayReader array(); + String getAsString(); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java index 5cbe80a379a..2edced559f2 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java @@ -22,16 +22,44 @@ import org.joda.time.Period; /** - * Methods common to the {@link ColumnWriter} and - * {@link ArrayWriter} interfaces. + * Represents a scalar value: a required column, a nullable column, + * or one element within an array of scalars. + *

+ * Vector values are mapped to + * their "natural" representations: the representation closest + * to the actual vector value. For date and time values, this + * generally means a numeric value. Applications can then map + * this value to Java objects as desired. Decimal types all + * map to BigDecimal as that is the only way in Java to + * represent large decimal values. + *

+ * In general, a column maps to just one value. However, derived + * classes may choose to provide type conversions if convenient. + * An exception is thrown if a call is made to a method that + * is not supported by the column type. + *

+ * {@see ScalarReader} + * {@see ScalarElementReader} */ public interface ScalarWriter { + /** + * Describe the type of the value. This is a compression of the + * value vector type: it describes which method will return the + * vector value. + * @return the value type which indicates which get method + * is valid for the column + */ + + ValueType valueType(); + void setNull(); void setInt(int value); void setLong(long value); void setDouble(double value); void setString(String value); - void setBytes(byte[] value); + void setBytes(byte[] value, int len); void setDecimal(BigDecimal value); void setPeriod(Period value); + + void setObject(Object value); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleAccessor.java deleted file mode 100644 index 2ebb32ce652..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleAccessor.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor; - -import org.apache.drill.exec.record.MaterializedField; - -/** - * Provides access to a "tuple". In Drill, both rows and maps are - * tuples: both are an ordered collection of values, defined by a - * schema. Each tuple has a schema that defines the column ordering - * for indexed access. Each tuple also provides methods to get column - * accessors by name or index. - */ - -public interface TupleAccessor { - - /** - * Flattened view of the schema as needed for row-based access of scalar - * members. The scalar view presents scalar fields: those that can be set - * or retrieved. A separate map view presents map vectors. The scalar - * view is the one used by row set readers and writers. Column indexes - * are into the flattened view, with maps removed and map members flattened - * into the top-level name space with compound names. - */ - - public interface TupleSchema { - /** - * Return a column schema given an indexed into the flattened row structure. - * - * @param index index of the row in the flattened structure - * @return schema of the column - */ - - MaterializedField column(int index); - - MaterializedField column(String name); - - int columnIndex(String name); - - int count(); - } - - TupleSchema schema(); -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java index 57425afd4a3..908d6a07805 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java @@ -17,17 +17,38 @@ */ package org.apache.drill.exec.vector.accessor; +import org.apache.drill.exec.record.TupleMetadata; + /** * Interface for reading from tuples (rows or maps). Provides * a column reader for each column that can be obtained either * by name or column index (as defined in the tuple schema.) * Also provides two generic methods to get the value as a * Java object or as a string. + *

+ * {@see TupleWriter} */ -public interface TupleReader extends TupleAccessor { - ColumnReader column(int colIndex); - ColumnReader column(String colName); - Object get(int colIndex); - String getAsString(int colIndex); +public interface TupleReader { + TupleMetadata schema(); + int columnCount(); + + ObjectReader column(int colIndex); + ObjectReader column(String colName); + + // Convenience methods + + ObjectType type(int colIndex); + ObjectType type(String colName); + ScalarReader scalar(int colIndex); + ScalarReader scalar(String colName); + TupleReader tuple(int colIndex); + TupleReader tuple(String colName); + ArrayReader array(int colIndex); + ArrayReader array(String colName); + ScalarElementReader elements(int colIndex); + ScalarElementReader elements(String colName); + + Object getObject(); + String getAsString(); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java index 59eca794ec4..6d6e9ea9805 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java @@ -17,18 +17,78 @@ */ package org.apache.drill.exec.vector.accessor; +import org.apache.drill.exec.record.TupleMetadata; + /** - * Interface for writing to rows via a column writer. - * Column writers can be obtained by name or index. Column - * indexes are defined by the tuple schema. Also provides - * a convenience method to set the column value from a Java - * object. The caller is responsible for providing the - * correct object type for each column. (The object type - * must match the column accessor type.) + * Interface for writing to rows via a column writer. Column writers can be + * obtained by name or index. Column indexes are defined by the tuple schema. + * Also provides a convenience method to set the column value from a Java + * object. The caller is responsible for providing the correct object type for + * each column. (The object type must match the column accessor type.) + *

+ * A tuple is composed of columns with a fixed order and unique names: either + * can be used to reference columns. Columns are scalar (simple values), tuples + * (i.e. maps), or arrays (of scalars, tuples or arrays.) + *

+ * Convenience methods allow getting a column as a scalar, tuple or array. These + * methods throw an exception if the column is not of the requested type. */ -public interface TupleWriter extends TupleAccessor { - ColumnWriter column(int colIndex); - ColumnWriter column(String colName); +public interface TupleWriter { + TupleMetadata schema(); + int size(); + + // Return the column as a generic object + + ObjectWriter column(int colIndex); + ObjectWriter column(String colName); + + // Convenience methods + + ScalarWriter scalar(int colIndex); + ScalarWriter scalar(String colName); + TupleWriter tuple(int colIndex); + TupleWriter tuple(String colName); + ArrayWriter array(int colIndex); + ArrayWriter array(String colName); + ObjectType type(int colIndex); + ObjectType type(String colName); + + /** + * Set one column given a generic object value. Most helpful for testing, + * not performant for production code due to object creation and dynamic + * type checking. + * + * @param colIndex the index of the column to set + * @param value the value to set. The type of the object must be compatible + * with the type of the target column + * @throws VectorOverflowException if the vector overflows + */ + void set(int colIndex, Object value); + + /** + * Write a row or map of values, given by Java objects. Object type must + * match expected column type. + *

+ * Note that a single-column tuple is ambiguous if that column is an + * array. To avoid ambiguity, use set(0, value) in this case. + * + * @param values variable-length argument list of column values + * @return true if the row was written, false if any column + * caused vector overflow. + * @throws VectorOverflowException if the vector overflows + */ + + void setTuple(Object ...values); + + /** + * Set the tuple from an array of objects. Primarily for use in + * test tools. + * + * @param value + * @throws VectorOverflowException + */ + + void setObject(Object value); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java new file mode 100644 index 00000000000..e6687dcd311 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor; + +/** + * Represents the primitive types supported to read and write data + * from value vectors. Vectors support many data widths. For simplicity + * (and because of no difference in performance), the get/set methods + * use a reduced set of types. In general, each reader and writer + * supports just one type. Though some may provide more than one + * (such as access to bytes for a STRING value.) + */ + +public enum ValueType { + INTEGER, LONG, DOUBLE, STRING, BYTES, DECIMAL, PERIOD +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayReader.java deleted file mode 100644 index deea7f8865e..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayReader.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor.impl; - -import java.math.BigDecimal; - -import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.accessor.ArrayReader; -import org.apache.drill.exec.vector.accessor.TupleReader; -import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor; -import org.joda.time.Period; - -/** - * Reader for an array-valued column. This reader provides access to specific - * array members via an array index. This is an abstract base class; - * subclasses are generated for each repeated value vector type. - */ - -public abstract class AbstractArrayReader extends AbstractColumnAccessor implements ArrayReader { - - /** - * Column reader that provides access to an array column by returning a - * separate reader specifically for that array. That is, reading a column - * is a two-part process:


-   * tupleReader.column("arrayCol").array().getInt(2);
- * This pattern is used to avoid overloading the column reader with - * both scalar and array access. Also, this pattern mimics the way - * that nested tuples (Drill maps) are handled. - */ - - public static class ArrayColumnReader extends AbstractColumnReader { - - private final AbstractArrayReader arrayReader; - - public ArrayColumnReader(AbstractArrayReader arrayReader) { - this.arrayReader = arrayReader; - } - - @Override - public ValueType valueType() { - return ValueType.ARRAY; - } - - @Override - public void bind(RowIndex rowIndex, ValueVector vector) { - arrayReader.bind(rowIndex, vector); - vectorIndex = rowIndex; - } - - @Override - public ArrayReader array() { - return arrayReader; - } - } - - protected VectorAccessor vectorAccessor; - - public void bind(RowIndex rowIndex, MaterializedField field, VectorAccessor va) { - bind(rowIndex); - vectorAccessor = va; - } - - @Override - public boolean isNull(int index) { - return false; - } - - @Override - public int getInt(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public long getLong(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public double getDouble(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public String getString(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public byte[] getBytes(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public BigDecimal getDecimal(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public Period getPeriod(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public TupleReader map(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public ArrayReader array(int index) { - throw new UnsupportedOperationException(); - } -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayWriter.java deleted file mode 100644 index d1d126333db..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayWriter.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor.impl; - -import java.math.BigDecimal; - -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.accessor.ArrayWriter; -import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector; -import org.joda.time.Period; - -/** - * Writer for an array-valued column. This writer appends values: once a value - * is written, it cannot be changed. As a result, writer methods have no item index; - * each set advances the array to the next position. This is an abstract base class; - * subclasses are generated for each repeated value vector type. - */ - -public abstract class AbstractArrayWriter extends AbstractColumnAccessor implements ArrayWriter { - - /** - * Column writer that provides access to an array column by returning a - * separate writer specifically for that array. That is, writing an array - * is a two-part process:

-   * tupleWriter.column("arrayCol").array().setInt(2);
- * This pattern is used to avoid overloading the column reader with - * both scalar and array access. Also, this pattern mimics the way - * that nested tuples (Drill maps) are handled. - */ - - public static class ArrayColumnWriter extends AbstractColumnWriter { - - private final AbstractArrayWriter arrayWriter; - - public ArrayColumnWriter(AbstractArrayWriter arrayWriter) { - this.arrayWriter = arrayWriter; - } - - @Override - public ValueType valueType() { - return ValueType.ARRAY; - } - - @Override - public void bind(RowIndex rowIndex, ValueVector vector) { - arrayWriter.bind(rowIndex, vector); - vectorIndex = rowIndex; - } - - @Override - public ArrayWriter array() { - return arrayWriter; - } - - /** - * Arrays require a start step for each row, regardless of - * whether any values are written for that row. - */ - - public void start() { - arrayWriter.mutator().startNewValue(vectorIndex.index()); - } - } - - protected abstract BaseRepeatedValueVector.BaseRepeatedMutator mutator(); - - @Override - public int size() { - return mutator().getInnerValueCountAt(vectorIndex.index()); - } - - @Override - public boolean valid() { - // Not implemented yet - return true; - } - - @Override - public void setInt(int value) { - throw new UnsupportedOperationException(); - } - - @Override - public void setLong(long value) { - throw new UnsupportedOperationException(); - } - - @Override - public void setDouble(double value) { - throw new UnsupportedOperationException(); - } - - @Override - public void setString(String value) { - throw new UnsupportedOperationException(); - } - - @Override - public void setBytes(byte[] value) { - throw new UnsupportedOperationException(); - } - - @Override - public void setDecimal(BigDecimal value) { - throw new UnsupportedOperationException(); - } - - @Override - public void setPeriod(Period value) { - throw new UnsupportedOperationException(); - } -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnReader.java deleted file mode 100644 index b88b08bc8cf..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnReader.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor.impl; - -import java.math.BigDecimal; - -import org.apache.drill.exec.record.MaterializedField; -import org.apache.drill.exec.vector.ValueVector; -import org.apache.drill.exec.vector.accessor.ArrayReader; -import org.apache.drill.exec.vector.accessor.ColumnReader; -import org.apache.drill.exec.vector.accessor.TupleReader; -import org.joda.time.Period; - -/** - * Column reader implementation that acts as the basis for the - * generated, vector-specific implementations. All set methods - * throw an exception; subclasses simply override the supported - * method(s). - */ - -public abstract class AbstractColumnReader extends AbstractColumnAccessor implements ColumnReader { - - public interface VectorAccessor { - ValueVector vector(); - } - - protected VectorAccessor vectorAccessor; - - public void bind(RowIndex rowIndex, MaterializedField field, VectorAccessor va) { - bind(rowIndex); - vectorAccessor = va; - } - - @Override - public Object getObject() { - switch (valueType()) { - case ARRAY: - // TODO: build an array. Just a bit tedious... - throw new UnsupportedOperationException(); - case BYTES: - return getBytes(); - case DECIMAL: - return getDecimal(); - case DOUBLE: - return getDouble(); - case INTEGER: - return getInt(); - case LONG: - return getLong(); - case MAP: - // TODO: build an array. Just a bit tedious... - throw new UnsupportedOperationException(); - case PERIOD: - return getPeriod(); - case STRING: - return getString(); - default: - throw new IllegalStateException("Unexpected type: " + valueType()); - } - } - - @Override - public boolean isNull() { - return false; - } - - @Override - public int getInt() { - throw new UnsupportedOperationException(); - } - - @Override - public long getLong() { - throw new UnsupportedOperationException(); - } - - @Override - public double getDouble() { - throw new UnsupportedOperationException(); - } - - @Override - public String getString() { - throw new UnsupportedOperationException(); - } - - @Override - public byte[] getBytes() { - throw new UnsupportedOperationException(); - } - - @Override - public BigDecimal getDecimal() { - throw new UnsupportedOperationException(); - } - - @Override - public Period getPeriod() { - throw new UnsupportedOperationException(); - } - - @Override - public TupleReader map() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrayReader array() { - throw new UnsupportedOperationException(); - } -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AccessorUtilities.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AccessorUtilities.java new file mode 100644 index 00000000000..8b5d5df3834 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AccessorUtilities.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.impl; + +import java.math.BigDecimal; + +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.joda.time.Duration; + +public class AccessorUtilities { + + private AccessorUtilities() { } + + public static int sv4Batch(int sv4Index) { + return sv4Index >>> 16; + } + + public static int sv4Index(int sv4Index) { + return sv4Index & 0xFFFF; + } + + public static String bytesToString(byte[] value) { + StringBuilder buf = new StringBuilder() + .append("["); + int len = Math.min(value.length, 20); + for (int i = 0; i < len; i++) { + if (i > 0) { + buf.append(", "); + } + String str = Integer.toHexString(value[i] & 0xFF); + if (str.length() < 2) { + buf.append("0"); + } + buf.append(str); + } + if (value.length > len) { + buf.append("..."); + } + buf.append("]"); + return buf.toString(); + } + +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java index 019d3bed129..3f2e8b03dc7 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java @@ -20,101 +20,125 @@ import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.vector.accessor.ColumnAccessors; -import org.apache.drill.exec.vector.accessor.impl.AbstractArrayReader.ArrayColumnReader; -import org.apache.drill.exec.vector.accessor.impl.AbstractArrayWriter.ArrayColumnWriter; +import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader; +import org.apache.drill.exec.vector.accessor.reader.BaseElementReader; +import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader; +import org.apache.drill.exec.vector.accessor.reader.ScalarArrayReader; +import org.apache.drill.exec.vector.accessor.reader.VectorAccessor; +import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter; +import org.apache.drill.exec.vector.accessor.writer.BaseScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.NullableScalarWriter; +import org.apache.drill.exec.vector.accessor.writer.ScalarArrayWriter; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; /** - * Gather generated accessor classes into a set of class - * tables to allow rapid run-time creation of accessors. - * The caller is responsible for binding the accessor to - * a vector and a row index. + * Gather generated accessor classes into a set of class tables to allow rapid + * run-time creation of accessors. Builds the accessor and its object reader/writer + * wrapper which binds the vector to the accessor. */ +@SuppressWarnings("unchecked") public class ColumnAccessorFactory { - private static Class columnWriters[][] = buildColumnWriters(); - private static Class columnReaders[][] = buildColumnReaders(); - private static Class arrayWriters[] = buildArrayWriters(); - private static Class arrayReaders[] = buildArrayReaders(); + private static final int typeCount = MinorType.values().length; + private static final Class requiredReaders[] = new Class[typeCount]; + private static final Class nullableReaders[] = new Class[typeCount]; + private static final Class elementReaders[] = new Class[typeCount]; + private static final Class requiredWriters[] = new Class[typeCount]; - @SuppressWarnings("unchecked") - private static Class[][] buildColumnWriters() { - int typeCount = MinorType.values().length; - int modeCount = DataMode.values().length; - Class writers[][] = new Class[typeCount][]; - for (int i = 0; i < typeCount; i++) { - writers[i] = new Class[modeCount]; - } - - ColumnAccessors.defineWriters(writers); - return writers; + static { + ColumnAccessors.defineRequiredReaders(requiredReaders); + ColumnAccessors.defineNullableReaders(nullableReaders); + ColumnAccessors.defineArrayReaders(elementReaders); + ColumnAccessors.defineRequiredWriters(requiredWriters); } - @SuppressWarnings("unchecked") - private static Class[][] buildColumnReaders() { - int typeCount = MinorType.values().length; - int modeCount = DataMode.values().length; - Class readers[][] = new Class[typeCount][]; - for (int i = 0; i < typeCount; i++) { - readers[i] = new Class[modeCount]; - } + public static AbstractObjectWriter buildColumnWriter(ValueVector vector) { + MajorType major = vector.getField().getType(); + MinorType type = major.getMinorType(); + DataMode mode = major.getMode(); - ColumnAccessors.defineReaders(readers); - return readers; + switch (type) { + case GENERIC_OBJECT: + case LATE: + case NULL: + case LIST: + case MAP: + throw new UnsupportedOperationException(type.toString()); + default: + switch (mode) { + case OPTIONAL: + return NullableScalarWriter.build(vector, newAccessor(type, requiredWriters)); + case REQUIRED: + return BaseScalarWriter.build(vector, newAccessor(type, requiredWriters)); + case REPEATED: + return ScalarArrayWriter.build((RepeatedValueVector) vector, newAccessor(type, requiredWriters)); + default: + throw new UnsupportedOperationException(mode.toString()); + } + } } - @SuppressWarnings("unchecked") - private static Class[] buildArrayWriters() { - int typeCount = MinorType.values().length; - Class writers[] = new Class[typeCount]; - ColumnAccessors.defineArrayWriters(writers); - return writers; - } + public static AbstractObjectReader buildColumnReader(ValueVector vector) { + MajorType major = vector.getField().getType(); + MinorType type = major.getMinorType(); + DataMode mode = major.getMode(); - @SuppressWarnings("unchecked") - private static Class[] buildArrayReaders() { - int typeCount = MinorType.values().length; - Class readers[] = new Class[typeCount]; - ColumnAccessors.defineArrayReaders(readers); - return readers; + switch (type) { + case GENERIC_OBJECT: + case LATE: + case NULL: + case LIST: + case MAP: + throw new UnsupportedOperationException(type.toString()); + default: + switch (mode) { + case OPTIONAL: + return BaseScalarReader.build(vector, newAccessor(type, nullableReaders)); + case REQUIRED: + return BaseScalarReader.build(vector, newAccessor(type, requiredReaders)); + case REPEATED: + return ScalarArrayReader.build((RepeatedValueVector) vector, newAccessor(type, elementReaders)); + default: + throw new UnsupportedOperationException(mode.toString()); + } + } } - public static AbstractColumnWriter newWriter(MajorType type) { - try { - if (type.getMode() == DataMode.REPEATED) { - Class writerClass = arrayWriters[type.getMinorType().ordinal()]; - if (writerClass == null) { - throw new UnsupportedOperationException(); - } - return new ArrayColumnWriter(writerClass.newInstance()); - } else { - Class writerClass = columnWriters[type.getMinorType().ordinal()][type.getMode().ordinal()]; - if (writerClass == null) { - throw new UnsupportedOperationException(); - } - return writerClass.newInstance(); + public static AbstractObjectReader buildColumnReader(MajorType majorType, VectorAccessor va) { + MinorType type = majorType.getMinorType(); + DataMode mode = majorType.getMode(); + + switch (type) { + case GENERIC_OBJECT: + case LATE: + case NULL: + case LIST: + case MAP: + throw new UnsupportedOperationException(type.toString()); + default: + switch (mode) { + case OPTIONAL: + return BaseScalarReader.build(majorType, va, newAccessor(type, nullableReaders)); + case REQUIRED: + return BaseScalarReader.build(majorType, va, newAccessor(type, requiredReaders)); + case REPEATED: + return ScalarArrayReader.build(majorType, va, newAccessor(type, elementReaders)); + default: + throw new UnsupportedOperationException(mode.toString()); } - } catch (InstantiationException | IllegalAccessException e) { - throw new IllegalStateException(e); } } - public static AbstractColumnReader newReader(MajorType type) { + public static T newAccessor(MinorType type, Class accessors[]) { try { - if (type.getMode() == DataMode.REPEATED) { - Class readerClass = arrayReaders[type.getMinorType().ordinal()]; - if (readerClass == null) { - throw new UnsupportedOperationException(); - } - return new ArrayColumnReader(readerClass.newInstance()); - } else { - Class readerClass = columnReaders[type.getMinorType().ordinal()][type.getMode().ordinal()]; - if (readerClass == null) { - throw new UnsupportedOperationException(); - } - return readerClass.newInstance(); + Class accessorClass = accessors[type.ordinal()]; + if (accessorClass == null) { + throw new UnsupportedOperationException(type.toString()); } + return accessorClass.newInstance(); } catch (InstantiationException | IllegalAccessException e) { throw new IllegalStateException(e); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleReaderImpl.java deleted file mode 100644 index 97a6e3c3013..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleReaderImpl.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor.impl; - -import org.apache.drill.exec.vector.accessor.ArrayReader; -import org.apache.drill.exec.vector.accessor.ColumnReader; -import org.apache.drill.exec.vector.accessor.TupleReader; - -/** - * Reader for a tuple (a row or a map.) Provides access to each - * column using either a name or a numeric index. - */ - -public class TupleReaderImpl extends AbstractTupleAccessor implements TupleReader { - - private final AbstractColumnReader readers[]; - - public TupleReaderImpl(TupleSchema schema, AbstractColumnReader readers[]) { - super(schema); - this.readers = readers; - } - - @Override - public ColumnReader column(int colIndex) { - return readers[colIndex]; - } - - @Override - public ColumnReader column(String colName) { - int index = schema.columnIndex(colName); - if (index == -1) { - return null; } - return readers[index]; - } - - @Override - public Object get(int colIndex) { - ColumnReader colReader = column(colIndex); - if (colReader.isNull()) { - return null; } - switch (colReader.valueType()) { - case BYTES: - return colReader.getBytes(); - case DOUBLE: - return colReader.getDouble(); - case INTEGER: - return colReader.getInt(); - case LONG: - return colReader.getLong(); - case STRING: - return colReader.getString(); - default: - throw new IllegalArgumentException("Unsupported type " + colReader.valueType()); - } - } - - @Override - public String getAsString(int colIndex) { - ColumnReader colReader = column(colIndex); - if (colReader.isNull()) { - return "null"; - } - switch (colReader.valueType()) { - case BYTES: - return bytesToString(colReader.getBytes()); - case DOUBLE: - return Double.toString(colReader.getDouble()); - case INTEGER: - return Integer.toString(colReader.getInt()); - case LONG: - return Long.toString(colReader.getLong()); - case STRING: - return "\"" + colReader.getString() + "\""; - case DECIMAL: - return colReader.getDecimal().toPlainString(); - case ARRAY: - return getArrayAsString(colReader.array()); - default: - throw new IllegalArgumentException("Unsupported type " + colReader.valueType()); - } - } - - private String bytesToString(byte[] value) { - StringBuilder buf = new StringBuilder() - .append("["); - int len = Math.min(value.length, 20); - for (int i = 0; i < len; i++) { - if (i > 0) { - buf.append(", "); - } - buf.append((int) value[i]); - } - if (value.length > len) { - buf.append("..."); - } - buf.append("]"); - return buf.toString(); - } - - private String getArrayAsString(ArrayReader array) { - StringBuilder buf = new StringBuilder(); - buf.append("["); - for (int i = 0; i < array.size(); i++) { - if (i > 0) { - buf.append( ", " ); - } - switch (array.valueType()) { - case BYTES: - buf.append(bytesToString(array.getBytes(i))); - break; - case DOUBLE: - buf.append(Double.toString(array.getDouble(i))); - break; - case INTEGER: - buf.append(Integer.toString(array.getInt(i))); - break; - case LONG: - buf.append(Long.toString(array.getLong(i))); - break; - case STRING: - buf.append("\"" + array.getString(i) + "\""); - break; - case DECIMAL: - buf.append(array.getDecimal(i).toPlainString()); - break; - case MAP: - case ARRAY: - throw new UnsupportedOperationException("Unsupported type " + array.valueType()); - default: - throw new IllegalArgumentException("Unexpected type " + array.valueType()); - } - } - buf.append("]"); - return buf.toString(); - } -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleWriterImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleWriterImpl.java deleted file mode 100644 index 015b099668c..00000000000 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleWriterImpl.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.drill.exec.vector.accessor.impl; - -import java.math.BigDecimal; - -import org.apache.drill.exec.vector.accessor.AccessorUtilities; -import org.apache.drill.exec.vector.accessor.ArrayWriter; -import org.apache.drill.exec.vector.accessor.ColumnAccessor.ValueType; -import org.apache.drill.exec.vector.accessor.ColumnWriter; -import org.apache.drill.exec.vector.accessor.TupleWriter; -import org.joda.time.Period; - -/** - * Implementation for a writer for a tuple (a row or a map.) Provides access to each - * column using either a name or a numeric index. - */ - -public class TupleWriterImpl extends AbstractTupleAccessor implements TupleWriter { - - private final AbstractColumnWriter writers[]; - - public TupleWriterImpl(TupleSchema schema, AbstractColumnWriter writers[]) { - super(schema); - this.writers = writers; - } - - public void start() { - for (int i = 0; i < writers.length; i++) { - writers[i].start(); - } - } - - @Override - public ColumnWriter column(int colIndex) { - return writers[colIndex]; - } - - @Override - public ColumnWriter column(String colName) { - int index = schema.columnIndex(colName); - if (index == -1) { - return null; } - return writers[index]; - } - - @Override - public void set(int colIndex, Object value) { - ColumnWriter colWriter = column(colIndex); - if (value == null) { - // Arrays have no null concept, just an empty array. - if (colWriter.valueType() != ValueType.ARRAY) { - colWriter.setNull(); - } - } else if (value instanceof Integer) { - colWriter.setInt((Integer) value); - } else if (value instanceof Long) { - colWriter.setLong((Long) value); - } else if (value instanceof String) { - colWriter.setString((String) value); - } else if (value instanceof BigDecimal) { - colWriter.setDecimal((BigDecimal) value); - } else if (value instanceof Period) { - colWriter.setPeriod((Period) value); - } else if (value instanceof byte[]) { - colWriter.setBytes((byte[]) value); - } else if (value instanceof Byte) { - colWriter.setInt((Byte) value); - } else if (value instanceof Short) { - colWriter.setInt((Short) value); - } else if (value instanceof Double) { - colWriter.setDouble((Double) value); - } else if (value instanceof Float) { - colWriter.setDouble((Float) value); - } else if (value.getClass().getName().startsWith("[")) { - setArray(colIndex, value); - } else { - throw new IllegalArgumentException("Unsupported type " + - value.getClass().getSimpleName() + " for column " + colIndex); - } - } - - public void setArray(int colIndex, Object value) { - if (value == null) { - // Assume null means a 0-element array since Drill does - // not support null for the whole array. - - return; - } - String objClass = value.getClass().getName(); - if (!objClass.startsWith("[")) { - throw new IllegalArgumentException("Argument is not an array"); - } - - ColumnWriter colWriter = column(colIndex); - if (colWriter.valueType() != ValueType.ARRAY) { - throw new IllegalArgumentException("Column is not an array"); - } - - ArrayWriter arrayWriter = colWriter.array(); - - // Figure out type - - char second = objClass.charAt( 1 ); - switch ( second ) { - case 'B': - AccessorUtilities.setByteArray(arrayWriter, (byte[]) value ); - break; - case 'S': - AccessorUtilities.setShortArray(arrayWriter, (short[]) value ); - break; - case 'I': - AccessorUtilities.setIntArray(arrayWriter, (int[]) value ); - break; - case 'J': - AccessorUtilities.setLongArray(arrayWriter, (long[]) value ); - break; - case 'F': - AccessorUtilities.setFloatArray(arrayWriter, (float[]) value ); - break; - case 'D': - AccessorUtilities.setDoubleArray(arrayWriter, (double[]) value ); - break; - case 'Z': - AccessorUtilities.setBooleanArray(arrayWriter, (boolean[]) value ); - break; - case 'L': - int posn = objClass.indexOf(';'); - - // If the array is of type Object, then we have no type info. - - String memberClassName = objClass.substring( 2, posn ); - if (memberClassName.equals(String.class.getName())) { - AccessorUtilities.setStringArray(arrayWriter, (String[]) value ); - } else if (memberClassName.equals(Period.class.getName())) { - AccessorUtilities.setPeriodArray(arrayWriter, (Period[]) value ); - } else if (memberClassName.equals(BigDecimal.class.getName())) { - AccessorUtilities.setBigDecimalArray(arrayWriter, (BigDecimal[]) value ); - } else { - throw new IllegalArgumentException( "Unknown Java array type: " + memberClassName ); - } - break; - default: - throw new IllegalArgumentException( "Unknown Java array type: " + second ); - } - } -} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java index f51c1a998c3..c90a7342eee 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java @@ -24,8 +24,40 @@ * framework for the java-exec project. That one implementation is specific to * unit tests, but the accessor framework could easily be used for other * purposes as well. + * + *

Vector Overflow Handling

+ * + * The writers provide integrated support for detecting and handling vector + * overflow. Overflow occurs when a value exceeds some maximum, such as the + * 16MB block size in Netty. Overflow handling consists of replacing the + * "full" vector with a new, empty vector as part of a new batch. Overflow + * handing code must copy partially written values from the "overflow" row + * to the new vectors. The classes here do not provide overflow handling, + * rather they provide the framework on top of which overflow handling can be + * built by a higher level of abstraction. + * + *

JSON-Like Model

+ * + * The object reader and writer provide a generic, JSON-like interface + * to allow any valid combination of readers or writers (generically + * accessors):

+ * row : tuple
+ * tuple : (name column) *
+ * column : scalar obj | array obj | tuple obj
+ * scalar obj : scalar accessor
+ * array obj : array accessor
+ * array accessor : element accessor
+ * tuple obj : tuple
*

- * Drill provides a set of column readers and writers. Compared to those, this + * As seen above, the accessor tree starts with a tuple (a row in the form of + * a class provided by the consumer.) Each column in the tuple is represented + * by an object accesor. That object accessor contains a scalar, tuple or array + * accessor. This models Drill's JSON structure: a row can have a list of lists + * of tuples that contains lists of ints, say. + * + *

Comparison with Previous Vector Readers and Writers

+ * + * Drill provides a set of vector readers and writers. Compared to those, this * set: *
    *
  • Works with all Drill data types. The other set works only with repeated @@ -36,23 +68,24 @@ * other set has accessors specific to each of the ~30 data types which Drill * supports.
  • *
- * The key difference is that this set is designed for developer ease-of-use, a - * primary requirement for unit tests. The other set is designed to be used in + * The key difference is that this set is designed for both developer ease-of-use + * and performance. Developer eas-of-use is a + * primary requirement for unit tests. Performance is critical for production + * code. The other set is designed to be used in * machine-generated or write-once code and so can be much more complex. - *

- * That is, the accessors here are optimized for test code: they trade - * convenience for a slight decrease in speed (the performance hit comes from - * the extra level of indirection which hides the complex, type-specific code - * otherwise required.) - *

- * {@link ColumnReader} and {@link ColumnWriter} are the core abstractions: they + * + *

Overview of the Code Structure

+ * + * {@link ScalarReader} and {@link ColumnWriter} are the core abstractions: they * provide simplified access to the myriad of Drill column types via a * simplified, uniform API. {@link TupleReader} and {@link TupleWriter} provide * a simplified API to rows or maps (both of which are tuples in Drill.) * {@link AccessorUtilities} provides a number of data conversion tools. - *

- * Overview of the code structure: *

+ *
ObjectWriter, ObjectReader
+ *
Drill follows a JSON data model. A row is a tuple (AKA structure). Each + * column is a scalar, a map (AKA tuple, structure) or an array (AKA a repeated + * value.)
*
TupleWriter, TupleReader
*
In relational terms, a tuple is an ordered collection of values, where * the meaning of the order is provided by a schema (usually a name/type pair.) @@ -62,12 +95,8 @@ * But, doing so is slower than access by position (index). To provide efficient * code, the tuple classes assume that the implementation imposes a column * ordering which can be exposed via the indexes.
- *
ColumnAccessor
- *
A generic base class for column readers and writers that provides the - * column data type.
- *
ColumnWriter, ColumnReader
- *
A uniform interface implemented for each column type ("major type" in - * Drill terminology). The scalar types: Nullable (Drill optional) and + *
ScalarWriter, ScalarReader
+ *
A uniform interface for the scalar types: Nullable (Drill optional) and * non-nullable (Drill required) fields use the same interface. Arrays (Drill * repeated) are special. To handle the array aspect, even array fields use the * same interface, but the getArray method returns another layer of @@ -98,11 +127,11 @@ *
The generated accessors: one for each combination of write/read, data * (minor) type and cardinality (data model). *
- *
RowIndex
+ *
ColumnReaderIndex, ColumnWriterIndex
*
This nested class binds the accessor to the current row position for the * entire record batch. That is, you don't ask for the value of column a for row * 5, then the value of column b for row 5, etc. as with the "raw" vectors. - * Instead, the implementation sets the row position (with, say an interator.) + * Instead, the implementation sets the row position (with, say an iterator.) * Then, all columns implicitly return values for the current row. *

* Different implementations of the row index handle the case of no selection @@ -122,6 +151,16 @@ * The column index picks out the x coordinate (horizontal position along the * columns.) *

+ *

Column Writer Optimizations

+ * The writer classes here started as a simple abstraction on top of the existing + * vector mutators. The classes were then recruited for use in a new writer + * abstraction for Drill's record readers. At that point, performance became + * critical. The key to performance is to bypass the vector and the mutator and + * instead work with the Netty direct memory functions. This seems a risky + * approach until we realize that the writers form a very clear interface: + * the same interface supported the original mutator-based implementation and + * the revised Netty-based implementation. The benefit, however, is stark; + * the direct-to-Netty version is up to 4x faster (for repeated types). */ package org.apache.drill.exec.vector.accessor; diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java new file mode 100644 index 00000000000..dd78463d580 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import org.apache.drill.exec.vector.UInt4Vector.Accessor; +import org.apache.drill.exec.vector.accessor.ArrayReader; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.ObjectReader; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.TupleReader; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; + +/** + * Reader for an array-valued column. This reader provides access to specific + * array members via an array index. This is an abstract base class; + * subclasses are generated for each repeated value vector type. + */ + +public abstract class AbstractArrayReader implements ArrayReader { + + /** + * Object representation of an array reader. + */ + + public static class ArrayObjectReader extends AbstractObjectReader { + + private AbstractArrayReader arrayReader; + + public ArrayObjectReader(AbstractArrayReader arrayReader) { + this.arrayReader = arrayReader; + } + + @Override + public void bindIndex(ColumnReaderIndex index) { + arrayReader.bindIndex(index); + } + + @Override + public ObjectType type() { + return ObjectType.ARRAY; + } + + @Override + public ArrayReader array() { + return arrayReader; + } + + @Override + public ScalarElementReader elements() { + return arrayReader.elements(); + } + + @Override + public Object getObject() { + return arrayReader.getObject(); + } + + @Override + public String getAsString() { + return arrayReader.getAsString(); + } + + @Override + public void reposition() { + arrayReader.reposition(); + } + } + + public static class BaseElementIndex { + private final ColumnReaderIndex base; + protected int startOffset; + protected int length; + + public BaseElementIndex(ColumnReaderIndex base) { + this.base = base; + } + + public int batchIndex() { + return base.batchIndex(); + } + + public void reset(int startOffset, int length) { + this.startOffset = startOffset; + this.length = length; + } + + public int size() { return length; } + + public int elementIndex(int index) { + if (index < 0 || length <= index) { + throw new IndexOutOfBoundsException("Index = " + index + ", length = " + length); + } + return startOffset + index; + } + } + + private final Accessor accessor; + private final VectorAccessor vectorAccessor; + protected ColumnReaderIndex baseIndex; + protected BaseElementIndex elementIndex; + + public AbstractArrayReader(RepeatedValueVector vector) { + accessor = vector.getOffsetVector().getAccessor(); + vectorAccessor = null; + } + + public AbstractArrayReader(VectorAccessor vectorAccessor) { + accessor = null; + this.vectorAccessor = vectorAccessor; + } + + public void bindIndex(ColumnReaderIndex index) { + baseIndex = index; + if (vectorAccessor != null) { + vectorAccessor.bind(index); + } + } + + private Accessor accessor() { + if (accessor != null) { + return accessor; + } + return ((RepeatedValueVector) (vectorAccessor.vector())).getOffsetVector().getAccessor(); + } + + public void reposition() { + final int index = baseIndex.vectorIndex(); + Accessor curAccesssor = accessor(); + final int startPosn = curAccesssor.get(index); + elementIndex.reset(startPosn, curAccesssor.get(index + 1) - startPosn); + } + + @Override + public int size() { return elementIndex.size(); } + + @Override + public ScalarElementReader elements() { + throw new UnsupportedOperationException(); + } + + @Override + public ObjectReader entry(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public TupleReader tuple(int index) { + return entry(index).tuple(); + } + + @Override + public ArrayReader array(int index) { + return entry(index).array(); + } + + @Override + public ObjectReader entry() { + throw new UnsupportedOperationException(); + } + + @Override + public TupleReader tuple() { + return entry().tuple(); + } + + @Override + public ArrayReader array() { + return entry().array(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java new file mode 100644 index 00000000000..59a066e05ec --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import org.apache.drill.exec.vector.accessor.ArrayReader; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.ObjectReader; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.TupleReader; + +public abstract class AbstractObjectReader implements ObjectReader { + + public abstract void bindIndex(ColumnReaderIndex index); + + public void reposition() { } + + @Override + public ScalarReader scalar() { + throw new UnsupportedOperationException(); + } + + @Override + public TupleReader tuple() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrayReader array() { + throw new UnsupportedOperationException(); + } + + @Override + public ScalarElementReader elements() { + throw new UnsupportedOperationException(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java new file mode 100644 index 00000000000..afa0cb727e9 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.accessor.ArrayReader; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.ObjectReader; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.TupleReader; + +/** + * Reader for a tuple (a row or a map.) Provides access to each + * column using either a name or a numeric index. + */ + +public abstract class AbstractTupleReader implements TupleReader { + + public static class TupleObjectReader extends AbstractObjectReader { + + private AbstractTupleReader tupleReader; + + public TupleObjectReader(AbstractTupleReader tupleReader) { + this.tupleReader = tupleReader; + } + + @Override + public void bindIndex(ColumnReaderIndex index) { + tupleReader.bindIndex(index); + } + + @Override + public ObjectType type() { + return ObjectType.TUPLE; + } + + @Override + public TupleReader tuple() { + return tupleReader; + } + + @Override + public Object getObject() { + return tupleReader.getObject(); + } + + @Override + public String getAsString() { + return tupleReader.getAsString(); + } + + @Override + public void reposition() { + tupleReader.reposition(); + } + } + + protected final TupleMetadata schema; + private final AbstractObjectReader readers[]; + + protected AbstractTupleReader(TupleMetadata schema, AbstractObjectReader readers[]) { + this.schema = schema; + this.readers = readers; + } + + public void bindIndex(ColumnReaderIndex index) { + for (int i = 0; i < readers.length; i++) { + readers[i].bindIndex(index); + } + } + + @Override + public TupleMetadata schema() { return schema; } + + @Override + public int columnCount() { return schema().size(); } + + @Override + public ObjectReader column(int colIndex) { + return readers[colIndex]; + } + + @Override + public ObjectReader column(String colName) { + int index = schema.index(colName); + if (index == -1) { + return null; } + return readers[index]; + } + + @Override + public ScalarReader scalar(int colIndex) { + return column(colIndex).scalar(); + } + + @Override + public ScalarReader scalar(String colName) { + return column(colName).scalar(); + } + + @Override + public TupleReader tuple(int colIndex) { + return column(colIndex).tuple(); + } + + @Override + public TupleReader tuple(String colName) { + return column(colName).tuple(); + } + + @Override + public ArrayReader array(int colIndex) { + return column(colIndex).array(); + } + + @Override + public ArrayReader array(String colName) { + return column(colName).array(); + } + + @Override + public ObjectType type(int colIndex) { + return column(colIndex).type(); + } + + @Override + public ObjectType type(String colName) { + return column(colName).type(); + } + + @Override + public ScalarElementReader elements(int colIndex) { + return column(colIndex).elements(); + } + + @Override + public ScalarElementReader elements(String colName) { + return column(colName).elements(); + } + + public void reposition() { + for (int i = 0; i < columnCount(); i++) { + readers[i].reposition(); + } + } + + @Override + public Object getObject() { + List elements = new ArrayList<>(); + for (int i = 0; i < columnCount(); i++) { + elements.add(readers[i].getObject()); + } + return elements; + } + + @Override + public String getAsString() { + StringBuilder buf = new StringBuilder(); + buf.append("("); + for (int i = 0; i < columnCount(); i++) { + if (i > 0) { + buf.append( ", " ); + } + buf.append(readers[i].getAsString()); + } + buf.append(")"); + return buf.toString(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java new file mode 100644 index 00000000000..f32c101c26f --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities; +import org.joda.time.Period; + +public abstract class BaseElementReader implements ScalarElementReader { + + public static class ScalarElementObjectReader extends AbstractObjectReader { + + private BaseElementReader elementReader; + + public ScalarElementObjectReader(BaseElementReader elementReader) { + this.elementReader = elementReader; + } + + @Override + public void bindIndex(ColumnReaderIndex index) { + elementReader.bindIndex((ElementReaderIndex) index); + } + + @Override + public ObjectType type() { + return ObjectType.SCALAR; + } + + @Override + public ScalarElementReader elements() { + return elementReader; + } + + @Override + public Object getObject() { + // Simple: return elements as an object list. + // If really needed, could return as a typed array, but that + // is a bit of a hassle. + + List elements = new ArrayList<>(); + for (int i = 0; i < elementReader.size(); i++) { + elements.add(elementReader.getObject(i)); + } + return elements; + } + + @Override + public String getAsString() { + StringBuilder buf = new StringBuilder(); + buf.append("["); + for (int i = 0; i < elementReader.size(); i++) { + if (i > 0) { + buf.append( ", " ); + } + buf.append(elementReader.getAsString(i)); + } + buf.append("]"); + return buf.toString(); + } + } + + protected ElementReaderIndex vectorIndex; + protected VectorAccessor vectorAccessor; + + public abstract void bindVector(ValueVector vector); + + public void bindVector(MajorType majorType, VectorAccessor va) { + vectorAccessor = va; + } + + protected void bindIndex(ElementReaderIndex rowIndex) { + this.vectorIndex = rowIndex; + } + + @Override + public int size() { return vectorIndex.size(); } + + @Override + public Object getObject(int index) { + if (isNull(index)) { + return "null"; + } + switch (valueType()) { + case BYTES: + return getBytes(index); + case DECIMAL: + return getDecimal(index); + case DOUBLE: + return getDouble(index); + case INTEGER: + return getInt(index); + case LONG: + return getLong(index); + case PERIOD: + return getPeriod(index); + case STRING: + return getString(index); + default: + throw new IllegalStateException("Unexpected type: " + valueType()); + } + } + + @Override + public String getAsString(int index) { + switch (valueType()) { + case BYTES: + return AccessorUtilities.bytesToString(getBytes(index)); + case DOUBLE: + return Double.toString(getDouble(index)); + case INTEGER: + return Integer.toString(getInt(index)); + case LONG: + return Long.toString(getLong(index)); + case STRING: + return "\"" + getString(index) + "\""; + case DECIMAL: + return getDecimal(index).toPlainString(); + case PERIOD: + return getPeriod(index).normalizedStandard().toString(); + default: + throw new IllegalArgumentException("Unsupported type " + valueType()); + } + } + + @Override + public boolean isNull(int index) { + return false; + } + + @Override + public int getInt(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public long getLong(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public double getDouble(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public String getString(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public byte[] getBytes(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public BigDecimal getDecimal(int index) { + throw new UnsupportedOperationException(); + } + + @Override + public Period getPeriod(int index) { + throw new UnsupportedOperationException(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java new file mode 100644 index 00000000000..fb9a71160ff --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import java.math.BigDecimal; + +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarReader; +import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities; +import org.joda.time.Period; + +/** + * Column reader implementation that acts as the basis for the + * generated, vector-specific implementations. All set methods + * throw an exception; subclasses simply override the supported + * method(s). + */ + +public abstract class BaseScalarReader implements ScalarReader { + + public static class ScalarObjectReader extends AbstractObjectReader { + + private BaseScalarReader scalarReader; + + public ScalarObjectReader(BaseScalarReader scalarReader) { + this.scalarReader = scalarReader; + } + + @Override + public void bindIndex(ColumnReaderIndex index) { + scalarReader.bindIndex(index); + } + + @Override + public ObjectType type() { + return ObjectType.SCALAR; + } + + @Override + public ScalarReader scalar() { + return scalarReader; + } + + @Override + public Object getObject() { + return scalarReader.getObject(); + } + + @Override + public String getAsString() { + return scalarReader.getAsString(); + } + } + + protected ColumnReaderIndex vectorIndex; + protected VectorAccessor vectorAccessor; + + public static ScalarObjectReader build(ValueVector vector, BaseScalarReader reader) { + reader.bindVector(vector); + return new ScalarObjectReader(reader); + } + + public static AbstractObjectReader build(MajorType majorType, VectorAccessor va, + BaseScalarReader reader) { + reader.bindVector(majorType, va); + return new ScalarObjectReader(reader); + } + + public abstract void bindVector(ValueVector vector); + + protected void bindIndex(ColumnReaderIndex rowIndex) { + this.vectorIndex = rowIndex; + if (vectorAccessor != null) { + vectorAccessor.bind(rowIndex); + } + } + + public void bindVector(MajorType majorType, VectorAccessor va) { + vectorAccessor = va; + } + + @Override + public Object getObject() { + if (isNull()) { + return null; + } + switch (valueType()) { + case BYTES: + return getBytes(); + case DECIMAL: + return getDecimal(); + case DOUBLE: + return getDouble(); + case INTEGER: + return getInt(); + case LONG: + return getLong(); + case PERIOD: + return getPeriod(); + case STRING: + return getString(); + default: + throw new IllegalStateException("Unexpected type: " + valueType()); + } + } + + @Override + public String getAsString() { + if (isNull()) { + return "null"; + } + switch (valueType()) { + case BYTES: + return AccessorUtilities.bytesToString(getBytes()); + case DOUBLE: + return Double.toString(getDouble()); + case INTEGER: + return Integer.toString(getInt()); + case LONG: + return Long.toString(getLong()); + case STRING: + return "\"" + getString() + "\""; + case DECIMAL: + return getDecimal().toPlainString(); + case PERIOD: + return getPeriod().normalizedStandard().toString(); + default: + throw new IllegalArgumentException("Unsupported type " + valueType()); + } + } + + @Override + public boolean isNull() { + return false; + } + + @Override + public int getInt() { + throw new UnsupportedOperationException(); + } + + @Override + public long getLong() { + throw new UnsupportedOperationException(); + } + + @Override + public double getDouble() { + throw new UnsupportedOperationException(); + } + + @Override + public String getString() { + throw new UnsupportedOperationException(); + } + + @Override + public byte[] getBytes() { + throw new UnsupportedOperationException(); + } + + @Override + public BigDecimal getDecimal() { + throw new UnsupportedOperationException(); + } + + @Override + public Period getPeriod() { + throw new UnsupportedOperationException(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ElementReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ElementReaderIndex.java new file mode 100644 index 00000000000..9985edc868e --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ElementReaderIndex.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +public interface ElementReaderIndex { + int batchIndex(); + int size(); + int vectorIndex(int posn); +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java new file mode 100644 index 00000000000..4f3aeeb9150 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.reader.AbstractArrayReader.BaseElementIndex; + +/** + * Index into the vector of elements for a repeated vector. + * Keeps track of the current offset in terms of value positions. + */ + +public class FixedWidthElementReaderIndex extends BaseElementIndex implements ElementReaderIndex { + + public FixedWidthElementReaderIndex(ColumnReaderIndex base) { + super(base); + } + + @Override + public int vectorIndex(int posn) { + return elementIndex(posn); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java new file mode 100644 index 00000000000..f921a2ccdf9 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; + +/** + * Reader for a Drill Map type. Maps are actually tuples, just like rows. + */ + +public class MapReader extends AbstractTupleReader { + + protected MapReader(ColumnMetadata schema, AbstractObjectReader readers[]) { + super(schema.mapSchema(), readers); + } + + public static TupleObjectReader build(ColumnMetadata schema, AbstractObjectReader readers[]) { + return new TupleObjectReader(new MapReader(schema, readers)); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java new file mode 100644 index 00000000000..9ed89f1c728 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.ObjectReader; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; + +/** + * Reader for an array of either tuples or other arrays. + */ + +public class ObjectArrayReader extends AbstractArrayReader { + + /** + * Index into the vector of elements for a repeated vector. + * Keeps track of the current offset in terms of value positions. + * This is a derived index. The base index points to an entry + * in the offset vector for the array. This inner index picks + * off elements within the range of offsets for that one entry. + * For example:

+   * [ ... 100 105 ...]
+   * 
In the above the value 100 might be at outer + * offset 5. The inner array will pick off the five values + * 100...104. + *

+ * Because arrays allow random access on read, the inner offset + * is reset on each access to the array. + */ + + public static class ObjectElementReaderIndex extends BaseElementIndex implements ColumnReaderIndex { + + private int posn; + + public ObjectElementReaderIndex(ColumnReaderIndex base) { + super(base); + } + + @Override + public int vectorIndex() { + return startOffset + posn; + } + + public void set(int index) { + if (index < 0 || length <= index) { + throw new IndexOutOfBoundsException("Index = " + index + ", length = " + length); + } + posn = index; + } + + public int posn() { return posn; } + } + + /** + * Reader for each element. + */ + + private final AbstractObjectReader elementReader; + + /** + * Index used to access elements. + */ + + private ObjectElementReaderIndex objElementIndex; + + private ObjectArrayReader(RepeatedValueVector vector, AbstractObjectReader elementReader) { + super(vector); + this.elementReader = elementReader; + } + + private ObjectArrayReader(VectorAccessor vectorAccessor, AbstractObjectReader elementReader) { + super(vectorAccessor); + this.elementReader = elementReader; + } + + public static ArrayObjectReader build(RepeatedValueVector vector, + AbstractObjectReader elementReader) { + return new ArrayObjectReader( + new ObjectArrayReader(vector, elementReader)); + } + + public static AbstractObjectReader build(VectorAccessor vectorAccessor, + AbstractObjectReader elementReader) { + return new ArrayObjectReader( + new ObjectArrayReader(vectorAccessor, elementReader)); + } + + @Override + public void bindIndex(ColumnReaderIndex index) { + super.bindIndex(index); + objElementIndex = new ObjectElementReaderIndex(baseIndex); + elementIndex = objElementIndex; + elementReader.bindIndex(objElementIndex); + } + + @Override + public ObjectType entryType() { + return elementReader.type(); + } + + @Override + public void setPosn(int index) { + objElementIndex.set(index); + elementReader.reposition(); + } + + @Override + public ObjectReader entry() { + return elementReader; + } + + @Override + public ObjectReader entry(int index) { + setPosn(index); + return entry(); + } + + @Override + public Object getObject() { + List array = new ArrayList<>(); + for (int i = 0; i < objElementIndex.size(); i++) { + array.add(entry(i).getObject()); + } + return array; + } + + @Override + public String getAsString() { + StringBuilder buf = new StringBuilder(); + buf.append("["); + for (int i = 0; i < size(); i++) { + if (i > 0) { + buf.append( ", " ); + } + buf.append(entry(i).getAsString()); + } + buf.append("]"); + return buf.toString(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java new file mode 100644 index 00000000000..d93e4a59920 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.reader; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarElementReader; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; + +public class ScalarArrayReader extends AbstractArrayReader { + + private final BaseElementReader elementReader; + + private ScalarArrayReader(RepeatedValueVector vector, + BaseElementReader elementReader) { + super(vector); + this.elementReader = elementReader; + } + + private ScalarArrayReader(VectorAccessor va, + BaseElementReader elementReader) { + super(va); + this.elementReader = elementReader; + } + + public static ArrayObjectReader build(RepeatedValueVector vector, + BaseElementReader elementReader) { + elementReader.bindVector(vector.getDataVector()); + return new ArrayObjectReader(new ScalarArrayReader(vector, elementReader)); + } + + public static ArrayObjectReader build(MajorType majorType, VectorAccessor va, + BaseElementReader elementReader) { + elementReader.bindVector(majorType, va); + return new ArrayObjectReader(new ScalarArrayReader(va, elementReader)); + } + + @Override + public void bindIndex(ColumnReaderIndex index) { + super.bindIndex(index); + FixedWidthElementReaderIndex fwElementIndex = new FixedWidthElementReaderIndex(baseIndex); + elementIndex = fwElementIndex; + elementReader.bindIndex(fwElementIndex); + } + + @Override + public ObjectType entryType() { + return ObjectType.SCALAR; + } + + @Override + public ScalarElementReader elements() { + return elementReader; + } + + @Override + public void setPosn(int index) { + throw new IllegalStateException("setPosn() not supported for scalar arrays"); + } + + @Override + public Object getObject() { + List elements = new ArrayList<>(); + for (int i = 0; i < size(); i++) { + elements.add(elementReader.getObject(i)); + } + return elements; + } + + @Override + public String getAsString() { + StringBuilder buf = new StringBuilder(); + buf.append("["); + for (int i = 0; i < size(); i++) { + if (i > 0) { + buf.append( ", " ); + } + buf.append(elementReader.getAsString(i)); + } + buf.append("]"); + return buf.toString(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java similarity index 59% rename from exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnAccessor.java rename to exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java index 5b751c52ba9..1cf2a196560 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnAccessor.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java @@ -15,29 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.vector.accessor.impl; +package org.apache.drill.exec.vector.accessor.reader; import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ColumnReaderIndex; -/** - * Abstract base class for column readers and writers that - * implements the mechanism for binding accessors to a row - * index. The row index is implicit: index a row, then - * column accessors pull out columns from that row. - */ - -public abstract class AbstractColumnAccessor { - - public interface RowIndex { - int batch(); - int index(); - } - - protected RowIndex vectorIndex; - - protected void bind(RowIndex rowIndex) { - this.vectorIndex = rowIndex; - } - - public abstract void bind(RowIndex rowIndex, ValueVector vector); +public interface VectorAccessor { + void bind(ColumnReaderIndex index); + ValueVector vector(); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java new file mode 100644 index 00000000000..a94d2e844d3 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Provides the reader hierarchy as explained in the API package. + * The only caveat is that a simplification is provided for arrays of + * scalar values: rather than a scalar reader for each value, the + * {#link ScalarElementReader} class provides access to the entire array + * via indexed get methods. + */ + +package org.apache.drill.exec.vector.accessor.reader; \ No newline at end of file diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractArrayWriter.java new file mode 100644 index 00000000000..4a986261137 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractArrayWriter.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import org.apache.drill.exec.vector.accessor.ArrayWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ObjectWriter; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; + +/** + * Writer for an array-valued column. This writer appends values: once a value + * is written, it cannot be changed. As a result, writer methods have no item + * index; each set advances the array to the next position. + *

+ * This class represents the array as a whole. In practice that means building + * the offset vector. The array is associated with an element object that + * manages writing to the scalar, array or tuple that is the array element. Note + * that this representation makes little use of the methods in the "Repeated" + * vector class: instead it works directly with the offset and element vectors. + */ + +public abstract class AbstractArrayWriter implements ArrayWriter, WriterEvents { + + /** + * Object representation of an array writer. + */ + + public static class ArrayObjectWriter extends AbstractObjectWriter { + + private AbstractArrayWriter arrayWriter; + + public ArrayObjectWriter(AbstractArrayWriter arrayWriter) { + this.arrayWriter = arrayWriter; + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + arrayWriter.bindIndex(index); + } + + @Override + public ObjectType type() { + return ObjectType.ARRAY; + } + + @Override + public void set(Object value) { + arrayWriter.setObject(value); + } + + public void start() { + arrayWriter.startWrite(); + } + + @Override + public ArrayWriter array() { + return arrayWriter; + } + + @Override + public void startWrite() { + arrayWriter.startWrite(); + } + + @Override + public void startValue() { + arrayWriter.startValue(); + } + + @Override + public void endValue() { + arrayWriter.endValue(); + } + + @Override + public void endWrite() { + arrayWriter.endWrite(); + } + } + + /** + * Index into the vector of elements for a repeated vector. + * Keeps track of the current offset in terms of value positions. + * Forwards overflow events to the base index. + */ + + public class ArrayElementWriterIndex implements ColumnWriterIndex { + + private final ColumnWriterIndex baseIndex; + private int startOffset = 0; + private int offset = 0; + + public ArrayElementWriterIndex(ColumnWriterIndex baseIndex) { + this.baseIndex = baseIndex; + } + + public ColumnWriterIndex baseIndex() { return baseIndex; } + + public void reset() { + offset = 0; + startOffset = 0; + } + + public int endValue() { + startOffset = offset; + return offset; + } + + @Override + public int vectorIndex() { return offset; } + + @Override + public void overflowed() { + baseIndex.overflowed(); + } + + public int arraySize() { + return offset - startOffset; + } + + @Override + public void nextElement() { offset++; } + + @Override + public boolean legal() { + return true; + } + } + + protected final AbstractObjectWriter elementObjWriter; + private final OffsetVectorWriter offsetsWriter = new OffsetVectorWriter(); + private ColumnWriterIndex baseIndex; + protected ArrayElementWriterIndex elementIndex; + + public AbstractArrayWriter(RepeatedValueVector vector, AbstractObjectWriter elementObjWriter) { + this.elementObjWriter = elementObjWriter; + offsetsWriter.bindVector(vector.getOffsetVector()); + } + + public void bindIndex(ColumnWriterIndex index) { + baseIndex = index; + offsetsWriter.bindIndex(index); + elementIndex = new ArrayElementWriterIndex(baseIndex); + elementObjWriter.bindIndex(elementIndex); + } + + protected ColumnWriterIndex elementIndex() { return elementIndex; } + + @Override + public int size() { + return elementIndex.arraySize(); + } + + @Override + public ObjectWriter entry() { + return elementObjWriter; + } + + @Override + public void startWrite() { + elementIndex.reset(); + elementObjWriter.startWrite(); + } + + @Override + public void startValue() { } + + @Override + public void endValue() { + offsetsWriter.setOffset(elementIndex.endValue()); + } + + @Override + public void endWrite() { + offsetsWriter.finish(); + elementObjWriter.endWrite(); + } + + @Override + public ObjectType entryType() { + return elementObjWriter.type(); + } + + @Override + public ScalarWriter scalar() { + return elementObjWriter.scalar(); + } + + @Override + public TupleWriter tuple() { + return elementObjWriter.tuple(); + } + + @Override + public ArrayWriter array() { + return elementObjWriter.array(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractObjectWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractObjectWriter.java new file mode 100644 index 00000000000..d78d3321072 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractObjectWriter.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import org.apache.drill.exec.vector.accessor.ArrayWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ObjectWriter; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.TupleWriter; + +public abstract class AbstractObjectWriter implements ObjectWriter, WriterEvents { + + public abstract void bindIndex(ColumnWriterIndex index); + + @Override + public ScalarWriter scalar() { + throw new UnsupportedOperationException(); + } + + @Override + public TupleWriter tuple() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrayWriter array() { + throw new UnsupportedOperationException(); + } + + @Override public void startWrite() { } + @Override public void startValue() { } + @Override public void endValue() { } + @Override public void endWrite() { } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java new file mode 100644 index 00000000000..9bb6be0ffe1 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import java.math.BigDecimal; + +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.joda.time.Period; + +/** + * Column writer implementation that acts as the basis for the + * generated, vector-specific implementations. All set methods + * throw an exception; subclasses simply override the supported + * method(s). + */ + +public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents { + + public static class ScalarObjectWriter extends AbstractObjectWriter { + + private AbstractScalarWriter scalarWriter; + + public ScalarObjectWriter(AbstractScalarWriter scalarWriter) { + this.scalarWriter = scalarWriter; + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + scalarWriter.bindIndex(index); + } + + @Override + public ObjectType type() { + return ObjectType.SCALAR; + } + + @Override + public void set(Object value) { + scalarWriter.setObject(value); + } + + public void start() { + scalarWriter.startWrite(); + } + + @Override + public ScalarWriter scalar() { + return scalarWriter; + } + + @Override + public void startWrite() { + scalarWriter.startWrite(); + } + + @Override + public void startValue() { + scalarWriter.startValue(); + } + + @Override + public void endValue() { + scalarWriter.endValue(); + } + + @Override + public void endWrite() { + scalarWriter.endWrite(); + } + } + + public abstract void bindIndex(ColumnWriterIndex index); + + public abstract void bindVector(ValueVector vector); + + @Override + public void setObject(Object value) { + if (value == null) { + setNull(); + } else if (value instanceof Integer) { + setInt((Integer) value); + } else if (value instanceof Long) { + setLong((Long) value); + } else if (value instanceof String) { + setString((String) value); + } else if (value instanceof BigDecimal) { + setDecimal((BigDecimal) value); + } else if (value instanceof Period) { + setPeriod((Period) value); + } else if (value instanceof byte[]) { + byte[] bytes = (byte[]) value; + setBytes(bytes, bytes.length); + } else if (value instanceof Byte) { + setInt((Byte) value); + } else if (value instanceof Short) { + setInt((Short) value); + } else if (value instanceof Double) { + setDouble((Double) value); + } else if (value instanceof Float) { + setDouble((Float) value); + } else { + throw new IllegalArgumentException("Unsupported type " + + value.getClass().getSimpleName()); + } + } + + @Override + public void startWrite() { } + + @Override + public void startValue() { } + + @Override + public void endValue() { } + + @Override + public void endWrite() { + finish(); + } + + /** + * Overridden by generated classes to finish up writing. Such as + * setting the final element count. + * + * @throws VectorOverflowException should not actually occur + */ + + public void finish() { } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractTupleWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractTupleWriter.java new file mode 100644 index 00000000000..775ceaee8ea --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractTupleWriter.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import org.apache.drill.exec.record.TupleMetadata; +import org.apache.drill.exec.vector.accessor.ArrayWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ObjectType; +import org.apache.drill.exec.vector.accessor.ObjectWriter; +import org.apache.drill.exec.vector.accessor.ScalarWriter; +import org.apache.drill.exec.vector.accessor.TupleWriter; + +/** + * Implementation for a writer for a tuple (a row or a map.) Provides access to each + * column using either a name or a numeric index. + */ + +public abstract class AbstractTupleWriter implements TupleWriter, WriterEvents { + + /** + * Generic object wrapper for the tuple writer. + */ + + public static class TupleObjectWriter extends AbstractObjectWriter { + + private AbstractTupleWriter tupleWriter; + + public TupleObjectWriter(AbstractTupleWriter tupleWriter) { + this.tupleWriter = tupleWriter; + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + tupleWriter.bindIndex(index); + } + + @Override + public ObjectType type() { + return ObjectType.TUPLE; + } + + @Override + public void set(Object value) { + tupleWriter.setObject(value); + } + + @Override + public void startWrite() { + tupleWriter.startWrite(); + } + + @Override + public void startValue() { + tupleWriter.startValue(); + } + + @Override + public void endValue() { + tupleWriter.endValue(); + } + + @Override + public void endWrite() { + tupleWriter.endWrite(); + } + + @Override + public TupleWriter tuple() { + return tupleWriter; + } + } + + protected ColumnWriterIndex vectorIndex; + protected final TupleMetadata schema; + protected final AbstractObjectWriter writers[]; + + protected AbstractTupleWriter(TupleMetadata schema, AbstractObjectWriter writers[]) { + this.schema = schema; + this.writers = writers; + } + + public void bindIndex(ColumnWriterIndex index) { + vectorIndex = index; + for (int i = 0; i < writers.length; i++) { + writers[i].bindIndex(index); + } + } + + @Override + public TupleMetadata schema() { return schema; } + + @Override + public int size() { return schema().size(); } + + @Override + public void startWrite() { + for (int i = 0; i < writers.length; i++) { + writers[i].startWrite(); + } + } + + @Override + public void startValue() { + for (int i = 0; i < writers.length; i++) { + writers[i].startValue(); + } + } + + @Override + public void endValue() { + for (int i = 0; i < writers.length; i++) { + writers[i].endValue(); + } + } + + @Override + public void endWrite() { + for (int i = 0; i < writers.length; i++) { + writers[i].endWrite(); + } + } + + @Override + public ObjectWriter column(int colIndex) { + return writers[colIndex]; + } + + @Override + public ObjectWriter column(String colName) { + int index = schema.index(colName); + if (index == -1) { + return null; } + return writers[index]; + } + + @Override + public void set(int colIndex, Object value) { + ObjectWriter colWriter = column(colIndex); + switch (colWriter.type()) { + case ARRAY: + colWriter.array().setObject(value); + break; + case SCALAR: + colWriter.scalar().setObject(value); + break; + case TUPLE: + colWriter.tuple().setObject(value); + break; + default: + throw new IllegalStateException("Unexpected object type: " + colWriter.type()); + } + } + + @Override + public void setTuple(Object ...values) { + setObject(values); + } + + @Override + public void setObject(Object value) { + Object values[] = (Object[]) value; + int count = Math.min(values.length, schema().size()); + for (int i = 0; i < count; i++) { + set(i, values[i]); + } + } + + @Override + public ScalarWriter scalar(int colIndex) { + return column(colIndex).scalar(); + } + + @Override + public ScalarWriter scalar(String colName) { + return column(colName).scalar(); + } + + @Override + public TupleWriter tuple(int colIndex) { + return column(colIndex).tuple(); + } + + @Override + public TupleWriter tuple(String colName) { + return column(colName).tuple(); + } + + @Override + public ArrayWriter array(int colIndex) { + return column(colIndex).array(); + } + + @Override + public ArrayWriter array(String colName) { + return column(colName).array(); + } + + @Override + public ObjectType type(int colIndex) { + return column(colIndex).type(); + } + + @Override + public ObjectType type(String colName) { + return column(colName).type(); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseScalarWriter.java similarity index 66% rename from exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnWriter.java rename to exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseScalarWriter.java index 5071e033a3d..72b73c1cde7 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnWriter.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseScalarWriter.java @@ -15,13 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.vector.accessor.impl; +package org.apache.drill.exec.vector.accessor.writer; import java.math.BigDecimal; -import org.apache.drill.exec.vector.accessor.ArrayWriter; -import org.apache.drill.exec.vector.accessor.ColumnWriter; -import org.apache.drill.exec.vector.accessor.TupleWriter; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; import org.joda.time.Period; /** @@ -31,13 +30,31 @@ * method(s). */ -public abstract class AbstractColumnWriter extends AbstractColumnAccessor implements ColumnWriter { +public abstract class BaseScalarWriter extends AbstractScalarWriter { - public void start() { } + protected ColumnWriterIndex vectorIndex; + protected int lastWriteIndex; + protected long bufAddr; + protected int capacity; + + public static ScalarObjectWriter build(ValueVector vector, BaseScalarWriter writer) { + writer.bindVector(vector); + return new ScalarObjectWriter(writer); + } + + @Override + public void bindIndex(ColumnWriterIndex vectorIndex) { + this.vectorIndex = vectorIndex; + } + + @Override + public void startWrite() { lastWriteIndex = -1; } + public int lastWriteIndex() { return lastWriteIndex; } + public void setLastWriteIndex(int index) { lastWriteIndex = index; } @Override public void setNull() { - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("Vector is not nullable"); } @Override @@ -61,7 +78,7 @@ public void setString(String value) { } @Override - public void setBytes(byte[] value) { + public void setBytes(byte[] value, int len) { throw new UnsupportedOperationException(); } @@ -74,14 +91,4 @@ public void setDecimal(BigDecimal value) { public void setPeriod(Period value) { throw new UnsupportedOperationException(); } - - @Override - public TupleWriter map() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrayWriter array() { - throw new UnsupportedOperationException(); - } } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/MapWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/MapWriter.java new file mode 100644 index 00000000000..1dd077854ca --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/MapWriter.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import org.apache.drill.common.types.TypeProtos.DataMode; +import org.apache.drill.exec.record.TupleMetadata.ColumnMetadata; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.ArrayElementWriterIndex; +import org.apache.drill.exec.vector.complex.AbstractMapVector; +import org.apache.drill.exec.vector.complex.MapVector; +import org.apache.drill.exec.vector.complex.RepeatedMapVector; + +/** + * Writer for a Drill Map type. Maps are actually tuples, just like rows. + */ + +public abstract class MapWriter extends AbstractTupleWriter { + + private static class MemberWriterIndex implements ColumnWriterIndex { + private ColumnWriterIndex baseIndex; + + private MemberWriterIndex(ColumnWriterIndex baseIndex) { + this.baseIndex = baseIndex; + } + + @Override public int vectorIndex() { return baseIndex.vectorIndex(); } + @Override public void overflowed() { baseIndex.overflowed(); } + @Override public boolean legal() { return baseIndex.legal(); } + @Override public void nextElement() { } + } + + private static class SingleMapWriter extends MapWriter { + private final MapVector mapVector; + + private SingleMapWriter(ColumnMetadata schema, MapVector vector, AbstractObjectWriter[] writers) { + super(schema, writers); + mapVector = vector; + } + + @Override + public void endWrite() { + super.endWrite(); + mapVector.getMutator().setValueCount(vectorIndex.vectorIndex()); + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + bindIndex(index, index); + } + } + + private static class ArrayMapWriter extends MapWriter { + private final RepeatedMapVector mapVector; + + private ArrayMapWriter(ColumnMetadata schema, RepeatedMapVector vector, AbstractObjectWriter[] writers) { + super(schema, writers); + mapVector = vector; + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + + // This is a repeated map, then the provided index is an array element + // index. Convert this to an index that will not increment the element + // index on each write so that a map with three members, say, won't + // increment the index for each member. Rather, the index must be + // incremented at the array level. + + final ColumnWriterIndex childIndex = new MemberWriterIndex(index); + bindIndex(index, childIndex); + } + + @Override + public void endWrite() { + super.endWrite(); + + // A bit of a hack. This writer sees the element index. But, + // the vector wants the base element count, provided by the + // parent index. + + ColumnWriterIndex baseIndex = ((ArrayElementWriterIndex) vectorIndex).baseIndex(); + mapVector.getMutator().setValueCount(baseIndex.vectorIndex()); + } + } + + protected final ColumnMetadata mapColumnSchema; + + private MapWriter(ColumnMetadata schema, AbstractObjectWriter[] writers) { + super(schema.mapSchema(), writers); + mapColumnSchema = schema; + } + + public static TupleObjectWriter build(ColumnMetadata schema, MapVector vector, + AbstractObjectWriter[] writers) { + return new TupleObjectWriter(new SingleMapWriter(schema, vector, writers)); + } + + public static TupleObjectWriter build(ColumnMetadata schema, RepeatedMapVector vector, + AbstractObjectWriter[] writers) { + return new TupleObjectWriter(new ArrayMapWriter(schema, vector, writers)); + } + + protected void bindIndex(ColumnWriterIndex index, ColumnWriterIndex childIndex) { + vectorIndex = index; + + for (int i = 0; i < writers.length; i++) { + writers[i].bindIndex(childIndex); + } + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java new file mode 100644 index 00000000000..8a23c82396d --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import java.math.BigDecimal; + +import org.apache.drill.exec.vector.NullableVector; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ColumnAccessors.UInt1ColumnWriter; +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.ValueType; +import org.joda.time.Period; + +public class NullableScalarWriter extends AbstractScalarWriter { + + private final UInt1ColumnWriter isSetWriter = new UInt1ColumnWriter(); + private final BaseScalarWriter baseWriter; + + public NullableScalarWriter(BaseScalarWriter baseWriter) { + this.baseWriter = baseWriter; + } + + public static ScalarObjectWriter build(ValueVector vector, BaseScalarWriter baseWriter) { + NullableScalarWriter writer = new NullableScalarWriter(baseWriter); + writer.bindVector(vector); + return new ScalarObjectWriter(writer); + } + + @Override + public void bindVector(ValueVector vector) { + NullableVector nullableVector = (NullableVector) vector; + baseWriter.bindVector(nullableVector.getValuesVector()); + isSetWriter.bindVector(nullableVector.getBitsVector()); + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + isSetWriter.bindIndex(index); + baseWriter.bindIndex(index); + } + + @Override + public void finish() { + isSetWriter.finish(); + baseWriter.setLastWriteIndex(isSetWriter.lastWriteIndex()); + baseWriter.finish(); + } + + @Override + public ValueType valueType() { + return baseWriter.valueType(); + } + + @Override + public void setNull() { + isSetWriter.setInt(0); + baseWriter.setLastWriteIndex(isSetWriter.lastWriteIndex()); + } + + @Override + public void setInt(int value) { + isSetWriter.setInt(1); + baseWriter.setInt(value); + } + + @Override + public void setLong(long value) { + isSetWriter.setInt(1); + baseWriter.setLong(value); + } + + @Override + public void setDouble(double value) { + isSetWriter.setInt(1); + baseWriter.setDouble(value); + } + + @Override + public void setString(String value) { + isSetWriter.setInt(1); + baseWriter.setString(value); + } + + @Override + public void setBytes(byte[] value, int len) { + isSetWriter.setInt(1); + baseWriter.setBytes(value, len); + } + + @Override + public void setDecimal(BigDecimal value) { + isSetWriter.setInt(1); + baseWriter.setDecimal(value); + } + + @Override + public void setPeriod(Period value) { + isSetWriter.setInt(1); + baseWriter.setPeriod(value); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ObjectArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ObjectArrayWriter.java new file mode 100644 index 00000000000..1dcb9f68977 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ObjectArrayWriter.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import org.apache.drill.exec.vector.complex.RepeatedValueVector; + +/** + * Writer for an array of either a map or another array. + */ + +public class ObjectArrayWriter extends AbstractArrayWriter { + + private ObjectArrayWriter(RepeatedValueVector vector, AbstractObjectWriter elementWriter) { + super(vector, elementWriter); + } + + public static ArrayObjectWriter build(RepeatedValueVector vector, + AbstractObjectWriter elementWriter) { + return new ArrayObjectWriter( + new ObjectArrayWriter(vector, elementWriter)); + } + + @Override + public void save() { + elementIndex.nextElement(); + endValue(); + } + + @Override + public void set(Object... values) { + setObject(values); + } + + @Override + public void setObject(Object array) { + Object values[] = (Object[]) array; + for (int i = 0; i < values.length; i++) { + elementObjWriter.set(values[i]); + save(); + } + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/OffsetVectorWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/OffsetVectorWriter.java new file mode 100644 index 00000000000..cb1fb9fd248 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/OffsetVectorWriter.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import org.apache.drill.exec.memory.BaseAllocator; +import org.apache.drill.exec.vector.UInt4Vector; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.accessor.ValueType; + +import io.netty.buffer.DrillBuf; +import io.netty.util.internal.PlatformDependent; + +/** + * Specialized column writer for the (hidden) offset vector used + * with variable-length or repeated vectors. + */ + +public class OffsetVectorWriter extends BaseScalarWriter { + private static final int VALUE_WIDTH = UInt4Vector.VALUE_WIDTH; + private static final int MIN_VECTOR_SIZE = VALUE_WIDTH * 1024; + private UInt4Vector vector; + private int writeOffset; + + @Override + public final void bindVector(final ValueVector vector) { + this.vector = (UInt4Vector) vector; + setAddr(this.vector.getBuffer()); + + // Special handling for first value. Alloc vector if needed. + // Offset vectors require a 0 at position 0. The (end) offset + // for row 0 starts at position 1, which is handled in + // writeOffset() below. + + writeOffset = 0; + lastWriteIndex = 0; + if (capacity < MIN_VECTOR_SIZE) { + setAddr(this.vector.reallocRaw(MIN_VECTOR_SIZE)); + } + PlatformDependent.putInt(bufAddr, writeOffset); + } + + private final void setAddr(final DrillBuf buf) { + bufAddr = buf.addr(); + capacity = buf.capacity() / VALUE_WIDTH; + } + + public int writeOffset() { return writeOffset; } + + @Override + public ValueType valueType() { + return ValueType.INTEGER; + } + + private final int writeIndex() { + int writeIndex = vectorIndex.vectorIndex() + 1; + if (lastWriteIndex + 1 == writeIndex && writeIndex < capacity) { + lastWriteIndex = writeIndex; + return writeIndex; + } + if (writeIndex >= capacity) { + int size = (writeIndex + 1) * VALUE_WIDTH; + if (size > ValueVector.MAX_BUFFER_SIZE) { + throw new IllegalStateException("Offset vectors should not overflow"); + } else { + setAddr(vector.reallocRaw(BaseAllocator.nextPowerOfTwo(size))); + } + } + while (lastWriteIndex < writeIndex - 1) { + PlatformDependent.putInt(bufAddr + ++lastWriteIndex * VALUE_WIDTH, writeOffset); + } + lastWriteIndex = writeIndex; + return writeIndex; + } + + public final void setOffset(final int curOffset) { + final int writeIndex = writeIndex(); + PlatformDependent.putInt(bufAddr + writeIndex * VALUE_WIDTH, curOffset); + writeOffset = curOffset; + } + + @Override + public final void finish() { + final int finalIndex = writeIndex(); + vector.getBuffer().writerIndex(finalIndex * VALUE_WIDTH); + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java new file mode 100644 index 00000000000..29ac00feea1 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +import java.math.BigDecimal; + +import org.apache.drill.exec.vector.accessor.ColumnWriterIndex; +import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter.ScalarObjectWriter; +import org.apache.drill.exec.vector.complex.RepeatedValueVector; +import org.joda.time.Period; + +/** + * Writer for a column that holds an array of scalars. This writer manages + * the array itself. A type-specific child writer manages the elements within + * the array. The overall row index (usually) provides the index into + * the offset vector. An array-specific element index provides the index + * into elements. + *

+ * This class manages the offset vector directly. Doing so saves one read and + * one write to direct memory per element value. + *

+ * Provides generic write methods for testing and other times when + * convenience is more important than speed. + *

+ * The scalar writer for array-valued columns appends values: once a value + * is written, it cannot be changed. As a result, writer methods have no item index; + * each set advances the array to the next position. This is an abstract base class; + * subclasses are generated for each repeated value vector type. + */ + +public class ScalarArrayWriter extends AbstractArrayWriter { + + private final BaseScalarWriter elementWriter; + + private ScalarArrayWriter(RepeatedValueVector vector, BaseScalarWriter elementWriter) { + super(vector, new ScalarObjectWriter(elementWriter)); + this.elementWriter = elementWriter; + elementWriter.bindVector(vector.getDataVector()); + } + + public static ArrayObjectWriter build(RepeatedValueVector vector, + BaseScalarWriter elementWriter) { + return new ArrayObjectWriter( + new ScalarArrayWriter((RepeatedValueVector) vector, + elementWriter)); + } + + @Override + public void bindIndex(ColumnWriterIndex index) { + super.bindIndex(index); + elementWriter.bindIndex(elementIndex()); + } + + @Override + public void save() { + // No-op: done when writing each scalar value + } + + @Override + public void set(Object... values) { + for (Object value : values) { + entry().set(value); + } + } + + @Override + public void setObject(Object array) { + if (array == null) { + // Assume null means a 0-element array since Drill does + // not support null for the whole array. + + return; + } + String objClass = array.getClass().getName(); + if (! objClass.startsWith("[")) { + throw new IllegalArgumentException("Argument must be an array"); + } + + // Figure out type + + char second = objClass.charAt(1); + switch ( second ) { + case '[': + // bytes is represented as an array of byte arrays. + + char third = objClass.charAt(2); + switch (third) { + case 'B': + setBytesArray((byte[][]) array); + break; + default: + throw new IllegalArgumentException( "Unknown Java array type: " + objClass ); + } + break; + case 'S': + setShortArray((short[]) array ); + break; + case 'I': + setIntArray((int[]) array ); + break; + case 'J': + setLongArray((long[]) array ); + break; + case 'F': + setFloatArray((float[]) array ); + break; + case 'D': + setDoubleArray((double[]) array ); + break; + case 'Z': + setBooleanArray((boolean[]) array ); + break; + case 'L': + int posn = objClass.indexOf(';'); + + // If the array is of type Object, then we have no type info. + + String memberClassName = objClass.substring( 2, posn ); + if (memberClassName.equals(String.class.getName())) { + setStringArray((String[]) array ); + } else if (memberClassName.equals(Period.class.getName())) { + setPeriodArray((Period[]) array ); + } else if (memberClassName.equals(BigDecimal.class.getName())) { + setBigDecimalArray((BigDecimal[]) array ); + } else { + throw new IllegalArgumentException( "Unknown Java array type: " + memberClassName ); + } + break; + default: + throw new IllegalArgumentException( "Unknown Java array type: " + objClass ); + } + } + + public void setBooleanArray(boolean[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setInt(value[i] ? 1 : 0); + } + } + + public void setBytesArray(byte[][] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setBytes(value[i], value[i].length); + } + } + + public void setShortArray(short[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setInt(value[i]); + } + } + + public void setIntArray(int[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setInt(value[i]); + } + } + + public void setLongArray(long[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setLong(value[i]); + } + } + + public void setFloatArray(float[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setDouble(value[i]); + } + } + + public void setDoubleArray(double[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setDouble(value[i]); + } + } + + public void setStringArray(String[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setString(value[i]); + } + } + + public void setPeriodArray(Period[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setPeriod(value[i]); + } + } + + public void setBigDecimalArray(BigDecimal[] value) { + for (int i = 0; i < value.length; i++) { + elementWriter.setDecimal(value[i]); + } + } +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/WriterEvents.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/WriterEvents.java new file mode 100644 index 00000000000..cf1cceee906 --- /dev/null +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/WriterEvents.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.accessor.writer; + +public interface WriterEvents { + void startWrite(); + void startValue(); + void endValue(); + void endWrite(); +} diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractTupleAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/package-info.java similarity index 65% rename from exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractTupleAccessor.java rename to exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/package-info.java index 98ea6ac338f..2526632a330 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractTupleAccessor.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/package-info.java @@ -15,24 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.exec.vector.accessor.impl; - -import org.apache.drill.exec.vector.accessor.TupleAccessor; /** - * Common base class for tuple readers and writers. + * Implementation of the vector writers. Writers are size-aware and will + * throw a {@link VectorOverflowException} if any vector exceeds the maximum + * size (16 MB.) */ -public abstract class AbstractTupleAccessor implements TupleAccessor { - - protected final TupleSchema schema; - - public AbstractTupleAccessor(TupleSchema schema) { - this.schema = schema; - } - - @Override - public TupleSchema schema() { - return schema; - } -} +package org.apache.drill.exec.vector.accessor.writer; \ No newline at end of file diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java index baba0865d89..7abe60d2513 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -130,6 +130,7 @@ public T addOrGet(String name, TypeProtos.MajorType type create = true; } if (create) { + @SuppressWarnings("unchecked") final T vector = (T) BasicTypeHelper.getNewVector(name, allocator, type, callBack); putChild(name, vector); if (callBack!=null) { @@ -175,7 +176,7 @@ public T getChild(String name, Class clazz) { * * Note that this method does not enforce any vector type check nor throws a schema change exception. */ - protected void putChild(String name, ValueVector vector) { + public void putChild(String name, ValueVector vector) { putVector(name, vector); field.addChild(vector.getField()); } diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java index 5b8f44d4ed0..0077024976f 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java @@ -265,14 +265,6 @@ public void startNewValue(int index) { setValueCount(index+1); } - public boolean startNewValueBounded(int index) { - if (index >= MAX_ROW_COUNT) { - return false; - } - startNewValue(index); - return true; - } - @Override public void setValueCount(int valueCount) { // TODO: populate offset end points