From c509e932b465e6966cc303d0423ae01ceb0c44e2 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Sat, 25 Sep 2021 20:15:28 -0700 Subject: [PATCH] ARROW-13847: [Java] Avoid unnecessary collection copies There are several occurences in Arrow Java codebase where temporary collections are being created and immediately discarded like when going over a sequence of elements, or when making a copy of a collection provided as an argument to a constructor. This change adds a couple of methods to Collections2 and AutoCloseables, along with some changes to the codebase to remove those extra copies. Closes #11063 from laurentgo/laurentgo/avoid-copies Lead-authored-by: Laurent Goujon Co-authored-by: emkornfield Signed-off-by: Micah Kornfield --- .../org/apache/arrow/util/AutoCloseables.java | 23 ++++- .../org/apache/arrow/util/Collections2.java | 36 ++++++-- .../apache/arrow/util/TestCollections2.java | 83 +++++++++++++++++++ .../codegen/templates/DenseUnionVector.java | 3 +- .../main/codegen/templates/UnionVector.java | 3 +- .../org/apache/arrow/vector/VectorLoader.java | 2 +- .../apache/arrow/vector/types/pojo/Field.java | 3 +- .../arrow/vector/types/pojo/FieldType.java | 7 +- .../arrow/vector/types/pojo/Schema.java | 31 +++---- 9 files changed, 157 insertions(+), 34 deletions(-) create mode 100644 java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java index cffec557e9615..d965f2aed4120 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java @@ -21,8 +21,9 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Iterator; import java.util.List; -import java.util.stream.Collectors; +import java.util.stream.StreamSupport; /** * Utilities for AutoCloseable classes. @@ -113,9 +114,23 @@ public static void close(Iterable ac) throws Exception */ @SafeVarargs public static void close(Iterable...closeables) throws Exception { - close(Arrays.asList(closeables).stream() - .flatMap((Iterable t) -> Collections2.toList(t).stream()) - .collect(Collectors.toList())); + close(flatten(closeables)); + } + + @SafeVarargs + private static Iterable flatten(Iterable... closeables) { + return new Iterable() { + // Cast from Iterable to Iterable is safe in this context + // since there's no modification of the original collection + @SuppressWarnings("unchecked") + @Override + public Iterator iterator() { + return Arrays.stream(closeables) + .flatMap((Iterable i) + -> StreamSupport.stream(((Iterable) i).spliterator(), /*parallel=*/false)) + .iterator(); + } + }; } /** diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java index 7605db9af1000..6b01a61ebca39 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java @@ -19,18 +19,21 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Spliterator; +import java.util.Spliterators; import java.util.stream.Collectors; import java.util.stream.StreamSupport; /** * Utility methods for manipulating {@link java.util.Collections} and their subclasses/implementations. */ -public class Collections2 { +public final class Collections2 { private Collections2() {} /** @@ -46,23 +49,44 @@ public static List toList(Iterator iterator) { * Converts the iterable into a new {@link List}. */ public static List toList(Iterable iterable) { - return StreamSupport.stream(iterable.spliterator(), false).collect(Collectors.toList()); + if (iterable instanceof Collection) { + // If iterable is a collection, take advantage of it for a more efficient copy + return new ArrayList((Collection) iterable); + } + return toList(iterable.iterator()); } + /** + * Converts the iterable into a new immutable {@link List}. + */ + public static List toImmutableList(Iterable iterable) { + return Collections.unmodifiableList(toList(iterable)); + } + + /** Copies the elements of map to a new unmodifiable map. */ public static Map immutableMapCopy(Map map) { - Map newMap = new HashMap<>(); - newMap.putAll(map); - return java.util.Collections.unmodifiableMap(newMap); + return Collections.unmodifiableMap(new HashMap<>(map)); } /** Copies the elements of list to a new unmodifiable list. */ public static List immutableListCopy(List list) { - return Collections.unmodifiableList(list.stream().collect(Collectors.toList())); + return Collections.unmodifiableList(new ArrayList<>(list)); } /** Copies the values to a new unmodifiable list. */ public static List asImmutableList(V...values) { return Collections.unmodifiableList(Arrays.asList(values)); } + + /** + * Creates a human readable string from the remaining elements in iterator. + * + * The output should be similar to {@code Arrays#toString(Object[])} + */ + public static String toString(Iterator iterator) { + return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(String::valueOf) + .collect(Collectors.joining(", ", "[", "]")); + } } diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java b/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java new file mode 100644 index 0000000000000..c858ebe62a90f --- /dev/null +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.util; + +import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.junit.Test; + +/** + * Tests for {@code Collections2} class. + */ +public class TestCollections2 { + + + @Test + public void testToImmutableListFromIterable() { + final List source = new ArrayList<>(Arrays.asList("foo", "bar", "baz")); + + final List copy = Collections2.toImmutableList(source); + assertEquals(source, copy); + + try { + copy.add("unexpected"); + fail("add operation should not be supported"); + } catch (UnsupportedOperationException ignored) { + } + + try { + copy.set(0, "unexpected"); + fail("set operation should not be supported"); + } catch (UnsupportedOperationException ignored) { + } + + try { + copy.remove(0); + fail("remove operation should not be supported"); + } catch (UnsupportedOperationException ignored) { + } + + source.set(1, "newvalue"); + source.add("anothervalue"); + + assertEquals("bar", copy.get(1)); + assertEquals(3, copy.size()); + } + + + @Test + public void testStringFromEmptyIterator() { + assertEquals("[]", Collections2.toString(Collections.emptyIterator())); + } + + @Test + public void testStringFromIterator() { + Iterator iterator = Arrays.asList("foo", "bar", "baz").iterator(); + iterator.next(); + + assertEquals("[bar, baz]", Collections2.toString(iterator)); + assertEquals(false, iterator.hasNext()); + } +} diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java index c1991f65b92d8..440fe32635414 100644 --- a/java/vector/src/main/codegen/templates/DenseUnionVector.java +++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java @@ -734,8 +734,7 @@ public ArrowBuf[] getBuffers(boolean clear) { @Override public Iterator iterator() { - List vectors = org.apache.arrow.util.Collections2.toList(internalStruct.iterator()); - return vectors.iterator(); + return internalStruct.iterator(); } private ValueVector getVector(int index) { diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 8e5d76f39b83f..53fa070ec964a 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -617,8 +617,7 @@ public ArrowBuf[] getBuffers(boolean clear) { @Override public Iterator iterator() { - List vectors = org.apache.arrow.util.Collections2.toList(internalStruct.iterator()); - return vectors.iterator(); + return internalStruct.iterator(); } public ValueVector getVector(int index) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 24be349c2820d..ed5f3aef17397 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -86,7 +86,7 @@ public void load(ArrowRecordBatch recordBatch) { root.setRowCount(recordBatch.getLength()); if (nodes.hasNext() || buffers.hasNext()) { throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " + - Collections2.toList(nodes).toString() + " buffers: " + Collections2.toList(buffers).toString()); + Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers)); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 2eeb3bea449e6..3a5ef11537abc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -35,6 +35,7 @@ import org.apache.arrow.flatbuf.KeyValue; import org.apache.arrow.flatbuf.Type; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Collections2; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; @@ -91,7 +92,7 @@ private Field( private Field(String name, FieldType fieldType, List children, TypeLayout typeLayout) { this.name = name; this.fieldType = checkNotNull(fieldType); - this.children = children == null ? Collections.emptyList() : children.stream().collect(Collectors.toList()); + this.children = children == null ? Collections.emptyList() : Collections2.toImmutableList(children); } public Field(String name, FieldType fieldType, List children) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java index 9ef849d86edf1..bb3250ef1021b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java @@ -17,6 +17,7 @@ package org.apache.arrow.vector.types.pojo; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Objects; @@ -68,11 +69,9 @@ public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_NAME, ((ExtensionType) type).extensionName()); extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ((ExtensionType) type).serialize()); if (metadata != null) { - for (Map.Entry entry : metadata.entrySet()) { - extensionMetadata.put(entry.getKey(), entry.getValue()); - } + extensionMetadata.putAll(metadata); } - this.metadata = Collections2.immutableMapCopy(extensionMetadata); + this.metadata = Collections.unmodifiableMap(extensionMetadata); } else { this.metadata = metadata == null ? java.util.Collections.emptyMap() : Collections2.immutableMapCopy(metadata); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java index e0ce1fe99d112..d377b395c04f4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java @@ -97,7 +97,7 @@ public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) { String value = kv.value(); metadata.put(key == null ? "" : key, value == null ? "" : value); } - return new Schema(Collections2.immutableListCopy(fields), Collections2.immutableMapCopy(metadata)); + return new Schema(true, Collections.unmodifiableList(fields), Collections.unmodifiableMap(metadata)); } private final List fields; @@ -112,27 +112,30 @@ public Schema(Iterable fields) { */ public Schema(Iterable fields, Map metadata) { - List fieldList = new ArrayList<>(); - for (Field field : fields) { - fieldList.add(field); - } - this.fields = Collections2.immutableListCopy(fieldList); - this.metadata = metadata == null ? Collections.emptyMap() : Collections2.immutableMapCopy(metadata); + this(true, + Collections2.toImmutableList(fields), + metadata == null ? Collections.emptyMap() : Collections2.immutableMapCopy(metadata)); } + /** * Constructor used for JSON deserialization. */ @JsonCreator private Schema(@JsonProperty("fields") Iterable fields, @JsonProperty("metadata") List> metadata) { - List fieldList = new ArrayList<>(); - for (Field field : fields) { - fieldList.add(field); - } - this.fields = Collections2.immutableListCopy(fieldList); - this.metadata = metadata == null ? - Collections.emptyMap() : Collections2.immutableMapCopy(convertMetadata(metadata)); + this(fields, convertMetadata(metadata)); + } + + + /** + * Private constructor to bypass automatic collection copy. + * @param unsafe a ignored argument. Its only purpose is to prevent using the constructor + * by accident because of type collisions (List vs Iterable). + */ + private Schema(boolean unsafe, List fields, Map metadata) { + this.fields = fields; + this.metadata = metadata; } static Map convertMetadata(List> metadata) {