Skip to content

Commit

Permalink
ARROW-13847: [Java] Avoid unnecessary collection copies
Browse files Browse the repository at this point in the history
There are several occurences in Arrow Java codebase where temporary
collections are being created and immediately discarded like when going
over a sequence of elements, or when making a copy of a collection
provided as an argument to a constructor.

This change adds a couple of methods to Collections2 and AutoCloseables,
along with some changes to the codebase to remove those extra copies.

Closes apache#11063 from laurentgo/laurentgo/avoid-copies

Lead-authored-by: Laurent Goujon <laurent@apache.org>
Co-authored-by: emkornfield <emkornfield@gmail.com>
Signed-off-by: Micah Kornfield <emkornfield@gmail.com>
  • Loading branch information
2 people authored and ViniciusSouzaRoque committed Oct 20, 2021
1 parent 3d96177 commit c509e93
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

/**
* Utilities for AutoCloseable classes.
Expand Down Expand Up @@ -113,9 +114,23 @@ public static void close(Iterable<? extends AutoCloseable> ac) throws Exception
*/
@SafeVarargs
public static void close(Iterable<? extends AutoCloseable>...closeables) throws Exception {
close(Arrays.asList(closeables).stream()
.flatMap((Iterable<? extends AutoCloseable> t) -> Collections2.toList(t).stream())
.collect(Collectors.toList()));
close(flatten(closeables));
}

@SafeVarargs
private static Iterable<AutoCloseable> flatten(Iterable<? extends AutoCloseable>... closeables) {
return new Iterable<AutoCloseable>() {
// Cast from Iterable<? extends AutoCloseable> to Iterable<AutoCloseable> is safe in this context
// since there's no modification of the original collection
@SuppressWarnings("unchecked")
@Override
public Iterator<AutoCloseable> iterator() {
return Arrays.stream(closeables)
.flatMap((Iterable<? extends AutoCloseable> i)
-> StreamSupport.stream(((Iterable<AutoCloseable>) i).spliterator(), /*parallel=*/false))
.iterator();
}
};
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,21 @@

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

/**
* Utility methods for manipulating {@link java.util.Collections} and their subclasses/implementations.
*/
public class Collections2 {
public final class Collections2 {
private Collections2() {}

/**
Expand All @@ -46,23 +49,44 @@ public static <T> List<T> toList(Iterator<T> iterator) {
* Converts the iterable into a new {@link List}.
*/
public static <T> List<T> toList(Iterable<T> iterable) {
return StreamSupport.stream(iterable.spliterator(), false).collect(Collectors.toList());
if (iterable instanceof Collection<?>) {
// If iterable is a collection, take advantage of it for a more efficient copy
return new ArrayList<T>((Collection<T>) iterable);
}
return toList(iterable.iterator());
}

/**
* Converts the iterable into a new immutable {@link List}.
*/
public static <T> List<T> toImmutableList(Iterable<T> iterable) {
return Collections.unmodifiableList(toList(iterable));
}


/** Copies the elements of <code>map</code> to a new unmodifiable map. */
public static <K, V> Map<K, V> immutableMapCopy(Map<K, V> map) {
Map<K, V> newMap = new HashMap<>();
newMap.putAll(map);
return java.util.Collections.unmodifiableMap(newMap);
return Collections.unmodifiableMap(new HashMap<>(map));
}

/** Copies the elements of list to a new unmodifiable list. */
public static <V> List<V> immutableListCopy(List<V> list) {
return Collections.unmodifiableList(list.stream().collect(Collectors.toList()));
return Collections.unmodifiableList(new ArrayList<>(list));
}

/** Copies the values to a new unmodifiable list. */
public static <V> List<V> asImmutableList(V...values) {
return Collections.unmodifiableList(Arrays.asList(values));
}

/**
* Creates a human readable string from the remaining elements in iterator.
*
* The output should be similar to {@code Arrays#toString(Object[])}
*/
public static String toString(Iterator<?> iterator) {
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
.map(String::valueOf)
.collect(Collectors.joining(", ", "[", "]"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.util;

import static org.junit.Assert.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.junit.Test;

/**
* Tests for {@code Collections2} class.
*/
public class TestCollections2 {


@Test
public void testToImmutableListFromIterable() {
final List<String> source = new ArrayList<>(Arrays.asList("foo", "bar", "baz"));

final List<String> copy = Collections2.toImmutableList(source);
assertEquals(source, copy);

try {
copy.add("unexpected");
fail("add operation should not be supported");
} catch (UnsupportedOperationException ignored) {
}

try {
copy.set(0, "unexpected");
fail("set operation should not be supported");
} catch (UnsupportedOperationException ignored) {
}

try {
copy.remove(0);
fail("remove operation should not be supported");
} catch (UnsupportedOperationException ignored) {
}

source.set(1, "newvalue");
source.add("anothervalue");

assertEquals("bar", copy.get(1));
assertEquals(3, copy.size());
}


@Test
public void testStringFromEmptyIterator() {
assertEquals("[]", Collections2.toString(Collections.emptyIterator()));
}

@Test
public void testStringFromIterator() {
Iterator<String> iterator = Arrays.asList("foo", "bar", "baz").iterator();
iterator.next();

assertEquals("[bar, baz]", Collections2.toString(iterator));
assertEquals(false, iterator.hasNext());
}
}
3 changes: 1 addition & 2 deletions java/vector/src/main/codegen/templates/DenseUnionVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -734,8 +734,7 @@ public ArrowBuf[] getBuffers(boolean clear) {
@Override
public Iterator<ValueVector> iterator() {
List<ValueVector> vectors = org.apache.arrow.util.Collections2.toList(internalStruct.iterator());
return vectors.iterator();
return internalStruct.iterator();
}
private ValueVector getVector(int index) {
Expand Down
3 changes: 1 addition & 2 deletions java/vector/src/main/codegen/templates/UnionVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -617,8 +617,7 @@ public ArrowBuf[] getBuffers(boolean clear) {
@Override
public Iterator<ValueVector> iterator() {
List<ValueVector> vectors = org.apache.arrow.util.Collections2.toList(internalStruct.iterator());
return vectors.iterator();
return internalStruct.iterator();
}
public ValueVector getVector(int index) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public void load(ArrowRecordBatch recordBatch) {
root.setRowCount(recordBatch.getLength());
if (nodes.hasNext() || buffers.hasNext()) {
throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " +
Collections2.toList(nodes).toString() + " buffers: " + Collections2.toList(buffers).toString());
Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.arrow.flatbuf.KeyValue;
import org.apache.arrow.flatbuf.Type;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Collections2;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.TypeLayout;
import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
Expand Down Expand Up @@ -91,7 +92,7 @@ private Field(
private Field(String name, FieldType fieldType, List<Field> children, TypeLayout typeLayout) {
this.name = name;
this.fieldType = checkNotNull(fieldType);
this.children = children == null ? Collections.emptyList() : children.stream().collect(Collectors.toList());
this.children = children == null ? Collections.emptyList() : Collections2.toImmutableList(children);
}

public Field(String name, FieldType fieldType, List<Field> children) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.arrow.vector.types.pojo;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -68,11 +69,9 @@ public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary
extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_NAME, ((ExtensionType) type).extensionName());
extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ((ExtensionType) type).serialize());
if (metadata != null) {
for (Map.Entry<String, String> entry : metadata.entrySet()) {
extensionMetadata.put(entry.getKey(), entry.getValue());
}
extensionMetadata.putAll(metadata);
}
this.metadata = Collections2.immutableMapCopy(extensionMetadata);
this.metadata = Collections.unmodifiableMap(extensionMetadata);
} else {
this.metadata = metadata == null ? java.util.Collections.emptyMap() : Collections2.immutableMapCopy(metadata);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) {
String value = kv.value();
metadata.put(key == null ? "" : key, value == null ? "" : value);
}
return new Schema(Collections2.immutableListCopy(fields), Collections2.immutableMapCopy(metadata));
return new Schema(true, Collections.unmodifiableList(fields), Collections.unmodifiableMap(metadata));
}

private final List<Field> fields;
Expand All @@ -112,27 +112,30 @@ public Schema(Iterable<Field> fields) {
*/
public Schema(Iterable<Field> fields,
Map<String, String> metadata) {
List<Field> fieldList = new ArrayList<>();
for (Field field : fields) {
fieldList.add(field);
}
this.fields = Collections2.immutableListCopy(fieldList);
this.metadata = metadata == null ? Collections.emptyMap() : Collections2.immutableMapCopy(metadata);
this(true,
Collections2.toImmutableList(fields),
metadata == null ? Collections.emptyMap() : Collections2.immutableMapCopy(metadata));
}


/**
* Constructor used for JSON deserialization.
*/
@JsonCreator
private Schema(@JsonProperty("fields") Iterable<Field> fields,
@JsonProperty("metadata") List<Map<String, String>> metadata) {
List<Field> fieldList = new ArrayList<>();
for (Field field : fields) {
fieldList.add(field);
}
this.fields = Collections2.immutableListCopy(fieldList);
this.metadata = metadata == null ?
Collections.emptyMap() : Collections2.immutableMapCopy(convertMetadata(metadata));
this(fields, convertMetadata(metadata));
}


/**
* Private constructor to bypass automatic collection copy.
* @param unsafe a ignored argument. Its only purpose is to prevent using the constructor
* by accident because of type collisions (List vs Iterable).
*/
private Schema(boolean unsafe, List<Field> fields, Map<String, String> metadata) {
this.fields = fields;
this.metadata = metadata;
}

static Map<String, String> convertMetadata(List<Map<String, String>> metadata) {
Expand Down

0 comments on commit c509e93

Please sign in to comment.