-
Notifications
You must be signed in to change notification settings - Fork 64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NEMO-350] Implement Off-heap SerializedMemoryStore & [NEMO-384] Implement DirectByteBufferInputStream for Off-heap SerializedMemoryStore #222
Changes from 63 commits
26f364f
4ded359
8c557d9
3ec9c8e
1dc1dd8
666cd01
05e7033
ceba288
0ab5c93
940bef7
9789d7e
34848af
d01c0f0
87409c7
6a296a6
4d349df
5766022
e6b271e
767c182
e40064b
c75ec2c
a1bc8d6
64b9df2
47c9832
8f77496
19bad77
e1be1c4
597c2f1
5339c3a
b1b7eb0
984652b
fbb577e
cb8b9e6
930cea0
dcd571c
4214da3
c4420b6
e0e7c8b
fc69549
673c98d
e9fc080
c5efdcf
a2713f4
7253e6f
93ecf76
edd1975
53faa33
58925d3
d56494e
39412ad
fa49ca3
fbf97e2
48c6b98
6f4dae5
e5ef592
d0e5478
0cb1f1e
17cd1d7
7b4a6bc
617dfd3
1b512c4
449649d
38578df
c55d54d
bf689d2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.nemo.common; | ||
|
||
import java.io.EOFException; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.nio.ByteBuffer; | ||
import java.util.List; | ||
|
||
/** | ||
* This class is a customized input stream implementation which reads data from | ||
* list of {@link ByteBuffer}. If the {@link ByteBuffer} is direct, it may reside outside | ||
* the normal garbage-collected heap memory. | ||
*/ | ||
public class DirectByteBufferInputStream extends InputStream { | ||
private List<ByteBuffer> bufList; | ||
private int current = 0; | ||
private static final int BITMASK = 0xff; | ||
|
||
/** | ||
* Default Constructor. | ||
* | ||
* @param bufList is the target data to read. | ||
*/ | ||
public DirectByteBufferInputStream(final List<ByteBuffer> bufList) { | ||
this.bufList = bufList; | ||
} | ||
|
||
/** | ||
* Reads data from the list of {@code ByteBuffer}s. | ||
* | ||
* @return integer. | ||
* @throws IOException | ||
*/ | ||
@Override | ||
public int read() throws IOException { | ||
// Since java's byte is signed type, we have to mask it to make byte | ||
// become unsigned type to properly retrieve `int` from sequence of bytes. | ||
return getBuffer().get() & BITMASK; | ||
} | ||
|
||
/** | ||
* Return next non-empty @code{ByteBuffer}. | ||
* | ||
* @return @code{ByteBuffer} to write the data | ||
* @throws IOException when fail to retrieve buffer. | ||
*/ | ||
public ByteBuffer getBuffer() throws IOException { | ||
while (current < bufList.size()) { | ||
ByteBuffer buffer = bufList.get(current); | ||
if (buffer.hasRemaining()) { | ||
return buffer; | ||
} | ||
current += 1; | ||
} | ||
throw new EOFException(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,24 +18,29 @@ | |
*/ | ||
package org.apache.nemo.common; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
|
||
import java.io.OutputStream; | ||
import java.nio.ByteBuffer; | ||
import java.util.ArrayList; | ||
import java.util.LinkedList; | ||
import java.util.List; | ||
|
||
/** | ||
* This class is a customized output stream implementation backed by | ||
* {@link ByteBuffer}, which utilizes off heap memory when writing the data. | ||
* Memory is allocated when needed by the specified {@code pageSize}. | ||
* Deletion of {@code dataList}, which is the memory this outputstream holds, occurs | ||
* when the corresponding block is deleted. | ||
* TODO #388: Off-heap memory management (reuse ByteBuffer) - implement reuse. | ||
*/ | ||
public final class DirectByteBufferOutputStream extends OutputStream { | ||
hy00nc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
private LinkedList<ByteBuffer> dataList = new LinkedList<>(); | ||
private static final int DEFAULT_PAGE_SIZE = 4096; | ||
private static final int DEFAULT_PAGE_SIZE = 32768; //32KB | ||
hy00nc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
private final int pageSize; | ||
private ByteBuffer currentBuf; | ||
|
||
|
||
/** | ||
* Default constructor. | ||
* Sets the {@code pageSize} as default size of 4096 bytes. | ||
|
@@ -45,8 +50,9 @@ public DirectByteBufferOutputStream() { | |
} | ||
|
||
/** | ||
* Constructor specifying the {@code size}. | ||
* Sets the {@code pageSize} as {@code size}. | ||
* Constructor which sets {@code pageSize} as specified {@code size}. | ||
* Note that the {@code pageSize} has trade-off between memory fragmentation and | ||
* native memory (de)allocation overhead. | ||
* | ||
* @param size should be a power of 2 and greater than or equal to 4096. | ||
*/ | ||
|
@@ -62,6 +68,7 @@ public DirectByteBufferOutputStream(final int size) { | |
/** | ||
* Allocates new {@link ByteBuffer} with the capacity equal to {@code pageSize}. | ||
*/ | ||
// TODO #388: Off-heap memory management (reuse ByteBuffer) | ||
private void newLastBuffer() { | ||
dataList.addLast(ByteBuffer.allocateDirect(pageSize)); | ||
} | ||
|
@@ -120,14 +127,15 @@ public void write(final byte[] b, final int off, final int len) { | |
} | ||
} | ||
|
||
|
||
/** | ||
* Creates a byte array that contains the whole content currently written in this output stream. | ||
* Note that this method causes array copy which could degrade performance. | ||
* TODO #384: For performance issue, implement an input stream so that we do not have to use this method. | ||
* | ||
* USED BY TESTS ONLY. | ||
* @return the current contents of this output stream, as byte array. | ||
*/ | ||
public byte[] toByteArray() { | ||
@VisibleForTesting | ||
byte[] toByteArray() { | ||
if (dataList.isEmpty()) { | ||
final byte[] byteArray = new byte[0]; | ||
return byteArray; | ||
|
@@ -140,38 +148,48 @@ public byte[] toByteArray() { | |
final int arraySize = pageSize * (dataList.size() - 1) + lastBuf.position(); | ||
final byte[] byteArray = new byte[arraySize]; | ||
int start = 0; | ||
int byteToWrite; | ||
|
||
for (final ByteBuffer temp : dataList) { | ||
// ByteBuffer has to be shifted to read mode by calling ByteBuffer.flip(), | ||
// which sets limit to the current position and sets the position to 0. | ||
// Note that capacity remains unchanged. | ||
temp.flip(); | ||
byteToWrite = temp.remaining(); | ||
temp.get(byteArray, start, byteToWrite); | ||
|
||
for (final ByteBuffer buffer : dataList) { | ||
// We use duplicated buffer to read the data so that there is complicated | ||
hy00nc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// alteration of position and limit when switching between read and write mode. | ||
final ByteBuffer dupBuffer = buffer.duplicate(); | ||
hy00nc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
dupBuffer.flip(); | ||
final int byteToWrite = dupBuffer.remaining(); | ||
dupBuffer.get(byteArray, start, byteToWrite); | ||
hy00nc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
start += byteToWrite; | ||
} | ||
// The limit of the last buffer has to be set to the capacity for additional write. | ||
lastBuf.limit(lastBuf.capacity()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this no longer needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We changed it to read from duplicated buffer so the original one maintains its limit. |
||
|
||
return byteArray; | ||
} | ||
|
||
/** | ||
* Returns the list of {@code ByteBuffer}s that contains the written data. | ||
* Note that by calling this method, the existing list of {@code ByteBuffer}s is cleared. | ||
* List of flipped and duplicated {@link ByteBuffer}s are returned which has independent | ||
* position and limit, to reduce erroneous data read/write. | ||
* This function has to be called when intended to read from the start of the list of | ||
* {@link ByteBuffer}s, not for additional write. | ||
* | ||
* @return the {@code LinkedList} of {@code ByteBuffer}s. | ||
*/ | ||
public List<ByteBuffer> getBufferListAndClear() { | ||
List<ByteBuffer> result = dataList; | ||
dataList = new LinkedList<>(); | ||
for (final ByteBuffer buffer : result) { | ||
buffer.flip(); | ||
public List<ByteBuffer> getDirectByteBufferList() { | ||
List<ByteBuffer> result = new ArrayList<>(dataList.size()); | ||
for (final ByteBuffer buffer : dataList) { | ||
final ByteBuffer dupBuffer = buffer.duplicate(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto on duplicate(). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please refer to the comment above 👍 |
||
dupBuffer.flip(); | ||
result.add(dupBuffer); | ||
} | ||
return result; | ||
} | ||
|
||
/** | ||
* Returns the size of the data written in this output stream. | ||
* | ||
* @return the size of the data | ||
*/ | ||
public int size() { | ||
return pageSize * (dataList.size() - 1) + dataList.getLast().position(); | ||
} | ||
|
||
/** | ||
* Closing this output stream has no effect. | ||
*/ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a comment something like:
// The contents of direct buffers may reside outside of the normal garbage-collected heap
I found the above in the
ByteBuffer
documentation. Is there a way to "guarantee" that the contents reside outside of the heap?My understanding is that it can be done by using
DirectByteBuffer
, rather than the abstract classByteBuffer
. It'd be good to make this change, also since the name of this class isDirectByteBufferInputStream
.https://www.javacodegeeks.com/2013/08/which-memory-is-faster-heap-or-bytebuffer-or-direct.html
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you're right, the name of the class implies that this is only for
DirectByteBuffer
. I guess it is better to change the name of the class toByteBufferInputStream
becauseDirectByteBuffer
class is not public and it can only be constructed by callingallocateDirect()
method inByteBuffer
. It is possible to check whether theByteBuffer
is direct or not, but I am afraid it has no much meaning in checking it when it is already written(in on-heap or off-heap) and we are intending to read it. Does it seem okay?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed.
I am also fine with keeping the current class name
DirectByteBufferInputStream
, as we also haveDirectByteBufferOutputStream
.