Skip to content
This repository has been archived by the owner on Dec 20, 2022. It is now read-only.

Commit

Permalink
Merge pull request #15 from Mellanox/release-3.1
Browse files Browse the repository at this point in the history
Release 3.1
  • Loading branch information
yuvaldeg committed Nov 29, 2018
2 parents 8476067 + 4545823 commit df827b0
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 163 deletions.
19 changes: 10 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Mellanox ConnectX-5 network adapter with 100GbE RoCE fabric, connected with a Me
For more information on configuration, performance tuning and troubleshooting, please visit the [SparkRDMA GitHub Wiki](https://github.com/Mellanox/SparkRDMA/wiki)

## Runtime requirements
* Apache Spark 2.0.0/2.1.0/2.2.0/2.3.0
* Apache Spark 2.0.0/2.1.0/2.2.0/2.3.0/2.4.0
* Java 8
* An RDMA-supported network, e.g. RoCE or Infiniband

Expand All @@ -45,21 +45,22 @@ Please use the ["Releases"](https://github.com/Mellanox/SparkRDMA/releases) page
<br>If you would like to build the project yourself, please refer to the ["Build"](https://github.com/Mellanox/SparkRDMA#build) section below.

The pre-built binaries are packed as an archive that contains the following files:
* spark-rdma-3.0-for-spark-2.0.0-jar-with-dependencies.jar
* spark-rdma-3.0-for-spark-2.1.0-jar-with-dependencies.jar
* spark-rdma-3.0-for-spark-2.2.0-jar-with-dependencies.jar
* spark-rdma-3.0-for-spark-2.3.0-jar-with-dependencies.jar
* spark-rdma-3.1-for-spark-2.0.0-jar-with-dependencies.jar
* spark-rdma-3.1-for-spark-2.1.0-jar-with-dependencies.jar
* spark-rdma-3.1-for-spark-2.2.0-jar-with-dependencies.jar
* spark-rdma-3.1-for-spark-2.3.0-jar-with-dependencies.jar
* spark-rdma-3.1-for-spark-2.4.0-jar-with-dependencies.jar
* libdisni.so

libdisni.so **must** be in `java.library.path` on every Spark Master and Worker (usually in /usr/lib)

### Configuration

Provide Spark the location of the SparkRDMA plugin jars by using the extraClassPath option. For standalone mode this can
be added to either spark-defaults.conf or any runtime configuration file. For client mode this **must** be added to spark-defaults.conf. For Spark 2.0.0 (Replace with 2.1.0, 2.2.0 or 2.3.0 according to your Spark version):
be added to either spark-defaults.conf or any runtime configuration file. For client mode this **must** be added to spark-defaults.conf. For Spark 2.0.0 (Replace with 2.1.0, 2.2.0, 2.3.0, 2.4.0 according to your Spark version):
```
spark.driver.extraClassPath /path/to/SparkRDMA/target/spark-rdma-2.0-for-spark-2.0.0-jar-with-dependencies.jar
spark.executor.extraClassPath /path/to/SparkRDMA/target/spark-rdma-2.0-for-spark-2.0.0-jar-with-dependencies.jar
spark.driver.extraClassPath /path/to/SparkRDMA/target/spark-rdma-3.1-for-spark-2.0.0-jar-with-dependencies.jar
spark.executor.extraClassPath /path/to/SparkRDMA/target/spark-rdma-3.1-for-spark-2.0.0-jar-with-dependencies.jar
```

### Running
Expand All @@ -76,7 +77,7 @@ Building the SparkRDMA plugin requires [Apache Maven](http://maven.apache.org/)

1. Obtain a clone of [SparkRDMA](https://github.com/Mellanox/SparkRDMA)

2. Build the plugin for your Spark version (either 2.0.0, 2.1.0, 2.2.0 or 2.3.0), e.g. for Spark 2.0.0:
2. Build the plugin for your Spark version (either 2.0.0, 2.1.0, 2.2.0, 2.3.0, 2.4.0), e.g. for Spark 2.0.0:
```
mvn -DskipTests clean package -Pspark-2.0.0
```
Expand Down
8 changes: 7 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

<groupId>com.github.mellanox</groupId>
<artifactId>spark-rdma</artifactId>
<version>3.0</version>
<version>3.1</version>
<name>${project.artifactId}</name>
<description>SparkRDMA Shuffle Manager Plugin</description>
<inceptionYear>2017</inceptionYear>
Expand Down Expand Up @@ -61,6 +61,12 @@
<spark.version>2.3.0</spark.version>
</properties>
</profile>
<profile>
<id>spark-2.4.0</id>
<properties>
<spark.version>2.4.0</spark.version>
</properties>
</profile>
</profiles>

<dependencies>
Expand Down
30 changes: 13 additions & 17 deletions src/main/java/org/apache/spark/shuffle/rdma/RdmaBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,18 @@ class RdmaBuffer {
private final MemoryBlock block;
private AtomicInteger refCount;

public static final UnsafeMemoryAllocator unsafeAlloc = new UnsafeMemoryAllocator();
static final UnsafeMemoryAllocator unsafeAlloc = new UnsafeMemoryAllocator();
public static final Constructor<?> directBufferConstructor;

static {
try {
Class<?> classDirectByteBuffer = Class.forName("java.nio.DirectByteBuffer");
directBufferConstructor = classDirectByteBuffer.getDeclaredConstructor(long.class, int.class);
directBufferConstructor.setAccessible(true);
} catch (Exception e) {
throw new RuntimeException("java.nio.DirectByteBuffer class not found");
}
}

RdmaBuffer(IbvPd ibvPd, int length) throws IOException {
block = unsafeAlloc.allocate((long)length);
Expand Down Expand Up @@ -126,25 +137,10 @@ private void unregister() {
}

ByteBuffer getByteBuffer() throws IOException {
Class<?> classDirectByteBuffer;
try {
classDirectByteBuffer = Class.forName("java.nio.DirectByteBuffer");
} catch (ClassNotFoundException e) {
throw new IOException("java.nio.DirectByteBuffer class not found");
}
Constructor<?> constructor;
try {
constructor = classDirectByteBuffer.getDeclaredConstructor(long.class, int.class);
} catch (NoSuchMethodException e) {
throw new IOException("java.nio.DirectByteBuffer constructor not found");
}
constructor.setAccessible(true);
ByteBuffer byteBuffer;
try {
byteBuffer = (ByteBuffer)constructor.newInstance(getAddress(), getLength());
return (ByteBuffer)directBufferConstructor.newInstance(getAddress(), getLength());
} catch (Exception e) {
throw new IOException("java.nio.DirectByteBuffer exception: " + e.toString());
}
return byteBuffer;
}
}
24 changes: 8 additions & 16 deletions src/main/java/org/apache/spark/shuffle/rdma/RdmaBufferManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import com.ibm.disni.rdma.verbs.IbvMr;
import com.ibm.disni.rdma.verbs.IbvPd;
import com.ibm.disni.rdma.verbs.SVCRegMr;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.concurrent.ExecutionContext;
Expand Down Expand Up @@ -99,7 +97,7 @@ private void close() {
private final ConcurrentHashMap<Integer, AllocatorStack> allocStackMap =
new ConcurrentHashMap<>();
private IbvPd pd;
private IbvMr odpMr = null;
private final boolean useOdp;
private long maxCacheSize;
private static final ExecutionContextExecutor globalScalaExecutor =
ExecutionContext.Implicits$.MODULE$.global();
Expand All @@ -110,15 +108,12 @@ private void close() {
this.minimumAllocationSize = Math.min(conf.recvWrSize(), MIN_BLOCK_SIZE);
this.maxCacheSize = conf.maxBufferAllocationSize();
if (conf.useOdp(pd.getContext())) {
int access = IbvMr.IBV_ACCESS_LOCAL_WRITE | IbvMr.IBV_ACCESS_REMOTE_WRITE |
IbvMr.IBV_ACCESS_REMOTE_READ | IbvMr.IBV_ACCESS_ON_DEMAND;

SVCRegMr sMr = pd.regMr(0, -1, access).execute();
this.odpMr = sMr.getMr();
useOdp = true;
if (conf.collectOdpStats()) {
odpStats = new OdpStats(conf);
}
sMr.free();
} else {
useOdp = false;
}
}

Expand Down Expand Up @@ -217,9 +212,9 @@ private void cleanLRUStacks(long idleBuffersSize) {

IbvPd getPd() { return this.pd; }

IbvMr getOdpMr() { return this.odpMr; }
boolean useOdp() { return this.useOdp; }

void stop() throws IOException {
void stop() {
logger.info("Rdma buffers allocation statistics:");
for (Integer size : allocStackMap.keySet()) {
AllocatorStack allocatorStack = allocStackMap.remove(size);
Expand All @@ -230,11 +225,8 @@ void stop() throws IOException {
}
}

if (odpMr != null) {
odpMr.deregMr().execute().free();
if (odpStats != null) {
odpStats.printODPStatistics();
}
if (useOdp && odpStats != null) {
odpStats.printODPStatistics();
}
}
}
37 changes: 15 additions & 22 deletions src/main/java/org/apache/spark/shuffle/rdma/RdmaChannel.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class RdmaChannel {
private final ConcurrentHashMap<Integer, ConcurrentLinkedDeque<SVCPostSend>> svcPostSendCache =
new ConcurrentHashMap();

enum RdmaChannelType { RPC_REQUESTOR, RPC_RESPONDER, RDMA_READ_REQUESTOR, RDMA_READ_RESPONDER }
enum RdmaChannelType { RPC, RDMA_READ_REQUESTOR, RDMA_READ_RESPONDER }
private final RdmaChannelType rdmaChannelType;

private final RdmaCompletionListener receiveListener;
Expand Down Expand Up @@ -130,6 +130,7 @@ private class CompletionInfo {
// NOOP_RESERVED_INDEX is used for send operations that do not require a callback
private static final int NOOP_RESERVED_INDEX = 0;
private final AtomicInteger completionInfoIndex = new AtomicInteger(NOOP_RESERVED_INDEX);
private final RdmaShuffleConf conf;

RdmaChannel(
RdmaChannelType rdmaChannelType,
Expand All @@ -152,32 +153,20 @@ private class CompletionInfo {
this.receiveListener = receiveListener;
this.rdmaBufferManager = rdmaBufferManager;
this.cpuVector = cpuVector;
this.conf = conf;

switch (rdmaChannelType) {
case RPC_REQUESTOR:
// Requires full-size sends, and receives for credit reports only
case RPC:
// Single bidirectional QP between executors and driver.
if (conf.swFlowControl()) {
this.recvDepth = RECV_CREDIT_REPORT_RATIO;
this.remoteRecvCredits = new Semaphore(conf.recvQueueDepth(), false);
} else {
this.recvDepth = 0;
this.remoteRecvCredits = new Semaphore(
conf.recvQueueDepth() - RECV_CREDIT_REPORT_RATIO, false);
}
this.recvWrSize = 0;
this.sendDepth = conf.sendQueueDepth();
this.sendBudgetSemaphore = new Semaphore(sendDepth, false);
break;

case RPC_RESPONDER:
// Requires full-size receives and sends for credit reports only
this.recvDepth = conf.recvQueueDepth();
this.recvWrSize = conf.recvWrSize();
if (conf.swFlowControl()) {
this.sendDepth = RECV_CREDIT_REPORT_RATIO;
} else {
this.sendDepth = 0;
}
this.sendDepth = conf.sendQueueDepth();
this.sendBudgetSemaphore = new Semaphore(sendDepth - RECV_CREDIT_REPORT_RATIO, false);
break;

case RDMA_READ_REQUESTOR:
// Requires sends only, no need for any receives
this.recvDepth = 0;
Expand Down Expand Up @@ -322,6 +311,10 @@ void connect(InetSocketAddress socketAddress) throws IOException {
setRdmaChannelState(RdmaChannelState.CONNECTED);
}

InetSocketAddress getSourceSocketAddress() throws IOException {
return (InetSocketAddress)cmId.getSource();
}

void accept() throws IOException {
RdmaConnParam connParams = new RdmaConnParam();

Expand Down Expand Up @@ -778,7 +771,7 @@ private void exhaustCq() throws IOException {
}
}

if (sendDepth == RECV_CREDIT_REPORT_RATIO) {
if (conf.swFlowControl() && rdmaChannelType == RdmaChannelType.RPC) {
// Software-level flow control is enabled
localRecvCreditsPendingReport += reclaimedRecvWrs;
if (localRecvCreditsPendingReport > (recvDepth / RECV_CREDIT_REPORT_RATIO)) {
Expand Down Expand Up @@ -895,7 +888,7 @@ void stop() throws InterruptedException, IOException {
int ret = cmId.disconnect();
if (ret != 0) {
logger.error("disconnect failed with errno: " + ret);
} else if (rdmaChannelType.equals(RdmaChannelType.RPC_REQUESTOR) ||
} else if (rdmaChannelType.equals(RdmaChannelType.RPC) ||
rdmaChannelType.equals(RdmaChannelType.RDMA_READ_REQUESTOR)) {
try {
processRdmaCmEvent(RdmaCmEvent.EventType.RDMA_CM_EVENT_DISCONNECTED.ordinal(),
Expand Down
35 changes: 9 additions & 26 deletions src/main/java/org/apache/spark/shuffle/rdma/RdmaMappedFile.java
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ public class RdmaMappedFile {
private FileChannel fileChannel;

private final IbvPd ibvPd;
private IbvMr odpMr;

private final RdmaMapTaskOutput rdmaMapTaskOutput;
private final RdmaBufferManager rdmaBufferManager;

public RdmaMapTaskOutput getRdmaMapTaskOutput() { return rdmaMapTaskOutput; }

Expand Down Expand Up @@ -79,8 +79,7 @@ public RdmaMappedFile(File file, int chunkSize, long[] partitionLengths,
IllegalAccessException {
this.file = file;
this.ibvPd = rdmaBufferManager.getPd();
this.odpMr = rdmaBufferManager.getOdpMr();

this.rdmaBufferManager = rdmaBufferManager;
final RandomAccessFile backingFile = new RandomAccessFile(file, "rw");
this.fileChannel = backingFile.getChannel();

Expand Down Expand Up @@ -136,7 +135,7 @@ private void mapAndRegister(int chunkSize, long[] partitionLengths) throws IOExc
curPartition,
rdmaFileMapping.address + curLength - partitionLengths[curPartition],
(int)partitionLengths[curPartition],
(rdmaFileMapping.ibvMr != null) ? rdmaFileMapping.ibvMr.getLkey() : odpMr.getLkey());
rdmaFileMapping.ibvMr.getLkey());
curPartition++;
}
}
Expand All @@ -157,15 +156,15 @@ private void mapAndRegister(long fileOffset, long length) throws IOException,
}

IbvMr ibvMr = null;
if (odpMr == null) {
if (!rdmaBufferManager.useOdp()) {
SVCRegMr svcRegMr = ibvPd.regMr(address, (int)length, ACCESS).execute();
ibvMr = svcRegMr.getMr();
svcRegMr.free();
} else {
int ret = odpMr.expPrefetchMr(address, (int)length);
if (ret != 0) {
throw new IOException("expPrefetchMr failed with: " + ret);
}
SVCRegMr svcRegMr = ibvPd.regMr(address, (int)length,
ACCESS | IbvMr.IBV_ACCESS_ON_DEMAND).execute();
ibvMr = svcRegMr.getMr();
svcRegMr.free();
}

rdmaFileMappings.add(new RdmaFileMapping(ibvMr, address, mapAddress, length, alignedLength));
Expand Down Expand Up @@ -201,30 +200,14 @@ public void dispose() throws IOException, InvocationTargetException, IllegalAcce
}

private ByteBuffer getByteBuffer(long address, int length) throws IOException {
Class<?> classDirectByteBuffer;
try {
classDirectByteBuffer = Class.forName("java.nio.DirectByteBuffer");
} catch (ClassNotFoundException e) {
throw new IOException("java.nio.DirectByteBuffer class not found");
}
Constructor<?> constructor;
try {
constructor = classDirectByteBuffer.getDeclaredConstructor(long.class, int.class);
} catch (NoSuchMethodException e) {
throw new IOException("java.nio.DirectByteBuffer constructor not found");
}
constructor.setAccessible(true);
ByteBuffer byteBuffer;
try {
byteBuffer = (ByteBuffer)constructor.newInstance(address, length);
return (ByteBuffer)RdmaBuffer.directBufferConstructor.newInstance(address, length);
} catch (InvocationTargetException ex) {
throw new IOException("java.nio.DirectByteBuffer: " +
"InvocationTargetException: " + ex.getTargetException());
} catch (Exception e) {
throw new IOException("java.nio.DirectByteBuffer exception: " + e.toString());
}

return byteBuffer;
}

public ByteBuffer getByteBufferForPartition(int partitionId) throws IOException {
Expand Down
Loading

0 comments on commit df827b0

Please sign in to comment.