deepjavalibrary · frankfliu · Jun 28, 2024 · Jun 28, 2024
@@ -32,15 +32,6 @@ dependencies {
     runtimeOnly("ai.djl.tensorrt:tensorrt")
     runtimeOnly(project(":engines:python"))
 
-    if (hasGpu) {
-        runtimeOnly("ai.djl.onnxruntime:onnxruntime-engine") {
-            exclude(group = "com.microsoft.onnxruntime", module = "onnxruntime")
-        }
-        runtimeOnly(libs.onnxruntime.gpu)
-    } else {
-        runtimeOnly("ai.djl.onnxruntime:onnxruntime-engine")
-    }
-
     testRuntimeOnly("org.bouncycastle:bcpkix-jdk18on:1.78")
     testRuntimeOnly("org.bouncycastle:bcprov-jdk18on:1.78")
     testRuntimeOnly(libs.snakeyaml)

@@ -67,6 +67,8 @@ RUN scripts/install_python.sh && \
     scripts/install_djl_serving.sh $djl_version $torch_version && \
     djl-serving -i ai.djl.pytorch:pytorch-native-cpu:$torch_version:linux-x86_64 && \
     djl-serving -i ai.djl.tensorflow:tensorflow-native-cpu:2.16.1:linux-x86_64 && \
+    djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version && \
+    djl-serving -i com.microsoft.onnxruntime:onnxruntime:1.18.0 && \
     scripts/patch_oss_dlc.sh python && \
     echo "${djl_version} cpufull" > /opt/djl/bin/telemetry && \
     rm -rf /opt/djl/logs && \

@@ -87,9 +87,9 @@ RUN mv *.deb djl-serving_all.deb || true
 
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ && \
     scripts/install_djl_serving.sh $djl_version && \
-    rm -f /usr/local/djl-serving-*/lib/onnxruntime-1.*.jar && \
-    curl -o $(ls -d /usr/local/djl-serving-*/)lib/onnxruntime_gpu-$onnx_version.jar https://publish.djl.ai/onnxruntime/$onnx_version/onnxruntime_gpu-$onnx_version.jar && \
     scripts/install_djl_serving.sh $djl_version ${torch_version} && \
+    djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version && \
+    djl-serving -i com.microsoft.onnxruntime:onnxruntime_gpu:$onnx_version && \
     scripts/install_python.sh ${python_version} && \
     scripts/install_s5cmd.sh x64 && \
     mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \

@@ -51,8 +51,8 @@ COPY scripts scripts/
 RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh && \
     scripts/install_djl_serving.sh $djl_version && \
     scripts/install_djl_serving.sh $djl_version ${torch_version} && \
-    rm -f /usr/local/djl-serving-*/lib/onnxruntime-$onnx_version.jar && \
-    curl -o $(ls -d /usr/local/djl-serving-*/)lib/onnxruntime_gpu-$onnx_version.jar https://publish.djl.ai/onnxruntime/$onnx_version/onnxruntime_gpu-$onnx_version.jar && \
+    djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version && \
+    djl-serving -i com.microsoft.onnxruntime:onnxruntime_gpu:$onnx_version && \
     scripts/install_python.sh ${python_version} && \
     scripts/install_s5cmd.sh x64 && \
     pip3 install numpy==${numpy_version} && pip3 install torch==${torch_version} torchvision==${torch_vision_version} --extra-index-url https://download.pytorch.org/whl/cu121 && \

@@ -19,6 +19,7 @@
 import ai.djl.serving.util.ConfigManager;
 import ai.djl.serving.util.MutableClassLoader;
 import ai.djl.util.Utils;
+import ai.djl.util.cuda.CudaUtils;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -75,6 +76,15 @@ public void installEngine(String engineName) throws IOException {
                 installDependency("ai.djl.mxnet:mxnet-engine:" + djlVersion);
                 installDependency("ai.djl.mxnet:mxnet-model-zoo:" + djlVersion);
                 break;
+            case "OnnxRuntime":
+                installDependency("ai.djl.onnxruntime:onnxruntime-engine:" + djlVersion);
+                String ortVersion = getOrtVersion(djlVersion);
+                if (CudaUtils.hasCuda()) {
+                    installDependency("com.microsoft.onnxruntime:onnxruntime_gpu:" + ortVersion);
+                } else {
+                    installDependency("com.microsoft.onnxruntime:onnxruntime:" + ortVersion);
+                }
+                break;
             case "XGBoost":
                 installDependency("ai.djl.ml.xgboost:xgboost:" + djlVersion);
                 // TODO: Avoid hard code version
@@ -120,7 +130,10 @@ public synchronized void installDependency(String dependency) throws IOException
             logger.info("Found existing dependency: {}", name);
         } else {
             String link;
-            if (version.endsWith("-SNAPSHOT")) {
+            if ("onnxruntime_gpu".equals(artifactId)) {
+                // TODO: Remove this hack when OnnxRuntime support cudnn9
+                link = "https://publish.djl.ai/onnxruntime/" + version + '/' + name;
+            } else if (version.endsWith("-SNAPSHOT")) {
                 link = getSnapshotUrl(groupId, artifactId, version) + ".jar";
             } else {
                 String maven = "https://search.maven.org/remotecontent?filepath=";