From 523c243a37139831a6289804dc415f1a104501d9 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-0-52.us-west-2.compute.internal>
Date: Wed, 6 Oct 2021 01:09:20 +0000
Subject: [PATCH 01/17] added bert batch test handler

---
 test/pytest/test_handler.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index 0a25f5e0a5..9f1b1ac831 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -224,7 +224,25 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain():
 
     assert np.array(json.loads(response.content)['explanations']).shape == (1, 1, 28, 28)
     test_utils.unregister_model("mnist")
+
+def test_huggingface_bert_batch_inference():
+    batch_size = 4
+    batch_delay = 10000 # 10 seconds
+    test_utils.register_model_with_params(model_name='BERTSeqClassification', url='https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar', batch_size=batch_size, batch_delay=batch_delay)
+    input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
+    files = {
+    'data': (input_text,
+                open(input_text, 'rb')),
+    }
     
+    for _ in range(batch_size):
+        response = run_inference_using_url_with_data(TF_INFERENCE_API + '/v1/models/BERTSeqClassification:predict', pfiles=files)
+    
+    response = response.content.decode("utf-8")
+    response = ast.literal_eval(response)
+    # custom handler returns number of responses not the actual responses
+    assert int(response[0]) == batch_size
+    test_utils.unregister_model('BERTSeqClassification')
     
 def test_MMF_activity_recognition_model_register_and_inference_on_valid_model():
   

From 3068a495ec585f22270e5c05b16d3d33dbf546a2 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-0-52.us-west-2.compute.internal>
Date: Wed, 6 Oct 2021 04:03:57 +0000
Subject: [PATCH 02/17] updated param dict

---
 test/pytest/test_handler.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index 9f1b1ac831..fae29f82a9 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -228,7 +228,15 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain():
 def test_huggingface_bert_batch_inference():
     batch_size = 4
     batch_delay = 10000 # 10 seconds
-    test_utils.register_model_with_params(model_name='BERTSeqClassification', url='https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar', batch_size=batch_size, batch_delay=batch_delay)
+    params = (
+        ('model_name', 'BERTSeqClassification'),
+        ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'),
+        ('initial_workers', '1'),
+        ('synchronous', 'true'),
+        ('batch_size', '4'),
+        ('batch_delay', '10000')
+    )
+    test_utils.register_model_with_params(params)
     input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
     files = {
     'data': (input_text,

From a4932b69a23922cdf358f25cf4650bae7c1e55b5 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-0-52.us-west-2.compute.internal>
Date: Wed, 6 Oct 2021 19:47:31 +0000
Subject: [PATCH 03/17] added torchserve start setup

---
 test/pytest/test_handler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index fae29f82a9..e032299db6 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -236,6 +236,7 @@ def test_huggingface_bert_batch_inference():
         ('batch_size', '4'),
         ('batch_delay', '10000')
     )
+    test_utils.start_torchserve(no_config_snapshots=True)
     test_utils.register_model_with_params(params)
     input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
     files = {

From 1e5b901c5d3bca7cced3d1bd104c4a570a6689a2 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-0-52.us-west-2.compute.internal>
Date: Wed, 6 Oct 2021 20:59:14 +0000
Subject: [PATCH 04/17] simplified content decoding

---
 test/pytest/test_handler.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index e032299db6..f3fccb41ac 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -227,18 +227,17 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain():
 
 def test_huggingface_bert_batch_inference():
     batch_size = 4
-    batch_delay = 10000 # 10 seconds
+    batch_delay = 20000 # 20 seconds
     params = (
         ('model_name', 'BERTSeqClassification'),
         ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'),
         ('initial_workers', '1'),
-        ('synchronous', 'true'),
-        ('batch_size', '4'),
-        ('batch_delay', '10000')
+        ('batch_size', str(batch_size)),
+        ('batch_delay', str(batch_delay))
     )
     test_utils.start_torchserve(no_config_snapshots=True)
     test_utils.register_model_with_params(params)
-    input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
+    input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt')
     files = {
     'data': (input_text,
                 open(input_text, 'rb')),
@@ -247,10 +246,10 @@ def test_huggingface_bert_batch_inference():
     for _ in range(batch_size):
         response = run_inference_using_url_with_data(TF_INFERENCE_API + '/v1/models/BERTSeqClassification:predict', pfiles=files)
     
-    response = response.content.decode("utf-8")
-    response = ast.literal_eval(response)
+    response = response.content
+    # response = ast.literal_eval(response)
     # custom handler returns number of responses not the actual responses
-    assert int(response[0]) == batch_size
+    assert int(response) == batch_size
     test_utils.unregister_model('BERTSeqClassification')
     
 def test_MMF_activity_recognition_model_register_and_inference_on_valid_model():

From 7e751c5ef7bc6f6c427222272dc722a96cde7b71 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-0-52.us-west-2.compute.internal>
Date: Thu, 7 Oct 2021 00:35:01 +0000
Subject: [PATCH 05/17] added concurrency batch aggregator fix

---
 .../pytorch/serve/wlm/BatchAggregator.java    | 33 ++++++++++---------
 test/pytest/test_handler.py                   | 24 ++++++--------
 2 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java
index 8c95383d96..5a02283126 100644
--- a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java
+++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java
@@ -63,12 +63,13 @@ public void sendResponse(ModelWorkerResponse message) {
                 // this is from initial load.
                 return;
             }
-
             for (Predictions prediction : message.getPredictions()) {
                 String jobId = prediction.getRequestId();
-                Job job = jobs.remove(jobId);
+                Job job = jobs.get(jobId);
+
                 if (job == null) {
-                    throw new IllegalStateException("Unexpected job: " + jobId);
+                    throw new IllegalStateException(
+                            "Unexpected job in sendResponse() with 200 status code: " + jobId);
                 }
                 job.response(
                         prediction.getResp(),
@@ -77,18 +78,19 @@ public void sendResponse(ModelWorkerResponse message) {
                         prediction.getReasonPhrase(),
                         prediction.getHeaders());
             }
+
         } else {
-            for (String reqId : jobs.keySet()) {
-                Job j = jobs.remove(reqId);
-                if (j == null) {
-                    throw new IllegalStateException("Unexpected job: " + reqId);
+            for (Map.Entry<String, Job> j : jobs.entrySet()) {
+
+                if (j.getValue() == null) {
+                    throw new IllegalStateException(
+                            "Unexpected job in sendResponse() with non 200 status code: "
+                                    + j.getKey());
                 }
-                j.sendError(message.getCode(), message.getMessage());
-            }
-            if (!jobs.isEmpty()) {
-                throw new IllegalStateException("Not all jobs get response.");
+                j.getValue().sendError(message.getCode(), message.getMessage());
             }
         }
+        jobs.clear();
     }
 
     public void sendError(BaseModelRequest message, String error, int status) {
@@ -103,20 +105,20 @@ public void sendError(BaseModelRequest message, String error, int status) {
                 String requestId = req.getRequestId();
                 Job job = jobs.remove(requestId);
                 if (job == null) {
-                    logger.error("Unexpected job: " + requestId);
+                    logger.error("Unexpected job in sendError(): " + requestId);
                 } else {
                     job.sendError(status, error);
                 }
             }
             if (!jobs.isEmpty()) {
                 jobs.clear();
-                logger.error("Not all jobs get response.");
+                logger.error("Not all jobs got an error response.");
             }
         } else {
             // Send the error message to all the jobs
             for (Map.Entry<String, Job> j : jobs.entrySet()) {
                 String jobsId = j.getValue().getJobId();
-                Job job = jobs.remove(jobsId);
+                Job job = jobs.get(jobsId);
 
                 if (job.isControlCmd()) {
                     job.sendError(status, error);
@@ -127,5 +129,6 @@ public void sendError(BaseModelRequest message, String error, int status) {
                 }
             }
         }
+        jobs.clear();
     }
-}
+}
\ No newline at end of file
diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index f3fccb41ac..37554e6359 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -2,6 +2,8 @@
 import requests
 import json
 import test_utils
+import asyncio
+import multiprocessing
 import numpy as np
 import ast 
 REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../")
@@ -226,8 +228,8 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain():
     test_utils.unregister_model("mnist")
 
 def test_huggingface_bert_batch_inference():
-    batch_size = 4
-    batch_delay = 20000 # 20 seconds
+    batch_size = 2
+    batch_delay = 10000 # 10 seconds
     params = (
         ('model_name', 'BERTSeqClassification'),
         ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'),
@@ -238,18 +240,12 @@ def test_huggingface_bert_batch_inference():
     test_utils.start_torchserve(no_config_snapshots=True)
     test_utils.register_model_with_params(params)
     input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt')
-    files = {
-    'data': (input_text,
-                open(input_text, 'rb')),
-    }
-    
-    for _ in range(batch_size):
-        response = run_inference_using_url_with_data(TF_INFERENCE_API + '/v1/models/BERTSeqClassification:predict', pfiles=files)
-    
-    response = response.content
-    # response = ast.literal_eval(response)
-    # custom handler returns number of responses not the actual responses
-    assert int(response) == batch_size
+ 
+    response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}")
+
+    # handler responds with number of inferences
+    response = response.read()
+    assert response == batch_size
     test_utils.unregister_model('BERTSeqClassification')
     
 def test_MMF_activity_recognition_model_register_and_inference_on_valid_model():

From 2664f6c57a3d825493c5c9694e6ca83aecec5ef7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-0-52.us-west-2.compute.internal>
Date: Thu, 7 Oct 2021 00:58:30 +0000
Subject: [PATCH 06/17] removed asyncio and mp

---
 test/pytest/test_handler.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index 37554e6359..3019be8cff 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -2,8 +2,6 @@
 import requests
 import json
 import test_utils
-import asyncio
-import multiprocessing
 import numpy as np
 import ast 
 REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../")
@@ -240,12 +238,13 @@ def test_huggingface_bert_batch_inference():
     test_utils.start_torchserve(no_config_snapshots=True)
     test_utils.register_model_with_params(params)
     input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt')
- 
+    
+    # Make 2 curl requests in parallel with &
     response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}")
-
-    # handler responds with number of inferences
     response = response.read()
-    assert response == batch_size
+
+    ## Assert that 2 responses are returned from the same batch
+    assert response == 'Not AcceptedNot Accepted'
     test_utils.unregister_model('BERTSeqClassification')
     
 def test_MMF_activity_recognition_model_register_and_inference_on_valid_model():

From cd0aa8e371b234d3a9e424db709329a8c643b05b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-0-52.us-west-2.compute.internal>
Date: Thu, 7 Oct 2021 01:08:15 +0000
Subject: [PATCH 07/17] formatted java

---
 .../src/main/java/org/pytorch/serve/wlm/BatchAggregator.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java
index 5a02283126..9aa246bc8a 100644
--- a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java
+++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java
@@ -131,4 +131,4 @@ public void sendError(BaseModelRequest message, String error, int status) {
         }
         jobs.clear();
     }
-}
\ No newline at end of file
+}

From 6529750b4c74fbc036c8de8677f3739917e08b57 Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@fb.com>
Date: Wed, 13 Oct 2021 10:48:41 -0700
Subject: [PATCH 08/17] changed client id for second curl request

---
 test/pytest/test_handler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index 3019be8cff..58bd1c6f2a 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -240,9 +240,10 @@ def test_huggingface_bert_batch_inference():
     input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt')
     
     # Make 2 curl requests in parallel with &
-    response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}")
+    response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl --header \"X-Forwarded-For: 1.2.3.4\" http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}")
     response = response.read()
 
+
     ## Assert that 2 responses are returned from the same batch
     assert response == 'Not AcceptedNot Accepted'
     test_utils.unregister_model('BERTSeqClassification')

From 971f43e0c83d6af52ad6cba6924f1a2f0b6e5621 Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@fb.com>
Date: Wed, 13 Oct 2021 10:52:49 -0700
Subject: [PATCH 09/17] test_handler

---
 test/pytest/test_handler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index 58bd1c6f2a..5b5e1efc07 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -240,7 +240,8 @@ def test_huggingface_bert_batch_inference():
     input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt')
     
     # Make 2 curl requests in parallel with &
-    response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl --header \"X-Forwarded-For: 1.2.3.4\" http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}")
+    # curl --header \"X-Forwarded-For: 1.2.3.4\" won't work since you can't access local host anymore
+    response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}")
     response = response.read()
 
 
From 66fe2eca1770eab3e7e6be9be9ca8db0c662ece0 Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@fb.com>
Date: Thu, 14 Oct 2021 11:16:57 -0700
Subject: [PATCH 10/17] upadted to torchserve bucket url

---
 test/pytest/test_handler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index 5b5e1efc07..9e9526daf9 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -230,7 +230,7 @@ def test_huggingface_bert_batch_inference():
     batch_delay = 10000 # 10 seconds
     params = (
         ('model_name', 'BERTSeqClassification'),
-        ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'),
+        ('url', 'https://torchserve.pytorch.org/mar_files/BERTSeqClassification.mar'),
         ('initial_workers', '1'),
         ('batch_size', str(batch_size)),
         ('batch_delay', str(batch_delay))
@@ -268,4 +268,4 @@ def test_MMF_activity_recognition_model_register_and_inference_on_valid_model():
     response = ast.literal_eval(response)
     response = [n.strip() for n in response]
     assert response == ['Sitting at a table','Someone is sneezing','Watching a laptop or something on a laptop']
-    test_utils.unregister_model("MMF_activity_recognition_v2")
\ No newline at end of file
+    test_utils.unregister_model("MMF_activity_recognition_v2")

From 554d626ca076e24f3b7ab2bad52c31936a64e1e5 Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@fb.com>
Date: Fri, 15 Oct 2021 14:07:20 -0700
Subject: [PATCH 11/17] addressed li feedback

---
 .../org/pytorch/serve/ModelServerTest.java    | 52 ++++++++++++++++++-
 .../java/org/pytorch/serve/TestUtils.java     | 24 +++++++++
 test/pytest/test_handler.py                   |  2 +-
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
index 71a4c9abfd..60670bb465 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
@@ -1065,9 +1065,59 @@ public void testPredictionMemoryError() throws InterruptedException {
         Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.OK);
     }
 
-    @Test(
+@Test(
             alwaysRun = true,
             dependsOnMethods = {"testPredictionMemoryError"})
+    public void testSuccessBatch() throws InterruptedException {
+        int batch_size = 4;
+        int max_batch_delay = 10000;
+        Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager);
+        Assert.assertNotNull(channel);
+
+        TestUtils.setHttpStatus(null);
+        TestUtils.setResult(null);
+        TestUtils.setLatch(new CountDownLatch(1));
+
+        TestUtils.registerModel(channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay);
+        TestUtils.getLatch().await();
+
+        StatusResponse status =
+                JsonUtils.GSON.fromJson(TestUtils.getResult(), StatusResponse.class);
+        Assert.assertEquals(
+                status.getStatus(),
+                "Model \"noop\" Version: 1.0 registered with 1 initial workers");
+
+        channel.close().sync();
+
+        channel = TestUtils.connect(ConnectorType.INFERENCE_CONNECTOR, configManager);
+        Assert.assertNotNull(channel);
+
+        TestUtils.setResult(null);
+        TestUtils.setLatch(new CountDownLatch(1));
+        TestUtils.setHttpStatus(null);
+        
+        for(int i = 0; i < batch_size ; i ++) {
+        DefaultFullHttpRequest req =
+        new DefaultFullHttpRequest(
+                HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop");
+        // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8);
+        HttpUtil.setContentLength(req, req.content().readableBytes());
+        req.headers()
+                .set(
+                        HttpHeaderNames.CONTENT_TYPE,
+                        HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED);
+        channel.writeAndFlush(req);
+
+        TestUtils.getLatch().await();
+        Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED);
+        }
+        channel.close().sync();
+    }
+
+
+    @Test(
+            alwaysRun = true,
+            dependsOnMethods = {"testSuccessBatch"})
     public void testErrorBatch() throws InterruptedException {
         Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager);
         Assert.assertNotNull(channel);
diff --git a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java
index 6b5900817d..4965df4608 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java
@@ -174,6 +174,30 @@ public static void registerModel(
         }
     }
 
+    public static void registerModel(
+            Channel channel,
+            String url,
+            String modelName,
+            boolean withInitialWorkers,
+            boolean syncChannel,
+            int batchSize,
+            int maxBatchDelay)
+            throws InterruptedException {
+        String requestURL = "/models?url=" + url + "&model_name=" + modelName + "&runtime=python" + "&batch_size=" + batchSize + "&max_batch_delay=" + maxBatchDelay;
+        if (withInitialWorkers) {
+            requestURL += "&initial_workers=1&synchronous=true";
+        }
+
+        HttpRequest req =
+                new DefaultFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.POST, requestURL);
+        if (syncChannel) {
+            channel.writeAndFlush(req).sync();
+            channel.closeFuture().sync();
+        } else {
+            channel.writeAndFlush(req);
+        }
+    }
+
     public static void registerWorkflow(
             Channel channel, String url, String workflowName, boolean syncChannel)
             throws InterruptedException {
diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py
index 9e9526daf9..ad3bd3b6a3 100644
--- a/test/pytest/test_handler.py
+++ b/test/pytest/test_handler.py
@@ -233,7 +233,7 @@ def test_huggingface_bert_batch_inference():
         ('url', 'https://torchserve.pytorch.org/mar_files/BERTSeqClassification.mar'),
         ('initial_workers', '1'),
         ('batch_size', str(batch_size)),
-        ('batch_delay', str(batch_delay))
+        ('max_batch_delay', str(batch_delay))
     )
     test_utils.start_torchserve(no_config_snapshots=True)
     test_utils.register_model_with_params(params)

From 1c7ab03e546aefca3e11e4663ac3bb3b95690855 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-60-36.us-west-2.compute.internal>
Date: Fri, 15 Oct 2021 21:30:36 +0000
Subject: [PATCH 12/17] gradelew formatjva

---
 .../org/pytorch/serve/ModelServerTest.java    | 34 +++++++++----------
 .../java/org/pytorch/serve/TestUtils.java     | 11 +++++-
 2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
index 60670bb465..4e53d55402 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
@@ -1065,7 +1065,7 @@ public void testPredictionMemoryError() throws InterruptedException {
         Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.OK);
     }
 
-@Test(
+    @Test(
             alwaysRun = true,
             dependsOnMethods = {"testPredictionMemoryError"})
     public void testSuccessBatch() throws InterruptedException {
@@ -1078,7 +1078,8 @@ public void testSuccessBatch() throws InterruptedException {
         TestUtils.setResult(null);
         TestUtils.setLatch(new CountDownLatch(1));
 
-        TestUtils.registerModel(channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay);
+        TestUtils.registerModel(
+                channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay);
         TestUtils.getLatch().await();
 
         StatusResponse status =
@@ -1095,26 +1096,25 @@ public void testSuccessBatch() throws InterruptedException {
         TestUtils.setResult(null);
         TestUtils.setLatch(new CountDownLatch(1));
         TestUtils.setHttpStatus(null);
-        
-        for(int i = 0; i < batch_size ; i ++) {
-        DefaultFullHttpRequest req =
-        new DefaultFullHttpRequest(
-                HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop");
-        // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8);
-        HttpUtil.setContentLength(req, req.content().readableBytes());
-        req.headers()
-                .set(
-                        HttpHeaderNames.CONTENT_TYPE,
-                        HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED);
-        channel.writeAndFlush(req);
 
-        TestUtils.getLatch().await();
-        Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED);
+        for (int i = 0; i < batch_size; i++) {
+            DefaultFullHttpRequest req =
+                    new DefaultFullHttpRequest(
+                            HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop");
+            // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8);
+            HttpUtil.setContentLength(req, req.content().readableBytes());
+            req.headers()
+                    .set(
+                            HttpHeaderNames.CONTENT_TYPE,
+                            HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED);
+            channel.writeAndFlush(req);
+
+            TestUtils.getLatch().await();
+            Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED);
         }
         channel.close().sync();
     }
 
-
     @Test(
             alwaysRun = true,
             dependsOnMethods = {"testSuccessBatch"})
diff --git a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java
index 4965df4608..d0b2256e71 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java
@@ -183,7 +183,16 @@ public static void registerModel(
             int batchSize,
             int maxBatchDelay)
             throws InterruptedException {
-        String requestURL = "/models?url=" + url + "&model_name=" + modelName + "&runtime=python" + "&batch_size=" + batchSize + "&max_batch_delay=" + maxBatchDelay;
+        String requestURL =
+                "/models?url="
+                        + url
+                        + "&model_name="
+                        + modelName
+                        + "&runtime=python"
+                        + "&batch_size="
+                        + batchSize
+                        + "&max_batch_delay="
+                        + maxBatchDelay;
         if (withInitialWorkers) {
             requestURL += "&initial_workers=1&synchronous=true";
         }

From f713d49d9e5ccf61e6be2ea099f2cbe3f9643dff Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@fb.com>
Date: Fri, 15 Oct 2021 14:41:00 -0700
Subject: [PATCH 13/17] fixed casing

---
 .../src/test/java/org/pytorch/serve/ModelServerTest.java    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
index 4e53d55402..1ec1c98cd8 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
@@ -1069,8 +1069,8 @@ public void testPredictionMemoryError() throws InterruptedException {
             alwaysRun = true,
             dependsOnMethods = {"testPredictionMemoryError"})
     public void testSuccessBatch() throws InterruptedException {
-        int batch_size = 4;
-        int max_batch_delay = 10000;
+        int batchSize = 4;
+        int maxBatchDelay = 10000;
         Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager);
         Assert.assertNotNull(channel);
 
@@ -1079,7 +1079,7 @@ public void testSuccessBatch() throws InterruptedException {
         TestUtils.setLatch(new CountDownLatch(1));
 
         TestUtils.registerModel(
-                channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay);
+                channel, "noop.mar", "noop", true, false, batchSize, maxBatchDelay);
         TestUtils.getLatch().await();
 
         StatusResponse status =

From cfae3930f85419e278617d339aee43b5fb9f217f Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-60-36.us-west-2.compute.internal>
Date: Mon, 18 Oct 2021 18:43:30 +0000
Subject: [PATCH 14/17] trigger-build


From e79d9a21dd15cee4edb50de4185c2ca2205fb7d5 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-60-36.us-west-2.compute.internal>
Date: Mon, 18 Oct 2021 19:23:07 +0000
Subject: [PATCH 15/17] fixed syntax error

---
 .../server/src/test/java/org/pytorch/serve/ModelServerTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
index 1ec1c98cd8..75e2274849 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
@@ -1097,7 +1097,7 @@ public void testSuccessBatch() throws InterruptedException {
         TestUtils.setLatch(new CountDownLatch(1));
         TestUtils.setHttpStatus(null);
 
-        for (int i = 0; i < batch_size; i++) {
+        for (int i = 0; i < batchSize; i++) {
             DefaultFullHttpRequest req =
                     new DefaultFullHttpRequest(
                             HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop");

From c78bddb2f28e6325d03d7c90ec75a50017d61a0a Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-60-36.us-west-2.compute.internal>
Date: Mon, 18 Oct 2021 22:34:07 +0000
Subject: [PATCH 16/17] removed batch inference test but kept registration in
 java

---
 .../org/pytorch/serve/ModelServerTest.java    | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
index 75e2274849..e272e5a5d2 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
@@ -1089,30 +1089,6 @@ public void testSuccessBatch() throws InterruptedException {
                 "Model \"noop\" Version: 1.0 registered with 1 initial workers");
 
         channel.close().sync();
-
-        channel = TestUtils.connect(ConnectorType.INFERENCE_CONNECTOR, configManager);
-        Assert.assertNotNull(channel);
-
-        TestUtils.setResult(null);
-        TestUtils.setLatch(new CountDownLatch(1));
-        TestUtils.setHttpStatus(null);
-
-        for (int i = 0; i < batchSize; i++) {
-            DefaultFullHttpRequest req =
-                    new DefaultFullHttpRequest(
-                            HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop");
-            // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8);
-            HttpUtil.setContentLength(req, req.content().readableBytes());
-            req.headers()
-                    .set(
-                            HttpHeaderNames.CONTENT_TYPE,
-                            HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED);
-            channel.writeAndFlush(req);
-
-            TestUtils.getLatch().await();
-            Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED);
-        }
-        channel.close().sync();
     }
 
     @Test(

From fff83ab19878c2279b375f2f7f634e06849d7dfa Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-60-36.us-west-2.compute.internal>
Date: Mon, 18 Oct 2021 22:34:42 +0000
Subject: [PATCH 17/17] removed java test

---
 .../org/pytorch/serve/ModelServerTest.java    | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
index e272e5a5d2..71a4c9abfd 100644
--- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
+++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java
@@ -1068,32 +1068,6 @@ public void testPredictionMemoryError() throws InterruptedException {
     @Test(
             alwaysRun = true,
             dependsOnMethods = {"testPredictionMemoryError"})
-    public void testSuccessBatch() throws InterruptedException {
-        int batchSize = 4;
-        int maxBatchDelay = 10000;
-        Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager);
-        Assert.assertNotNull(channel);
-
-        TestUtils.setHttpStatus(null);
-        TestUtils.setResult(null);
-        TestUtils.setLatch(new CountDownLatch(1));
-
-        TestUtils.registerModel(
-                channel, "noop.mar", "noop", true, false, batchSize, maxBatchDelay);
-        TestUtils.getLatch().await();
-
-        StatusResponse status =
-                JsonUtils.GSON.fromJson(TestUtils.getResult(), StatusResponse.class);
-        Assert.assertEquals(
-                status.getStatus(),
-                "Model \"noop\" Version: 1.0 registered with 1 initial workers");
-
-        channel.close().sync();
-    }
-
-    @Test(
-            alwaysRun = true,
-            dependsOnMethods = {"testSuccessBatch"})
     public void testErrorBatch() throws InterruptedException {
         Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager);
         Assert.assertNotNull(channel);