From 523c243a37139831a6289804dc415f1a104501d9 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 6 Oct 2021 01:09:20 +0000 Subject: [PATCH 01/17] added bert batch test handler --- test/pytest/test_handler.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index 0a25f5e0a5..9f1b1ac831 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -224,7 +224,25 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain(): assert np.array(json.loads(response.content)['explanations']).shape == (1, 1, 28, 28) test_utils.unregister_model("mnist") + +def test_huggingface_bert_batch_inference(): + batch_size = 4 + batch_delay = 10000 # 10 seconds + test_utils.register_model_with_params(model_name='BERTSeqClassification', url='https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar', batch_size=batch_size, batch_delay=batch_delay) + input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt" + files = { + 'data': (input_text, + open(input_text, 'rb')), + } + for _ in range(batch_size): + response = run_inference_using_url_with_data(TF_INFERENCE_API + '/v1/models/BERTSeqClassification:predict', pfiles=files) + + response = response.content.decode("utf-8") + response = ast.literal_eval(response) + # custom handler returns number of responses not the actual responses + assert int(response[0]) == batch_size + test_utils.unregister_model('BERTSeqClassification') def test_MMF_activity_recognition_model_register_and_inference_on_valid_model(): From 3068a495ec585f22270e5c05b16d3d33dbf546a2 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 6 Oct 2021 04:03:57 +0000 Subject: [PATCH 02/17] updated param dict --- test/pytest/test_handler.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index 9f1b1ac831..fae29f82a9 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -228,7 +228,15 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain(): def test_huggingface_bert_batch_inference(): batch_size = 4 batch_delay = 10000 # 10 seconds - test_utils.register_model_with_params(model_name='BERTSeqClassification', url='https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar', batch_size=batch_size, batch_delay=batch_delay) + params = ( + ('model_name', 'BERTSeqClassification'), + ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'), + ('initial_workers', '1'), + ('synchronous', 'true'), + ('batch_size', '4'), + ('batch_delay', '10000') + ) + test_utils.register_model_with_params(params) input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt" files = { 'data': (input_text, From a4932b69a23922cdf358f25cf4650bae7c1e55b5 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 6 Oct 2021 19:47:31 +0000 Subject: [PATCH 03/17] added torchserve start setup --- test/pytest/test_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index fae29f82a9..e032299db6 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -236,6 +236,7 @@ def test_huggingface_bert_batch_inference(): ('batch_size', '4'), ('batch_delay', '10000') ) + test_utils.start_torchserve(no_config_snapshots=True) test_utils.register_model_with_params(params) input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt" files = { From 1e5b901c5d3bca7cced3d1bd104c4a570a6689a2 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 6 Oct 2021 20:59:14 +0000 Subject: [PATCH 04/17] simplified content decoding --- test/pytest/test_handler.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index e032299db6..f3fccb41ac 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -227,18 +227,17 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain(): def test_huggingface_bert_batch_inference(): batch_size = 4 - batch_delay = 10000 # 10 seconds + batch_delay = 20000 # 20 seconds params = ( ('model_name', 'BERTSeqClassification'), ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'), ('initial_workers', '1'), - ('synchronous', 'true'), - ('batch_size', '4'), - ('batch_delay', '10000') + ('batch_size', str(batch_size)), + ('batch_delay', str(batch_delay)) ) test_utils.start_torchserve(no_config_snapshots=True) test_utils.register_model_with_params(params) - input_text = "../../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt" + input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt') files = { 'data': (input_text, open(input_text, 'rb')), @@ -247,10 +246,10 @@ def test_huggingface_bert_batch_inference(): for _ in range(batch_size): response = run_inference_using_url_with_data(TF_INFERENCE_API + '/v1/models/BERTSeqClassification:predict', pfiles=files) - response = response.content.decode("utf-8") - response = ast.literal_eval(response) + response = response.content + # response = ast.literal_eval(response) # custom handler returns number of responses not the actual responses - assert int(response[0]) == batch_size + assert int(response) == batch_size test_utils.unregister_model('BERTSeqClassification') def test_MMF_activity_recognition_model_register_and_inference_on_valid_model(): From 7e751c5ef7bc6f6c427222272dc722a96cde7b71 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 7 Oct 2021 00:35:01 +0000 Subject: [PATCH 05/17] added concurrency batch aggregator fix --- .../pytorch/serve/wlm/BatchAggregator.java | 33 ++++++++++--------- test/pytest/test_handler.py | 24 ++++++-------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java index 8c95383d96..5a02283126 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java @@ -63,12 +63,13 @@ public void sendResponse(ModelWorkerResponse message) { // this is from initial load. return; } - for (Predictions prediction : message.getPredictions()) { String jobId = prediction.getRequestId(); - Job job = jobs.remove(jobId); + Job job = jobs.get(jobId); + if (job == null) { - throw new IllegalStateException("Unexpected job: " + jobId); + throw new IllegalStateException( + "Unexpected job in sendResponse() with 200 status code: " + jobId); } job.response( prediction.getResp(), @@ -77,18 +78,19 @@ public void sendResponse(ModelWorkerResponse message) { prediction.getReasonPhrase(), prediction.getHeaders()); } + } else { - for (String reqId : jobs.keySet()) { - Job j = jobs.remove(reqId); - if (j == null) { - throw new IllegalStateException("Unexpected job: " + reqId); + for (Map.Entry j : jobs.entrySet()) { + + if (j.getValue() == null) { + throw new IllegalStateException( + "Unexpected job in sendResponse() with non 200 status code: " + + j.getKey()); } - j.sendError(message.getCode(), message.getMessage()); - } - if (!jobs.isEmpty()) { - throw new IllegalStateException("Not all jobs get response."); + j.getValue().sendError(message.getCode(), message.getMessage()); } } + jobs.clear(); } public void sendError(BaseModelRequest message, String error, int status) { @@ -103,20 +105,20 @@ public void sendError(BaseModelRequest message, String error, int status) { String requestId = req.getRequestId(); Job job = jobs.remove(requestId); if (job == null) { - logger.error("Unexpected job: " + requestId); + logger.error("Unexpected job in sendError(): " + requestId); } else { job.sendError(status, error); } } if (!jobs.isEmpty()) { jobs.clear(); - logger.error("Not all jobs get response."); + logger.error("Not all jobs got an error response."); } } else { // Send the error message to all the jobs for (Map.Entry j : jobs.entrySet()) { String jobsId = j.getValue().getJobId(); - Job job = jobs.remove(jobsId); + Job job = jobs.get(jobsId); if (job.isControlCmd()) { job.sendError(status, error); @@ -127,5 +129,6 @@ public void sendError(BaseModelRequest message, String error, int status) { } } } + jobs.clear(); } -} +} \ No newline at end of file diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index f3fccb41ac..37554e6359 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -2,6 +2,8 @@ import requests import json import test_utils +import asyncio +import multiprocessing import numpy as np import ast REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") @@ -226,8 +228,8 @@ def test_kfserving_mnist_model_register_and_inference_on_valid_model_explain(): test_utils.unregister_model("mnist") def test_huggingface_bert_batch_inference(): - batch_size = 4 - batch_delay = 20000 # 20 seconds + batch_size = 2 + batch_delay = 10000 # 10 seconds params = ( ('model_name', 'BERTSeqClassification'), ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'), @@ -238,18 +240,12 @@ def test_huggingface_bert_batch_inference(): test_utils.start_torchserve(no_config_snapshots=True) test_utils.register_model_with_params(params) input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt') - files = { - 'data': (input_text, - open(input_text, 'rb')), - } - - for _ in range(batch_size): - response = run_inference_using_url_with_data(TF_INFERENCE_API + '/v1/models/BERTSeqClassification:predict', pfiles=files) - - response = response.content - # response = ast.literal_eval(response) - # custom handler returns number of responses not the actual responses - assert int(response) == batch_size + + response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}") + + # handler responds with number of inferences + response = response.read() + assert response == batch_size test_utils.unregister_model('BERTSeqClassification') def test_MMF_activity_recognition_model_register_and_inference_on_valid_model(): From 2664f6c57a3d825493c5c9694e6ca83aecec5ef7 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 7 Oct 2021 00:58:30 +0000 Subject: [PATCH 06/17] removed asyncio and mp --- test/pytest/test_handler.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index 37554e6359..3019be8cff 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -2,8 +2,6 @@ import requests import json import test_utils -import asyncio -import multiprocessing import numpy as np import ast REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") @@ -240,12 +238,13 @@ def test_huggingface_bert_batch_inference(): test_utils.start_torchserve(no_config_snapshots=True) test_utils.register_model_with_params(params) input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt') - + + # Make 2 curl requests in parallel with & response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}") - - # handler responds with number of inferences response = response.read() - assert response == batch_size + + ## Assert that 2 responses are returned from the same batch + assert response == 'Not AcceptedNot Accepted' test_utils.unregister_model('BERTSeqClassification') def test_MMF_activity_recognition_model_register_and_inference_on_valid_model(): From cd0aa8e371b234d3a9e424db709329a8c643b05b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 7 Oct 2021 01:08:15 +0000 Subject: [PATCH 07/17] formatted java --- .../src/main/java/org/pytorch/serve/wlm/BatchAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java index 5a02283126..9aa246bc8a 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/BatchAggregator.java @@ -131,4 +131,4 @@ public void sendError(BaseModelRequest message, String error, int status) { } jobs.clear(); } -} \ No newline at end of file +} From 6529750b4c74fbc036c8de8677f3739917e08b57 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 13 Oct 2021 10:48:41 -0700 Subject: [PATCH 08/17] changed client id for second curl request --- test/pytest/test_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index 3019be8cff..58bd1c6f2a 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -240,9 +240,10 @@ def test_huggingface_bert_batch_inference(): input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt') # Make 2 curl requests in parallel with & - response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}") + response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl --header \"X-Forwarded-For: 1.2.3.4\" http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}") response = response.read() + ## Assert that 2 responses are returned from the same batch assert response == 'Not AcceptedNot Accepted' test_utils.unregister_model('BERTSeqClassification') From 971f43e0c83d6af52ad6cba6924f1a2f0b6e5621 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 13 Oct 2021 10:52:49 -0700 Subject: [PATCH 09/17] test_handler --- test/pytest/test_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index 58bd1c6f2a..5b5e1efc07 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -240,7 +240,8 @@ def test_huggingface_bert_batch_inference(): input_text = os.path.join(REPO_ROOT, 'examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt') # Make 2 curl requests in parallel with & - response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl --header \"X-Forwarded-For: 1.2.3.4\" http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}") + # curl --header \"X-Forwarded-For: 1.2.3.4\" won't work since you can't access local host anymore + response = os.popen(f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}") response = response.read() From 66fe2eca1770eab3e7e6be9be9ca8db0c662ece0 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 14 Oct 2021 11:16:57 -0700 Subject: [PATCH 10/17] upadted to torchserve bucket url --- test/pytest/test_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index 5b5e1efc07..9e9526daf9 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -230,7 +230,7 @@ def test_huggingface_bert_batch_inference(): batch_delay = 10000 # 10 seconds params = ( ('model_name', 'BERTSeqClassification'), - ('url', 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'), + ('url', 'https://torchserve.pytorch.org/mar_files/BERTSeqClassification.mar'), ('initial_workers', '1'), ('batch_size', str(batch_size)), ('batch_delay', str(batch_delay)) @@ -268,4 +268,4 @@ def test_MMF_activity_recognition_model_register_and_inference_on_valid_model(): response = ast.literal_eval(response) response = [n.strip() for n in response] assert response == ['Sitting at a table','Someone is sneezing','Watching a laptop or something on a laptop'] - test_utils.unregister_model("MMF_activity_recognition_v2") \ No newline at end of file + test_utils.unregister_model("MMF_activity_recognition_v2") From 554d626ca076e24f3b7ab2bad52c31936a64e1e5 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Fri, 15 Oct 2021 14:07:20 -0700 Subject: [PATCH 11/17] addressed li feedback --- .../org/pytorch/serve/ModelServerTest.java | 52 ++++++++++++++++++- .../java/org/pytorch/serve/TestUtils.java | 24 +++++++++ test/pytest/test_handler.py | 2 +- 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index 71a4c9abfd..60670bb465 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -1065,9 +1065,59 @@ public void testPredictionMemoryError() throws InterruptedException { Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.OK); } - @Test( +@Test( alwaysRun = true, dependsOnMethods = {"testPredictionMemoryError"}) + public void testSuccessBatch() throws InterruptedException { + int batch_size = 4; + int max_batch_delay = 10000; + Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager); + Assert.assertNotNull(channel); + + TestUtils.setHttpStatus(null); + TestUtils.setResult(null); + TestUtils.setLatch(new CountDownLatch(1)); + + TestUtils.registerModel(channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay); + TestUtils.getLatch().await(); + + StatusResponse status = + JsonUtils.GSON.fromJson(TestUtils.getResult(), StatusResponse.class); + Assert.assertEquals( + status.getStatus(), + "Model \"noop\" Version: 1.0 registered with 1 initial workers"); + + channel.close().sync(); + + channel = TestUtils.connect(ConnectorType.INFERENCE_CONNECTOR, configManager); + Assert.assertNotNull(channel); + + TestUtils.setResult(null); + TestUtils.setLatch(new CountDownLatch(1)); + TestUtils.setHttpStatus(null); + + for(int i = 0; i < batch_size ; i ++) { + DefaultFullHttpRequest req = + new DefaultFullHttpRequest( + HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop"); + // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8); + HttpUtil.setContentLength(req, req.content().readableBytes()); + req.headers() + .set( + HttpHeaderNames.CONTENT_TYPE, + HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED); + channel.writeAndFlush(req); + + TestUtils.getLatch().await(); + Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED); + } + channel.close().sync(); + } + + + @Test( + alwaysRun = true, + dependsOnMethods = {"testSuccessBatch"}) public void testErrorBatch() throws InterruptedException { Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager); Assert.assertNotNull(channel); diff --git a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java index 6b5900817d..4965df4608 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java +++ b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java @@ -174,6 +174,30 @@ public static void registerModel( } } + public static void registerModel( + Channel channel, + String url, + String modelName, + boolean withInitialWorkers, + boolean syncChannel, + int batchSize, + int maxBatchDelay) + throws InterruptedException { + String requestURL = "/models?url=" + url + "&model_name=" + modelName + "&runtime=python" + "&batch_size=" + batchSize + "&max_batch_delay=" + maxBatchDelay; + if (withInitialWorkers) { + requestURL += "&initial_workers=1&synchronous=true"; + } + + HttpRequest req = + new DefaultFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.POST, requestURL); + if (syncChannel) { + channel.writeAndFlush(req).sync(); + channel.closeFuture().sync(); + } else { + channel.writeAndFlush(req); + } + } + public static void registerWorkflow( Channel channel, String url, String workflowName, boolean syncChannel) throws InterruptedException { diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index 9e9526daf9..ad3bd3b6a3 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -233,7 +233,7 @@ def test_huggingface_bert_batch_inference(): ('url', 'https://torchserve.pytorch.org/mar_files/BERTSeqClassification.mar'), ('initial_workers', '1'), ('batch_size', str(batch_size)), - ('batch_delay', str(batch_delay)) + ('max_batch_delay', str(batch_delay)) ) test_utils.start_torchserve(no_config_snapshots=True) test_utils.register_model_with_params(params) From 1c7ab03e546aefca3e11e4663ac3bb3b95690855 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 15 Oct 2021 21:30:36 +0000 Subject: [PATCH 12/17] gradelew formatjva --- .../org/pytorch/serve/ModelServerTest.java | 34 +++++++++---------- .../java/org/pytorch/serve/TestUtils.java | 11 +++++- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index 60670bb465..4e53d55402 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -1065,7 +1065,7 @@ public void testPredictionMemoryError() throws InterruptedException { Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.OK); } -@Test( + @Test( alwaysRun = true, dependsOnMethods = {"testPredictionMemoryError"}) public void testSuccessBatch() throws InterruptedException { @@ -1078,7 +1078,8 @@ public void testSuccessBatch() throws InterruptedException { TestUtils.setResult(null); TestUtils.setLatch(new CountDownLatch(1)); - TestUtils.registerModel(channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay); + TestUtils.registerModel( + channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay); TestUtils.getLatch().await(); StatusResponse status = @@ -1095,26 +1096,25 @@ public void testSuccessBatch() throws InterruptedException { TestUtils.setResult(null); TestUtils.setLatch(new CountDownLatch(1)); TestUtils.setHttpStatus(null); - - for(int i = 0; i < batch_size ; i ++) { - DefaultFullHttpRequest req = - new DefaultFullHttpRequest( - HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop"); - // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8); - HttpUtil.setContentLength(req, req.content().readableBytes()); - req.headers() - .set( - HttpHeaderNames.CONTENT_TYPE, - HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED); - channel.writeAndFlush(req); - TestUtils.getLatch().await(); - Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED); + for (int i = 0; i < batch_size; i++) { + DefaultFullHttpRequest req = + new DefaultFullHttpRequest( + HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop"); + // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8); + HttpUtil.setContentLength(req, req.content().readableBytes()); + req.headers() + .set( + HttpHeaderNames.CONTENT_TYPE, + HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED); + channel.writeAndFlush(req); + + TestUtils.getLatch().await(); + Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED); } channel.close().sync(); } - @Test( alwaysRun = true, dependsOnMethods = {"testSuccessBatch"}) diff --git a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java index 4965df4608..d0b2256e71 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java +++ b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java @@ -183,7 +183,16 @@ public static void registerModel( int batchSize, int maxBatchDelay) throws InterruptedException { - String requestURL = "/models?url=" + url + "&model_name=" + modelName + "&runtime=python" + "&batch_size=" + batchSize + "&max_batch_delay=" + maxBatchDelay; + String requestURL = + "/models?url=" + + url + + "&model_name=" + + modelName + + "&runtime=python" + + "&batch_size=" + + batchSize + + "&max_batch_delay=" + + maxBatchDelay; if (withInitialWorkers) { requestURL += "&initial_workers=1&synchronous=true"; } From f713d49d9e5ccf61e6be2ea099f2cbe3f9643dff Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Fri, 15 Oct 2021 14:41:00 -0700 Subject: [PATCH 13/17] fixed casing --- .../src/test/java/org/pytorch/serve/ModelServerTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index 4e53d55402..1ec1c98cd8 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -1069,8 +1069,8 @@ public void testPredictionMemoryError() throws InterruptedException { alwaysRun = true, dependsOnMethods = {"testPredictionMemoryError"}) public void testSuccessBatch() throws InterruptedException { - int batch_size = 4; - int max_batch_delay = 10000; + int batchSize = 4; + int maxBatchDelay = 10000; Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager); Assert.assertNotNull(channel); @@ -1079,7 +1079,7 @@ public void testSuccessBatch() throws InterruptedException { TestUtils.setLatch(new CountDownLatch(1)); TestUtils.registerModel( - channel, "noop.mar", "noop", true, false, batch_size, max_batch_delay); + channel, "noop.mar", "noop", true, false, batchSize, maxBatchDelay); TestUtils.getLatch().await(); StatusResponse status = From cfae3930f85419e278617d339aee43b5fb9f217f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 18 Oct 2021 18:43:30 +0000 Subject: [PATCH 14/17] trigger-build From e79d9a21dd15cee4edb50de4185c2ca2205fb7d5 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 18 Oct 2021 19:23:07 +0000 Subject: [PATCH 15/17] fixed syntax error --- .../server/src/test/java/org/pytorch/serve/ModelServerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index 1ec1c98cd8..75e2274849 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -1097,7 +1097,7 @@ public void testSuccessBatch() throws InterruptedException { TestUtils.setLatch(new CountDownLatch(1)); TestUtils.setHttpStatus(null); - for (int i = 0; i < batch_size; i++) { + for (int i = 0; i < batchSize; i++) { DefaultFullHttpRequest req = new DefaultFullHttpRequest( HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop"); From c78bddb2f28e6325d03d7c90ec75a50017d61a0a Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 18 Oct 2021 22:34:07 +0000 Subject: [PATCH 16/17] removed batch inference test but kept registration in java --- .../org/pytorch/serve/ModelServerTest.java | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index 75e2274849..e272e5a5d2 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -1089,30 +1089,6 @@ public void testSuccessBatch() throws InterruptedException { "Model \"noop\" Version: 1.0 registered with 1 initial workers"); channel.close().sync(); - - channel = TestUtils.connect(ConnectorType.INFERENCE_CONNECTOR, configManager); - Assert.assertNotNull(channel); - - TestUtils.setResult(null); - TestUtils.setLatch(new CountDownLatch(1)); - TestUtils.setHttpStatus(null); - - for (int i = 0; i < batchSize; i++) { - DefaultFullHttpRequest req = - new DefaultFullHttpRequest( - HttpVersion.HTTP_1_1, HttpMethod.POST, "/predictions/noop"); - // req.content().writeCharSequence("data=invalid_output", CharsetUtil.UTF_8); - HttpUtil.setContentLength(req, req.content().readableBytes()); - req.headers() - .set( - HttpHeaderNames.CONTENT_TYPE, - HttpHeaderValues.APPLICATION_X_WWW_FORM_URLENCODED); - channel.writeAndFlush(req); - - TestUtils.getLatch().await(); - Assert.assertEquals(TestUtils.getHttpStatus(), HttpResponseStatus.ACCEPTED); - } - channel.close().sync(); } @Test( From fff83ab19878c2279b375f2f7f634e06849d7dfa Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 18 Oct 2021 22:34:42 +0000 Subject: [PATCH 17/17] removed java test --- .../org/pytorch/serve/ModelServerTest.java | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index e272e5a5d2..71a4c9abfd 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -1068,32 +1068,6 @@ public void testPredictionMemoryError() throws InterruptedException { @Test( alwaysRun = true, dependsOnMethods = {"testPredictionMemoryError"}) - public void testSuccessBatch() throws InterruptedException { - int batchSize = 4; - int maxBatchDelay = 10000; - Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager); - Assert.assertNotNull(channel); - - TestUtils.setHttpStatus(null); - TestUtils.setResult(null); - TestUtils.setLatch(new CountDownLatch(1)); - - TestUtils.registerModel( - channel, "noop.mar", "noop", true, false, batchSize, maxBatchDelay); - TestUtils.getLatch().await(); - - StatusResponse status = - JsonUtils.GSON.fromJson(TestUtils.getResult(), StatusResponse.class); - Assert.assertEquals( - status.getStatus(), - "Model \"noop\" Version: 1.0 registered with 1 initial workers"); - - channel.close().sync(); - } - - @Test( - alwaysRun = true, - dependsOnMethods = {"testSuccessBatch"}) public void testErrorBatch() throws InterruptedException { Channel channel = TestUtils.connect(ConnectorType.MANAGEMENT_CONNECTOR, configManager); Assert.assertNotNull(channel);