From 580a4a152dbf0a7309918f04d7b1e1092b6733ea Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 17 Oct 2023 14:24:55 -0700
Subject: [PATCH 1/6] Add dependencies needed for llm examples to the examles
 yaml. Widen the version of networkx to allow earlier versions to match
 version requirements for langchain

---
 docker/conda/environments/cuda11.8_dev.yml    |  2 +-
 .../conda/environments/cuda11.8_examples.yml  | 22 +++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml
index 2b44feb419..5785546684 100644
--- a/docker/conda/environments/cuda11.8_dev.yml
+++ b/docker/conda/environments/cuda11.8_dev.yml
@@ -68,7 +68,7 @@ dependencies:
     - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863
     - mlflow>=2.2.1,<3
     - mrc=23.11
-    - networkx=3.1
+    - networkx>=2.8
     - ninja=1.10
     - nodejs=18.*
     - numba>=0.56.2
diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml
index a7b8b1b49b..517004f5af 100644
--- a/docker/conda/environments/cuda11.8_examples.yml
+++ b/docker/conda/environments/cuda11.8_examples.yml
@@ -22,16 +22,38 @@
 channels:
     - rapidsai
     - nvidia
+    - huggingface
     - conda-forge
     - dglteam/label/cu118
 dependencies:
+    - arxiv=1.4
     - boto3
     - cuml=23.06
     - dask>=2023.1.1
     - dgl=1.0.2
     - dill=0.3.6
     - distributed>=2023.1.1
+    - langchain=0.0.190
     - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863
     - mlflow>=2.2.1,<3
+    - newspaper3k=0.2
     - papermill=2.3.4
+    - pypdf=3.16
+    - requests-cache=1.1
     - s3fs>=2023.6
+    - sentence-transformers
+    - transformers
+
+    ####### Pip Transitive Dependencies (keep sorted!) #######
+    # These are dependencies that are available on conda, but are required by the pip packages listed below. Its much
+    # better to install them with conda than pip to allow for better dependency resolution.
+    - environs=9.5
+    - minio=7.1
+    - python-dotenv=1.0
+    - ujson=5.8
+
+
+  ####### Pip Dependencies (keep sorted!) #######
+    - pip:
+        - grpcio-status==1.58 # To keep in sync with 1.58 grpcio which is installed for Morpheus
+        - nemollm

From 35f1035e15f8a3fb73ae854c1c65ea2e707bc42a Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 17 Oct 2023 15:58:10 -0700
Subject: [PATCH 2/6] Fix handling of stop_after feature

---
 morpheus/stages/input/rss_source_stage.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/morpheus/stages/input/rss_source_stage.py b/morpheus/stages/input/rss_source_stage.py
index 39f7c38cd3..b238c1dafc 100644
--- a/morpheus/stages/input/rss_source_stage.py
+++ b/morpheus/stages/input/rss_source_stage.py
@@ -64,6 +64,12 @@ def __init__(self,
         if (batch_size is None):
             batch_size = c.pipeline_batch_size
 
+        if (stop_after > 0):
+            if (run_indefinitely):
+                raise ValueError("Cannot set both `stop_after` and `run_indefinitely` to True.")
+
+            run_indefinitely = False
+
         self._records_emitted = 0
         self._controller = RSSController(feed_input=feed_input,
                                          batch_size=batch_size,
@@ -101,13 +107,13 @@ def _fetch_feeds(self) -> MessageMeta:
 
                     yield MessageMeta(df=df)
 
-                if not self._controller.run_indefinitely:
-                    self._stop_requested = True
-                    continue
+                    if (self._stop_after > 0 and self._records_emitted >= self._stop_after):
+                        self._stop_requested = True
+                        logger.debug("Stop limit reached...preparing to halt the source.")
+                        break
 
-                if (self._stop_after > 0 and self._records_emitted >= self._stop_after):
+                if not self._controller.run_indefinitely:
                     self._stop_requested = True
-                    logger.debug("Stop limit reached...preparing to halt the source.")
                     continue
 
                 logger.debug("Waiting for %d seconds before fetching again...", self._interval_secs)
@@ -128,6 +134,8 @@ def _fetch_feeds(self) -> MessageMeta:
                     logger.error("Max retries reached. Unable to fetch feed entries.")
                     raise RuntimeError(f"Failed to fetch feed entries after max retries: {exc}") from exc
 
+        logger.debug("Source stopped.")
+
     def _build_source(self, builder: mrc.Builder) -> StreamPair:
         source = builder.make_source(self.unique_name, self._fetch_feeds)
         return source, MessageMeta

From 81c3b72955f1459b1a4128c2263b6f40623509e2 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Tue, 17 Oct 2023 16:00:44 -0700
Subject: [PATCH 3/6] Add a stop-adter flag

---
 examples/llm/vdb_upload/run.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py
index a4949d0d81..f9040762ab 100644
--- a/examples/llm/vdb_upload/run.py
+++ b/examples/llm/vdb_upload/run.py
@@ -168,11 +168,17 @@ def run():
 @click.option("--isolate_embeddings",
               is_flag=True,
               default=False,
-              help="Whether to pre-calculate the embeddings using Triton")
+              help="Whether to fetch all data prior to executing the rest of the pipeline.")
 @click.option("--use_cache",
               type=click.Path(file_okay=True, dir_okay=False),
               default=None,
               help="What cache to use for the confluence documents")
+@click.option(
+    "--stop_after",
+    default=0,
+    type=click.IntRange(min=0),
+    help="Stop after emitting this many records from the RSS source stage. Useful for testing. Disabled if `0`",
+)
 def pipeline(num_threads,
              pipeline_batch_size,
              model_max_batch_size,
@@ -184,11 +190,14 @@ def pipeline(num_threads,
              model_name,
              pre_calc_embeddings,
              isolate_embeddings,
-             use_cache):
+             use_cache,
+             stop_after: int):
 
     from morpheus.config import Config
     from morpheus.config import CppConfig
     from morpheus.config import PipelineModes
+    from morpheus.messages import ControlMessage
+    from morpheus.messages import MessageMeta
     from morpheus.pipeline.linear_pipeline import LinearPipeline
     from morpheus.stages.general.monitor_stage import MonitorStage
     from morpheus.stages.general.trigger_stage import TriggerStage
@@ -218,7 +227,7 @@ def pipeline(num_threads,
     pipe = LinearPipeline(config)
 
     # add doca source stage
-    pipe.set_source(RSSSourceStage(config, feed_input=_build_rss_urls(), batch_size=128))
+    pipe.set_source(RSSSourceStage(config, feed_input=_build_rss_urls(), batch_size=128, stop_after=stop_after))
 
     pipe.add_stage(MonitorStage(config, description="Source rate", unit='pages'))
 
@@ -226,14 +235,18 @@ def pipeline(num_threads,
 
     pipe.add_stage(MonitorStage(config, description="Download rate", unit='pages'))
 
+    if (pre_calc_embeddings):
+        message_type = MessageMeta
+    else:
+        message_type = ControlMessage
+
+    # add deserialize stage
+    pipe.add_stage(DeserializeStage(config, message_type=message_type))
+
     if (isolate_embeddings):
         pipe.add_stage(TriggerStage(config))
 
     if (pre_calc_embeddings):
-
-        # add deserialize stage
-        pipe.add_stage(DeserializeStage(config))
-
         # add preprocessing stage
         pipe.add_stage(
             PreprocessNLPStage(config,

From e8213d9f5b47489638d65a0b76cdab8bae6fd5e5 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 18 Oct 2023 09:00:41 -0700
Subject: [PATCH 4/6] Update model name and feature length size. Remove
 pre_calc_embeddings flag and make this enabled by default. Elevate the
 deserialize flag prior to the trigger when --isolate_embeddings is enabled.
 Add a minimal readme

---
 examples/llm/vdb_upload/README.md | 28 ++++++++++++++++
 examples/llm/vdb_upload/run.py    | 53 ++++++++++++-------------------
 2 files changed, 49 insertions(+), 32 deletions(-)
 create mode 100644 examples/llm/vdb_upload/README.md

diff --git a/examples/llm/vdb_upload/README.md b/examples/llm/vdb_upload/README.md
new file mode 100644
index 0000000000..13f38d2858
--- /dev/null
+++ b/examples/llm/vdb_upload/README.md
@@ -0,0 +1,28 @@
+### Launching Triton
+
+Pull the Docker image for Triton:
+```bash
+docker pull nvcr.io/nvidia/tritonserver:23.06-py3
+```
+
+From the Morpheus repo root directory, run the following to launch Triton and load the `all-MiniLM-L6-v2` model:
+```bash
+docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model all-MiniLM-L6-v2
+```
+
+This will launch Triton and only load the `all-MiniLM-L6-v2` model. Once Triton has loaded the model, the following will be displayed:
+```
++------------------+---------+--------+
+| Model            | Version | Status |
++------------------+---------+--------+
+| all-MiniLM-L6-v2 | 1       | READY  |
++------------------+---------+--------+
+```
+
+## Running the Pipeline
+
+```bash
+python examples/llm/main.py vdb_upload pipeline
+``````
+
+> **Note**: This pipeline will, by default, run continuously repeatedly polling the configured RSS sources. To run for a fixed number of iterations, add the `--stop_after=N` flag.
diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py
index ebc2cce1aa..0818712540 100644
--- a/examples/llm/vdb_upload/run.py
+++ b/examples/llm/vdb_upload/run.py
@@ -135,7 +135,7 @@ def run():
 )
 @click.option(
     "--model_fea_length",
-    default=256,
+    default=512,
     type=click.IntRange(min=1),
     help="Features length to use for the model",
 )
@@ -159,13 +159,9 @@ def run():
 @click.option(
     "--model_name",
     required=True,
-    default='all-mpnet-base-v2',
+    default='all-MiniLM-L6-v2',
     help="The name of the model that is deployed on Triton server",
 )
-@click.option("--pre_calc_embeddings",
-              is_flag=True,
-              default=False,
-              help="Whether to pre-calculate the embeddings using Triton")
 @click.option("--isolate_embeddings",
               is_flag=True,
               default=False,
@@ -189,7 +185,6 @@ def pipeline(num_threads,
              output_file,
              server_url,
              model_name,
-             pre_calc_embeddings,
              isolate_embeddings,
              use_cache,
              stop_after: int):
@@ -236,36 +231,30 @@ def pipeline(num_threads,
 
     pipe.add_stage(MonitorStage(config, description="Download rate", unit='pages'))
 
-    if (pre_calc_embeddings):
-        message_type = MessageMeta
-    else:
-        message_type = ControlMessage
-
     # add deserialize stage
-    pipe.add_stage(DeserializeStage(config, message_type=message_type))
+    pipe.add_stage(DeserializeStage(config))
 
     if (isolate_embeddings):
         pipe.add_stage(TriggerStage(config))
 
-    if (pre_calc_embeddings):
-        # add preprocessing stage
-        pipe.add_stage(
-            PreprocessNLPStage(config,
-                               vocab_hash_file="data/bert-base-uncased-hash.txt",
-                               do_lower_case=True,
-                               truncation=True,
-                               add_special_tokens=False,
-                               column='page_content'))
-
-        pipe.add_stage(MonitorStage(config, description="Tokenize rate", unit='events', delayed_start=True))
-
-        pipe.add_stage(
-            TritonInferenceStage(config,
-                                 model_name=model_name,
-                                 server_url="localhost:8001",
-                                 force_convert_inputs=True,
-                                 use_shared_memory=True))
-        pipe.add_stage(MonitorStage(config, description="Inference rate", unit="events", delayed_start=True))
+    # add preprocessing stage
+    pipe.add_stage(
+        PreprocessNLPStage(config,
+                           vocab_hash_file="data/bert-base-uncased-hash.txt",
+                           do_lower_case=True,
+                           truncation=True,
+                           add_special_tokens=False,
+                           column='page_content'))
+
+    pipe.add_stage(MonitorStage(config, description="Tokenize rate", unit='events', delayed_start=True))
+
+    pipe.add_stage(
+        TritonInferenceStage(config,
+                             model_name=model_name,
+                             server_url="localhost:8001",
+                             force_convert_inputs=True,
+                             use_shared_memory=True))
+    pipe.add_stage(MonitorStage(config, description="Inference rate", unit="events", delayed_start=True))
 
     pipe.add_stage(
         WriteToVectorDBStage(config,

From 6ec023660083e3de14967ef7921e6bb4715803a3 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 18 Oct 2023 09:27:24 -0700
Subject: [PATCH 5/6] Update model name

---
 examples/llm/vdb_upload/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py
index 0818712540..b2479151d4 100644
--- a/examples/llm/vdb_upload/run.py
+++ b/examples/llm/vdb_upload/run.py
@@ -279,7 +279,7 @@ def pipeline(num_threads,
 @click.option(
     "--model_name",
     required=True,
-    default='all-mpnet-base-v2',
+    default='all-MiniLM-L6-v2',
     help="The name of the model that is deployed on Triton server",
 )
 @click.option(
@@ -374,7 +374,7 @@ def _save_model(model, max_seq_length: int, batch_size: int, output_model_path:
 @click.option(
     "--model_name",
     required=True,
-    default='all-mpnet-base-v2',
+    default='all-MiniLM-L6-v2',
     help="The name of the model that is deployed on Triton server",
 )
 @click.option(

From e748df64677b94ed12c157647ad5406f2133935b Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 18 Oct 2023 11:34:19 -0700
Subject: [PATCH 6/6] Pin to an older version of huggingface_hub to work-around
 https://github.com/UKPLab/sentence-transformers/issues/1762

---
 docker/conda/environments/cuda11.8_examples.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml
index 517004f5af..bd56baf9e5 100644
--- a/docker/conda/environments/cuda11.8_examples.yml
+++ b/docker/conda/environments/cuda11.8_examples.yml
@@ -33,6 +33,7 @@ dependencies:
     - dgl=1.0.2
     - dill=0.3.6
     - distributed>=2023.1.1
+    - huggingface_hub=0.10.1 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762
     - langchain=0.0.190
     - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863
     - mlflow>=2.2.1,<3