From 580a4a152dbf0a7309918f04d7b1e1092b6733ea Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Oct 2023 14:24:55 -0700 Subject: [PATCH 1/6] Add dependencies needed for llm examples to the examles yaml. Widen the version of networkx to allow earlier versions to match version requirements for langchain --- docker/conda/environments/cuda11.8_dev.yml | 2 +- .../conda/environments/cuda11.8_examples.yml | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 2b44feb419..5785546684 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -68,7 +68,7 @@ dependencies: - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - mlflow>=2.2.1,<3 - mrc=23.11 - - networkx=3.1 + - networkx>=2.8 - ninja=1.10 - nodejs=18.* - numba>=0.56.2 diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index a7b8b1b49b..517004f5af 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -22,16 +22,38 @@ channels: - rapidsai - nvidia + - huggingface - conda-forge - dglteam/label/cu118 dependencies: + - arxiv=1.4 - boto3 - cuml=23.06 - dask>=2023.1.1 - dgl=1.0.2 - dill=0.3.6 - distributed>=2023.1.1 + - langchain=0.0.190 - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - mlflow>=2.2.1,<3 + - newspaper3k=0.2 - papermill=2.3.4 + - pypdf=3.16 + - requests-cache=1.1 - s3fs>=2023.6 + - sentence-transformers + - transformers + + ####### Pip Transitive Dependencies (keep sorted!) ####### + # These are dependencies that are available on conda, but are required by the pip packages listed below. Its much + # better to install them with conda than pip to allow for better dependency resolution. + - environs=9.5 + - minio=7.1 + - python-dotenv=1.0 + - ujson=5.8 + + + ####### Pip Dependencies (keep sorted!) ####### + - pip: + - grpcio-status==1.58 # To keep in sync with 1.58 grpcio which is installed for Morpheus + - nemollm From 35f1035e15f8a3fb73ae854c1c65ea2e707bc42a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Oct 2023 15:58:10 -0700 Subject: [PATCH 2/6] Fix handling of stop_after feature --- morpheus/stages/input/rss_source_stage.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/morpheus/stages/input/rss_source_stage.py b/morpheus/stages/input/rss_source_stage.py index 39f7c38cd3..b238c1dafc 100644 --- a/morpheus/stages/input/rss_source_stage.py +++ b/morpheus/stages/input/rss_source_stage.py @@ -64,6 +64,12 @@ def __init__(self, if (batch_size is None): batch_size = c.pipeline_batch_size + if (stop_after > 0): + if (run_indefinitely): + raise ValueError("Cannot set both `stop_after` and `run_indefinitely` to True.") + + run_indefinitely = False + self._records_emitted = 0 self._controller = RSSController(feed_input=feed_input, batch_size=batch_size, @@ -101,13 +107,13 @@ def _fetch_feeds(self) -> MessageMeta: yield MessageMeta(df=df) - if not self._controller.run_indefinitely: - self._stop_requested = True - continue + if (self._stop_after > 0 and self._records_emitted >= self._stop_after): + self._stop_requested = True + logger.debug("Stop limit reached...preparing to halt the source.") + break - if (self._stop_after > 0 and self._records_emitted >= self._stop_after): + if not self._controller.run_indefinitely: self._stop_requested = True - logger.debug("Stop limit reached...preparing to halt the source.") continue logger.debug("Waiting for %d seconds before fetching again...", self._interval_secs) @@ -128,6 +134,8 @@ def _fetch_feeds(self) -> MessageMeta: logger.error("Max retries reached. Unable to fetch feed entries.") raise RuntimeError(f"Failed to fetch feed entries after max retries: {exc}") from exc + logger.debug("Source stopped.") + def _build_source(self, builder: mrc.Builder) -> StreamPair: source = builder.make_source(self.unique_name, self._fetch_feeds) return source, MessageMeta From 81c3b72955f1459b1a4128c2263b6f40623509e2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Oct 2023 16:00:44 -0700 Subject: [PATCH 3/6] Add a stop-adter flag --- examples/llm/vdb_upload/run.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py index a4949d0d81..f9040762ab 100644 --- a/examples/llm/vdb_upload/run.py +++ b/examples/llm/vdb_upload/run.py @@ -168,11 +168,17 @@ def run(): @click.option("--isolate_embeddings", is_flag=True, default=False, - help="Whether to pre-calculate the embeddings using Triton") + help="Whether to fetch all data prior to executing the rest of the pipeline.") @click.option("--use_cache", type=click.Path(file_okay=True, dir_okay=False), default=None, help="What cache to use for the confluence documents") +@click.option( + "--stop_after", + default=0, + type=click.IntRange(min=0), + help="Stop after emitting this many records from the RSS source stage. Useful for testing. Disabled if `0`", +) def pipeline(num_threads, pipeline_batch_size, model_max_batch_size, @@ -184,11 +190,14 @@ def pipeline(num_threads, model_name, pre_calc_embeddings, isolate_embeddings, - use_cache): + use_cache, + stop_after: int): from morpheus.config import Config from morpheus.config import CppConfig from morpheus.config import PipelineModes + from morpheus.messages import ControlMessage + from morpheus.messages import MessageMeta from morpheus.pipeline.linear_pipeline import LinearPipeline from morpheus.stages.general.monitor_stage import MonitorStage from morpheus.stages.general.trigger_stage import TriggerStage @@ -218,7 +227,7 @@ def pipeline(num_threads, pipe = LinearPipeline(config) # add doca source stage - pipe.set_source(RSSSourceStage(config, feed_input=_build_rss_urls(), batch_size=128)) + pipe.set_source(RSSSourceStage(config, feed_input=_build_rss_urls(), batch_size=128, stop_after=stop_after)) pipe.add_stage(MonitorStage(config, description="Source rate", unit='pages')) @@ -226,14 +235,18 @@ def pipeline(num_threads, pipe.add_stage(MonitorStage(config, description="Download rate", unit='pages')) + if (pre_calc_embeddings): + message_type = MessageMeta + else: + message_type = ControlMessage + + # add deserialize stage + pipe.add_stage(DeserializeStage(config, message_type=message_type)) + if (isolate_embeddings): pipe.add_stage(TriggerStage(config)) if (pre_calc_embeddings): - - # add deserialize stage - pipe.add_stage(DeserializeStage(config)) - # add preprocessing stage pipe.add_stage( PreprocessNLPStage(config, From e8213d9f5b47489638d65a0b76cdab8bae6fd5e5 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 18 Oct 2023 09:00:41 -0700 Subject: [PATCH 4/6] Update model name and feature length size. Remove pre_calc_embeddings flag and make this enabled by default. Elevate the deserialize flag prior to the trigger when --isolate_embeddings is enabled. Add a minimal readme --- examples/llm/vdb_upload/README.md | 28 ++++++++++++++++ examples/llm/vdb_upload/run.py | 53 ++++++++++++------------------- 2 files changed, 49 insertions(+), 32 deletions(-) create mode 100644 examples/llm/vdb_upload/README.md diff --git a/examples/llm/vdb_upload/README.md b/examples/llm/vdb_upload/README.md new file mode 100644 index 0000000000..13f38d2858 --- /dev/null +++ b/examples/llm/vdb_upload/README.md @@ -0,0 +1,28 @@ +### Launching Triton + +Pull the Docker image for Triton: +```bash +docker pull nvcr.io/nvidia/tritonserver:23.06-py3 +``` + +From the Morpheus repo root directory, run the following to launch Triton and load the `all-MiniLM-L6-v2` model: +```bash +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model all-MiniLM-L6-v2 +``` + +This will launch Triton and only load the `all-MiniLM-L6-v2` model. Once Triton has loaded the model, the following will be displayed: +``` ++------------------+---------+--------+ +| Model | Version | Status | ++------------------+---------+--------+ +| all-MiniLM-L6-v2 | 1 | READY | ++------------------+---------+--------+ +``` + +## Running the Pipeline + +```bash +python examples/llm/main.py vdb_upload pipeline +`````` + +> **Note**: This pipeline will, by default, run continuously repeatedly polling the configured RSS sources. To run for a fixed number of iterations, add the `--stop_after=N` flag. diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py index ebc2cce1aa..0818712540 100644 --- a/examples/llm/vdb_upload/run.py +++ b/examples/llm/vdb_upload/run.py @@ -135,7 +135,7 @@ def run(): ) @click.option( "--model_fea_length", - default=256, + default=512, type=click.IntRange(min=1), help="Features length to use for the model", ) @@ -159,13 +159,9 @@ def run(): @click.option( "--model_name", required=True, - default='all-mpnet-base-v2', + default='all-MiniLM-L6-v2', help="The name of the model that is deployed on Triton server", ) -@click.option("--pre_calc_embeddings", - is_flag=True, - default=False, - help="Whether to pre-calculate the embeddings using Triton") @click.option("--isolate_embeddings", is_flag=True, default=False, @@ -189,7 +185,6 @@ def pipeline(num_threads, output_file, server_url, model_name, - pre_calc_embeddings, isolate_embeddings, use_cache, stop_after: int): @@ -236,36 +231,30 @@ def pipeline(num_threads, pipe.add_stage(MonitorStage(config, description="Download rate", unit='pages')) - if (pre_calc_embeddings): - message_type = MessageMeta - else: - message_type = ControlMessage - # add deserialize stage - pipe.add_stage(DeserializeStage(config, message_type=message_type)) + pipe.add_stage(DeserializeStage(config)) if (isolate_embeddings): pipe.add_stage(TriggerStage(config)) - if (pre_calc_embeddings): - # add preprocessing stage - pipe.add_stage( - PreprocessNLPStage(config, - vocab_hash_file="data/bert-base-uncased-hash.txt", - do_lower_case=True, - truncation=True, - add_special_tokens=False, - column='page_content')) - - pipe.add_stage(MonitorStage(config, description="Tokenize rate", unit='events', delayed_start=True)) - - pipe.add_stage( - TritonInferenceStage(config, - model_name=model_name, - server_url="localhost:8001", - force_convert_inputs=True, - use_shared_memory=True)) - pipe.add_stage(MonitorStage(config, description="Inference rate", unit="events", delayed_start=True)) + # add preprocessing stage + pipe.add_stage( + PreprocessNLPStage(config, + vocab_hash_file="data/bert-base-uncased-hash.txt", + do_lower_case=True, + truncation=True, + add_special_tokens=False, + column='page_content')) + + pipe.add_stage(MonitorStage(config, description="Tokenize rate", unit='events', delayed_start=True)) + + pipe.add_stage( + TritonInferenceStage(config, + model_name=model_name, + server_url="localhost:8001", + force_convert_inputs=True, + use_shared_memory=True)) + pipe.add_stage(MonitorStage(config, description="Inference rate", unit="events", delayed_start=True)) pipe.add_stage( WriteToVectorDBStage(config, From 6ec023660083e3de14967ef7921e6bb4715803a3 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 18 Oct 2023 09:27:24 -0700 Subject: [PATCH 5/6] Update model name --- examples/llm/vdb_upload/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py index 0818712540..b2479151d4 100644 --- a/examples/llm/vdb_upload/run.py +++ b/examples/llm/vdb_upload/run.py @@ -279,7 +279,7 @@ def pipeline(num_threads, @click.option( "--model_name", required=True, - default='all-mpnet-base-v2', + default='all-MiniLM-L6-v2', help="The name of the model that is deployed on Triton server", ) @click.option( @@ -374,7 +374,7 @@ def _save_model(model, max_seq_length: int, batch_size: int, output_model_path: @click.option( "--model_name", required=True, - default='all-mpnet-base-v2', + default='all-MiniLM-L6-v2', help="The name of the model that is deployed on Triton server", ) @click.option( From e748df64677b94ed12c157647ad5406f2133935b Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 18 Oct 2023 11:34:19 -0700 Subject: [PATCH 6/6] Pin to an older version of huggingface_hub to work-around https://github.com/UKPLab/sentence-transformers/issues/1762 --- docker/conda/environments/cuda11.8_examples.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index 517004f5af..bd56baf9e5 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -33,6 +33,7 @@ dependencies: - dgl=1.0.2 - dill=0.3.6 - distributed>=2023.1.1 + - huggingface_hub=0.10.1 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762 - langchain=0.0.190 - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - mlflow>=2.2.1,<3