From 87ab1ca2820cbb2123a28025c4eaf8e1c4b8327d Mon Sep 17 00:00:00 2001
From: Jeffrey Martin <jemartin@nvidia.com>
Date: Wed, 11 Dec 2024 08:30:59 -0600
Subject: [PATCH 1/6] detect chat support from tokenizer

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>
---
 garak/generators/huggingface.py | 40 +++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py
index abfddc9c..564056e9 100644
--- a/garak/generators/huggingface.py
+++ b/garak/generators/huggingface.py
@@ -87,6 +87,10 @@ def _load_client(self):
             self.generator.tokenizer = AutoTokenizer.from_pretrained(
                 pipeline_kwargs["model"]
             )
+        self.use_chat = (
+            hasattr(self.generator.tokenizer, "chat_template")
+            and self.generator.tokenizer.chat_template is not None
+        )
         if not hasattr(self, "deprefix_prompt"):
             self.deprefix_prompt = self.name in models_to_deprefix
         if _config.loaded:
@@ -98,6 +102,9 @@ def _load_client(self):
     def _clear_client(self):
         self.generator = None
 
+    def _format_chat_prompt(self, prompt: str) -> List[dict]:
+        return [{"role": "user", "content": prompt}]
+
     def _call_model(
         self, prompt: str, generations_this_call: int = 1
     ) -> List[Union[str, None]]:
@@ -107,7 +114,21 @@ def _call_model(
             try:
                 with torch.no_grad():
                     # workaround for pipeline to truncate the input
-                    encoded_prompt = self.generator.tokenizer(prompt, truncation=True)
+
+                    # according to docs https://huggingface.co/docs/transformers/main/en/chat_templating
+                    # chat template should be automatically utilized if the pipeline tokenizer has support
+                    if self.use_chat:
+                        formatted_prompt = self.generator.tokenizer.apply_chat_template(
+                            self._format_chat_prompt(prompt),
+                            tokenize=False,
+                            add_generation_prompt=True,
+                        )
+                    else:
+                        formatted_prompt = prompt
+
+                    encoded_prompt = self.generator.tokenizer(
+                        formatted_prompt, truncation=True
+                    )
                     truncated_prompt = self.generator.tokenizer.decode(
                         encoded_prompt["input_ids"], skip_special_tokens=True
                     )
@@ -468,6 +489,12 @@ def _load_client(self):
                 self.name, padding_side="left"
             )
 
+        # test tokenizer for `apply_chat_template` support
+        self.use_chat = (
+            hasattr(self.tokenizer, "chat_template")
+            and self.tokenizer.chat_template is not None
+        )
+
         self.generation_config = transformers.GenerationConfig.from_pretrained(
             self.name
         )
@@ -496,8 +523,17 @@ def _call_model(
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", category=UserWarning)
             with torch.no_grad():
+                if self.use_chat:
+                    formatted_prompt = self.tokenizer.apply_chat_template(
+                        self._format_chat_prompt(prompt),
+                        tokenize=False,
+                        add_generation_prompt=True,
+                    )
+                else:
+                    formatted_prompt = prompt
+
                 inputs = self.tokenizer(
-                    prompt, truncation=True, return_tensors="pt"
+                    formatted_prompt, truncation=True, return_tensors="pt"
                 ).to(self.device)
 
                 try:

From 9449258e82baead1d25c0063ffa454f294f8e4ba Mon Sep 17 00:00:00 2001
From: Jeffrey Martin <jemartin@nvidia.com>
Date: Mon, 16 Dec 2024 14:34:00 -0600
Subject: [PATCH 2/6] test a chat enabled hf model

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>
---
 tests/generators/test_huggingface.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/generators/test_huggingface.py b/tests/generators/test_huggingface.py
index f784d95d..8c264eed 100644
--- a/tests/generators/test_huggingface.py
+++ b/tests/generators/test_huggingface.py
@@ -50,6 +50,18 @@ def test_pipeline(hf_generator_config):
         assert isinstance(item, str)
 
 
+def test_pipeline_chat(mocker, hf_generator_config):
+    # uses a ~350M model with chat support
+    g = garak.generators.huggingface.Pipeline(
+        "microsoft/DialoGPT-small", config_root=hf_generator_config
+    )
+    mock_format = mocker.patch.object(
+        g, "_format_chat_prompt", wraps=g._format_chat_prompt
+    )
+    g.generate("Hello world!")
+    mock_format.assert_called_once()
+
+
 def test_inference(mocker, hf_mock_response, hf_generator_config):
     model_name = "gpt2"
     mock_request = mocker.patch.object(
@@ -121,6 +133,18 @@ def test_model(hf_generator_config):
         assert item is None  # gpt2 is known raise exception returning `None`
 
 
+def test_model_chat(mocker, hf_generator_config):
+    # uses a ~350M model with chat support
+    g = garak.generators.huggingface.Model(
+        "microsoft/DialoGPT-small", config_root=hf_generator_config
+    )
+    mock_format = mocker.patch.object(
+        g, "_format_chat_prompt", wraps=g._format_chat_prompt
+    )
+    g.generate("Hello world!")
+    mock_format.assert_called_once()
+
+
 def test_select_hf_device():
     from garak.generators.huggingface import HFCompatible
     import torch

From 9ec57943650a07b0a67c27e687a2fa31858fcd2d Mon Sep 17 00:00:00 2001
From: Jeffrey Martin <jemartin@nvidia.com>
Date: Tue, 17 Dec 2024 11:33:10 -0600
Subject: [PATCH 3/6] always deprefix when using chat template

---
 garak/generators/huggingface.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py
index 564056e9..fa6acb06 100644
--- a/garak/generators/huggingface.py
+++ b/garak/generators/huggingface.py
@@ -148,10 +148,11 @@ def _call_model(
                 i["generated_text"] for i in raw_output
             ]  # generator returns 10 outputs by default in __init__
 
-        if not self.deprefix_prompt:
+        if not self.deprefix_prompt and not self.use_chat:
             return outputs
         else:
-            return [re.sub("^" + re.escape(prompt), "", _o) for _o in outputs]
+            # consider using formatted_prompt in removal as a `list` or `str`
+            return [re.sub("^" + re.escape(formatted_prompt), "", _o) for _o in outputs]
 
 
 class OptimumPipeline(Pipeline, HFCompatible):
@@ -552,10 +553,13 @@ def _call_model(
                     outputs, skip_special_tokens=True, device=self.device
                 )
 
-        if not self.deprefix_prompt:
+        if not self.deprefix_prompt and not self.use_chat:
             return text_output
         else:
-            return [re.sub("^" + re.escape(prompt), "", i) for i in text_output]
+            # consider using formatted_prompt in removal as a `list` or `str`
+            return [
+                re.sub("^" + re.escape(formatted_prompt), "", i) for i in text_output
+            ]
 
 
 class LLaVA(Generator, HFCompatible):

From 99c760a87ba4f21e3bb58f1b387d69cb383ca3ce Mon Sep 17 00:00:00 2001
From: Jeffrey Martin <jemartin@nvidia.com>
Date: Tue, 17 Dec 2024 14:53:45 -0600
Subject: [PATCH 4/6] use pipeline explicit truncation, use tokenizer decode in
 Model

* When using a pipeline with `chat` support defer to library to enable
  response formatting to be in canonical chat list of dict form.

* Models responses may have the chat template based prompt as a prefix
  decode to ensure the template mutated prompt is removed.
---
 garak/generators/huggingface.py | 55 ++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py
index fa6acb06..ddd3f3e8 100644
--- a/garak/generators/huggingface.py
+++ b/garak/generators/huggingface.py
@@ -79,6 +79,9 @@ def _load_client(self):
             set_seed(_config.run.seed)
 
         pipeline_kwargs = self._gather_hf_params(hf_constructor=pipeline)
+        pipeline_kwargs["truncation"] = (
+            True  # this is forced to maintain existing pipeline expectations
+        )
         self.generator = pipeline("text-generation", **pipeline_kwargs)
         if self.generator.tokenizer is None:
             # account for possible model without a stored tokenizer
@@ -113,27 +116,16 @@ def _call_model(
             warnings.simplefilter("ignore", category=UserWarning)
             try:
                 with torch.no_grad():
-                    # workaround for pipeline to truncate the input
-
                     # according to docs https://huggingface.co/docs/transformers/main/en/chat_templating
                     # chat template should be automatically utilized if the pipeline tokenizer has support
+                    # and a properly formatted list[dict] is supplied
                     if self.use_chat:
-                        formatted_prompt = self.generator.tokenizer.apply_chat_template(
-                            self._format_chat_prompt(prompt),
-                            tokenize=False,
-                            add_generation_prompt=True,
-                        )
+                        formatted_prompt = self._format_chat_prompt(prompt)
                     else:
                         formatted_prompt = prompt
 
-                    encoded_prompt = self.generator.tokenizer(
-                        formatted_prompt, truncation=True
-                    )
-                    truncated_prompt = self.generator.tokenizer.decode(
-                        encoded_prompt["input_ids"], skip_special_tokens=True
-                    )
                     raw_output = self.generator(
-                        truncated_prompt,
+                        formatted_prompt,
                         pad_token_id=self.generator.tokenizer.eos_token_id,
                         max_new_tokens=self.max_tokens,
                         num_return_sequences=generations_this_call,
@@ -148,11 +140,15 @@ def _call_model(
                 i["generated_text"] for i in raw_output
             ]  # generator returns 10 outputs by default in __init__
 
-        if not self.deprefix_prompt and not self.use_chat:
-            return outputs
+        if self.use_chat:
+            text_outputs = [_o[-1]["content"].strip() for _o in outputs]
+        else:
+            text_outputs = outputs
+
+        if not self.deprefix_prompt:
+            return text_outputs
         else:
-            # consider using formatted_prompt in removal as a `list` or `str`
-            return [re.sub("^" + re.escape(formatted_prompt), "", _o) for _o in outputs]
+            return [re.sub("^" + re.escape(prompt), "", _o) for _o in text_outputs]
 
 
 class OptimumPipeline(Pipeline, HFCompatible):
@@ -520,7 +516,7 @@ def _call_model(
         if self.top_k is not None:
             self.generation_config.top_k = self.top_k
 
-        text_output = []
+        raw_text_output = []
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", category=UserWarning)
             with torch.no_grad():
@@ -537,6 +533,10 @@ def _call_model(
                     formatted_prompt, truncation=True, return_tensors="pt"
                 ).to(self.device)
 
+                prefix_prompt = self.tokenizer.decode(
+                    inputs["input_ids"][0], skip_special_tokens=True
+                )
+
                 try:
                     outputs = self.model.generate(
                         **inputs, generation_config=self.generation_config
@@ -549,17 +549,22 @@ def _call_model(
                         return returnval
                     else:
                         raise e
-                text_output = self.tokenizer.batch_decode(
+                raw_text_output = self.tokenizer.batch_decode(
                     outputs, skip_special_tokens=True, device=self.device
                 )
 
-        if not self.deprefix_prompt and not self.use_chat:
+        if self.use_chat:
+            text_output = [
+                re.sub("^" + re.escape(prefix_prompt), "", i).strip()
+                for i in raw_text_output
+            ]
+        else:
+            text_output = raw_text_output
+
+        if not self.deprefix_prompt:
             return text_output
         else:
-            # consider using formatted_prompt in removal as a `list` or `str`
-            return [
-                re.sub("^" + re.escape(formatted_prompt), "", i) for i in text_output
-            ]
+            return [re.sub("^" + re.escape(prefix_prompt), "", i) for i in text_output]
 
 
 class LLaVA(Generator, HFCompatible):

From 900fc8f5db3341f7f72c68e77e088116182f8141 Mon Sep 17 00:00:00 2001
From: Jeffrey Martin <jemartin@nvidia.com>
Date: Thu, 19 Dec 2024 10:33:14 -0600
Subject: [PATCH 5/6] allow user provided `use_chat` override

While not exposed as a `DEFAULT_PARAM` allowing the user to suppress
chat template usage for models can enable exploration of possible
weaknesses when insufficient or improper prompt handling occurs if the
model is passed input inside an application.

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>
---
 garak/generators/huggingface.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py
index ddd3f3e8..341c3f0e 100644
--- a/garak/generators/huggingface.py
+++ b/garak/generators/huggingface.py
@@ -90,10 +90,11 @@ def _load_client(self):
             self.generator.tokenizer = AutoTokenizer.from_pretrained(
                 pipeline_kwargs["model"]
             )
-        self.use_chat = (
-            hasattr(self.generator.tokenizer, "chat_template")
-            and self.generator.tokenizer.chat_template is not None
-        )
+        if not hasattr(self, "use_chat"):
+            self.use_chat = (
+                hasattr(self.generator.tokenizer, "chat_template")
+                and self.generator.tokenizer.chat_template is not None
+            )
         if not hasattr(self, "deprefix_prompt"):
             self.deprefix_prompt = self.name in models_to_deprefix
         if _config.loaded:
@@ -486,11 +487,12 @@ def _load_client(self):
                 self.name, padding_side="left"
             )
 
-        # test tokenizer for `apply_chat_template` support
-        self.use_chat = (
-            hasattr(self.tokenizer, "chat_template")
-            and self.tokenizer.chat_template is not None
-        )
+        if not hasattr(self, "use_chat"):
+            # test tokenizer for `apply_chat_template` support
+            self.use_chat = (
+                hasattr(self.tokenizer, "chat_template")
+                and self.tokenizer.chat_template is not None
+            )
 
         self.generation_config = transformers.GenerationConfig.from_pretrained(
             self.name

From 86de1161969bc84c72291f8aa67da0e9dc9eff44 Mon Sep 17 00:00:00 2001
From: Jeffrey Martin <jemartin@nvidia.com>
Date: Thu, 19 Dec 2024 10:58:49 -0600
Subject: [PATCH 6/6] ensure output format on chat enabled model tests

Signed-off-by: Jeffrey Martin <jemartin@nvidia.com>
---
 tests/generators/test_huggingface.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/generators/test_huggingface.py b/tests/generators/test_huggingface.py
index 8c264eed..fd830027 100644
--- a/tests/generators/test_huggingface.py
+++ b/tests/generators/test_huggingface.py
@@ -58,8 +58,11 @@ def test_pipeline_chat(mocker, hf_generator_config):
     mock_format = mocker.patch.object(
         g, "_format_chat_prompt", wraps=g._format_chat_prompt
     )
-    g.generate("Hello world!")
+    output = g.generate("Hello world!")
     mock_format.assert_called_once()
+    assert len(output) == 1
+    for item in output:
+        assert isinstance(item, str)
 
 
 def test_inference(mocker, hf_mock_response, hf_generator_config):
@@ -141,8 +144,11 @@ def test_model_chat(mocker, hf_generator_config):
     mock_format = mocker.patch.object(
         g, "_format_chat_prompt", wraps=g._format_chat_prompt
     )
-    g.generate("Hello world!")
+    output = g.generate("Hello world!")
     mock_format.assert_called_once()
+    assert len(output) == 1
+    for item in output:
+        assert isinstance(item, str)
 
 
 def test_select_hf_device():