From ecd9791b4a51b75753014e6b921fc26a56485a85 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Sun, 12 Jan 2025 23:56:35 -0800
Subject: [PATCH 01/17] Add support for output messages for
 streaming/non-streaming and sync/async

---
 .../instrumentation/litellm/__init__.py       | 27 +++++++++---
 .../tests/test_instrumentor.py                | 43 ++++++++++++++++++-
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index f15af0bcf..f37cdf69a 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -1,7 +1,7 @@
 import json
 from enum import Enum
 from functools import wraps
-from typing import Any, Callable, Collection, Dict, Iterable, Iterator, Mapping, Tuple, TypeVar
+from typing import Any, Callable, Collection, Dict, Iterable, Iterator, Mapping, Tuple, TypeVar, Union
 
 from openai.types.image import Image
 from opentelemetry import context as context_api
@@ -12,6 +12,7 @@
 
 import litellm
 from litellm.types.utils import (
+    StreamingChoices,
     Choices,
     EmbeddingResponse,
     ImageResponse,
@@ -153,10 +154,9 @@ def _instrument_func_type_image_generation(span: trace_api.Span, kwargs: Dict[st
 
 def _finalize_span(span: trace_api.Span, result: Any) -> None:
     if isinstance(result, ModelResponse):
-        if (choices := result.choices) and len(choices) > 0:
-            choice = choices[0]
-            if isinstance(choice, Choices) and (output := choice.message.content):
-                _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
+        for idx, choice in enumerate(result.choices):
+            _process_choice(span, choice, idx)
+            
     elif isinstance(result, EmbeddingResponse):
         if result_data := result.data:
             first_embedding = result_data[0]
@@ -187,6 +187,23 @@ def _finalize_span(span: trace_api.Span, result: Any) -> None:
             span, SpanAttributes.LLM_TOKEN_COUNT_TOTAL, result.usage["total_tokens"]
         )
 
+def _process_choice(span: trace_api.Span, choice: Union[Choices, StreamingChoices], idx: int) -> None:
+    if isinstance(choice, Choices):            
+        if idx == 0 and choice.message and (output := choice.message.content):
+            _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
+
+        for key, value in _get_attributes_from_message_param(choice.message):
+            _set_span_attribute(
+                span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value
+            )
+    elif isinstance(choice, StreamingChoices):
+        if idx == 0 and choice.delta and (output := choice.delta.content):
+            _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
+
+        for key, value in _get_attributes_from_message_param(choice.delta):
+            _set_span_attribute(
+                span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value
+            )
 
 class LiteLLMInstrumentor(BaseInstrumentor):  # type: ignore
     original_litellm_funcs: Dict[
diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 2240eb027..43e460da6 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -47,6 +47,7 @@ def test_oitracer(
 
 
 @pytest.mark.parametrize("use_context_attributes", [False, True])
+@pytest.mark.parametrize("n", [1, 5])
 def test_completion(
     tracer_provider: TracerProvider,
     in_memory_span_exporter: InMemorySpanExporter,
@@ -58,11 +59,13 @@ def test_completion(
     prompt_template: str,
     prompt_template_version: str,
     prompt_template_variables: Dict[str, Any],
+    n: int,
 ) -> None:
     in_memory_span_exporter.clear()
     LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)
 
     input_messages = [{"content": "What's the capital of China?", "role": "user"}]
+    response = None
     if use_context_attributes:
         with using_attributes(
             session_id=session_id,
@@ -73,15 +76,17 @@ def test_completion(
             prompt_template_version=prompt_template_version,
             prompt_template_variables=prompt_template_variables,
         ):
-            litellm.completion(
+            response = litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=input_messages,
+                n=n,
                 mock_response="Beijing",
             )
     else:
-        litellm.completion(
+        response = litellm.completion(
             model="gpt-3.5-turbo",
             messages=input_messages,
+            n=n,
             mock_response="Beijing",
         )
 
@@ -94,6 +99,9 @@ def test_completion(
     assert attributes.get(SpanAttributes.INPUT_VALUE) == json.dumps(input_messages)
 
     assert attributes.get(SpanAttributes.OUTPUT_VALUE) == "Beijing"
+    for i, choice in enumerate(response['choices']):
+        _check_llm_message(SpanAttributes.LLM_OUTPUT_MESSAGES, i, attributes, choice.message)
+
     assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_PROMPT) == 10
     assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_COMPLETION) == 20
     assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_TOTAL) == 30
@@ -111,6 +119,37 @@ def test_completion(
         )
     LiteLLMInstrumentor().uninstrument()
 
+@pytest.mark.parametrize("n", [1, 4])
+def test_completion_streaming(
+    tracer_provider: TracerProvider,
+    in_memory_span_exporter: InMemorySpanExporter,
+    n: int,
+) -> None:
+    in_memory_span_exporter.clear()
+    LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)
+
+    input_messages = [{"content": "What's the capital of China?", "role": "user"}]
+
+    response = litellm.completion(
+        model="gpt-3.5-turbo",
+        messages=input_messages,
+        mock_response="Beijing",
+        stream=True,
+        n=n,
+    )
+
+    spans = in_memory_span_exporter.get_finished_spans()
+    assert len(spans) == 1
+    span = spans[0]
+    assert span.name == "completion"
+    attributes = dict(cast(Mapping[str, AttributeValue], span.attributes))
+    assert attributes.get(SpanAttributes.LLM_MODEL_NAME) == "gpt-3.5-turbo"
+    assert attributes.get(SpanAttributes.INPUT_VALUE) == json.dumps(input_messages)
+
+    for i, chunk in response.chunks:
+        _check_llm_message(SpanAttributes.LLM_OUTPUT_MESSAGES, i, attributes, chunk.message.delta)
+
+    LiteLLMInstrumentor().uninstrument()
 
 def test_completion_with_parameters(
     tracer_provider: TracerProvider, in_memory_span_exporter: InMemorySpanExporter

From e3aada2df402def8648b1e350b2af9d41e8ab612 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 01:15:33 -0800
Subject: [PATCH 02/17] lint

---
 .../instrumentation/litellm/__init__.py       | 33 ++++++++++++-------
 .../instrumentation/litellm/version.py        |  2 +-
 .../tests/test_instrumentor.py                |  5 ++-
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index f37cdf69a..f6931807f 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -1,7 +1,18 @@
 import json
 from enum import Enum
 from functools import wraps
-from typing import Any, Callable, Collection, Dict, Iterable, Iterator, Mapping, Tuple, TypeVar, Union
+from typing import (
+    Any,
+    Callable,
+    Collection,
+    Dict,
+    Iterable,
+    Iterator,
+    Mapping,
+    Tuple,
+    TypeVar,
+    Union,
+)
 
 from openai.types.image import Image
 from opentelemetry import context as context_api
@@ -12,11 +23,11 @@
 
 import litellm
 from litellm.types.utils import (
-    StreamingChoices,
     Choices,
     EmbeddingResponse,
     ImageResponse,
     ModelResponse,
+    StreamingChoices,
 )
 from openinference.instrumentation import (
     OITracer,
@@ -156,7 +167,7 @@ def _finalize_span(span: trace_api.Span, result: Any) -> None:
     if isinstance(result, ModelResponse):
         for idx, choice in enumerate(result.choices):
             _process_choice(span, choice, idx)
-            
+
     elif isinstance(result, EmbeddingResponse):
         if result_data := result.data:
             first_embedding = result_data[0]
@@ -187,23 +198,23 @@ def _finalize_span(span: trace_api.Span, result: Any) -> None:
             span, SpanAttributes.LLM_TOKEN_COUNT_TOTAL, result.usage["total_tokens"]
         )
 
-def _process_choice(span: trace_api.Span, choice: Union[Choices, StreamingChoices], idx: int) -> None:
-    if isinstance(choice, Choices):            
+
+def _process_choice(
+    span: trace_api.Span, choice: Union[Choices, StreamingChoices], idx: int
+) -> None:
+    if isinstance(choice, Choices):
         if idx == 0 and choice.message and (output := choice.message.content):
             _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
 
         for key, value in _get_attributes_from_message_param(choice.message):
-            _set_span_attribute(
-                span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value
-            )
+            _set_span_attribute(span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value)
     elif isinstance(choice, StreamingChoices):
         if idx == 0 and choice.delta and (output := choice.delta.content):
             _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
 
         for key, value in _get_attributes_from_message_param(choice.delta):
-            _set_span_attribute(
-                span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value
-            )
+            _set_span_attribute(span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value)
+
 
 class LiteLLMInstrumentor(BaseInstrumentor):  # type: ignore
     original_litellm_funcs: Dict[
diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py
index 1276d0254..0a8da8825 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py
@@ -1 +1 @@
-__version__ = "0.1.5"
+__version__ = "0.1.6"
diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 43e460da6..46b99afb1 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -99,7 +99,7 @@ def test_completion(
     assert attributes.get(SpanAttributes.INPUT_VALUE) == json.dumps(input_messages)
 
     assert attributes.get(SpanAttributes.OUTPUT_VALUE) == "Beijing"
-    for i, choice in enumerate(response['choices']):
+    for i, choice in enumerate(response["choices"]):
         _check_llm_message(SpanAttributes.LLM_OUTPUT_MESSAGES, i, attributes, choice.message)
 
     assert attributes.get(SpanAttributes.LLM_TOKEN_COUNT_PROMPT) == 10
@@ -119,6 +119,7 @@ def test_completion(
         )
     LiteLLMInstrumentor().uninstrument()
 
+
 @pytest.mark.parametrize("n", [1, 4])
 def test_completion_streaming(
     tracer_provider: TracerProvider,
@@ -149,8 +150,10 @@ def test_completion_streaming(
     for i, chunk in response.chunks:
         _check_llm_message(SpanAttributes.LLM_OUTPUT_MESSAGES, i, attributes, chunk.message.delta)
 
+    # usage is not used in streaming completion
     LiteLLMInstrumentor().uninstrument()
 
+
 def test_completion_with_parameters(
     tracer_provider: TracerProvider, in_memory_span_exporter: InMemorySpanExporter
 ) -> None:

From dc4e9c2c66fd2d1041169e3194b081e2dd47a4b1 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 13:05:52 -0800
Subject: [PATCH 03/17] removed streaming

---
 .../instrumentation/litellm/__init__.py       | 39 +++++++++----------
 .../tests/test_instrumentor.py                | 34 ----------------
 2 files changed, 18 insertions(+), 55 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index f6931807f..e525772e5 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -1,6 +1,7 @@
 import json
 from enum import Enum
 from functools import wraps
+from time import sleep
 from typing import (
     Any,
     Callable,
@@ -27,7 +28,7 @@
     EmbeddingResponse,
     ImageResponse,
     ModelResponse,
-    StreamingChoices,
+    ModelResponseStream
 )
 from openinference.instrumentation import (
     OITracer,
@@ -164,9 +165,17 @@ def _instrument_func_type_image_generation(span: trace_api.Span, kwargs: Dict[st
 
 
 def _finalize_span(span: trace_api.Span, result: Any) -> None:
-    if isinstance(result, ModelResponse):
+    if isinstance(result, ModelResponse) or isinstance(result, ModelResponseStream):
         for idx, choice in enumerate(result.choices):
-            _process_choice(span, choice, idx)
+            if not isinstance(choice, Choices):
+                continue
+        
+            if idx == 0 and choice.message and (output := choice.message.content):
+                _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
+
+            for key, value in _get_attributes_from_message_param(choice.message):
+                _set_span_attribute(span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value)
+
 
     elif isinstance(result, EmbeddingResponse):
         if result_data := result.data:
@@ -199,23 +208,6 @@ def _finalize_span(span: trace_api.Span, result: Any) -> None:
         )
 
 
-def _process_choice(
-    span: trace_api.Span, choice: Union[Choices, StreamingChoices], idx: int
-) -> None:
-    if isinstance(choice, Choices):
-        if idx == 0 and choice.message and (output := choice.message.content):
-            _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
-
-        for key, value in _get_attributes_from_message_param(choice.message):
-            _set_span_attribute(span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value)
-    elif isinstance(choice, StreamingChoices):
-        if idx == 0 and choice.delta and (output := choice.delta.content):
-            _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
-
-        for key, value in _get_attributes_from_message_param(choice.delta):
-            _set_span_attribute(span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value)
-
-
 class LiteLLMInstrumentor(BaseInstrumentor):  # type: ignore
     original_litellm_funcs: Dict[
         str, Callable[..., Any]
@@ -272,8 +264,13 @@ def _completion_wrapper(self, *args: Any, **kwargs: Any) -> ModelResponse:
             name="completion", attributes=dict(get_attributes_from_context())
         ) as span:
             _instrument_func_type_completion(span, kwargs)
+            print("1: ", kwargs)
             result = self.original_litellm_funcs["completion"](*args, **kwargs)
-            _finalize_span(span, result)
+            print("1.5:", result)
+            if isinstance(result, litellm.CustomStreamWrapper):
+                _finalize_span(span, result.completion_stream)
+            else:
+                _finalize_span(span, result)
         return result  # type:ignore
 
     @wraps(litellm.acompletion)
diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 46b99afb1..1b3e78acc 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -120,40 +120,6 @@ def test_completion(
     LiteLLMInstrumentor().uninstrument()
 
 
-@pytest.mark.parametrize("n", [1, 4])
-def test_completion_streaming(
-    tracer_provider: TracerProvider,
-    in_memory_span_exporter: InMemorySpanExporter,
-    n: int,
-) -> None:
-    in_memory_span_exporter.clear()
-    LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)
-
-    input_messages = [{"content": "What's the capital of China?", "role": "user"}]
-
-    response = litellm.completion(
-        model="gpt-3.5-turbo",
-        messages=input_messages,
-        mock_response="Beijing",
-        stream=True,
-        n=n,
-    )
-
-    spans = in_memory_span_exporter.get_finished_spans()
-    assert len(spans) == 1
-    span = spans[0]
-    assert span.name == "completion"
-    attributes = dict(cast(Mapping[str, AttributeValue], span.attributes))
-    assert attributes.get(SpanAttributes.LLM_MODEL_NAME) == "gpt-3.5-turbo"
-    assert attributes.get(SpanAttributes.INPUT_VALUE) == json.dumps(input_messages)
-
-    for i, chunk in response.chunks:
-        _check_llm_message(SpanAttributes.LLM_OUTPUT_MESSAGES, i, attributes, chunk.message.delta)
-
-    # usage is not used in streaming completion
-    LiteLLMInstrumentor().uninstrument()
-
-
 def test_completion_with_parameters(
     tracer_provider: TracerProvider, in_memory_span_exporter: InMemorySpanExporter
 ) -> None:

From e00229b04343762f2e6903325c38689011d2339d Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 13:12:50 -0800
Subject: [PATCH 04/17] remove prints

---
 .../src/openinference/instrumentation/litellm/__init__.py       | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index e525772e5..4a43a2acb 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -264,9 +264,7 @@ def _completion_wrapper(self, *args: Any, **kwargs: Any) -> ModelResponse:
             name="completion", attributes=dict(get_attributes_from_context())
         ) as span:
             _instrument_func_type_completion(span, kwargs)
-            print("1: ", kwargs)
             result = self.original_litellm_funcs["completion"](*args, **kwargs)
-            print("1.5:", result)
             if isinstance(result, litellm.CustomStreamWrapper):
                 _finalize_span(span, result.completion_stream)
             else:

From 754b08071c65f8aae90815a8a233490713bb4276 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 13:13:50 -0800
Subject: [PATCH 05/17] lint

---
 .../openinference/instrumentation/litellm/__init__.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index 4a43a2acb..1d2e76b0a 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -1,7 +1,6 @@
 import json
 from enum import Enum
 from functools import wraps
-from time import sleep
 from typing import (
     Any,
     Callable,
@@ -12,7 +11,6 @@
     Mapping,
     Tuple,
     TypeVar,
-    Union,
 )
 
 from openai.types.image import Image
@@ -28,7 +26,7 @@
     EmbeddingResponse,
     ImageResponse,
     ModelResponse,
-    ModelResponseStream
+    ModelResponseStream,
 )
 from openinference.instrumentation import (
     OITracer,
@@ -169,13 +167,14 @@ def _finalize_span(span: trace_api.Span, result: Any) -> None:
         for idx, choice in enumerate(result.choices):
             if not isinstance(choice, Choices):
                 continue
-        
+
             if idx == 0 and choice.message and (output := choice.message.content):
                 _set_span_attribute(span, SpanAttributes.OUTPUT_VALUE, output)
 
             for key, value in _get_attributes_from_message_param(choice.message):
-                _set_span_attribute(span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value)
-
+                _set_span_attribute(
+                    span, f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}.{key}", value
+                )
 
     elif isinstance(result, EmbeddingResponse):
         if result_data := result.data:

From 0d9ba2f3d2dc2b1ef6a6dcbcd02ee1a989214e83 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 13:20:15 -0800
Subject: [PATCH 06/17] fix ci errors

---
 .../src/openinference/instrumentation/litellm/__init__.py | 5 +----
 .../tests/test_instrumentor.py                            | 8 ++++----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index 1d2e76b0a..180c695ba 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -264,10 +264,7 @@ def _completion_wrapper(self, *args: Any, **kwargs: Any) -> ModelResponse:
         ) as span:
             _instrument_func_type_completion(span, kwargs)
             result = self.original_litellm_funcs["completion"](*args, **kwargs)
-            if isinstance(result, litellm.CustomStreamWrapper):
-                _finalize_span(span, result.completion_stream)
-            else:
-                _finalize_span(span, result)
+            _finalize_span(span, result)
         return result  # type:ignore
 
     @wraps(litellm.acompletion)
diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 1b3e78acc..70c7724fe 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -5,7 +5,7 @@
 import litellm
 import pytest
 from litellm import OpenAIChatCompletion  # type: ignore[attr-defined]
-from litellm.types.utils import EmbeddingResponse, ImageResponse, Usage
+from litellm.types.utils import EmbeddingResponse, ImageObject, ImageResponse, Usage
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
@@ -548,7 +548,7 @@ def test_image_generation_url(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[{"b64_json": None, "revised_prompt": None, "url": "https://dummy-url"}],
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],
     )
 
     with patch.object(
@@ -618,7 +618,7 @@ def test_image_generation_b64json(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[{"b64_json": "dummy_b64_json", "revised_prompt": None, "url": None}],
+        data=[ImageObject(b64_json="dummy_b64_json", revised_prompt=None, url=None)],
     )
 
     with patch.object(
@@ -688,7 +688,7 @@ async def test_aimage_generation(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[{"b64_json": None, "revised_prompt": None, "url": "https://dummy-url"}],
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],
     )
     with patch.object(
         OpenAIChatCompletion, "aimage_generation", return_value=mock_response_image_gen

From 9d5148fd8e08a9415debe34e2069de963867e6ad Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 15:29:21 -0800
Subject: [PATCH 07/17] Update __init__.py

---
 .../src/openinference/instrumentation/litellm/__init__.py      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index 180c695ba..19ef943de 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -11,6 +11,7 @@
     Mapping,
     Tuple,
     TypeVar,
+    Union,
 )
 
 from openai.types.image import Image
@@ -59,7 +60,7 @@ def is_iterable_of(lst: Iterable[object], tp: T) -> bool:
 
 
 def _get_attributes_from_message_param(
-    message: Mapping[str, Any],
+    message: Union[Mapping[str, Any], litellm.Message],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     if not hasattr(message, "get"):
         return

From 1f52f5535a56ef1a04323f08603e62919fe72b9d Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 15:50:55 -0800
Subject: [PATCH 08/17] Update test_instrumentor.py

---
 .../tests/test_instrumentor.py                              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 70c7724fe..5b3ca7a37 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -548,7 +548,7 @@ def test_image_generation_url(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: List[ImageObject]
     )
 
     with patch.object(
@@ -618,7 +618,7 @@ def test_image_generation_b64json(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[ImageObject(b64_json="dummy_b64_json", revised_prompt=None, url=None)],
+        data=[ImageObject(b64_json="dummy_b64_json", revised_prompt=None, url=None)],  # type: List[ImageObject]
     )
 
     with patch.object(
@@ -688,7 +688,7 @@ async def test_aimage_generation(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: List[ImageObject]
     )
     with patch.object(
         OpenAIChatCompletion, "aimage_generation", return_value=mock_response_image_gen

From 7d9d58b8d6a74b2218dc773c28dc4350c101fa59 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 16:03:11 -0800
Subject: [PATCH 09/17] fix typing

---
 .../src/openinference/instrumentation/litellm/__init__.py | 3 ++-
 .../tests/test_instrumentor.py                            | 8 ++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index 19ef943de..6af1bfb18 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -23,6 +23,7 @@
 
 import litellm
 from litellm.types.utils import (
+    Message,
     Choices,
     EmbeddingResponse,
     ImageResponse,
@@ -60,7 +61,7 @@ def is_iterable_of(lst: Iterable[object], tp: T) -> bool:
 
 
 def _get_attributes_from_message_param(
-    message: Union[Mapping[str, Any], litellm.Message],
+    message: Union[Mapping[str, Any], Message],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     if not hasattr(message, "get"):
         return
diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 5b3ca7a37..1e848757a 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -548,7 +548,7 @@ def test_image_generation_url(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: List[ImageObject]
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")], # type: ignore
     )
 
     with patch.object(
@@ -618,7 +618,7 @@ def test_image_generation_b64json(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[ImageObject(b64_json="dummy_b64_json", revised_prompt=None, url=None)],  # type: List[ImageObject]
+        data=[ImageObject(b64_json="dummy_b64_json", revised_prompt=None, url=None)],  # type: ignore
     )
 
     with patch.object(
@@ -686,9 +686,9 @@ async def test_aimage_generation(
     in_memory_span_exporter.clear()
     LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)
 
-    mock_response_image_gen = ImageResponse(
+    mock_response_image_gen = ImageResponse( 
         created=1722359754,
-        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: List[ImageObject]
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")], # type: ignore
     )
     with patch.object(
         OpenAIChatCompletion, "aimage_generation", return_value=mock_response_image_gen

From 5aef266d358df9ab457c832ef9ebd32e59d07662 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 16:25:03 -0800
Subject: [PATCH 10/17] lint

---
 .../src/openinference/instrumentation/litellm/__init__.py   | 2 +-
 .../tests/test_instrumentor.py                              | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index 6af1bfb18..df54192f8 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -23,10 +23,10 @@
 
 import litellm
 from litellm.types.utils import (
-    Message,
     Choices,
     EmbeddingResponse,
     ImageResponse,
+    Message,
     ModelResponse,
     ModelResponseStream,
 )
diff --git a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
index 1e848757a..6321a8d03 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/tests/test_instrumentor.py
@@ -548,7 +548,7 @@ def test_image_generation_url(
 
     mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")], # type: ignore
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: ignore
     )
 
     with patch.object(
@@ -686,9 +686,9 @@ async def test_aimage_generation(
     in_memory_span_exporter.clear()
     LiteLLMInstrumentor().instrument(tracer_provider=tracer_provider)
 
-    mock_response_image_gen = ImageResponse( 
+    mock_response_image_gen = ImageResponse(
         created=1722359754,
-        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")], # type: ignore
+        data=[ImageObject(b64_json=None, revised_prompt=None, url="https://dummy-url")],  # type: ignore
     )
     with patch.object(
         OpenAIChatCompletion, "aimage_generation", return_value=mock_response_image_gen

From e2b6f6333060481d728f23cdb2af36cf150dfb99 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 16:26:52 -0800
Subject: [PATCH 11/17] remove additional stream reference

---
 .../src/openinference/instrumentation/litellm/__init__.py      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
index df54192f8..5e5782d81 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/__init__.py
@@ -28,7 +28,6 @@
     ImageResponse,
     Message,
     ModelResponse,
-    ModelResponseStream,
 )
 from openinference.instrumentation import (
     OITracer,
@@ -165,7 +164,7 @@ def _instrument_func_type_image_generation(span: trace_api.Span, kwargs: Dict[st
 
 
 def _finalize_span(span: trace_api.Span, result: Any) -> None:
-    if isinstance(result, ModelResponse) or isinstance(result, ModelResponseStream):
+    if isinstance(result, ModelResponse):
         for idx, choice in enumerate(result.choices):
             if not isinstance(choice, Choices):
                 continue

From 87f82ca45abd1cc22e84a4500b4c2d72955b0487 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 16:42:11 -0800
Subject: [PATCH 12/17] avoid bad tokenizer lib release that was yanked

---
 .../openinference-instrumentation-litellm/pyproject.toml         | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
index 14501977d..7076d352d 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
@@ -39,6 +39,7 @@ test = [
   "opentelemetry-sdk",
   "opentelemetry-instrumentation-httpx",
   "tenacity",
+  "tokenizers>=0.21.0"
 ]
 
 [project.urls]

From 5187c2e7479311c22825b3d3f5da0ddead437982 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 16:58:45 -0800
Subject: [PATCH 13/17] Update pyproject.toml

---
 .../openinference-instrumentation-litellm/pyproject.toml        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
index 7076d352d..eb8336926 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
@@ -39,7 +39,7 @@ test = [
   "opentelemetry-sdk",
   "opentelemetry-instrumentation-httpx",
   "tenacity",
-  "tokenizers>=0.21.0"
+  "tokenizers==0.20.3; python_version == '3.8'"
 ]
 
 [project.urls]

From f07d53d077e87da286b270159e188aa4b4971b2f Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 17:23:37 -0800
Subject: [PATCH 14/17] Update pyproject.toml

---
 .../openinference-instrumentation-litellm/pyproject.toml         | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
index eb8336926..99abf5dd5 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-litellm/pyproject.toml
@@ -31,6 +31,7 @@ dependencies = [
   "openinference-instrumentation>=0.1.17",
   "openinference-semantic-conventions>=0.1.9",
   "wrapt",
+  "setuptools",
 ]
 
 [project.optional-dependencies]

From 7c0917cb7b988ee03a0da47eb62e1f64b5623fc8 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 17:34:28 -0800
Subject: [PATCH 15/17] Update tox.ini

---
 python/tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 7db3507da..5603c6c6d 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -76,7 +76,7 @@ commands_pre =
   groq: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-groq[test]
   groq-latest: uv pip install -U groq 'httpx<0.28'
   litellm: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-litellm[test]
-  litellm-latest: uv pip install -U litellm 'httpx<0.28'
+  litellm-latest: uv pip install -U litellm 'httpx<0.28' 'tokenizer>=0.21.0'
   ; instructor: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-instructor[test]
   ; instructor-latest: uv pip install -U instructor
   anthropic: uv pip uninstall -r test-requirements.txt

From a2b615e2b9d3e8dfbe1e28b6ead92592538ef687 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Mon, 13 Jan 2025 17:41:01 -0800
Subject: [PATCH 16/17] Update tox.ini

---
 python/tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 5603c6c6d..ee5643c83 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -76,7 +76,7 @@ commands_pre =
   groq: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-groq[test]
   groq-latest: uv pip install -U groq 'httpx<0.28'
   litellm: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-litellm[test]
-  litellm-latest: uv pip install -U litellm 'httpx<0.28' 'tokenizer>=0.21.0'
+  litellm-latest: uv pip install -U --only-binary=tokenizers litellm 'httpx<0.28' 'tokenizer<=0.20.3'
   ; instructor: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-instructor[test]
   ; instructor-latest: uv pip install -U instructor
   anthropic: uv pip uninstall -r test-requirements.txt

From 76342aa8254b495e5676c6451c7127f39bf90a28 Mon Sep 17 00:00:00 2001
From: Nate Mar <nate@arize.com>
Date: Wed, 15 Jan 2025 15:08:16 -0800
Subject: [PATCH 17/17] restore version

---
 .../src/openinference/instrumentation/litellm/version.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py
index 0a8da8825..1276d0254 100644
--- a/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py
+++ b/python/instrumentation/openinference-instrumentation-litellm/src/openinference/instrumentation/litellm/version.py
@@ -1 +1 @@
-__version__ = "0.1.6"
+__version__ = "0.1.5"