Deprecate call_custom (#18)

pkelaita · Jan 17, 2025 · 1d31b77 · 1d31b77
2 parents ddd24ce + cbf5a03
commit 1d31b77
Show file tree

Hide file tree

Showing 7 changed files with 14 additions and 197 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,18 @@
 # Changelog
 
-_Current version: 0.0.39_
+_Current version: 0.0.40_
 
 [PyPi link](https://pypi.org/project/l2m2/)
 
+### 0.0.40 - January 16, 2025
+
+> [!CAUTION]
+> This release has breaking changes! Please read the changelog carefully.
+
+#### Removed
+
+- The `call_custom` method has been removed from `LLMClient` and `AsyncLLMClient` due to lack of use and unnecessary complexity. **This is a breaking change!!!** If you need to call a model that is not officially supported by L2M2, please open an issue on the [Github repo](https://github.com/pkelaita/l2m2/issues).
+
 ### 0.0.39 - December 17, 2024
 
 > [!CAUTION]

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # L2M2: A Simple Python LLM Manager 💬👍
 
-[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1734494879)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1734494879)](https://badge.fury.io/py/l2m2)
+[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1737088596)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1737088596)](https://badge.fury.io/py/l2m2)
 
 **L2M2** ("LLM Manager" &rarr; "LLMM" &rarr; "L2M2") is a tiny and very simple LLM manager for Python that exposes lots of models through a unified API. This is useful for evaluation, demos, production applications etc. that need to easily be model-agnostic.
 
@@ -650,24 +650,9 @@ print(prompt)
 Your name is Pierce and you are a software engineer.
 ```
 
-### Other Capabilities
+### Other Capabilities: Extra Parameters
 
-#### Call Custom
-
-If you'd like to call a language model from one of the supported providers that isn't officially supported by L2M2 (for example, older models such as `gpt-4-0125-preview`), you can similarly `call_custom` with the additional required parameter `provider`, and pass in the model name expected by the provider's API. Unlike `call`, `call_custom` doesn't guarantee correctness or well-defined behavior.
-
-```python
-response = client.call_custom(
-    provider="<provider name>",
-    model_id="<model id for given provider>",
-    prompt="<prompt>",
-    ...
-)
-```
-
-#### Extra Parameters
-
-You can pass in extra parameters to the provider's API (For example, [reasoning_effort](https://platform.openai.com/docs/api-reference/chat/create#chat-create-reasoning_effort) on OpenAI's o1 series) by passing in the `extra_params` parameter to `call` or `call_custom`. These parameters are passed in as a dictionary of key-value pairs, where the values are of type `str`, `int`, or `float`. Similarly, using `extra_params` does not guarantee correctness or well-defined behavior, and you should refer to the provider's documentation for correct usage.
+You can pass in extra parameters to the provider's API (For example, [reasoning_effort](https://platform.openai.com/docs/api-reference/chat/create#chat-create-reasoning_effort) on OpenAI's o1 series) by passing in the `extra_params` parameter to `call`. These parameters are passed in as a dictionary of key-value pairs, where the values are of type `str`, `int`, or `float`. Using `extra_params` does not guarantee correctness or well-defined behavior, and you should refer to the provider's documentation for correct usage.
 
 ```python
 response = client.call(

diff --git a/l2m2/__init__.py b/l2m2/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.39"
+__version__ = "0.0.40"
diff --git a/l2m2/client/base_llm_client.py b/l2m2/client/base_llm_client.py
@@ -353,91 +353,6 @@ async def call(
             extra_params,
         )
 
-    async def call_custom(
-        self,
-        *,
-        provider: str,
-        model_id: str,
-        prompt: str,
-        system_prompt: Optional[str] = None,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        json_mode: bool = False,
-        json_mode_strategy: Optional[JsonModeStrategy] = None,
-        timeout: Optional[int] = DEFAULT_TIMEOUT_SECONDS,
-        bypass_memory: bool = False,
-        alt_memory: Optional[BaseMemory] = None,
-        extra_params: Optional[Dict[str, Union[str, int, float]]] = None,
-    ) -> str:
-        """Performs inference on any model from an active provider that is not officially supported
-        by L2M2. This method does not guarantee correctness.
-
-        Args:
-            provider (str): The provider to use. Must be one of the active providers.
-            model_id (str): The ID of model to call. Must be the exact match to how you would call
-                it with the provider's API. For example, `gpt-3.5-turbo-0125` can be used to call
-                a legacy model from OpenAI as per the OpenAI API docs.
-                (https://platform.openai.com/docs/api-reference/chat)
-            prompt (str): The user prompt for which to generate a completion.
-            system_prompt (str, optional): The system prompt to send to the model. Defaults to None.
-            temperature (float, optional): The sampling temperature for the model. If not specified,
-                the provider's default value for the model is used. Defaults to None.
-            max_tokens (int, optional): The maximum number of tokens to generate. If not specified,
-                the provider's default value for the model is used. Defaults to None.
-            json_mode (bool, optional): Whether to return the response in JSON format. Defaults to False.
-            json_mode_strategy (JsonModeStrategy, optional): The strategy to use to enforce JSON outputs
-                when `json_mode` is True. If `None`, the default strategy will be used:
-                `JsonModeStrategy.prepend()` for Anthropic, and `JsonModeStrategy.strip()` for all other
-                providers. Defaults to `None`.
-            timeout (int, optional): The timeout in seconds for the LLM request. Can be set to `None`,
-                in which case the request will be allowed to run indefinitely. Defaults to `10`.
-            bypass_memory (bool, optional): Whether to bypass memory when calling the model. If `True`, the
-                model will not read from or write to memory during the call if memory is enabled. Defaults
-                to `False`.
-            alt_memory (BaseMemory, optional): An alternative memory object to use for this call only. This
-                is very useful for asynchronous workflows where you want to keep track of multiple memory
-                streams in parallel without risking race conditions. Defaults to `None`.
-
-        Raises:
-            ValueError: If the provided model is not active and/or not available.
-
-        Returns:
-            str: The model's completion for the prompt (correctness not guaranteed).
-        """
-        if provider not in self.get_available_providers():
-            raise ValueError(f"Invalid provider: {provider}")
-        if provider not in self.active_providers:
-            raise ValueError(f"Provider not active: {provider}")
-
-        # Get the param info from the first model where the provider matches.
-        # Not ideal, but the best we can do for user-provided models.
-        model_info: ModelEntry = {
-            "model_id": model_id,
-            "params": MODEL_INFO[
-                next(
-                    model
-                    for model in self.get_available_models()
-                    if provider in MODEL_INFO[model].keys()
-                )
-            ][provider]["params"],
-            "extras": {},
-        }
-
-        return await self._call_impl(
-            model_info,
-            provider,
-            prompt,
-            system_prompt,
-            temperature,
-            max_tokens,
-            json_mode,
-            json_mode_strategy,
-            timeout,
-            bypass_memory,
-            alt_memory,
-            extra_params,
-        )
-
     async def _call_impl(
         self,
         model_info: ModelEntry,

diff --git a/l2m2/client/llm_client.py b/l2m2/client/llm_client.py
@@ -59,42 +59,6 @@ def call(  # type: ignore
         )
         return str(result)
 
-    def call_custom(  # type: ignore
-        self,
-        *,
-        provider: str,
-        model_id: str,
-        prompt: str,
-        system_prompt: Optional[str] = None,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        json_mode: bool = False,
-        json_mode_strategy: Optional[JsonModeStrategy] = None,
-        timeout: Optional[int] = DEFAULT_TIMEOUT_SECONDS,
-        bypass_memory: bool = False,
-        alt_memory: Optional[BaseMemory] = None,
-        extra_params: Optional[Dict[str, Union[str, int, float]]] = None,
-    ) -> str:
-        result = asyncio.run(
-            self._sync_fn_wrapper(
-                super(LLMClient, self).call_custom,
-                provider=provider,
-                model_id=model_id,
-                prompt=prompt,
-                system_prompt=system_prompt,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                json_mode=json_mode,
-                json_mode_strategy=json_mode_strategy,
-                timeout=timeout,
-                bypass_memory=bypass_memory,
-                alt_memory=alt_memory,
-                extra_params=extra_params,
-            )
-        )
-        return str(result)
-
     # Inherit docstrings
     __init__.__doc__ = BaseLLMClient.__init__.__doc__
     call.__doc__ = BaseLLMClient.call.__doc__
-    call_custom.__doc__ = BaseLLMClient.call_custom.__doc__
diff --git a/tests/l2m2/client/test_base_llm_client.py b/tests/l2m2/client/test_base_llm_client.py
@@ -375,52 +375,6 @@ async def test_call_temperature_too_high(llm_client):
         await llm_client.call(prompt="Hello", model="gpt-4o", temperature=3.0)
 
 
-# -- Tests for call_custom -- #
-
-
-@pytest.mark.asyncio
-@patch(LLM_POST_PATH)
-async def test_call_custom(mock_call_openai, llm_client):
-    mock_call_openai.return_value = {"choices": [{"message": {"content": "response"}}]}
-    llm_client.add_provider("openai", "fake-api-key")
-    response_default = await llm_client.call_custom(
-        provider="openai",
-        prompt="Hello",
-        model_id="custom-model-xyz",
-    )
-    response_custom = await llm_client.call_custom(
-        provider="openai",
-        prompt="Hello",
-        model_id="custom-model-xyz",
-        system_prompt="System prompt",
-        temperature=0.5,
-        max_tokens=100,
-    )
-
-    assert response_default == "response"
-    assert response_custom == "response"
-
-
-@pytest.mark.asyncio
-async def test_call_custom_invalid_provider(llm_client):
-    with pytest.raises(ValueError):
-        await llm_client.call_custom(
-            provider="invalid_provider",
-            prompt="Hello",
-            model_id="custom-model-xyz",
-        )
-
-
-@pytest.mark.asyncio
-async def test_call_custom_not_active(llm_client):
-    with pytest.raises(ValueError):
-        await llm_client.call_custom(
-            provider="openai",
-            prompt="Hello",
-            model_id="custom-model-xyz",
-        )
-
-
 # -- Tests for multi provider -- #
 
 

diff --git a/tests/l2m2/client/test_llm_client.py b/tests/l2m2/client/test_llm_client.py
@@ -17,16 +17,6 @@ def test_call(mock_asyncio_run, llm_client):
     assert result == "async_call_result"
 
 
-@patch("l2m2.client.llm_client.asyncio.run", return_value="async_call_custom_result")
-def test_call_custom(mock_asyncio_run, llm_client):
-    result = llm_client.call_custom(
-        provider="test-provider", model_id="test-model-id", prompt="test prompt"
-    )
-
-    mock_asyncio_run.assert_called_once()
-    assert result == "async_call_custom_result"
-
-
 @pytest.mark.asyncio
 async def test_sync_fn_wrapper():
     async def dummy_fn(*args, **kwargs):