Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dspy.RM/retrieve refactor #1739

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
4 changes: 2 additions & 2 deletions dspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from dspy.retrieve import *
from dspy.signatures import *
from dspy.teleprompt import *

import dspy.retrievers
from dspy.retrievers import *

from dspy.evaluate import Evaluate # isort: skip
from dspy.clients import * # isort: skip
Expand All @@ -27,6 +26,7 @@

import dspy.teleprompt

ColBERTv2 = ColBERTv2
LabeledFewShot = dspy.teleprompt.LabeledFewShot
BootstrapFewShot = dspy.teleprompt.BootstrapFewShot
BootstrapFewShotWithRandomSearch = dspy.teleprompt.BootstrapFewShotWithRandomSearch
Expand Down
10 changes: 5 additions & 5 deletions dspy/adapters/json_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@

from dspy.adapters.base import Adapter
from dspy.adapters.utils import find_enum_member, format_field_value, serialize_for_json

from ..adapters.image_utils import Image
from ..signatures.signature import SignatureMeta
from ..signatures.utils import get_dspy_field_type
from dspy.adapters.image_utils import Image
from dspy.signatures.signature import SignatureMeta
from dspy.signatures.utils import get_dspy_field_type

_logger = logging.getLogger(__name__)

Expand All @@ -38,7 +37,8 @@ def __call__(self, lm, lm_kwargs, signature, demos, inputs):

try:
provider = lm.model.split("/", 1)[0] or "openai"
if "response_format" in litellm.get_supported_openai_params(model=lm.model, custom_llm_provider=provider):
params = litellm.get_supported_openai_params(model=lm.model, custom_llm_provider=provider)
if params and "response_format" in params:
try:
response_format = _get_structured_outputs_response_format(signature)
outputs = lm(**inputs, **lm_kwargs, response_format=response_format)
Expand Down
9 changes: 2 additions & 7 deletions dspy/clients/anyscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import yaml
import logging

from dspy.clients.finetune import (
from dspy.clients.lm import (
FinetuneJob,
# TrainingMethod,
TrainingMethod,
save_data,
)
from dspy.clients.openai import openai_data_validation
Expand Down Expand Up @@ -182,11 +182,6 @@ def start_remote_training(job_config) -> str:
return job_id


def wait_for_training(job_id):
print("Waiting for training to complete")
anyscale.job.wait(id=job_id, timeout_s=18000)


def get_model_info(job_id):
print("[Finetune] Retrieving model information from Anyscale Models SDK...")
info = anyscale.llm.model.get(job_id=job_id).to_dict()
Expand Down
25 changes: 8 additions & 17 deletions dspy/dsp/colbertv2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import functools
import importlib.util
from typing import Any, List, Optional, Union

import requests
Expand All @@ -8,7 +9,6 @@

# TODO: Ideally, this takes the name of the index and looks up its port.


class ColBERTv2:
"""Wrapper for the ColBERTv2 Retrieval."""

Expand Down Expand Up @@ -76,14 +76,17 @@ def colbertv2_post_request_v2_wrapped(*args, **kwargs):
colbertv2_post_request = colbertv2_post_request_v2_wrapped

class ColBERTv2RetrieverLocal:
def __init__(self,passages:List[str],colbert_config=None,load_only:bool=False):
def __init__(self,passages: List[str], colbert_config=None, load_only=False):
"""Colbertv2 retriever module

Args:
passages (List[str]): list of passages
colbert_config (ColBERTConfig, optional): colbert config for building and searching. Defaults to None.
load_only (bool, optional): whether to load the index or build and then load. Defaults to False.
"""
if importlib.util.find_spec("colbert") is None:
raise ImportError("Colbert not found. Please check your installation or install the module using pip install colbert-ai[faiss-gpu,torch].")

assert colbert_config is not None, "Please pass a valid colbert_config, which you can import from colbert.infra.config import ColBERTConfig and modify it"
self.colbert_config = colbert_config

Expand All @@ -101,24 +104,13 @@ def __init__(self,passages:List[str],colbert_config=None,load_only:bool=False):
self.searcher = self.get_index()

def build_index(self):

try:
import colbert
except ImportError:
print("Colbert not found. Please check your installation or install the module using pip install colbert-ai[faiss-gpu,torch].")

from colbert import Indexer
from colbert.infra import Run, RunConfig
with Run().context(RunConfig(nranks=self.colbert_config.nranks, experiment=self.colbert_config.experiment)):
indexer = Indexer(checkpoint=self.colbert_config.checkpoint, config=self.colbert_config)
indexer.index(name=self.colbert_config.index_name, collection=self.passages, overwrite=True)

def get_index(self):
try:
import colbert
except ImportError:
print("Colbert not found. Please check your installation or install the module using pip install colbert-ai[faiss-gpu,torch].")

from colbert import Searcher
from colbert.infra import Run, RunConfig

Expand Down Expand Up @@ -153,16 +145,15 @@ def forward(self,query:str,k:int=7,**kwargs):
class ColBERTv2RerankerLocal:

def __init__(self,colbert_config=None,checkpoint:str='bert-base-uncased'):
try:
import colbert
except ImportError:
print("Colbert not found. Please check your installation or install the module using pip install colbert-ai[faiss-gpu,torch].")
"""_summary_

Args:
colbert_config (ColBERTConfig, optional): Colbert config. Defaults to None.
checkpoint_name (str, optional): checkpoint for embeddings. Defaults to 'bert-base-uncased'.
"""

if importlib.util.find_spec("colbert") is None:
raise ImportError("Colbert not found. Please check your installation or install the module using pip install colbert-ai[faiss-gpu,torch].")
self.colbert_config = colbert_config
self.checkpoint = checkpoint
self.colbert_config.checkpoint = checkpoint
Expand Down
5 changes: 5 additions & 0 deletions dspy/dsp/utils/dpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@
"""

import unicodedata
import logging
import copy

import regex


logger = logging.getLogger(__name__)


class Tokens:
"""A class to represent a list of tokenized text."""
TEXT = 0
Expand Down
2 changes: 1 addition & 1 deletion dspy/dsp/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def load_batch_backgrounds(args, qids):
for qid in qids:
back = args.qid2backgrounds[qid]

if len(back) and type(back[0]) == int:
if len(back) and isinstance(back[0], int):
x = [args.collection[pid] for pid in back]
else:
x = [args.collectionX.get(pid, "") for pid in back]
Expand Down
4 changes: 2 additions & 2 deletions dspy/predict/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from typing import Tuple, List, Any

from ..primitives.example import Example
from ..utils.parallelizer import ParallelExecutor
from dspy.primitives.example import Example
from dspy.utils.parallelizer import ParallelExecutor


class Parallel:
Expand Down
4 changes: 2 additions & 2 deletions dspy/predict/program_of_thought.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import dspy
from dspy.signatures.signature import ensure_signature

from ..primitives.program import Module
from ..primitives.python_interpreter import CodePrompt, PythonInterpreter
from dspy.primitives.program import Module
from dspy.primitives.python_interpreter import CodePrompt, PythonInterpreter


class ProgramOfThought(Module):
Expand Down
3 changes: 3 additions & 0 deletions dspy/primitives/python_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ class InterpreterError(ValueError):
expression, due to syntax error or unsupported operations.
"""

class BreakException(Exception):
pass

class ContinueException(Exception):
pass

class PythonInterpreter:
r"""A customized python interpreter to control the execution of
Expand Down
17 changes: 11 additions & 6 deletions dspy/propose/dataset_summary_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def reorder_keys(match):
return ordered_repr

def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_file=None, verbose=False):
if verbose: print("\nBootstrapping dataset summary (this will be used to generate instructions)...")
if verbose:
print("\nBootstrapping dataset summary (this will be used to generate instructions)...")
upper_lim = min(len(trainset), view_data_batch_size)
prompt_model = prompt_model if prompt_model else dspy.settings.lm
with dspy.settings.context(lm=prompt_model):
Expand All @@ -63,7 +64,8 @@ def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_fil
calls+=1
if calls >= max_calls:
break
if verbose: print(f"b: {b}")
if verbose:
print(f"b: {b}")
upper_lim = min(len(trainset), b+view_data_batch_size)
with dspy.settings.context(lm=prompt_model):
output = dspy.Predict(DatasetDescriptorWithPriorObservations, n=1, temperature=1.0)(prior_observations=observations, examples=order_input_keys_in_string(trainset[b:upper_lim].__repr__()))
Expand All @@ -77,17 +79,20 @@ def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_fil
if log_file:
log_file.write(f"observations {observations}\n")
except Exception as e:
if verbose: print(f"e {e}. using observations from past round for a summary.")
if verbose:
print(f"e {e}. using observations from past round for a summary.")

if prompt_model:
with dspy.settings.context(lm=prompt_model):
summary = dspy.Predict(ObservationSummarizer, n=1, temperature=1.0)(observations=observations)
else:
summary = dspy.Predict(ObservationSummarizer, n=1, temperature=1.0)(observations=observations)
if verbose: print(f"summary: {summary}")

if log_file:
log_file.write(f"summary: {summary}\n")

if verbose: print(f"\nGenerated summary: {strip_prefix(summary.summary)}\n")

if verbose:
print(f"summary: {summary}")
print(f"\nGenerated summary: {strip_prefix(summary.summary)}\n")

return strip_prefix(summary.summary)
41 changes: 25 additions & 16 deletions dspy/propose/grounded_proposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ def forward(
program_code=self.program_code_string, program_example=task_demos,
).program_description,
)
if self.verbose: print(f"PROGRAM DESCRIPTION: {program_description}")
if self.verbose:
print(f"PROGRAM DESCRIPTION: {program_description}")

inputs = []
outputs = []
Expand All @@ -218,12 +219,14 @@ def forward(
module=module_code,
max_depth=10,
).module_description
except:
if self.verbose: print("Error getting program description. Running without program aware proposer.")
except Exception:
if self.verbose:
print("Error getting program description. Running without program aware proposer.")
self.program_aware = False

# Generate an instruction for our chosen module
if self.verbose: print(f"task_demos {task_demos}")
if self.verbose:
print(f"task_demos {task_demos}")
instruct = self.generate_module_instruction(
dataset_description=data_summary,
program_code=self.program_code_string,
Expand All @@ -237,7 +240,8 @@ def forward(
)
if hasattr(instruct, "module_description"):
module_description = strip_prefix(instruct.module_description)
if self.verbose: print(f"MODULE DESCRIPTION: {module_description}")
if self.verbose:
print(f"MODULE DESCRIPTION: {module_description}")
proposed_instruction = strip_prefix(instruct.proposed_instruction)

return dspy.Prediction(proposed_instruction=proposed_instruction)
Expand Down Expand Up @@ -278,7 +282,8 @@ def __init__(
if self.program_aware:
try:
self.program_code_string = get_dspy_source_code(program)
if self.verbose: print("SOURCE CODE:",self.program_code_string)
if self.verbose:
print("SOURCE CODE:",self.program_code_string)
except Exception as e:
print(f"Error getting source code: {e}.\n\nRunning without program aware proposer.")
self.program_aware = False
Expand All @@ -289,7 +294,8 @@ def __init__(
self.data_summary = create_dataset_summary(
trainset=trainset, view_data_batch_size=view_data_batch_size, prompt_model=prompt_model,
)
if self.verbose: print(f"DATA SUMMARY: {self.data_summary}")
if self.verbose:
print(f"DATA SUMMARY: {self.data_summary}")
except Exception as e:
print(f"Error getting data summary: {e}.\n\nRunning without data aware proposer.")
self.use_dataset_summary = False
Expand All @@ -313,12 +319,14 @@ def propose_instructions_for_program(
# Randomly select whether or not we're using instruction history
use_history = self.rng.random() < 0.5
self.use_instruct_history = use_history
if self.verbose: print(f"Use history T/F: {self.use_instruct_history}")
if self.verbose:
print(f"Use history T/F: {self.use_instruct_history}")

num_demos = max(len(demo_candidates[0]) if demo_candidates else N, 1)

if not demo_candidates:
if self.verbose: print("No demo candidates provided. Running without task demos.")
if self.verbose:
print("No demo candidates provided. Running without task demos.")
self.use_task_demos = False

# Create an instruction for each predictor
Expand All @@ -327,14 +335,14 @@ def propose_instructions_for_program(
if pred_i not in proposed_instructions:
proposed_instructions[pred_i] = []
if self.set_tip_randomly:
if self.verbose: print("Using a randomly generated configuration for our grounded proposer.")
if self.verbose:
print("Using a randomly generated configuration for our grounded proposer.")
# Randomly select the tip
selected_tip_key = self.rng.choice(list(TIPS.keys()))
selected_tip = TIPS[selected_tip_key]
self.use_tip = bool(
selected_tip,
)
if self.verbose: print(f"Selected tip: {selected_tip_key}")
self.use_tip = bool(selected_tip)
if self.verbose:
print(f"Selected tip: {selected_tip_key}")

proposed_instructions[pred_i].append(
self.propose_instruction_for_predictor(
Expand Down Expand Up @@ -399,7 +407,8 @@ def propose_instruction_for_predictor(
self.prompt_model.kwargs["temperature"] = original_temp

# Log the trace used to generate the new instruction, along with the new instruction itself
if self.verbose: self.prompt_model.inspect_history(n=1)
if self.verbose: print(f"PROPOSED INSTRUCTION: {proposed_instruction}")
if self.verbose:
self.prompt_model.inspect_history(n=1)
print(f"PROPOSED INSTRUCTION: {proposed_instruction}")

return strip_prefix(proposed_instruction)
9 changes: 2 additions & 7 deletions dspy/propose/instruction_proposal.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import dspy
import dsp

from dspy.signatures import Signature


Expand Down Expand Up @@ -80,13 +82,6 @@ class BasicGenerateInstructionWithDataObservations(Signature):
proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")


class BasicGenerateInstruction(Signature):
"""You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""

basic_instruction = dspy.InputField(desc="The initial instructions before optimization")
proposed_instruction = dspy.OutputField(desc="The improved instructions for the language model")
proposed_prefix_for_output_field = dspy.OutputField(desc="The string at the end of the prompt, which will help the model start solving the task")

class BasicGenerateInstructionAllFields(Signature):
"""You are an instruction optimizer for large language models. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative."""
("""You are an instruction optimizer for large language models. I will provide you with""")
Expand Down
Loading
Loading