diff --git a/slack_bot/run.py b/slack_bot/run.py index 4475e5de..19d8d623 100755 --- a/slack_bot/run.py +++ b/slack_bot/run.py @@ -21,7 +21,11 @@ async def main(): # Parse command line arguments parser = argparse.ArgumentParser() parser.add_argument( - "--model", "-m", help="Select which model to use", default=None, choices=MODELS + "--model", + "-m", + help="Select which model to use", + default=os.environ.get("REGINALD_MODEL") or "hello", + choices=MODELS, ) parser.add_argument( "--model-name", @@ -45,17 +49,18 @@ async def main(): "(ignored if not using llama-index-llama-cpp or llama-index-hf). " "Default is 'chat'." ), - default=None, + default=os.environ.get("LLAMA_INDEX_MODE") or "chat", choices=["chat", "query"], ) parser.add_argument( - "--path", + "--is-path", "-p", help=( "Whether or not the model_name passed is a path to the model " "(ignored if not using llama-index-llama-cpp)" ), - action="store_true", + action=argparse.BooleanOptionalAction, + default=None, ) parser.add_argument( "--max-input-size", @@ -65,7 +70,7 @@ async def main(): "Select maximum input size for LlamaCPP or HuggingFace model " "(ignored if not using llama-index-llama-cpp or llama-index-hf)" ), - default=4096, + default=os.environ.get("LLAMA_INDEX_MAX_INPUT_SIZE") or 4096, ) parser.add_argument( "--n-gpu-layers", @@ -75,7 +80,7 @@ async def main(): "Select number of GPU layers for LlamaCPP model " "(ignored if not using llama-index-llama-cpp)" ), - default=0, + default=os.environ.get("LLAMA_INDEX_N_GPU_LAYERS") or 0, ) parser.add_argument( "--device", @@ -85,20 +90,22 @@ async def main(): "Select device for HuggingFace model " "(ignored if not using llama-index-hf model)" ), - default="auto", + default=os.environ.get("LLAMA_INDEX_DEVICE") or "auto", ) parser.add_argument( "--force-new-index", "-f", help="Recreate the index vector store or not", - action="store_true", + action=argparse.BooleanOptionalAction, + default=None, ) parser.add_argument( "--data-dir", "-d", type=pathlib.Path, help="Location for data", - default=None, + default=os.environ.get("LLAMA_INDEX_DATA_DIR") + or (pathlib.Path(__file__).parent.parent / "data").resolve(), ) parser.add_argument( "--which-index", @@ -111,7 +118,7 @@ async def main(): "files in the data directory, 'handbook' will " "only use 'handbook.csv' file." ), - default=None, + default=os.environ.get("LLAMA_INDEX_WHICH_INDEX") or "all_data", choices=["all_data", "public", "handbook"], ) @@ -124,83 +131,79 @@ async def main(): level=logging.INFO, ) - # Set model name - model_name = os.environ.get("REGINALD_MODEL") - if args.model: - model_name = args.model - if not model_name: - model_name = "hello" - - # Set force new index + # Set force new index (by default, don't) force_new_index = False - if os.environ.get("LLAMA_FORCE_NEW_INDEX"): - force_new_index = os.environ.get("LLAMA_FORCE_NEW_INDEX").lower() == "true" - if args.force_new_index: - force_new_index = True - - # Set data directory - data_dir = os.environ.get("LLAMA_DATA_DIR") - if args.data_dir: - data_dir = args.data_dir - if not data_dir: - data_dir = pathlib.Path(__file__).parent.parent / "data" - data_dir = pathlib.Path(data_dir).resolve() - - # Set which index - which_index = os.environ.get("LLAMA_WHICH_INDEX") - if args.which_index: - which_index = args.which_index - if not which_index: - which_index = "all_data" - - # Set mode - mode = os.environ.get("LLAMA_MODE") - if args.mode: - mode = args.mode - if not mode: - mode = "chat" + # try to obtain force_new_index from env var + if os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX"): + force_new_index = ( + os.environ.get("LLAMA_INDEX_FORCE_NEW_INDEX").lower() == "true" + ) + # if force_new_index is provided via command line, override env var + if args.force_new_index is not None: + force_new_index = args.force_new_index + + # Set is_path bool (by default, False) + is_path = False + # try to obtain is_path from env var + if os.environ.get("LLAMA_INDEX_IS_PATH"): + is_path = os.environ.get("LLAMA_INDEX_IS_PATH").lower() == "true" + # if is_path bool is provided via command line, override env var + if args.is_path is not None: + is_path = args.is_path # Initialise a new Slack bot with the requested model try: - model = MODELS[model_name.lower()] + model = MODELS[args.model.lower()] except KeyError: - logging.error(f"Model {model_name} was not recognised") + logging.error(f"Model {args.model} was not recognised") sys.exit(1) # Initialise LLM reponse model - logging.info(f"Initialising bot with model: {model_name}") + logging.info(f"Initialising bot with model: {args.model}") # Set up any model args that are required - if model_name == "llama-index-llama-cpp": - if args.model_name is None: - args.model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL + if args.model == "llama-index-llama-cpp": + # try to obtain model name from env var + # if model name is provided via command line, override env var + model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME") + + # if no model name is provided by command line or env var, + # default to DEFAULT_LLAMA_CPP_GGUF_MODEL + if model_name is None: + model_name = DEFAULT_LLAMA_CPP_GGUF_MODEL model_args = { - "model_name": args.model_name, - "path": args.path, + "model_name": model_name, + "is_path": is_path, "n_gpu_layers": args.n_gpu_layers, "max_input_size": args.max_input_size, } - elif model_name == "llama-index-hf": - if args.model_name is None: - args.model_name = DEFAULT_HF_MODEL + elif args.model == "llama-index-hf": + # try to obtain model name from env var + # if model name is provided via command line, override env var + model_name = args.model_name or os.environ.get("LLAMA_INDEX_MODEL_NAME") + + # if no model name is provided by command line or env var, + # default to DEFAULT_HF_MODEL + if model_name is None: + model_name = DEFAULT_HF_MODEL model_args = { - "model_name": args.model_name, + "model_name": model_name, "device": args.device, "max_input_size": args.max_input_size, } else: model_args = {} - if model_name == "hello": + if model == "hello": response_model = model() else: response_model = model( force_new_index=force_new_index, - data_dir=data_dir, - which_index=which_index, - mode=mode, + data_dir=args.data_dir, + which_index=args.which_index, + mode=args.mode, **model_args, ) diff --git a/slack_bot/slack_bot/models/llama_index.py b/slack_bot/slack_bot/models/llama_index.py index d5005e79..83eae921 100644 --- a/slack_bot/slack_bot/models/llama_index.py +++ b/slack_bot/slack_bot/models/llama_index.py @@ -68,7 +68,7 @@ def __init__( The type of engine to use when interacting with the data, options of "chat" or "query". Default is "chat". k : int, optional - `similarity_top_k` to use in query engine, by default 3 + `similarity_top_k` to use in char or query engine, by default 3 chunk_overlap_ratio : float, optional Chunk overlap as a ratio of chunk size, by default 0.1 force_new_index : bool, optional @@ -79,6 +79,14 @@ def __init__( """ super().__init__(emoji="llama") logging.info("Setting up Huggingface backend.") + if mode == "chat": + logging.info("Setting up chat engine.") + elif mode == "query": + logging.info("Setting up query engine.") + else: + logging.error("Mode must either be 'query' or 'chat'.") + sys.exit(1) + self.max_input_size = max_input_size self.model_name = model_name self.num_output = num_output @@ -138,17 +146,14 @@ def __init__( storage_context=storage_context, service_context=service_context ) - if self.mode == "query": - self.query_engine = self.index.as_query_engine(similarity_top_k=k) - logging.info("Done setting up Huggingface backend for query engine.") - elif self.mode == "chat": + if self.mode == "chat": self.chat_engine = self.index.as_chat_engine( chat_mode="context", similarity_top_k=k ) logging.info("Done setting up Huggingface backend for chat engine.") - else: - logging.error("Mode must either be 'query' or 'chat'.") - sys.exit(1) + elif self.mode == "query": + self.query_engine = self.index.as_query_engine(similarity_top_k=k) + logging.info("Done setting up Huggingface backend for query engine.") self.error_response_template = ( "Oh no! When I tried to get a response to your prompt, " @@ -356,7 +361,7 @@ class LlamaIndexLlamaCPP(LlamaIndex): def __init__( self, model_name: str, - path: bool, + is_path: bool, n_gpu_layers: int = 0, *args: Any, **kwargs: Any, @@ -369,14 +374,14 @@ def __init__( ---------- model_name : str Either the path to the model or the URL to download the model from - path : bool, optional + is_path : bool, optional If True, model_name is used as a path to the model file, otherwise it should be the URL to download the model n_gpu_layers : int, optional Number of layers to offload to GPU. If -1, all layers are offloaded, by default 0 """ - self.path = path + self.is_path = is_path self.n_gpu_layers = n_gpu_layers super().__init__(*args, model_name=model_name, **kwargs) @@ -389,8 +394,8 @@ def _prep_llm(self) -> LLM: ) return LlamaCPP( - model_url=self.model_name if not self.path else None, - model_path=self.model_name if self.path else None, + model_url=self.model_name if not self.is_path else None, + model_path=self.model_name if self.is_path else None, temperature=0.1, max_new_tokens=self.num_output, context_window=self.max_input_size,