diff --git a/README.md b/README.md index 7fda70b..f5bb178 100644 --- a/README.md +++ b/README.md @@ -22,13 +22,14 @@ This assistant can run offline on your local machine, and it respects your priva ## TODO -- [ ] Support other text models: Llama 3.x. -- [ ] Support multimodal models: LLaVA, Llama 3.2 + Vision. +- [x] Support multimodal model: [moondream2](https://huggingface.co/vikhyatk/moondream2). - [ ] Add offline STT support: WhisperCPP. - [ ] Add wake word detection: "Hey Llama!". -- [ ] Knowledge database. -- [ ] Video interaction support. +- [ ] Support 5 other text models. +- [ ] Support 5 other multimodal models. +- [ ] Knowledge database: Langchain or LlamaIndex?. - [ ] Plugin system for extensibility. +- [ ] Package for Windows, Linux, and macOS. ## Features diff --git a/llama_assistant/custom_plaintext_editor.py b/llama_assistant/custom_plaintext_editor.py index 5889d2c..2258024 100644 --- a/llama_assistant/custom_plaintext_editor.py +++ b/llama_assistant/custom_plaintext_editor.py @@ -2,6 +2,7 @@ from PyQt6.QtGui import QKeyEvent from PyQt6.QtCore import Qt, pyqtSignal + class CustomPlainTextEdit(QPlainTextEdit): submit = pyqtSignal() @@ -19,7 +20,10 @@ def __init__(self, submit_callback, parent=None): ) def keyPressEvent(self, event: QKeyEvent): - if event.key() == Qt.Key.Key_Return and not event.modifiers() & Qt.KeyboardModifier.ShiftModifier: + if ( + event.key() == Qt.Key.Key_Return + and not event.modifiers() & Qt.KeyboardModifier.ShiftModifier + ): self.submit.emit() else: - super().keyPressEvent(event) \ No newline at end of file + super().keyPressEvent(event) diff --git a/llama_assistant/global_hotkey.py b/llama_assistant/global_hotkey.py index 4378c62..707e8ed 100644 --- a/llama_assistant/global_hotkey.py +++ b/llama_assistant/global_hotkey.py @@ -1,18 +1,18 @@ from PyQt6.QtCore import QObject, pyqtSignal from pynput import keyboard + class GlobalHotkey(QObject): activated = pyqtSignal() def __init__(self, hotkey): super().__init__() self.hotkey = keyboard.HotKey( - keyboard.HotKey.parse(hotkey), - self.on_activate + keyboard.HotKey.parse(hotkey), self.on_activate ) self.listener = keyboard.Listener( on_press=self.for_canonical(self.hotkey.press), - on_release=self.for_canonical(self.hotkey.release) + on_release=self.for_canonical(self.hotkey.release), ) self.listener.start() @@ -24,4 +24,4 @@ def for_canonical(self, f): def stop(self): if self.listener: - self.listener.stop() \ No newline at end of file + self.listener.stop() diff --git a/llama_assistant/llama_assistant.py b/llama_assistant/llama_assistant.py index 7fd5e09..80ce1d9 100644 --- a/llama_assistant/llama_assistant.py +++ b/llama_assistant/llama_assistant.py @@ -1,7 +1,7 @@ import json import markdown -from llama_cpp import Llama from pathlib import Path +from importlib import resources from PyQt6.QtWidgets import ( QApplication, @@ -14,6 +14,7 @@ QSystemTrayIcon, QMenu, QLabel, + QScrollArea, ) from PyQt6.QtCore import ( Qt, @@ -41,7 +42,8 @@ from llama_assistant.loading_animation import LoadingAnimation from llama_assistant.setting_dialog import SettingsDialog from llama_assistant.speech_recognition import SpeechRecognitionThread -from importlib import resources +from llama_assistant.utils import image_to_base64_data_uri +from llama_assistant.model_handler import handler as model_handler class LlamaAssistant(QMainWindow): @@ -51,19 +53,12 @@ def __init__(self): self.init_ui() self.init_tray() self.setup_global_shortcut() - self.load_model() self.last_response = "" self.dropped_image = None self.speech_thread = None self.is_listening = False self.image_label = None - def load_model(self): - self.model = Llama.from_pretrained( - repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF", - filename="*q4_k_m.gguf", - ) - def load_settings(self): home_dir = Path.home() settings_dir = home_dir / "llama_assistant" @@ -112,7 +107,8 @@ def init_ui(self): self.setWindowTitle("AI Assistant") self.setFixedSize(600, 200) self.setWindowFlags( - Qt.WindowType.FramelessWindowHint | Qt.WindowType.WindowStaysOnTopHint + Qt.WindowType.FramelessWindowHint + | Qt.WindowType.WindowStaysOnTopHint ) self.setAttribute(Qt.WidgetAttribute.WA_TranslucentBackground) @@ -132,6 +128,7 @@ def init_ui(self): self.input_field = CustomPlainTextEdit(self.on_submit, self) self.input_field.setPlaceholderText("Ask me anything...") self.input_field.setAcceptDrops(True) + self.input_field.setFixedHeight(100) self.input_field.dragEnterEvent = self.dragEnterEvent self.input_field.dropEvent = self.dropEvent self.input_field.setStyleSheet( @@ -150,7 +147,9 @@ def init_ui(self): top_layout.addWidget(self.input_field) # Load the mic icon from resources - with resources.path("llama_assistant.resources", "mic_icon.png") as path: + with resources.path( + "llama_assistant.resources", "mic_icon.png" + ) as path: mic_icon = QIcon(str(path)) self.mic_button = QPushButton(self) @@ -197,38 +196,73 @@ def init_ui(self): # Add new buttons button_layout = QHBoxLayout() + button_layout.setAlignment(Qt.AlignmentFlag.AlignLeft) self.summarize_button = QPushButton("Summarize", self) self.rephrase_button = QPushButton("Rephrase", self) self.fix_grammar_button = QPushButton("Fix Grammar", self) self.brainstorm_button = QPushButton("Brainstorm", self) self.write_email_button = QPushButton("Write Email", self) - for button in [self.summarize_button, self.rephrase_button, self.fix_grammar_button, self.brainstorm_button, self.write_email_button]: - button.clicked.connect(self.on_task_button_clicked) - button_layout.addWidget(button) - - main_layout.addLayout(button_layout) - - self.chat_box = QTextBrowser(self) - self.chat_box.setOpenExternalLinks(True) - self.chat_box.setFixedHeight(300) - self.chat_box.hide() - main_layout.addWidget(self.chat_box) - - result_button_layout = QHBoxLayout() - result_button_layout.setContentsMargins(0, 10, 0, 0) # Add top margin - + # Add new buttons to layout + result_layout = QHBoxLayout() + result_layout.setAlignment(Qt.AlignmentFlag.AlignLeft) self.copy_button = QPushButton("Copy Result", self) self.copy_button.clicked.connect(self.copy_result) self.copy_button.hide() - result_button_layout.addWidget(self.copy_button) - self.clear_button = QPushButton("Clear", self) self.clear_button.clicked.connect(self.clear_chat) self.clear_button.hide() - result_button_layout.addWidget(self.clear_button) + result_layout.addWidget(self.copy_button) + result_layout.addWidget(self.clear_button) + + for button in [ + self.summarize_button, + self.rephrase_button, + self.fix_grammar_button, + self.brainstorm_button, + self.write_email_button, + ]: + button.clicked.connect(self.on_task_button_clicked) + button_layout.addWidget(button) - main_layout.addLayout(result_button_layout) + main_layout.addLayout(button_layout) + main_layout.addLayout(result_layout) + + # Create a scroll area for the chat box + self.scroll_area = QScrollArea(self) + self.scroll_area.setWidgetResizable(True) + self.scroll_area.setHorizontalScrollBarPolicy( + Qt.ScrollBarPolicy.ScrollBarAlwaysOff + ) + self.scroll_area.setStyleSheet( + """ + QScrollArea { + border: none; + background-color: transparent; + } + QScrollBar:vertical { + border: none; + background: rgba(255, 255, 255, 0.1); + width: 10px; + margin: 0px 0px 0px 0px; + } + QScrollBar::handle:vertical { + background: rgba(255, 255, 255, 0.3); + min-height: 20px; + border-radius: 5px; + } + QScrollBar::add-line:vertical, QScrollBar::sub-line:vertical { + border: none; + background: none; + } + """ + ) + + self.chat_box = QTextBrowser(self.scroll_area) + self.chat_box.setOpenExternalLinks(True) + self.scroll_area.setWidget(self.chat_box) + self.scroll_area.hide() + main_layout.addWidget(self.scroll_area) self.loading_animation = LoadingAnimation(self) self.loading_animation.setFixedSize(50, 50) @@ -276,7 +310,26 @@ def update_styles(self): background-color: rgba{QColor(self.settings["color"]).lighter(120).getRgb()[:3] + (opacity,)}; }} """ - for button in [self.copy_button, self.clear_button, self.rephrase_button, self.fix_grammar_button, self.brainstorm_button, self.write_email_button, self.summarize_button]: + for button in [ + self.rephrase_button, + self.fix_grammar_button, + self.brainstorm_button, + self.write_email_button, + self.summarize_button, + ]: + button.setStyleSheet(button_style) + + button_style = f""" + QPushButton {{ + {base_style} + padding: 2.5px 5px; + border-radius: 5px; + }} + QPushButton:hover {{ + background-color: rgba(200, 200, 200, 0.8); + }} + """ + for button in [self.copy_button, self.clear_button]: button.setStyleSheet(button_style) def center_on_screen(self): @@ -329,7 +382,9 @@ def toggle_visibility(self): def on_submit(self): message = self.input_field.toPlainText() self.input_field.clear() - self.loading_animation.move(self.width() // 2 - 25, self.height() // 2 - 25) + self.loading_animation.move( + self.width() // 2 - 25, self.height() // 2 - 25 + ) self.loading_animation.start_animation() if self.dropped_image: @@ -353,39 +408,36 @@ def process_text(self, message, task="chat"): elif task == "write email": prompt = f"Write an email about: {message}" - output = self.model.create_chat_completion( - messages = [ - { - "role": "user", - "content": prompt - } - ] - ) - response = output["choices"][0]["message"]["content"] - + response = model_handler.chat_completion("llama_text", prompt) self.last_response = response self.chat_box.append(f"You: {message}") - self.chat_box.append(f"AI ({task}): {markdown.markdown(response)}") + self.chat_box.append( + f"AI ({task}): {markdown.markdown(response)}" + ) self.loading_animation.stop_animation() self.show_chat_box() def process_image_with_prompt(self, image_path, prompt): + response = model_handler.chat_completion( + "moondream", prompt, image=image_to_base64_data_uri(image_path) + ) self.chat_box.append(f"You: [Uploaded an image: {image_path}]") self.chat_box.append(f"You: {prompt}") self.chat_box.append( - f"AI: I've received your image and prompt. However, image processing is not yet implemented. Currently, only text input is supported. Here's a response to your text prompt:" + f"AI: {markdown.markdown(response)}" + if response + else "No response" ) - - # Process the text prompt - self.process_text(prompt) + self.loading_animation.stop_animation() + self.show_chat_box() def show_chat_box(self): - if self.chat_box.isHidden(): - self.chat_box.show() + if self.scroll_area.isHidden(): + self.scroll_area.show() self.copy_button.show() self.clear_button.show() - self.setFixedHeight(450) + self.setFixedHeight(600) # Increase this value if needed self.chat_box.verticalScrollBar().setValue( self.chat_box.verticalScrollBar().maximum() ) @@ -409,9 +461,13 @@ def dragEnterEvent(self, event: QDragEnterEvent): def dropEvent(self, event: QDropEvent): files = [u.toLocalFile() for u in event.mimeData().urls()] for file_path in files: - if file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")): + if file_path.lower().endswith( + (".png", ".jpg", ".jpeg", ".gif", ".bmp") + ): self.dropped_image = file_path - self.input_field.setPlaceholderText("Enter a prompt for the image...") + self.input_field.setPlaceholderText( + "Enter a prompt for the image..." + ) self.show_image_thumbnail(file_path) break @@ -549,4 +605,11 @@ def on_speech_recognized(self, text): self.input_field.setPlainText(text) def on_speech_error(self, error_message): - print(error_message) \ No newline at end of file + print(error_message) + + +if __name__ == "__main__": + app = QApplication([]) + assistant = LlamaAssistant() + assistant.show() + app.exec() diff --git a/llama_assistant/loading_animation.py b/llama_assistant/loading_animation.py index 4dd591b..5f93a1f 100644 --- a/llama_assistant/loading_animation.py +++ b/llama_assistant/loading_animation.py @@ -56,7 +56,9 @@ def paintEvent(self, event): painter.setBrush(color) painter.setPen(Qt.PenStyle.NoPen) - painter.drawEllipse(QPointF(x, y), self.dot_radius, self.dot_radius) + painter.drawEllipse( + QPointF(x, y), self.dot_radius, self.dot_radius + ) @property def rotation(self): @@ -65,4 +67,4 @@ def rotation(self): @rotation.setter def rotation(self, value): self._rotation = value - self.update() \ No newline at end of file + self.update() diff --git a/llama_assistant/main.py b/llama_assistant/main.py index 7894616..f84dc6d 100644 --- a/llama_assistant/main.py +++ b/llama_assistant/main.py @@ -9,5 +9,6 @@ def main(): ex.show() sys.exit(app.exec()) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/llama_assistant/model_handler.py b/llama_assistant/model_handler.py new file mode 100644 index 0000000..475615b --- /dev/null +++ b/llama_assistant/model_handler.py @@ -0,0 +1,191 @@ +from typing import List, Dict, Optional +import time +from threading import Timer +from llama_cpp import Llama +from llama_cpp.llama_chat_format import MoondreamChatHandler + + +class Model: + def __init__( + self, + model_type: str, + model_id: str, + model_name: str, + model_path: Optional[str] = None, + repo_id: Optional[str] = None, + filename: Optional[str] = None, + ): + self.model_type = model_type + self.model_id = model_id + self.model_name = model_name + self.model_path = model_path + self.repo_id = repo_id + self.filename = filename + + def is_online(self) -> bool: + return self.repo_id is not None and self.filename is not None + + +class ModelHandler: + def __init__(self): + self.supported_models: List[Model] = [] + self.loaded_models: Dict[str, Dict] = {} + self.model_timers: Dict[str, Timer] = {} + + def list_supported_models(self) -> List[Model]: + return self.supported_models + + def add_supported_model(self, model: Model): + self.supported_models.append(model) + + def remove_supported_model(self, model_id: str): + self.supported_models = [ + m for m in self.supported_models if m.model_id != model_id + ] + if model_id in self.loaded_models: + self.unload_model(model_id) + + def load_model(self, model_id: str) -> Optional[Dict]: + model = next( + (m for m in self.supported_models if m.model_id == model_id), None + ) + if not model: + print(f"Model with ID {model_id} not found.") + return None + + if model_id not in self.loaded_models: + print(f"Loading model: {model.model_name}") + if model.is_online(): + if model.model_type == "text": + loaded_model = Llama.from_pretrained( + repo_id=model.repo_id, + filename=model.filename, + ) + elif model.model_type == "image": + chat_handler = MoondreamChatHandler.from_pretrained( + repo_id="vikhyatk/moondream2", + filename="*mmproj*", + ) + loaded_model = Llama.from_pretrained( + repo_id=model.repo_id, + filename=model.filename, + chat_handler=chat_handler, + n_ctx=2048, + ) + else: + print(f"Unsupported model type: {model.model_type}") + return None + else: + # Load model from local path + loaded_model = Llama(model_path=model.model_path) + + self.loaded_models[model_id] = { + "model": loaded_model, + "last_used": time.time(), + } + self._schedule_unload(model_id) + + return self.loaded_models[model_id] + + def unload_model(self, model_id: str): + if model_id in self.loaded_models: + print(f"Unloading model: {model_id}") + del self.loaded_models[model_id] + if model_id in self.model_timers: + self.model_timers[model_id].cancel() + del self.model_timers[model_id] + + def chat_completion( + self, + model_id: str, + message: str, + image: Optional[str] = None, + n_ctx: int = 2048, + ) -> str: + model_data = self.load_model(model_id) + if not model_data: + return "Failed to load model" + + model = model_data["model"] + model_data["last_used"] = time.time() + self._schedule_unload(model_id) + + if image: + response = model.create_chat_completion( + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": message}, + {"type": "image_url", "image_url": {"url": image}}, + ], + } + ] + ) + else: + response = model.create_chat_completion( + messages=[{"role": "user", "content": message}] + ) + + return response["choices"][0]["message"]["content"] + + def _schedule_unload(self, model_id: str): + if model_id in self.model_timers: + self.model_timers[model_id].cancel() + + timer = Timer(3600, self.unload_model, args=[model_id]) + timer.start() + self.model_timers[model_id] = timer + + +# Example usage +handler = ModelHandler() + +# Add supported models +handler.add_supported_model( + Model( + model_type="text", + model_id="llama_text", + model_name="Llama 3.2 1B Instruct", + repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF", + filename="*q4_k_m.gguf", + ) +) +handler.add_supported_model( + Model( + model_type="image", + model_id="moondream", + model_name="Moondream2", + repo_id="vikhyatk/moondream2", + filename="*text-model*", + ) +) +# handler.add_supported_model( +# Model( +# model_type="text", +# model_id="local_model", +# model_name="Local Text Model", +# model_path="/path/to/local/model.gguf", +# ) +# ) + +# List supported models +print("Supported models:") +for model in handler.list_supported_models(): + print(f"- {model.model_name} (ID: {model.model_id})") + +if __name__ == "__main__": + # Use text model + result = handler.chat_completion("llama_text", "Tell me a joke") + print(result) + + # Use image model + image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + result = handler.chat_completion( + "moondream", "What's in this image?", image=image_url + ) + print(result) + + # Use local model + result = handler.chat_completion("local_model", "Hello, local model!") + print(result) diff --git a/llama_assistant/setting_dialog.py b/llama_assistant/setting_dialog.py index 3d8b410..e351680 100644 --- a/llama_assistant/setting_dialog.py +++ b/llama_assistant/setting_dialog.py @@ -1,13 +1,19 @@ import json from pathlib import Path from PyQt6.QtWidgets import ( - QDialog, QFormLayout, QPushButton, QSlider, QComboBox, - QColorDialog + QDialog, + QFormLayout, + QPushButton, + QSlider, + QComboBox, + QColorDialog, + QLabel, ) from PyQt6.QtCore import Qt from PyQt6.QtGui import QColor from llama_assistant.shortcut_recorder import ShortcutRecorder + class SettingsDialog(QDialog): def __init__(self, parent=None): super().__init__(parent) @@ -31,9 +37,14 @@ def __init__(self, parent=None): self.layout.addRow("Transparency:", self.transparency_slider) self.ai_model_combo = QComboBox() - self.ai_model_combo.addItems(["Llama 1B", "Llama 3B"]) + self.ai_model_combo.addItems(["Llama 1B + Moondream2"]) self.layout.addRow("AI Model:", self.ai_model_combo) + self.label = QLabel( + "Note: Changing AI model will be supported in the future." + ) + self.layout.addRow(self.label) + self.save_button = QPushButton("Save") self.save_button.clicked.connect(self.accept) self.layout.addRow(self.save_button) @@ -55,10 +66,17 @@ def load_settings(self): if settings_file.exists(): with open(settings_file, "r") as f: settings = json.load(f) - self.shortcut_recorder.setText(settings.get("shortcut", "++")) + self.shortcut_recorder.setText( + settings.get("shortcut", "++") + ) self.color = QColor(settings.get("color", "#1E1E1E")) - self.transparency_slider.setValue(int(settings.get("transparency", 90))) - self.ai_model_combo.setCurrentText(settings.get("ai_model", "Llama 1B")) + self.transparency_slider.setValue( + int(settings.get("transparency", 90)) + ) + # self.ai_model_combo.setCurrentText( + # settings.get("ai_model", "Llama 1B") + # ) # TODO: Implement this feature + self.ai_model_combo.setCurrentText("Llama 1B + Moondream2") else: self.color = QColor("#1E1E1E") self.shortcut_recorder.setText("++") @@ -81,4 +99,4 @@ def save_settings(self): settings = self.get_settings() with open(settings_file, "w") as f: - json.dump(settings, f) \ No newline at end of file + json.dump(settings, f) diff --git a/llama_assistant/shortcut_recorder.py b/llama_assistant/shortcut_recorder.py index 54c4c3a..f86fd75 100644 --- a/llama_assistant/shortcut_recorder.py +++ b/llama_assistant/shortcut_recorder.py @@ -3,6 +3,7 @@ from PyQt6.QtGui import QKeyEvent, QKeySequence from pynput import keyboard + class ShortcutRecorder(QLineEdit): def __init__(self, parent=None): super().__init__(parent) @@ -56,11 +57,20 @@ def qt_to_pynput_key(self, qt_key): "Esc": "", "Backspace": "", # Function keys - "F1": "", "F2": "", "F3": "", "F4": "", - "F5": "", "F6": "", "F7": "", "F8": "", - "F9": "", "F10": "", "F11": "", "F12": "", + "F1": "", + "F2": "", + "F3": "", + "F4": "", + "F5": "", + "F6": "", + "F7": "", + "F8": "", + "F9": "", + "F10": "", + "F11": "", + "F12": "", } return key_map.get(qt_key, qt_key.lower()) def get_pynput_hotkey(self): - return self.recorded_shortcut \ No newline at end of file + return self.recorded_shortcut diff --git a/llama_assistant/speech_recognition.py b/llama_assistant/speech_recognition.py index 1b0b66a..4573ece 100644 --- a/llama_assistant/speech_recognition.py +++ b/llama_assistant/speech_recognition.py @@ -30,4 +30,4 @@ def run(self): self.error.emit(f"Could not request results; {e}") def stop(self): - self.stop_listening = True \ No newline at end of file + self.stop_listening = True diff --git a/llama_assistant/utils.py b/llama_assistant/utils.py new file mode 100644 index 0000000..6e28dba --- /dev/null +++ b/llama_assistant/utils.py @@ -0,0 +1,7 @@ +import base64 + + +def image_to_base64_data_uri(file_path): + with open(file_path, "rb") as img_file: + base64_data = base64.b64encode(img_file.read()).decode("utf-8") + return f"data:image/png;base64,{base64_data}" diff --git a/pyproject.toml b/pyproject.toml index 346ea9e..13c8d6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "llama-assistant" -version = "0.1.15" +version = "0.1.16" authors = [ {name = "Viet-Anh Nguyen", email = "vietanh.dev@gmail.com"}, ]