Merge pull request #1381 from Srayash/feature-TTS

Feature: Added Text-To-Speech Functionality
arc53 · Oct 30, 2024 · af2cef1 · af2cef1
2 parents 3be74b1 + 5c99615
commit af2cef1
Show file tree

Hide file tree

Showing 6 changed files with 133 additions and 1 deletion.
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
diff --git a/frontend/src/assets/Loading.svg b/frontend/src/assets/Loading.svg
diff --git a/frontend/src/assets/speaker.svg b/frontend/src/assets/speaker.svg
diff --git a/frontend/src/assets/stopspeech.svg b/frontend/src/assets/stopspeech.svg
diff --git a/frontend/src/components/TextToSpeechButton.tsx b/frontend/src/components/TextToSpeechButton.tsx
@@ -0,0 +1,94 @@
+import { useState, useRef } from 'react';
+import Speaker from '../assets/speaker.svg?react';
+import Stopspeech from '../assets/stopspeech.svg?react';
+import LoadingIcon from '../assets/Loading.svg?react'; // Add a loading icon SVG here
+const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
+
+export default function SpeakButton({
+  text,
+  colorLight,
+  colorDark,
+}: {
+  text: string;
+  colorLight?: string;
+  colorDark?: string;
+}) {
+  const [isSpeaking, setIsSpeaking] = useState(false);
+  const [isLoading, setIsLoading] = useState(false);
+  const [isSpeakHovered, setIsSpeakHovered] = useState(false);
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+
+  const handleSpeakClick = async () => {
+    if (isSpeaking) {
+      // Stop audio if it's currently playing
+      audioRef.current?.pause();
+      audioRef.current = null;
+      setIsSpeaking(false);
+      return;
+    }
+
+    try {
+      // Set loading state and initiate TTS request
+      setIsLoading(true);
+
+      const response = await fetch(apiHost + '/api/tts', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ text }),
+      });
+
+      const data = await response.json();
+
+      if (data.success && data.audio_base64) {
+        // Create and play the audio
+        const audio = new Audio(`data:audio/mp3;base64,${data.audio_base64}`);
+        audioRef.current = audio;
+
+        audio.play().then(() => {
+          setIsSpeaking(true);
+          setIsLoading(false);
+
+          // Reset when audio ends
+          audio.onended = () => {
+            setIsSpeaking(false);
+            audioRef.current = null;
+          };
+        });
+      } else {
+        console.error('Failed to retrieve audio.');
+        setIsLoading(false);
+      }
+    } catch (error) {
+      console.error('Error fetching audio from TTS endpoint', error);
+      setIsLoading(false);
+    }
+  };
+
+  return (
+    <div
+      className={`flex items-center justify-center rounded-full p-2 ${
+        isSpeakHovered
+          ? `bg-[#EEEEEE] dark:bg-purple-taupe`
+          : `bg-[${colorLight ? colorLight : '#FFFFFF'}] dark:bg-[${colorDark ? colorDark : 'transparent'}]`
+      }`}
+    >
+      {isLoading ? (
+        <LoadingIcon className="animate-spin" />
+      ) : isSpeaking ? (
+        <Stopspeech
+          className="cursor-pointer fill-none"
+          onClick={handleSpeakClick}
+          onMouseEnter={() => setIsSpeakHovered(true)}
+          onMouseLeave={() => setIsSpeakHovered(false)}
+        />
+      ) : (
+        <Speaker
+          className="cursor-pointer fill-none"
+          onClick={handleSpeakClick}
+          onMouseEnter={() => setIsSpeakHovered(true)}
+          onMouseLeave={() => setIsSpeakHovered(false)}
+        />
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/conversation/ConversationBubble.tsx b/frontend/src/conversation/ConversationBubble.tsx
@@ -7,7 +7,6 @@ import remarkGfm from 'remark-gfm';
 import remarkMath from 'remark-math';
 import rehypeKatex from 'rehype-katex';
 import 'katex/dist/katex.min.css';
-
 import DocsGPT3 from '../assets/cute_docsgpt3.svg';
 import Dislike from '../assets/dislike.svg?react';
 import Document from '../assets/document.svg';
@@ -23,6 +22,7 @@ import {
 } from '../preferences/preferenceSlice';
 import classes from './ConversationBubble.module.css';
 import { FEEDBACK, MESSAGE_TYPE } from './conversationModels';
+import SpeakButton from '../components/TextToSpeechButton';
 
 const DisableSourceFE = import.meta.env.VITE_DISABLE_SOURCE_FE || false;
 
@@ -336,6 +336,14 @@ const ConversationBubble = forwardRef<
               <CopyButton text={message} />
             </div>
           </div>
+          <div
+            className={`relative mr-5 block items-center justify-center lg:invisible 
+            ${type !== 'ERROR' ? 'group-hover:lg:visible' : 'hidden'}`}
+          >
+            <div>
+              <SpeakButton text={message} /> {/* Add SpeakButton here */}
+            </div>
+          </div>
           {type === 'ERROR' && (
             <div className="relative mr-5 block items-center justify-center">
               <div>{retryBtn}</div>