diff --git a/pipes/search/src/components/example-search-cards.tsx b/pipes/search/src/components/example-search-cards.tsx
index 588be3870..c8ff98209 100644
--- a/pipes/search/src/components/example-search-cards.tsx
+++ b/pipes/search/src/components/example-search-cards.tsx
@@ -1,4 +1,4 @@
-import React from "react";
+import React, { useEffect, useState } from "react";
 import { Card, CardContent } from "@/components/ui/card";
 import { Search, Mail, Clock, AlertCircle } from "lucide-react";
 import { Badge } from "./ui/badge";
@@ -24,33 +24,37 @@ interface ExampleSearchCardsProps {
   onSelect: (example: ExampleSearch) => void;
 }
 
-const exampleSearches: ExampleSearch[] = [
-  {
-    title: "summarize last hour meeting",
-    contentType: "audio",
-    limit: 120,
-    minLength: 10,
-    startDate: new Date(Date.now() - 60 * 60 * 1000), // 1 hour ago
-  },
-  {
-    title: "summarize my mails",
-    contentType: "ocr",
-    windowName: "gmail",
-    limit: 25,
-    minLength: 50,
-    startDate: new Date(new Date().setHours(0, 0, 0, 0)), // since midnight local time
-  },
-  {
-    title: "time spent last hour",
-    contentType: "ocr",
-    limit: 25,
-    minLength: 50,
-    startDate: new Date(Date.now() - 60 * 60 * 1000), // 1 hour ago
-  },
-];
-
 export function ExampleSearchCards({ onSelect }: ExampleSearchCardsProps) {
+  const [exampleSearches, setExampleSearches] = useState<ExampleSearch[]>([]);
   const { health } = useHealthCheck();
+
+  useEffect(() => {
+    setExampleSearches([
+      {
+        title: "summarize last hour meeting",
+        contentType: "audio",
+        limit: 120,
+        minLength: 10,
+        startDate: new Date(Date.now() - 60 * 60 * 1000), // 1 hour ago
+      },
+      {
+        title: "summarize my mails",
+        contentType: "ocr",
+        windowName: "gmail",
+        limit: 25,
+        minLength: 50,
+        startDate: new Date(new Date().setHours(0, 0, 0, 0)), // since midnight local time
+      },
+      {
+        title: "time spent last hour",
+        contentType: "ocr",
+        limit: 25,
+        minLength: 50,
+        startDate: new Date(Date.now() - 60 * 60 * 1000), // 1 hour ago
+      },
+    ]);
+  }, []);
+
   const getIcon = (title: string) => {
     switch (title) {
       case "summarize last hour meeting":
diff --git a/pipes/search/src/components/search-chat.tsx b/pipes/search/src/components/search-chat.tsx
index eb510add1..9fb0d6fc3 100644
--- a/pipes/search/src/components/search-chat.tsx
+++ b/pipes/search/src/components/search-chat.tsx
@@ -992,7 +992,11 @@ export function SearchChat() {
                       {item.content.filePath &&
                       item.content.filePath.trim() !== "" ? (
                         <div className="flex justify-center mt-4">
-                          <VideoComponent filePath={item.content.filePath} />
+                          <VideoComponent
+                            filePath={item.content.filePath}
+                            startTime={item.content.startTime}
+                            endTime={item.content.endTime}
+                          />
                         </div>
                       ) : (
                         <p className="text-gray-500 italic mt-2">
diff --git a/pipes/search/src/components/video.tsx b/pipes/search/src/components/video.tsx
index 20adfda27..0028a7f23 100644
--- a/pipes/search/src/components/video.tsx
+++ b/pipes/search/src/components/video.tsx
@@ -1,15 +1,19 @@
-import { memo, useCallback, useEffect, useState } from "react";
+import { memo, useCallback, useEffect, useState, useRef, useMemo } from "react";
+import { getMediaFile } from "@/lib/actions/video-actions";
 import { cn } from "@/lib/utils";
-import { getMediaFile } from '@/lib/actions/video-actions'
 
 export const VideoComponent = memo(function VideoComponent({
   filePath,
   customDescription,
   className,
+  startTime,
+  endTime,
 }: {
   filePath: string;
   customDescription?: string;
   className?: string;
+  startTime?: number;
+  endTime?: number;
 }) {
   const [mediaSrc, setMediaSrc] = useState<string | null>(null);
   const [error, setError] = useState<string | null>(null);
@@ -28,39 +32,19 @@ export const VideoComponent = memo(function VideoComponent({
 
   const renderFileLink = () => (
     // TODO button open link
-    <p className={"mt-2 text-center text-xs text-gray-500"}>
+    <div className="mt-2 text-center text-xs text-gray-500">
       {customDescription || filePath}
-    </p>
+    </div>
   );
 
-  const getMimeType = (path: string): string => {
-    const ext = path.split(".").pop()?.toLowerCase();
-    switch (ext) {
-      case "mp4":
-        return "video/mp4";
-      case "webm":
-        return "video/webm";
-      case "ogg":
-        return "video/ogg";
-      case "mp3":
-        return "audio/mpeg";
-      case "wav":
-        return "audio/wav";
-      default:
-        return isAudio ? "audio/mpeg" : "video/mp4";
-    }
-  };
-
   useEffect(() => {
     async function loadMedia() {
       try {
         console.log("Loading media:", filePath);
         const sanitizedPath = sanitizeFilePath(filePath);
         console.log("Sanitized path:", sanitizedPath);
-        if (!sanitizedPath) {
-          throw new Error("Invalid file path");
-        }
 
+        // Set isAudio based on path check
         setIsAudio(
           sanitizedPath.toLowerCase().includes("input") ||
             sanitizedPath.toLowerCase().includes("output")
@@ -115,12 +99,11 @@ export const VideoComponent = memo(function VideoComponent({
   return (
     <div className={cn("w-full max-w-2xl text-center", className)}>
       {isAudio ? (
-        <div className="bg-gray-100 p-4 rounded-md">
-          <audio controls className="w-full">
-            <source src={mediaSrc} type="audio/mpeg" />
-            Your browser does not support the audio element.
-          </audio>
-        </div>
+        <AudioPlayer
+          startTime={startTime}
+          endTime={endTime}
+          mediaSrc={mediaSrc}
+        />
       ) : (
         <video controls className="w-full rounded-md">
           <source src={mediaSrc} type="video/mp4" />
@@ -131,3 +114,165 @@ export const VideoComponent = memo(function VideoComponent({
     </div>
   );
 });
+
+const AudioPlayer = memo(function AudioPlayer({
+  startTime,
+  endTime,
+  mediaSrc,
+}: {
+  startTime?: number;
+  endTime?: number;
+  mediaSrc: string;
+}) {
+  const [duration, setDuration] = useState<number>(0);
+  const [currentTime, setCurrentTime] = useState<number>(0);
+  const [isPlaying, setIsPlaying] = useState(false);
+  const audioRef = useRef<HTMLAudioElement>(null);
+
+  const audioElement = useMemo(
+    () => (
+      <audio
+        ref={audioRef}
+        className="w-full"
+        preload="auto"
+        onLoadedMetadata={(e) => {
+          const audio = e.target as HTMLAudioElement;
+          setDuration(audio.duration);
+          if (startTime !== undefined) {
+            audio.currentTime = startTime;
+          }
+        }}
+        onTimeUpdate={(e) => {
+          const audio = e.target as HTMLAudioElement;
+          if (Math.abs(audio.currentTime - currentTime) > 0.1) {
+            setCurrentTime(audio.currentTime);
+          }
+        }}
+        onPlay={() => setIsPlaying(true)}
+        onPause={() => setIsPlaying(false)}
+        onEnded={() => setIsPlaying(false)}
+      >
+        <source src={mediaSrc} type="audio/mpeg" />
+        Your browser does not support the audio element.
+      </audio>
+    ),
+    [mediaSrc, startTime, currentTime]
+  );
+
+  const togglePlay = async () => {
+    if (!audioRef.current) return;
+
+    try {
+      if (isPlaying) {
+        audioRef.current.pause();
+      } else {
+        await audioRef.current.play();
+      }
+      setIsPlaying(!isPlaying);
+    } catch (error) {
+      console.error("Playback failed:", error);
+      setIsPlaying(false);
+    }
+  };
+
+  const handleTimeChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
+    if (!audioRef.current) return;
+
+    const time = parseFloat(e.target.value);
+    const wasPlaying = isPlaying;
+
+    if (wasPlaying) {
+      audioRef.current.pause();
+    }
+
+    // Set the time directly on the audio element first
+    audioRef.current.currentTime = time;
+    // Then update the state
+    setCurrentTime(time);
+
+    if (wasPlaying) {
+      try {
+        await audioRef.current.play();
+      } catch (error) {
+        console.error("Playback failed:", error);
+        setIsPlaying(false);
+      }
+    }
+  };
+
+  return (
+    <div className="bg-gray-100 px-4 py-6 rounded-md">
+      <div className="relative">
+        {startTime !== null && (
+          <div
+            className="absolute top-[-8px] h-6 w-0.5 bg-black z-10"
+            style={{
+              left: `calc(88px + ${
+                (startTime || 0) / duration
+              } * calc(100% - 176px))`,
+            }}
+          >
+            <div className="absolute -top-4 left-1/2 -translate-x-1/2 text-xs">
+              Start
+            </div>
+          </div>
+        )}
+        {endTime !== null && (
+          <div
+            className="absolute top-[-8px] h-6 w-0.5 bg-black z-10"
+            style={{
+              left: `calc(88px + ${
+                (endTime || 0) / duration
+              } * calc(100% - 176px))`,
+            }}
+          >
+            <div className="absolute -top-4 left-1/2 -translate-x-1/2 text-xs">
+              End
+            </div>
+          </div>
+        )}
+        <button
+          onClick={togglePlay}
+          className="absolute left-4 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center bg-black hover:bg-gray-800 text-white rounded-full"
+        >
+          {isPlaying ? (
+            <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24">
+              <rect x="6" y="4" width="4" height="16" />
+              <rect x="14" y="4" width="4" height="16" />
+            </svg>
+          ) : (
+            <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24">
+              <path d="M8 5v14l11-7z" />
+            </svg>
+          )}
+        </button>
+        <div className="mx-[88px] relative">
+          <div className="h-1 bg-gray-300 rounded-full overflow-hidden">
+            <div
+              className="h-full bg-black"
+              style={{
+                width: `${(currentTime / duration) * 100}%`,
+              }}
+            />
+          </div>
+          <div
+            className="absolute top-1/2 -translate-x-1/3 -translate-y-1/2 w-2 h-2 bg-black rounded-full cursor-pointer hover:bg-gray-800 hover:h-4 hover:w-4"
+            style={{
+              left: `${(currentTime / duration) * 100}%`,
+            }}
+          />
+          <input
+            type="range"
+            min={0}
+            max={duration}
+            value={currentTime}
+            onChange={handleTimeChange}
+            className="absolute inset-0 w-full opacity-0 cursor-pointer"
+            step="any"
+          />
+        </div>
+        {audioElement}
+      </div>
+    </div>
+  );
+});
diff --git a/screenpipe-app-tauri/components/identify-speakers.tsx b/screenpipe-app-tauri/components/identify-speakers.tsx
index 72bde93fb..bf048e844 100644
--- a/screenpipe-app-tauri/components/identify-speakers.tsx
+++ b/screenpipe-app-tauri/components/identify-speakers.tsx
@@ -226,6 +226,8 @@ export default function IdentifySpeakers({
               {
                 path: longestAudioSample?.path || "",
                 transcript: longestAudioSample?.transcript || "",
+                startTime: longestAudioSample?.startTime,
+                endTime: longestAudioSample?.endTime,
               },
             ],
           },
@@ -269,7 +271,7 @@ export default function IdentifySpeakers({
         async (speaker: Speaker) => {
           const durations: Map<string, number> = new Map();
           for (const sample of speaker.metadata?.audioSamples || []) {
-            const size = await getFileSize(sample.path);
+            const size = (sample.endTime ?? 0) - (sample.startTime ?? 0);
             durations.set(sample.path, size);
           }
 
@@ -637,6 +639,8 @@ export default function IdentifySpeakers({
                             <VideoComponent
                               key={index}
                               filePath={sample.path}
+                              startTime={sample.startTime}
+                              endTime={sample.endTime}
                               customDescription={`transcript: ${sample.transcript}`}
                             />
                           ))}
@@ -681,6 +685,8 @@ export default function IdentifySpeakers({
                                     <VideoComponent
                                       key={sample.path}
                                       filePath={sample.path}
+                                      startTime={sample.startTime}
+                                      endTime={sample.endTime}
                                       customDescription={`transcript: ${sample.transcript}`}
                                       className="max-w-[300px]"
                                     />
@@ -1078,6 +1084,8 @@ export default function IdentifySpeakers({
                               key={index}
                               filePath={sample.path}
                               customDescription={`transcript: ${sample.transcript}`}
+                              startTime={sample.startTime}
+                              endTime={sample.endTime}
                             />
                           ))}
                       </div>
@@ -1123,6 +1131,8 @@ export default function IdentifySpeakers({
                                       filePath={sample.path}
                                       customDescription={`transcript: ${sample.transcript}`}
                                       className="max-w-[300px]"
+                                      startTime={sample.startTime}
+                                      endTime={sample.endTime}
                                     />
                                   )
                                 )}
diff --git a/screenpipe-app-tauri/components/video.tsx b/screenpipe-app-tauri/components/video.tsx
index 3fcd9af80..2afcda48c 100644
--- a/screenpipe-app-tauri/components/video.tsx
+++ b/screenpipe-app-tauri/components/video.tsx
@@ -1,4 +1,4 @@
-import { memo, useCallback, useEffect, useState } from "react";
+import { memo, useCallback, useEffect, useState, useRef, useMemo } from "react";
 import { readFile, open } from "@tauri-apps/plugin-fs";
 import { platform } from "@tauri-apps/plugin-os";
 import { cn } from "@/lib/utils";
@@ -7,10 +7,14 @@ export const VideoComponent = memo(function VideoComponent({
   filePath,
   customDescription,
   className,
+  startTime,
+  endTime,
 }: {
   filePath: string;
   customDescription?: string;
   className?: string;
+  startTime?: number;
+  endTime?: number;
 }) {
   const [mediaSrc, setMediaSrc] = useState<string | null>(null);
   const [error, setError] = useState<string | null>(null);
@@ -28,10 +32,9 @@ export const VideoComponent = memo(function VideoComponent({
   }, []);
 
   const renderFileLink = () => (
-    // TODO button open link
-    <p className={"mt-2 text-center text-xs text-gray-500"}>
+    <div className="mt-2 text-center text-xs text-gray-500">
       {customDescription || filePath}
-    </p>
+    </div>
   );
 
   const getMimeType = (path: string): string => {
@@ -58,17 +61,17 @@ export const VideoComponent = memo(function VideoComponent({
         console.log("Loading media:", filePath);
         const sanitizedPath = await sanitizeFilePath(filePath);
         console.log("Sanitized path:", sanitizedPath);
-        if (!sanitizedPath) {
-          throw new Error("Invalid file path");
-        }
 
+        const mediaData = await readFile(sanitizedPath);
+        const mimeType = getMimeType(sanitizedPath);
+
+        // Set isAudio based on path check
         setIsAudio(
           sanitizedPath.toLowerCase().includes("input") ||
             sanitizedPath.toLowerCase().includes("output")
         );
 
-        const mediaData = await readFile(sanitizedPath);
-        const mimeType = getMimeType(sanitizedPath);
+        // Create blob URL directly
         const blob = new Blob([mediaData], { type: mimeType });
         setMediaSrc(URL.createObjectURL(blob));
       } catch (error) {
@@ -112,12 +115,11 @@ export const VideoComponent = memo(function VideoComponent({
   return (
     <div className={cn("w-full max-w-2xl text-center", className)}>
       {isAudio ? (
-        <div className="bg-gray-100 p-4 rounded-md">
-          <audio controls className="w-full">
-            <source src={mediaSrc} type="audio/mpeg" />
-            Your browser does not support the audio element.
-          </audio>
-        </div>
+        <AudioPlayer
+          startTime={startTime}
+          endTime={endTime}
+          mediaSrc={mediaSrc}
+        />
       ) : (
         <video controls className="w-full rounded-md">
           <source src={mediaSrc} type="video/mp4" />
@@ -128,3 +130,165 @@ export const VideoComponent = memo(function VideoComponent({
     </div>
   );
 });
+
+const AudioPlayer = memo(function AudioPlayer({
+  startTime,
+  endTime,
+  mediaSrc,
+}: {
+  startTime?: number;
+  endTime?: number;
+  mediaSrc: string;
+}) {
+  const [duration, setDuration] = useState<number>(0);
+  const [currentTime, setCurrentTime] = useState<number>(0);
+  const [isPlaying, setIsPlaying] = useState(false);
+  const audioRef = useRef<HTMLAudioElement>(null);
+
+  const audioElement = useMemo(
+    () => (
+      <audio
+        ref={audioRef}
+        className="w-full"
+        preload="auto"
+        onLoadedMetadata={(e) => {
+          const audio = e.target as HTMLAudioElement;
+          setDuration(audio.duration);
+          if (startTime !== undefined) {
+            audio.currentTime = startTime;
+          }
+        }}
+        onTimeUpdate={(e) => {
+          const audio = e.target as HTMLAudioElement;
+          if (Math.abs(audio.currentTime - currentTime) > 0.1) {
+            setCurrentTime(audio.currentTime);
+          }
+        }}
+        onPlay={() => setIsPlaying(true)}
+        onPause={() => setIsPlaying(false)}
+        onEnded={() => setIsPlaying(false)}
+      >
+        <source src={mediaSrc} type="audio/mpeg" />
+        Your browser does not support the audio element.
+      </audio>
+    ),
+    [mediaSrc, startTime, currentTime]
+  );
+
+  const togglePlay = async () => {
+    if (!audioRef.current) return;
+
+    try {
+      if (isPlaying) {
+        audioRef.current.pause();
+      } else {
+        await audioRef.current.play();
+      }
+      setIsPlaying(!isPlaying);
+    } catch (error) {
+      console.error("Playback failed:", error);
+      setIsPlaying(false);
+    }
+  };
+
+  const handleTimeChange = async (e: React.ChangeEvent<HTMLInputElement>) => {
+    if (!audioRef.current) return;
+
+    const time = parseFloat(e.target.value);
+    const wasPlaying = isPlaying;
+
+    if (wasPlaying) {
+      audioRef.current.pause();
+    }
+
+    // Set the time directly on the audio element first
+    audioRef.current.currentTime = time;
+    // Then update the state
+    setCurrentTime(time);
+
+    if (wasPlaying) {
+      try {
+        await audioRef.current.play();
+      } catch (error) {
+        console.error("Playback failed:", error);
+        setIsPlaying(false);
+      }
+    }
+  };
+
+  return (
+    <div className="bg-gray-100 px-4 py-6 rounded-md">
+      <div className="relative">
+        {startTime !== undefined && startTime > 0 && (
+          <div
+            className="absolute top-[-8px] h-6 w-0.5 bg-black z-10"
+            style={{
+              left: `calc(88px + ${
+                (startTime || 0) / duration
+              } * calc(100% - 176px))`,
+            }}
+          >
+            <div className="absolute -top-4 left-1/2 -translate-x-1/2 text-xs">
+              Start
+            </div>
+          </div>
+        )}
+        {endTime !== undefined && endTime < duration - 0.1 && (
+          <div
+            className="absolute top-[-8px] h-6 w-0.5 bg-black z-10"
+            style={{
+              left: `calc(88px + ${
+                (endTime || 0) / duration
+              } * calc(100% - 176px))`,
+            }}
+          >
+            <div className="absolute -top-4 left-1/2 -translate-x-1/2 text-xs">
+              End
+            </div>
+          </div>
+        )}
+        <button
+          onClick={togglePlay}
+          className="absolute left-4 top-1/2 -translate-y-1/2 w-8 h-8 flex items-center justify-center bg-black hover:bg-gray-800 text-white rounded-full"
+        >
+          {isPlaying ? (
+            <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24">
+              <rect x="6" y="4" width="4" height="16" />
+              <rect x="14" y="4" width="4" height="16" />
+            </svg>
+          ) : (
+            <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24">
+              <path d="M8 5v14l11-7z" />
+            </svg>
+          )}
+        </button>
+        <div className="mx-[88px] relative">
+          <div className="h-1 bg-gray-300 rounded-full overflow-hidden">
+            <div
+              className="h-full bg-black"
+              style={{
+                width: `${(currentTime / duration) * 100}%`,
+              }}
+            />
+          </div>
+          <div
+            className="absolute top-1/2 -translate-x-1/3 -translate-y-1/2 w-2 h-2 bg-black rounded-full cursor-pointer hover:bg-gray-800 hover:h-4 hover:w-4"
+            style={{
+              left: `${(currentTime / duration) * 100}%`,
+            }}
+          />
+          <input
+            type="range"
+            min={0}
+            max={duration}
+            value={currentTime}
+            onChange={handleTimeChange}
+            className="absolute inset-0 w-full opacity-0 cursor-pointer"
+            step="any"
+          />
+        </div>
+        {audioElement}
+      </div>
+    </div>
+  );
+});
diff --git a/screenpipe-app-tauri/lib/screenpipe.ts b/screenpipe-app-tauri/lib/screenpipe.ts
index 731a5f7bf..30a92b25b 100644
--- a/screenpipe-app-tauri/lib/screenpipe.ts
+++ b/screenpipe-app-tauri/lib/screenpipe.ts
@@ -25,6 +25,8 @@ export type AudioContent = {
   device_name: string;
   device_type: string;
   speaker: Speaker;
+  start_time?: number;
+  end_time?: number;
 };
 
 export type FTSContent = {
diff --git a/screenpipe-app-tauri/lib/types/speaker.ts b/screenpipe-app-tauri/lib/types/speaker.ts
index 7b5222c04..3d0eb12fd 100644
--- a/screenpipe-app-tauri/lib/types/speaker.ts
+++ b/screenpipe-app-tauri/lib/types/speaker.ts
@@ -1,6 +1,8 @@
 export type AudioSample = {
   path: string;
   transcript: string;
+  startTime?: number;
+  endTime?: number;
 };
 
 export interface Speaker {
diff --git a/screenpipe-audio/src/stt.rs b/screenpipe-audio/src/stt.rs
index a5e86076c..1c36deb0d 100644
--- a/screenpipe-audio/src/stt.rs
+++ b/screenpipe-audio/src/stt.rs
@@ -169,11 +169,9 @@ pub fn stt_sync(
     whisper_model: &mut WhisperModel,
     audio_transcription_engine: Arc<AudioTranscriptionEngine>,
     deepgram_api_key: Option<String>,
-    output_path: &PathBuf,
     languages: Vec<Language>,
-) -> Result<(String, String)> {
+) -> Result<String> {
     let mut whisper_model = whisper_model.clone();
-    let output_path = output_path.clone();
     let audio = audio.to_vec();
 
     let device = device.to_string();
@@ -187,8 +185,6 @@ pub fn stt_sync(
             &mut whisper_model,
             audio_transcription_engine,
             deepgram_api_key,
-            &output_path,
-            false,
             languages,
         ))
     });
@@ -301,22 +297,13 @@ fn parse_time_tokens(start: &str, end: &str, min_time: &mut f32, max_time: &mut
 }
 
 pub async fn prepare_segments(
-    audio_input: &AudioInput,
+    audio_data: &[f32],
     vad_engine: Arc<Mutex<Box<dyn VadEngine + Send>>>,
     segmentation_model_path: &PathBuf,
     embedding_manager: EmbeddingManager,
     embedding_extractor: Arc<StdMutex<EmbeddingExtractor>>,
+    device: &str,
 ) -> Result<tokio::sync::mpsc::Receiver<SpeechSegment>> {
-    let audio_data = if audio_input.sample_rate != m::SAMPLE_RATE as u32 {
-        resample(
-            audio_input.data.as_ref(),
-            audio_input.sample_rate,
-            m::SAMPLE_RATE as u32,
-        )?
-    } else {
-        audio_input.data.as_ref().to_vec()
-    };
-
     let audio_data = normalize_v2(&audio_data);
 
     let frame_size = 1600;
@@ -352,7 +339,7 @@ pub async fn prepare_segments(
 
     info!(
         "device: {}, speech ratio: {}, min_speech_ratio: {}, audio_frames: {}, speech_frames: {}",
-        audio_input.device,
+        device,
         speech_ratio,
         min_speech_ratio,
         audio_frames.len(),
@@ -387,10 +374,8 @@ pub async fn stt(
     whisper_model: &mut WhisperModel,
     audio_transcription_engine: Arc<AudioTranscriptionEngine>,
     deepgram_api_key: Option<String>,
-    output_path: &PathBuf,
-    skip_encoding: bool,
     languages: Vec<Language>,
-) -> Result<(String, String)> {
+) -> Result<String> {
     let model = &whisper_model.model;
 
     debug!("Loading mel filters");
@@ -426,25 +411,7 @@ pub async fn stt(
         process_with_whisper(&mut *whisper_model, audio, &mel_filters, languages)
     };
 
-    let new_file_name = Utc::now().format("%Y-%m-%d_%H-%M-%S").to_string();
-    let sanitized_device_name = device.replace(['/', '\\'], "_");
-    let file_path = PathBuf::from(output_path)
-        .join(format!("{}_{}.mp4", sanitized_device_name, new_file_name))
-        .to_str()
-        .expect("Failed to create valid path")
-        .to_string();
-    let file_path_clone = file_path.clone();
-    // Run FFmpeg in a separate task
-    if !skip_encoding {
-        encode_single_audio(
-            bytemuck::cast_slice(audio),
-            sample_rate,
-            1,
-            &file_path.into(),
-        )?;
-    }
-
-    Ok((transcription?, file_path_clone))
+    Ok(transcription?)
 }
 
 pub fn resample(input: &[f32], from_sample_rate: u32, to_sample_rate: u32) -> Result<Vec<f32>> {
@@ -488,6 +455,8 @@ pub struct TranscriptionResult {
     pub transcription: Option<String>,
     pub timestamp: u64,
     pub error: Option<String>,
+    pub start_time: f64,
+    pub end_time: f64,
 }
 
 impl TranscriptionResult {
@@ -572,14 +541,33 @@ pub async fn create_whisper_channel(
             crossbeam::select! {
                 recv(input_receiver) -> input_result => {
                     match input_result {
-                        Ok(audio) => {
+                        Ok(mut audio) => {
                             debug!("Received input from input_receiver");
                             let timestamp = SystemTime::now()
                                 .duration_since(UNIX_EPOCH)
                                 .expect("Time went backwards")
                                 .as_secs();
 
-                            let mut segments = match prepare_segments(&audio, vad_engine.clone(), &segmentation_model_path, embedding_manager.clone(), embedding_extractor.clone()).await {
+                            let audio_data = if audio.sample_rate != m::SAMPLE_RATE as u32 {
+                                match resample(
+                                    audio.data.as_ref(),
+                                    audio.sample_rate,
+                                    m::SAMPLE_RATE as u32,
+                                ) {
+                                    Ok(data) => data,
+                                    Err(e) => {
+                                        error!("Error resampling audio: {:?}", e);
+                                        continue;
+                                    }
+                                }
+                            } else {
+                                audio.data.as_ref().to_vec()
+                            };
+
+                            audio.data = Arc::new(audio_data.clone());
+                            audio.sample_rate = m::SAMPLE_RATE as u32;
+
+                            let mut segments = match prepare_segments(&audio_data, vad_engine.clone(), &segmentation_model_path, embedding_manager.clone(), embedding_extractor.clone(), &audio.device.to_string()).await {
                                 Ok(segments) => segments,
                                 Err(e) => {
                                     error!("Error preparing segments: {:?}", e);
@@ -587,14 +575,30 @@ pub async fn create_whisper_channel(
                                 }
                             };
 
+
+                            let path = match write_audio_to_file(
+                                &audio.data.to_vec(),
+                                audio.sample_rate,
+                                &output_path,
+                                &audio.device.to_string(),
+                                false,
+                            ) {
+                                Ok(file_path) => file_path,
+                                Err(e) => {
+                                    error!("Error writing audio to file: {:?}", e);
+                                    "".to_string()
+                                }
+                            };
+
                             while let Some(segment) = segments.recv().await {
+                                let path = path.clone();
                                 let transcription_result = if cfg!(target_os = "macos") {
                                     let timestamp = timestamp + segment.start.round() as u64;
                                     #[cfg(target_os = "macos")]
                                     {
                                         autoreleasepool(|| {
-                                            match stt_sync(&segment.samples, segment.sample_rate, &audio.device.to_string(), &mut whisper_model, audio_transcription_engine.clone(), deepgram_api_key.clone(), &output_path, languages.clone()) {
-                                                Ok((transcription, path)) => TranscriptionResult {
+                                            match stt_sync(&segment.samples, segment.sample_rate, &audio.device.to_string(), &mut whisper_model, audio_transcription_engine.clone(), deepgram_api_key.clone(), languages.clone()) {
+                                                Ok(transcription) => TranscriptionResult {
                                                     input: AudioInput {
                                                         data: Arc::new(segment.samples),
                                                         sample_rate: segment.sample_rate,
@@ -606,6 +610,8 @@ pub async fn create_whisper_channel(
                                                     timestamp,
                                                     error: None,
                                                     speaker_embedding: segment.embedding.clone(),
+                                                    start_time: segment.start,
+                                                    end_time: segment.end,
                                                 },
                                                 Err(e) => {
                                                     error!("STT error for input {}: {:?}", audio.device, e);
@@ -617,10 +623,12 @@ pub async fn create_whisper_channel(
                                                             device: audio.device.clone(),
                                                         },
                                                         transcription: None,
-                                                        path: "".to_string(),
+                                                        path,
                                                         timestamp,
                                                         error: Some(e.to_string()),
                                                         speaker_embedding: Vec::new(),
+                                                        start_time: segment.start,
+                                                        end_time: segment.end,
                                                     }
                                                 },
                                             }
@@ -631,8 +639,8 @@ pub async fn create_whisper_channel(
                                         unreachable!("This code should not be reached on non-macOS platforms")
                                     }
                                 } else {
-                                    match stt_sync(&segment.samples, segment.sample_rate, &audio.device.to_string(), &mut whisper_model, audio_transcription_engine.clone(), deepgram_api_key.clone(), &output_path, languages.clone()) {
-                                        Ok((transcription, path)) => TranscriptionResult {
+                                    match stt_sync(&segment.samples, segment.sample_rate, &audio.device.to_string(), &mut whisper_model, audio_transcription_engine.clone(), deepgram_api_key.clone(), languages.clone()) {
+                                        Ok(transcription) => TranscriptionResult {
                                             input: AudioInput {
                                                 data: Arc::new(segment.samples),
                                                 sample_rate: segment.sample_rate,
@@ -644,6 +652,8 @@ pub async fn create_whisper_channel(
                                             timestamp,
                                             error: None,
                                             speaker_embedding: segment.embedding.clone(),
+                                            start_time: segment.start,
+                                            end_time: segment.end,
                                         },
                                         Err(e) => {
                                             error!("STT error for input {}: {:?}", audio.device, e);
@@ -655,10 +665,12 @@ pub async fn create_whisper_channel(
                                                     device: audio.device.clone(),
                                                 },
                                                 transcription: None,
-                                                path: "".to_string(),
+                                                path,
                                                 timestamp,
                                                 error: Some(e.to_string()),
                                                 speaker_embedding: Vec::new(),
+                                                start_time: segment.start,
+                                                end_time: segment.end,
                                             }
                                         },
                                     }
@@ -723,3 +735,30 @@ pub fn longest_common_word_substring(s1: &str, s2: &str) -> Option<(usize, usize
         _ => None,
     }
 }
+
+pub fn write_audio_to_file(
+    audio: &[f32],
+    sample_rate: u32,
+    output_path: &PathBuf,
+    device: &str,
+    skip_encoding: bool,
+) -> Result<String> {
+    let new_file_name = Utc::now().format("%Y-%m-%d_%H-%M-%S").to_string();
+    let sanitized_device_name = device.replace(['/', '\\'], "_");
+    let file_path = PathBuf::from(output_path)
+        .join(format!("{}_{}.mp4", sanitized_device_name, new_file_name))
+        .to_str()
+        .expect("Failed to create valid path")
+        .to_string();
+    let file_path_clone = file_path.clone();
+    // Run FFmpeg in a separate task
+    if !skip_encoding {
+        encode_single_audio(
+            bytemuck::cast_slice(audio),
+            sample_rate,
+            1,
+            &file_path.into(),
+        )?;
+    }
+    Ok(file_path_clone)
+}
diff --git a/screenpipe-server/src/core.rs b/screenpipe-server/src/core.rs
index 24adfcbcc..7710548e8 100644
--- a/screenpipe-server/src/core.rs
+++ b/screenpipe-server/src/core.rs
@@ -448,7 +448,7 @@ async fn process_audio_result(
             }
         }
     }
-    match db.insert_audio_chunk(&result.path).await {
+    match db.get_or_insert_audio_chunk(&result.path).await {
         Ok(audio_chunk_id) => {
             if transcription.is_empty() {
                 return Ok(Some(audio_chunk_id));
@@ -462,6 +462,8 @@ async fn process_audio_result(
                     &transcription_engine,
                     &result.input.device,
                     Some(speaker.id),
+                    Some(result.start_time),
+                    Some(result.end_time),
                 )
                 .await
             {
diff --git a/screenpipe-server/src/db.rs b/screenpipe-server/src/db.rs
index ea662d826..ea912a586 100644
--- a/screenpipe-server/src/db.rs
+++ b/screenpipe-server/src/db.rs
@@ -21,7 +21,7 @@ use tokio::time::{timeout, Duration as TokioDuration};
 use zerocopy::AsBytes;
 
 use crate::db_types::{
-    AudioChunk, AudioEntry, AudioResult, AudioResultRaw, FrameData, OCREntry, OCRResult,
+    AudioChunksResponse, AudioEntry, AudioResult, AudioResultRaw, FrameData, OCREntry, OCRResult,
     OCRResultRaw, Speaker, TagContentType,
 };
 use crate::db_types::{ContentType, UiContent};
@@ -111,6 +111,22 @@ impl DatabaseManager {
         Ok(id)
     }
 
+    async fn get_audio_chunk_id(&self, file_path: &str) -> Result<i64, sqlx::Error> {
+        let id = sqlx::query_scalar::<_, i64>("SELECT id FROM audio_chunks WHERE file_path = ?1")
+            .bind(file_path)
+            .fetch_optional(&self.pool)
+            .await?;
+        Ok(id.unwrap_or(0))
+    }
+
+    pub async fn get_or_insert_audio_chunk(&self, file_path: &str) -> Result<i64, sqlx::Error> {
+        let mut id = self.get_audio_chunk_id(file_path).await?;
+        if id == 0 {
+            id = self.insert_audio_chunk(file_path).await?;
+        }
+        Ok(id)
+    }
+
     pub async fn insert_audio_transcription(
         &self,
         audio_chunk_id: i64,
@@ -119,12 +135,14 @@ impl DatabaseManager {
         transcription_engine: &str,
         device: &AudioDevice,
         speaker_id: Option<i64>,
+        start_time: Option<f64>,
+        end_time: Option<f64>,
     ) -> Result<i64, sqlx::Error> {
         let mut tx = self.pool.begin().await?;
 
         // Insert the full transcription
         let id = sqlx::query(
-            "INSERT INTO audio_transcriptions (audio_chunk_id, transcription, offset_index, timestamp, transcription_engine, device, is_input_device, speaker_id) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
+            "INSERT INTO audio_transcriptions (audio_chunk_id, transcription, offset_index, timestamp, transcription_engine, device, is_input_device, speaker_id, start_time, end_time) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
         )
         .bind(audio_chunk_id)
         .bind(transcription)
@@ -134,6 +152,8 @@ impl DatabaseManager {
         .bind(&device.name)
         .bind(device.device_type == DeviceType::Input)
         .bind(speaker_id)
+        .bind(start_time)
+        .bind(end_time)
         .execute(&mut *tx)
         .await?
         .last_insert_rowid();
@@ -814,7 +834,9 @@ impl DatabaseManager {
                 GROUP_CONCAT(tags.name, ',') as tags,
                 audio_transcriptions.device as device_name,
                 audio_transcriptions.is_input_device,
-                audio_transcriptions.speaker_id
+                audio_transcriptions.speaker_id,
+                audio_transcriptions.start_time,
+                audio_transcriptions.end_time
             FROM {}
             JOIN audio_chunks ON audio_transcriptions.audio_chunk_id = audio_chunks.id
             LEFT JOIN speakers on audio_transcriptions.speaker_id = speakers.id
@@ -873,6 +895,8 @@ impl DatabaseManager {
                     DeviceType::Output
                 },
                 speaker,
+                start_time: raw.start_time,
+                end_time: raw.end_time,
             })
         });
 
@@ -944,7 +968,7 @@ impl DatabaseManager {
             ContentType::Audio => {
                 format!(
                     r#"
-                    SELECT COUNT(DISTINCT audio_transcriptions.audio_chunk_id)
+                    SELECT COUNT(DISTINCT audio_transcriptions.audio_chunk_id || '_' || COALESCE(audio_transcriptions.start_time, '') || '_' || COALESCE(audio_transcriptions.end_time, ''))
                     FROM audio_transcriptions_fts 
                     JOIN audio_transcriptions ON audio_transcriptions_fts.audio_chunk_id = audio_transcriptions.audio_chunk_id
                     WHERE {}
@@ -1527,13 +1551,23 @@ impl DatabaseManager {
     pub async fn get_audio_chunks_for_speaker(
         &self,
         speaker_id: i64,
-    ) -> Result<Vec<AudioChunk>, sqlx::Error> {
-        sqlx::query_as::<_, AudioChunk>(
-            "SELECT * FROM audio_chunks WHERE id IN (SELECT audio_chunk_id FROM audio_transcriptions WHERE speaker_id = ?)",
+    ) -> Result<Vec<AudioChunksResponse>, sqlx::Error> {
+        sqlx::query_as::<_, AudioChunksResponse>(
+            r#"
+            SELECT 
+                ac.*,
+                at.start_time,
+                at.end_time,
+                ac.file_path
+            FROM audio_chunks ac
+            JOIN audio_transcriptions at ON ac.id = at.audio_chunk_id
+            WHERE at.speaker_id = ?
+            ORDER BY at.start_time
+            "#,
         )
-            .bind(speaker_id)
-            .fetch_all(&self.pool)
-            .await
+        .bind(speaker_id)
+        .fetch_all(&self.pool)
+        .await
     }
 
     // get unnamed speakers
@@ -1548,7 +1582,9 @@ impl DatabaseManager {
                 SELECT DISTINCT
                     s.id as speaker_id,
                     ac.file_path,
-                    at.transcription
+                    at.transcription,
+                    at.start_time,
+                    at.end_time
                 FROM speakers s
                 JOIN audio_transcriptions at ON s.id = at.speaker_id
                 JOIN audio_chunks ac ON at.audio_chunk_id = ac.id
@@ -1583,7 +1619,9 @@ impl DatabaseManager {
                     THEN json_object('audio_samples', json_group_array(
                         DISTINCT json_object(
                             'path', rap.file_path,
-                            'transcript', rap.transcription
+                            'transcript', rap.transcription,
+                            'start_time', rap.start_time,
+                            'end_time', rap.end_time
                         )
                     ))
                     ELSE json_patch(
@@ -1591,7 +1629,9 @@ impl DatabaseManager {
                         json_object('audio_samples', json_group_array(
                             DISTINCT json_object(
                                 'path', rap.file_path,
-                                'transcript', rap.transcription
+                                'transcript', rap.transcription,
+                                'start_time', rap.start_time,
+                                'end_time', rap.end_time
                             )
                         ))
                     )
@@ -1674,7 +1714,7 @@ impl DatabaseManager {
                 "audio transcriptions",
             ),
             (
-                "DELETE FROM audio_chunks WHERE id IN (SELECT audio_chunk_id FROM audio_transcriptions WHERE speaker_id = ?)",
+                "DELETE FROM audio_chunks WHERE id IN (SELECT audio_chunk_id FROM audio_transcriptions WHERE speaker_id = ? AND start_time IS NULL)",
                 "audio chunks",
             ),
             (
@@ -1719,7 +1759,9 @@ impl DatabaseManager {
                 SELECT DISTINCT
                     s.id as speaker_id,
                     ac.file_path,
-                    at.transcription
+                    at.transcription,
+                    at.start_time,
+                    at.end_time
                 FROM speakers s
                 JOIN audio_transcriptions at ON s.id = at.speaker_id
                 JOIN audio_chunks ac ON at.audio_chunk_id = ac.id
@@ -1742,13 +1784,17 @@ impl DatabaseManager {
                     WHEN s.metadata = '' OR s.metadata IS NULL OR json_valid(s.metadata) = 0
                     THEN json_object('audio_samples', json_group_array(DISTINCT json_object(
                         'path', rap.file_path,
-                        'transcript', rap.transcription
+                        'transcript', rap.transcription,
+                        'start_time', rap.start_time,
+                        'end_time', rap.end_time
                     )))
                     ELSE json_patch(
                         json(s.metadata), 
                         json_object('audio_samples', json_group_array(DISTINCT json_object(
                             'path', rap.file_path,
-                            'transcript', rap.transcription
+                            'transcript', rap.transcription,
+                            'start_time', rap.start_time,
+                            'end_time', rap.end_time
                         )))
                     )
                 END as metadata
diff --git a/screenpipe-server/src/db_types.rs b/screenpipe-server/src/db_types.rs
index 5459ce4c4..f3b3b0b1d 100644
--- a/screenpipe-server/src/db_types.rs
+++ b/screenpipe-server/src/db_types.rs
@@ -82,6 +82,8 @@ pub struct AudioResultRaw {
     pub device_name: String,
     pub is_input_device: bool,
     pub speaker_id: Option<i64>,
+    pub start_time: Option<f64>,
+    pub end_time: Option<f64>,
 }
 
 #[derive(Debug, Serialize, Deserialize, FromRow, Clone)]
@@ -103,6 +105,8 @@ pub struct AudioResult {
     pub device_name: String,
     pub device_type: DeviceType,
     pub speaker: Option<Speaker>,
+    pub start_time: Option<f64>,
+    pub end_time: Option<f64>,
 }
 
 #[derive(Debug, Deserialize, PartialEq)]
@@ -181,3 +185,12 @@ pub struct AudioChunk {
     pub file_path: String,
     pub timestamp: DateTime<Utc>,
 }
+
+#[derive(Debug, FromRow)]
+pub struct AudioChunksResponse {
+    pub audio_chunk_id: i64,
+    pub start_time: Option<f64>,
+    pub end_time: Option<f64>,
+    pub file_path: String,
+    pub timestamp: DateTime<Utc>,
+}
diff --git a/screenpipe-server/src/migrations/20241213220649_create_segment_start_time_and_end_time_columns.sql b/screenpipe-server/src/migrations/20241213220649_create_segment_start_time_and_end_time_columns.sql
new file mode 100644
index 000000000..296b99ea6
--- /dev/null
+++ b/screenpipe-server/src/migrations/20241213220649_create_segment_start_time_and_end_time_columns.sql
@@ -0,0 +1,67 @@
+-- Add migration script here
+ALTER TABLE audio_transcriptions ADD COLUMN start_time REAL;
+ALTER TABLE audio_transcriptions ADD COLUMN end_time REAL;
+
+PRAGMA foreign_keys = OFF;
+
+-- Drop existing triggers and FTS tables
+DROP TRIGGER IF EXISTS audio_transcriptions_ai;
+DROP TRIGGER IF EXISTS audio_transcriptions_update;
+DROP TRIGGER IF EXISTS audio_transcriptions_delete;
+DROP TABLE IF EXISTS audio_transcriptions_fts;
+
+CREATE VIRTUAL TABLE IF NOT EXISTS audio_transcriptions_fts USING fts5(
+    transcription,
+    device,
+    audio_chunk_id UNINDEXED,
+    speaker_id,
+    start_time UNINDEXED,
+    end_time UNINDEXED,
+    tokenize='unicode61'
+);
+
+INSERT OR IGNORE INTO audio_transcriptions_fts(transcription, device, audio_chunk_id, speaker_id, start_time, end_time)
+SELECT 
+    COALESCE(transcription, '') as transcription,
+    COALESCE(device, '') as device,
+    audio_chunk_id,
+    speaker_id,
+    start_time,
+    end_time
+FROM audio_transcriptions 
+WHERE transcription IS NOT NULL 
+  AND transcription != ''
+  AND audio_chunk_id IS NOT NULL;
+
+CREATE TRIGGER IF NOT EXISTS audio_transcriptions_ai AFTER INSERT ON audio_transcriptions 
+WHEN NEW.transcription IS NOT NULL AND NEW.transcription != '' AND NEW.audio_chunk_id IS NOT NULL
+BEGIN
+    INSERT OR IGNORE INTO audio_transcriptions_fts(transcription, device, audio_chunk_id, speaker_id, start_time, end_time)
+    VALUES (
+        NEW.transcription,
+        COALESCE(NEW.device, ''),
+        NEW.audio_chunk_id,
+        NEW.speaker_id,
+        NEW.start_time,
+        NEW.end_time
+    );
+END;
+
+CREATE TRIGGER IF NOT EXISTS audio_transcriptions_update AFTER UPDATE ON audio_transcriptions
+WHEN NEW.transcription IS NOT NULL AND NEW.transcription != '' AND OLD.audio_chunk_id IS NOT NULL
+BEGIN
+    UPDATE audio_transcriptions_fts 
+    SET transcription = NEW.transcription,
+        device = COALESCE(NEW.device, ''),
+        start_time = NEW.start_time,
+        end_time = NEW.end_time
+    WHERE audio_chunk_id = OLD.audio_chunk_id;
+END;
+
+CREATE TRIGGER IF NOT EXISTS audio_transcriptions_delete AFTER DELETE ON audio_transcriptions
+BEGIN
+    DELETE FROM audio_transcriptions_fts 
+    WHERE audio_chunk_id = OLD.audio_chunk_id;
+END;
+
+PRAGMA foreign_keys = ON;
\ No newline at end of file
diff --git a/screenpipe-server/src/server.rs b/screenpipe-server/src/server.rs
index df197e3ed..b00eceda6 100644
--- a/screenpipe-server/src/server.rs
+++ b/screenpipe-server/src/server.rs
@@ -185,6 +185,8 @@ pub struct AudioContent {
     pub device_name: String,
     pub device_type: DeviceType,
     pub speaker: Option<Speaker>,
+    pub start_time: Option<f64>,
+    pub end_time: Option<f64>,
 }
 
 #[derive(Serialize, Deserialize, Debug)]
@@ -338,6 +340,8 @@ pub(crate) async fn search(
                 device_name: audio.device_name.clone(),
                 device_type: audio.device_type.clone(),
                 speaker: audio.speaker.clone(),
+                start_time: audio.start_time,
+                end_time: audio.end_time,
             }),
             SearchResult::UI(ui) => ContentItem::UI(UiContent {
                 id: ui.id,
@@ -521,7 +525,7 @@ pub async fn health_check(State(state): State<Arc<AppState>>) -> JsonResponse<He
 
     let app_uptime = (now as i64) - (state.app_start_time.timestamp());
     let grace_period = 120; // 2 minutes in seconds
-    
+
     let last_capture = LAST_AUDIO_CAPTURE.load(Ordering::Relaxed);
     let audio_active = if app_uptime < grace_period {
         true // Consider active during grace period
@@ -1061,6 +1065,8 @@ async fn add_transcription_to_db(
         &transcription.transcription_engine,
         &device,
         None,
+        None,
+        None,
     )
     .await?;
 
@@ -1473,12 +1479,14 @@ async fn delete_speaker_handler(
 
     // delete all audio chunks from the file system
     for audio_chunk in audio_chunks {
-        std::fs::remove_file(audio_chunk.file_path).map_err(|e| {
-            (
-                StatusCode::INTERNAL_SERVER_ERROR,
-                JsonResponse(json!({"error": e.to_string()})),
-            )
-        })?;
+        if audio_chunk.start_time.is_some() && audio_chunk.end_time.is_some() {
+            std::fs::remove_file(audio_chunk.file_path).map_err(|e| {
+                (
+                    StatusCode::INTERNAL_SERVER_ERROR,
+                    JsonResponse(json!({"error": e.to_string()})),
+                )
+            })?;
+        }
     }
 
     Ok(JsonResponse(json!({"success": true})))
diff --git a/screenpipe-server/tests/db.rs b/screenpipe-server/tests/db.rs
index 5a643eb5d..4f5c6dd7d 100644
--- a/screenpipe-server/tests/db.rs
+++ b/screenpipe-server/tests/db.rs
@@ -72,6 +72,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             None,
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -130,6 +132,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             None,
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -215,6 +219,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             None,
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -301,6 +307,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             None,
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -335,6 +343,8 @@ mod tests {
                 "",
                 &AudioDevice::new("test".to_string(), DeviceType::Output),
                 None,
+                None,
+                None,
             )
             .await;
         println!("Second audio insert result: {:?}", insert_result);
@@ -480,6 +490,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             None,
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -513,6 +525,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             None,
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -649,6 +663,8 @@ mod tests {
                     "",
                     &AudioDevice::new("test".to_string(), DeviceType::Output),
                     Some(speaker.id),
+                    None,
+                    None,
                 )
                 .await
                 .unwrap();
@@ -711,6 +727,8 @@ mod tests {
                     "",
                     &AudioDevice::new("test".to_string(), DeviceType::Output),
                     Some(speaker.id),
+                    None,
+                    None,
                 )
                 .await
                 .unwrap();
@@ -765,6 +783,8 @@ mod tests {
                     "",
                     &AudioDevice::new("test".to_string(), DeviceType::Output),
                     Some(speaker.id),
+                    None,
+                    None,
                 )
                 .await
                 .unwrap();
@@ -806,6 +826,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             Some(speaker.id),
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -848,6 +870,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             Some(speaker.id),
+            None,
+            None,
         )
         .await
         .unwrap();
@@ -863,6 +887,8 @@ mod tests {
             "",
             &AudioDevice::new("test".to_string(), DeviceType::Output),
             Some(speaker2.id),
+            None,
+            None,
         )
         .await
         .unwrap();
diff --git a/screenpipe-server/tests/endpoint_test.rs b/screenpipe-server/tests/endpoint_test.rs
index 813ef6608..28fbde700 100644
--- a/screenpipe-server/tests/endpoint_test.rs
+++ b/screenpipe-server/tests/endpoint_test.rs
@@ -71,6 +71,8 @@ mod tests {
                 "",
                 &AudioDevice::new("test1".to_string(), DeviceType::Input),
                 None,
+                None,
+                None,
             )
             .await
             .unwrap();
@@ -83,6 +85,8 @@ mod tests {
                 "",
                 &AudioDevice::new("test2".to_string(), DeviceType::Input),
                 None,
+                None,
+                None,
             )
             .await
             .unwrap();
@@ -205,6 +209,8 @@ mod tests {
                 "",
                 &AudioDevice::new("test1".to_string(), DeviceType::Input),
                 None,
+                None,
+                None,
             )
             .await
             .unwrap();
@@ -216,6 +222,8 @@ mod tests {
                 "",
                 &AudioDevice::new("test2".to_string(), DeviceType::Input),
                 None,
+                None,
+                None,
             )
             .await
             .unwrap();
@@ -388,6 +396,8 @@ mod tests {
                 "",
                 &AudioDevice::new("test".to_string(), DeviceType::Input),
                 None,
+                None,
+                None,
             )
             .await
             .unwrap();
diff --git a/screenpipe-server/tests/tags_test.rs b/screenpipe-server/tests/tags_test.rs
index 463f7c30b..6a39ae3f5 100644
--- a/screenpipe-server/tests/tags_test.rs
+++ b/screenpipe-server/tests/tags_test.rs
@@ -392,6 +392,8 @@ async fn insert_test_data(db: &Arc<DatabaseManager>) {
         "test_engine",
         &AudioDevice::new("test".to_string(), DeviceType::Output),
         None,
+        None,
+        None,
     )
     .await
     .unwrap();
diff --git a/screenpipe-vision/bin/ui_monitor-aarch64-apple-darwin b/screenpipe-vision/bin/ui_monitor-aarch64-apple-darwin
index 1546bce73..27dc6d78e 100755
Binary files a/screenpipe-vision/bin/ui_monitor-aarch64-apple-darwin and b/screenpipe-vision/bin/ui_monitor-aarch64-apple-darwin differ
diff --git a/screenpipe-vision/lib/libscreenpipe_arm64.dylib b/screenpipe-vision/lib/libscreenpipe_arm64.dylib
index 817087c34..d936a8fe0 100755
Binary files a/screenpipe-vision/lib/libscreenpipe_arm64.dylib and b/screenpipe-vision/lib/libscreenpipe_arm64.dylib differ
diff --git a/screenpipe-vision/src/core.rs b/screenpipe-vision/src/core.rs
index 3dea68919..b0c46743a 100644
--- a/screenpipe-vision/src/core.rs
+++ b/screenpipe-vision/src/core.rs
@@ -296,3 +296,4 @@ pub fn trigger_screen_capture_permission() -> Result<()> {
 
     Ok(())
 }
+ 
\ No newline at end of file