Merge pull request #96 from engineervix/feat/audio-enhancements

Audio Enhancements
engineervix · Jul 20, 2024 · 8512817 · 8512817
2 parents fd122b4 + 4ac75ac
commit 8512817
Show file tree

Hide file tree

Showing 14 changed files with 105 additions and 114 deletions.
diff --git a/README.md b/README.md
@@ -233,33 +233,28 @@ See `pre-commit-config.yaml` for more details. In addition, please note the foll
 
 ### Features for future releases
 
-- [ ] Add [Diamond TV](https://diamondtvzambia.com) as a news source. Might be a good idea to replace Muvi TV with Diamond TV because the latter seems to have infrequent updates. Also, we don't want too many news items -- it kills the whole point of this project -- to get the latest updates delivered in a _concise_ manner.
+- [x] Cleanup the news by consolidating similar articles from different sources. In other words, let's make this [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself).
 - [x] Connect with social media platforms and automagically tweet, post to facebook when a new episode is out.
-- [ ] Incorporate a newsletter version where the news is sent to your mailbox in a nice, clean format. People can subscribe / unsubscribe.
+- [x] Keep the background music running throughout the show
+- [x] Different background music for each day of the week
 - [ ] Mention the weather in Lusaka, Livingstone, Kabwe, etc. Perhaps the weather forecast for the following day?
 - [ ] Mention exchange rates
-- [x] Cleanup the news by consolidating similar articles from different sources. In other words, let's make this [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself).
-- [ ] Find a way of training the voice to learn how to pronounce Zambian words.
-- [ ] Find a way to summarize for free, without relying on OpenAI's API. Perhaps train your own model, learn how to leverage tools like [NLTK](https://www.nltk.org/), [spaCy](https://spacy.io/), etc.
 - [ ] Find a way to make a closing statement based on the news. Something like, "Don't forget to register yor sim card before the ZICTA deadline ..."
-- [ ] Keep the background music running throughout the show
-- [ ] Different background music for each day of the week
 - [ ] Possibly allow for passing of an argument variable for the voice, or dynamically choose a voice from a list, just like the random intros and outros.
+- [ ] Find a way of training the voice to learn how to pronounce Zambian words.
+- [ ] Find a way to summarize for free, without relying on OpenAI's API. Perhaps train your own model, learn how to leverage tools like [NLTK](https://www.nltk.org/), [spaCy](https://spacy.io/), etc.
+- [ ] Incorporate a newsletter version where the news is sent to your mailbox in a nice, clean format. People can subscribe / unsubscribe.
+- [ ] Add [Diamond TV](https://diamondtvzambia.com) as a news source. Might be a good idea to replace Muvi TV with Diamond TV because the latter seems to have infrequent updates. Also, we don't want too many news items -- it kills the whole point of this project -- to get the latest updates delivered in a _concise_ manner.
 
 ## Credits
 
 ### Music
 
-- opening: <https://pixabay.com/music/future-bass-extreme-energy-burst-short-version-2-197550/>
-- closing: <https://pixabay.com/music/future-bass-fashion-future-bass-143924/>
-
-> **Note**
->
-> These audio files have the [**gain**](https://www.antarestech.com/community/whats-the-difference-between-gain-and-volume) reduced by -20dB, like this:
->
-> ```bash
-> ffmpeg -i intro.src.mp3 -af "volume=-20dB" intro.mp3
-> ```
+- <https://pixabay.com/music/beats-sweet-breeze-167504/>
+- <https://pixabay.com/music/beats-aesthetic-beat-royalty-free-music-215851/>
+- <https://pixabay.com/music/beats-digital-technology-131644/>
+- <https://pixabay.com/music/beats-stellar-echoes-202315/>
+- <https://pixabay.com/music/afrobeat-it-afrobeat-149308/>
 
 ### Icon
 

diff --git a/app/core/podcast/content.py b/app/core/podcast/content.py
@@ -6,12 +6,12 @@
 # import time
 from typing import Callable
 
-# import replicate
-from together import Together
-
 # from langchain.llms import OpenAI
 from pydantic import HttpUrl
 
+# import replicate
+from together import Together
+
 from app.core.db.models import Article, Episode
 from app.core.summarization.backends.together import brief_summary
 from app.core.utilities import (
@@ -86,7 +86,7 @@ def create_transcript(news: list[dict[str, str]], dest: str, summarizer: Callabl
     if today_iso_fmt == "2024-07-01":
         prompt = f"You are {podcast_host}, a lively and funny scriptwriter, content creator, and the host of the Zed News Podcast, which runs Monday to Friday. Today is {today_human_readable}, and you're gearing up for episode number {get_episode_number()} after a five-week holiday. Your task is to present the day's news in a conversational tone, covering everything logically and coherently without repetition. Consolidate information from different sources if needed. At the end of the podcast, leave your audience with a witty anecdote to end on a high note. Remember to cover all the news items from the sources provided below, but without repeating any content. Don't worry about sound effects, music, or captions – just speak directly, naturally and engagingly as if you're live on air. Start by sharing a few highlights from your holiday and express your genuine excitement to reconnect with your loyal listeners. Thank them for their patience during your absence.\n\n"
     else:
-        prompt = f"You are {podcast_host}, host of the Zed News Podcast, which runs Monday to Friday. Today is {today_human_readable}, and your task is to produce the text for episode number {get_episode_number()} based on the news items below. Your text will be read as-is by a text-to-speech engine who will take on your persona. This means that you should not add any captions or placeholders for sound effects, music, etc -- all we want is just natural, plain text. Ensure that you cover all items and avoid repetion. Feel free to add constructive comments and jokes where necessary and appropriate. Your persona should reflect a positive tone. End the episode by personally reflecting on the news and any implications for the future, with a challenge for the listener.\n\n"
+        prompt = f"You are {podcast_host}, host of the Zed News Podcast, which runs Monday to Friday. Today is {today_human_readable}, and your task is to produce the text for episode number {get_episode_number()} based on the news items below. Your text will be read as-is by a text-to-speech engine who will take on your persona. This means that you should not add any captions or placeholders for sound effects, music, etc -- all we want is just natural, plain text. Ensure that you cover all items and avoid repetion. Feel free to add constructive comments and jokes where necessary and appropriate. Your persona should reflect a positive tone. End the episode by personally reflecting on the news and any implications for the future.\n\n"
 
     metadata = f"Title: Zed News Podcast episode {get_episode_number()}\nDate: {today_human_readable}\nHost: {podcast_host}\n\n"
 

diff --git a/app/core/podcast/eleventify.py b/app/core/podcast/eleventify.py
@@ -3,9 +3,9 @@
 from datetime import datetime, timedelta, timezone
 
 import pytz
-from together import Together
 from babel import Locale
 from jinja2 import Environment, PackageLoader, select_autoescape
+from together import Together
 
 # from num2words import num2words
 from app.core.db.models import Article, Episode, Mp3
@@ -34,7 +34,7 @@
 TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
 client = Together(api_key=TOGETHER_API_KEY)
 
-news_headlines = f"{DATA_DIR}/{today_iso_fmt}_news_headlines.txt"
+transcript = f"{DATA_DIR}/{today_iso_fmt}_podcast-content.txt"
 
 logger = logging.getLogger(__name__)
 
@@ -47,13 +47,14 @@ def create_episode_summary(content: str, episode: str) -> str:
     https://docs.together.ai/reference/complete
     """
 
-    prompt = f"Given the details of today's episode below, write a very brief summary to use as a description for the media file. Your summary should be a single paragraph, not exceeding 2 sentences.\n\n```\n{content}\n```"
+    prompt = f"Given the transcript of today's episode below, write a very brief summary to use as a description for the media file. Your summary should be a single paragraph, not exceeding 2 sentences.\n\n```\n{content}\n```"
 
     # model = "lmsys/vicuna-13b-v1.5-16k"
     # model = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
     # model = "openchat/openchat-3.5-1210"
-    model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-    temperature = 0.7
+    # model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+    model = "mistralai/Mixtral-8x22B-Instruct-v0.1"
+    temperature = 0.75
     max_tokens = 512
 
     response = client.completions.create(
@@ -86,7 +87,7 @@ def create_episode_summary(content: str, episode: str) -> str:
 
 def get_content() -> str:
     """Get the headlines"""
-    with open(news_headlines, "r") as f:
+    with open(transcript, "r") as f:
         return f.read()
 
 

diff --git a/app/core/podcast/mix.py b/app/core/podcast/mix.py
@@ -48,16 +48,14 @@ def extract_duration_in_milliseconds(output: str) -> int:
         return 0
 
 
-def mix_audio(voice_track, intro_track, outro_track, dest=f"{DATA_DIR}/{today_iso_fmt}_podcast_dist.mp3"):
+def mix_audio(voice_track, music_track, dest=f"{DATA_DIR}/{today_iso_fmt}_podcast_dist.mp3"):
     """
-    Mix the voice track, intro track, and outro track into a single audio file
+    Mix the voice track and music track into a single audio file
     """
 
     voice_track_file_name = os.path.splitext(voice_track)[0]
     mix_44100 = f"{voice_track_file_name}.44.1kHz.mp3"
     voice_track_in_stereo = f"{voice_track_file_name}.stereo.mp3"
-    eq_mix = f"{voice_track_file_name}.eq-mix.mp3"
-    initial_mix = f"{voice_track_file_name}.mix-01.mp3"
 
     # change the voice track sample rate to 44.1 kHz
     subprocess.run(
@@ -71,64 +69,48 @@ def mix_audio(voice_track, intro_track, outro_track, dest=f"{DATA_DIR}/{today_is
         shell=True,
     )
 
-    # adjust the treble (high-frequency).
-    # The g=3 parameter specifies the gain in decibels (dB) to be applied to the treble frequencies.
+    # mix the voice track and music track
+    # we bump the volume on the voice track by 12dB
+    # and reduce it on the music track by 11dB
     subprocess.run(
-        f'ffmpeg -i {voice_track_in_stereo} -af "treble=g=3" {eq_mix}',
-        shell=True,
+        [
+            "ffmpeg",
+            "-i",
+            voice_track_in_stereo,
+            "-i",
+            music_track,
+            "-filter_complex",
+            "[0:a]volume=12dB[voice]; [1:a]volume=-11dB,aloop=loop=-1:size=2e+09[instrumental]; [voice][instrumental]amix=inputs=2:duration=first:dropout_transition=3[out]",
+            "-map",
+            "[out]",
+            "-c:a",
+            "libmp3lame",
+            "-b:a",
+            "128k",
+            dest,
+        ]
     )
 
-    # initial mix: the intro + voice track
-    subprocess.run(
-        f'ffmpeg -i {eq_mix} -i {intro_track} -filter_complex amix=inputs=2:duration=longest:dropout_transition=0:weights="1 0.25":normalize=0 {initial_mix}',
-        shell=True,
-    )
-
-    # get duration of the initial mix
-    command_1 = f'ffmpeg -i {initial_mix} 2>&1 | grep "Duration"'
-    output_1 = run_ffmpeg_command(command_1)
-    duration_1 = extract_duration_in_milliseconds(output_1)
-
-    # get duration of the outro instrumental
-    command_2 = f'ffmpeg -i {outro_track} 2>&1 | grep "Duration"'
-    output_2 = run_ffmpeg_command(command_2)
-    duration_2 = extract_duration_in_milliseconds(output_2)
-
-    # pad the outro instrumental with silence, using initial mix duration and
-    # the outro instrumental's duration
-    # adelay = (duration of initial mix - outro instrumental duration) in milliseconds
-    if duration_1 != 0 and duration_2 != 0:
-        padded_outro = f"{voice_track_file_name}.mix-02.mp3"
-
-        adelay = duration_1 - duration_2
-        subprocess.run(f'ffmpeg -i {outro_track} -af "adelay={adelay}|{adelay}" {padded_outro}', shell=True)
-
-        # final mix: the initial mix + the padded outro
-        subprocess.run(
-            f'ffmpeg -i {initial_mix} -i {padded_outro} -filter_complex amix=inputs=2:duration=longest:dropout_transition=0:weights="1 0.25":normalize=0 {dest}',
-            shell=True,
-        )
-
-        # add Id3 tags
-        episode = get_episode_number()
-        audio_file = dest
-        tag = eyed3.load(audio_file).tag
-        tag.artist = "Victor Miti"
-        tag.album = "Zed News"
-        tag.title = f"Zed News Podcast, Episode {episode:03} ({today_human_readable})"
-        tag.track_num = episode
-        tag.release_date = eyed3.core.Date(today.year, today.month, today.day)
-        tag.genre = "Podcast"
-        album_art_file = f"{IMAGE_DIR}/album-art.jpg"
-        with open(album_art_file, "rb") as cover_art:
-            # The value 3 indicates that the front cover shall be set
-            # # https://eyed3.readthedocs.io/en/latest/eyed3.id3.html#eyed3.id3.frames.ImageFrame
-            tag.images.set(3, cover_art.read(), "image/jpeg")
-        tag.save()
-
-        # Clean up
-        for f in [voice_track_in_stereo, mix_44100, eq_mix, initial_mix, padded_outro]:
-            delete_file(f)
+    # add Id3 tags
+    episode = get_episode_number()
+    audio_file = dest
+    tag = eyed3.load(audio_file).tag
+    tag.artist = "Victor Miti"
+    tag.album = "Zed News"
+    tag.title = f"Zed News Podcast, Episode {episode:03} ({today_human_readable})"
+    tag.track_num = episode
+    tag.release_date = eyed3.core.Date(today.year, today.month, today.day)
+    tag.genre = "Podcast"
+    album_art_file = f"{IMAGE_DIR}/album-art.jpg"
+    with open(album_art_file, "rb") as cover_art:
+        # The value 3 indicates that the front cover shall be set
+        # # https://eyed3.readthedocs.io/en/latest/eyed3.id3.html#eyed3.id3.frames.ImageFrame
+        tag.images.set(3, cover_art.read(), "image/jpeg")
+    tag.save()
+
+    # Clean up
+    for f in [voice_track_in_stereo, mix_44100]:
+        delete_file(f)
 
 
 def upload_to_s3(src: FilePath, dest_folder: str, dest_filename: str):

diff --git a/app/core/podcast/voice.py b/app/core/podcast/voice.py
@@ -28,9 +28,11 @@ def create_audio(transcript: FilePath, podcast_host: str = podcast_host, lingo:
     3. download the audio file
     """
 
-    content = transcript
-    with open(content, "r") as f:
-        podcast_content = f.read()
+    with open(transcript, "r") as f:
+        content = f.read()
+        # Add dynamic range compression
+        # https://docs.aws.amazon.com/polly/latest/dg/supportedtags.html#drc-tag
+        podcast_content = f'<speak><amazon:effect name="drc">{content}</amazon:effect></speak>'
 
     # Create a Polly client
     polly = boto3.client(
@@ -48,6 +50,7 @@ def create_audio(transcript: FilePath, podcast_host: str = podcast_host, lingo:
         LanguageCode=lingo,
         VoiceId=podcast_host,
         Text=podcast_content,
+        TextType="ssml",
         OutputS3BucketName=AWS_BUCKET_NAME,
         OutputS3KeyPrefix=f"{s3_podcast_dir}/{today_iso_fmt}-raw",
         OutputFormat="mp3",

diff --git a/app/core/run.py b/app/core/run.py
@@ -2,6 +2,7 @@
 Toolchain for fetching news content and processing it into a podcast.
 """
 
+import calendar
 import json
 import logging
 import subprocess
@@ -17,7 +18,7 @@
 from app.core.podcast.mix import add_to_db, mix_audio, upload_to_s3
 from app.core.podcast.voice import create_audio, delete_source_mp3
 from app.core.summarization.backends import together as together_backend
-from app.core.utilities import DATA_DIR, configure_logging, count_words, today_iso_fmt
+from app.core.utilities import DATA_DIR, configure_logging, count_words, today, today_iso_fmt
 
 raw_news = f"{DATA_DIR}/{today_iso_fmt}_news.json"
 transcript = f"{DATA_DIR}/{today_iso_fmt}_podcast-content.txt"
@@ -61,10 +62,10 @@ def main():
     output_key = create_audio(transcript)
 
     # Mix audio
+    weekday = calendar.day_name[today.weekday()].lower()
     mix_audio(
         voice_track=f"{DATA_DIR}/{today_iso_fmt}/{today_iso_fmt}.src.mp3",
-        intro_track=f"{DATA_DIR}/instrumental/intro.mp3",
-        outro_track=f"{DATA_DIR}/instrumental/outro.mp3",
+        music_track=f"{DATA_DIR}/instrumental/{weekday}.mp3",
     )
 
     # Upload podcast audio to S3

diff --git a/app/tests/test_data/.gitignore b/app/tests/test_data/.gitignore
@@ -1,5 +1,4 @@
 *
 !.gitignore
 !example.mp3
-!intro.mp3
-!outro.mp3
+!background.mp3
diff --git a/app/tests/test_data/intro.mp3 → app/tests/test_data/background.mp3 b/app/tests/test_data/intro.mp3 → app/tests/test_data/background.mp3
diff --git a/app/tests/test_data/outro.mp3 b/app/tests/test_data/outro.mp3
diff --git a/app/tests/test_eleventify.py b/app/tests/test_eleventify.py
@@ -19,13 +19,13 @@ class TestEleventify(unittest.TestCase):
     def setUp(self):
         self.temp_dir = tempfile.mkdtemp()
         self.dist_file = os.path.join(self.temp_dir, f"{today_iso_fmt}.njk")
-        self.news_headlines = os.path.join(self.temp_dir, f"{today_iso_fmt}_news_headlines.txt")
+        self.transcript = os.path.join(self.temp_dir, f"{today_iso_fmt}_transcript.txt")
 
         self.patch_dist_file = patch("app.core.podcast.eleventify.dist_file", self.dist_file)
-        self.patch_news_headlines = patch("app.core.podcast.eleventify.news_headlines", self.news_headlines)
+        self.patch_transcript = patch("app.core.podcast.eleventify.transcript", self.transcript)
 
         self.patch_dist_file.start()
-        self.patch_news_headlines.start()
+        self.patch_transcript.start()
 
         # Bind model classes to test db. Since we have a complete list of
         # all models, we do not need to recursively bind dependencies.
@@ -88,7 +88,7 @@ def tearDown(self):
         # database here. But for tests this is probably not necessary.
 
         self.patch_dist_file.stop()
-        self.patch_news_headlines.stop()
+        self.patch_transcript.stop()
 
     @patch("app.core.podcast.eleventify.get_content")
     @patch("app.core.podcast.eleventify.create_episode_summary")

diff --git a/app/tests/test_mix.py b/app/tests/test_mix.py
@@ -56,19 +56,16 @@ def setUp(self):
 
         # Create example audio files
         example_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", dir=self.temp_dir, delete=False).name
-        intro_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", dir=self.temp_dir, delete=False).name
-        outro_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", dir=self.temp_dir, delete=False).name
+        background_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", dir=self.temp_dir, delete=False).name
 
         # Copy example audio files to temporary directory
         data_dir = TEST_DIR / "test_data"
         shutil.copyfile(data_dir / "example.mp3", example_audio_path)
-        shutil.copyfile(data_dir / "intro.mp3", intro_audio_path)
-        shutil.copyfile(data_dir / "outro.mp3", outro_audio_path)
+        shutil.copyfile(data_dir / "background.mp3", background_audio_path)
 
         # Assign audio file paths for the test
         self.voice_track = example_audio_path
-        self.intro_track = intro_audio_path
-        self.outro_track = outro_audio_path
+        self.music_track = background_audio_path
         self.dest = os.path.join(self.temp_dir, f"{today_iso_fmt}_podcast_dist.mp3")
 
         # Bind model classes to test db. Since we have a complete list of
@@ -123,7 +120,7 @@ def tearDown(self):
         # database here. But for tests this is probably not necessary.
 
     def test_mix_audio(self):
-        mix_audio(self.voice_track, self.intro_track, self.outro_track, self.dest)
+        mix_audio(self.voice_track, self.music_track, self.dest)
 
         # Assert that the output file exists
         self.assertTrue(os.path.exists(self.dest))

diff --git a/app/tests/test_voice.py b/app/tests/test_voice.py
@@ -65,7 +65,8 @@ def test_create_audio(self, mock_mkdir, mock_boto3_client):
             Engine=engine,
             LanguageCode=lingo,
             VoiceId=podcast_host,
-            Text="This is some content",
+            Text='<speak><amazon:effect name="drc">This is some content</amazon:effect></speak>',
+            TextType="ssml",
             OutputS3BucketName=AWS_BUCKET_NAME,
             OutputS3KeyPrefix=f"zed-news/{today_iso_fmt}-raw",
             OutputFormat="mp3",

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,5 +1,3 @@
-version: "3.8"
-
 services:
   app:
     build: