diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..2525202 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.ipynb linguist-vendored +*.sh text eol=lf diff --git a/.github/workflows/update_space.yml b/.github/workflows/update_space.yml new file mode 100644 index 0000000..67dbc84 --- /dev/null +++ b/.github/workflows/update_space.yml @@ -0,0 +1,28 @@ +name: Run Python script + +on: + push: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.9' + + - name: Install Gradio + run: python -m pip install gradio + + - name: Log in to Hugging Face + run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")' + + - name: Deploy to Spaces + run: gradio deploy diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5f15a7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,126 @@ +.pypirc + +specs/ +docs/ +data/ + +*.ipynb + +*.whl +*.wheels + +# Exclude aliases file +alias.sh + +*.ipynb_checkpoints + +# Python-specific ignores +__pycache__/ +*.py[cod] +*$py.class + +# Virtual environment +venv/ +env/ +.env + +# IDE-specific files +.vscode/ +.idea/ +*.swp +*.swo + +# OS-specific files +.DS_Store +Thumbs.db + +# Project-specific ignores +attachments/ + +# Logs +*.log + +# Temporary files +*.tmp + +# Compiled Python files +*.pyc + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# Poetry +poetry.lock +.poetry/ + +# Poetry +.venv/ +dist/ + +# Python +*.pyc +__pycache__/ +*.egg-info/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..67cda2c --- /dev/null +++ b/README.md @@ -0,0 +1,111 @@ +--- +title: Podcastfy.ai_demo +app_file: podcastfy-app/app.py +sdk: gradio +sdk_version: 4.44.1 +python_version: 3.11 +--- +# Podcastfy.ai +[![CodeFactor](https://www.codefactor.io/repository/github/souzatharsis/podcastfy/badge)](https://www.codefactor.io/repository/github/souzatharsis/podcastfy) +[![PyPi Status](https://img.shields.io/pypi/v/podcastfy)](https://pypi.org/project/podcastfy/) +[![Downloads](https://pepy.tech/badge/podcastfy)](https://pepy.tech/project/podcastfy) +[![Issues](https://img.shields.io/github/issues-raw/souzatharsis/podcastfy)](https://github.com/souzatharsis/podcastfy/issues) +[![License: CC BY-NC-SA 4.0](https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-nc-sa/4.0/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) + +Transforming Multi-Sourced Text into Captivating Multi-Lingual Audio Conversations with GenAI + +https://github.com/user-attachments/assets/f1559e70-9cf9-4576-b48b-87e7dad1dd0b + +Podcastfy is an open-source Python package that transforms web content, PDFs, and text into engaging, multi-lingual audio conversations using GenAI. + +Unlike UI-based tools focused primarily on note-taking or research synthesis (e.g. NotebookLM ❤️), Podcastfy focuses on the programmatic and bespoke generation of engaging, conversational transcripts and audio from a multitude of text sources therefore enabling customization and scale. + +## Audio Examples + +This sample collection is also [available at audio.com](https://audio.com/thatupiso/collections/podcastfy): +- [English] Book Networks, Crowds, and Markets: [audio](https://audio.com/thatupiso/audio/networks) +- [English] Research paper: ([audio](https://audio.com/thatupiso/audio/agro-paper) | [pdf](./data/pdf/s41598-024-58826-w.pdf)) +- [English] Personal website: ([audio](https://audio.com/thatupiso/audio/tharsis) | [website](https://www.souzatharsis.com)) +- [English] Personal website + youtube video: ([audio](https://audio.com/thatupiso/audio/tharsis-ai) | [website](https://www.souzatharsis.com) | [youtube](https://www.youtube.com/watch?v=sJE1dE2dulg)) +- [French] Website: ([audio](https://audio.com/thatupiso/audio/podcast-fr-agro) | [website](https://agroclim.inrae.fr/)) +- [Portuguese-BR] News article: ([audio](https://audio.com/thatupiso/audio/podcast-thatupiso-br) | [website](https://noticias.uol.com.br/eleicoes/2024/10/03/nova-pesquisa-datafolha-quem-subiu-e-quem-caiu-na-disputa-de-sp-03-10.htm)) + +## Quickstart + +### Setup +Before installing, ensure you have Python 3.12 or higher installed on your system. + +1. Install from PyPI + + `$ pip install podcastfy` + +2. Set up your [API keys](usage/config.md) + +3. Ensure you have ffmpeg installed on your system, required for audio processing +``` +sudo apt update +sudo apt install ffmpeg +``` + +### Python +```python +from podcastfy.client import generate_podcast + +audio_file = generate_podcast(urls=["", ""]) +``` +### CLI +``` +python -m podcastfy.client --url --url +``` + +## Usage + +- [Python Package](podcastfy.ipynb) + +- [CLI](usage/cli.md) + + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request - see [Open Issues](https://github.com/souzatharsis/podcastfy/issues) for ideas. But even more excitingly feel free to fork the repo and create your own app! Please let me know if I could be of help. + +## Features + +- Generate engaging, AI-powered conversational content from multiple sources (URLs and PDFs) +- Create high-quality transcripts from diverse textual information sources +- Convert pre-existing transcript files into dynamic podcast episodes +- Support for multiple advanced text-to-speech models (OpenAI and ElevenLabs) for natural-sounding audio +- Support for multiple languages, enabling global content creation +- Seamlessly integrate CLI for streamlined workflows + +## Example Use Cases + +1. **Content Summarization**: Busy professionals can stay informed on industry trends by listening to concise audio summaries of multiple articles, saving time and gaining knowledge efficiently. + +2. **Language Localization**: Non-native English speakers can access English content in their preferred language, breaking down language barriers and expanding access to global information. + +3. **Website Content Marketing**: Companies can increase engagement by repurposing written website content into audio format, providing visitors with the option to read or listen. + +4. **Personal Branding**: Job seekers can create unique audio-based personal presentations from their CV or LinkedIn profile, making a memorable impression on potential employers. + +5. **Research Paper Summaries**: Graduate students and researchers can quickly review multiple academic papers by listening to concise audio summaries, speeding up the research process. + +6. **Long-form Podcast Summarization**: Podcast enthusiasts with limited time can stay updated on their favorite shows by listening to condensed versions of lengthy episodes. + +7. **News Briefings**: Commuters can stay informed about daily news during travel time with personalized audio news briefings compiled from their preferred sources. + +8. **Educational Content Creation**: Educators can enhance learning accessibility by providing audio versions of course materials, catering to students with different learning styles. + +9. **Book Summaries**: Avid readers can preview books efficiently through audio summaries, helping them make informed decisions about which books to read in full. + +10. **Conference and Event Recaps**: Professionals can stay updated on important industry events they couldn't attend by listening to audio recaps of conference highlights and key takeaways. + + +## License + +This project is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/). + +## Disclaimer + +This tool is designed for personal or educational use. Please ensure you have the necessary rights or permissions before using content from external sources for podcast creation. All audio content is AI-generated and it is not intended to clone real-life humans! diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..058423f --- /dev/null +++ b/install.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +echo "Starting custom installation script..." + +# Upgrade pip +echo "Upgrading pip..." +pip install --upgrade pip + + + +# If you have any specific packages that need special handling, install them here +# For example: +# echo "Installing specific package..." +# pip install some-package==1.2.3 + +# List installed packages +echo "Listing installed packages:" +pip list + +echo "Custom installation script completed." + +python -m ensurepip --upgrade +python -m pip install --upgrade setuptools \ No newline at end of file diff --git a/podcastfy-app/app.py b/podcastfy-app/app.py new file mode 100644 index 0000000..d34b977 --- /dev/null +++ b/podcastfy-app/app.py @@ -0,0 +1,127 @@ +import gradio as gr +from podcastfy.client import generate_podcast +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +def get_api_key(key_name, ui_value): + return ui_value if ui_value else os.getenv(key_name) + +def create_podcast(urls, openai_key, jina_key, gemini_key): + try: + # Set API keys, prioritizing UI input over .env file + os.environ["OPENAI_API_KEY"] = get_api_key("OPENAI_API_KEY", openai_key) + os.environ["JINA_API_KEY"] = get_api_key("JINA_API_KEY", jina_key) + os.environ["GEMINI_API_KEY"] = get_api_key("GEMINI_API_KEY", gemini_key) + + url_list = [url.strip() for url in urls.split(',') if url.strip()] + + if not url_list: + return "Please provide at least one URL." + + audio_file = generate_podcast(urls=url_list) + return audio_file + except Exception as e: + return str(e) + +# Create the Gradio interface +with gr.Blocks(title="Podcastfy.ai", theme=gr.themes.Default()) as iface: + gr.Markdown("# Podcastfy.ai demo") + gr.Markdown("Generate a podcast from multiple URLs using Podcastfy.") + gr.Markdown("For full customization, please check [Podcastfy package](https://github.com/souzatharsis/podcastfy).") + + with gr.Accordion("API Keys", open=False): + with gr.Row(variant="panel"): + with gr.Column(scale=1): + openai_key = gr.Textbox(label="OpenAI API Key", type="password", value=os.getenv("OPENAI_API_KEY", "")) + gr.Markdown('Get OpenAI API Key') + with gr.Column(scale=1): + jina_key = gr.Textbox(label="Jina API Key", type="password", value=os.getenv("JINA_API_KEY", "")) + gr.Markdown('Get Jina API Key') + with gr.Column(scale=1): + gemini_key = gr.Textbox(label="Gemini API Key", type="password", value=os.getenv("GEMINI_API_KEY", "")) + gr.Markdown('Get Gemini API Key') + + urls = gr.Textbox(lines=2, placeholder="Enter URLs separated by commas...", label="URLs") + + generate_button = gr.Button("Generate Podcast", variant="primary") + + with gr.Column(): + gr.Markdown('

Note: Podcast generation may take a couple of minutes.

', elem_id="generation-note") + audio_output = gr.Audio(type="filepath", label="Generated Podcast") + + generate_button.click( + create_podcast, + inputs=[urls, openai_key, jina_key, gemini_key], + outputs=audio_output + ) + + gr.Markdown('

Created with ❤️ by Podcastfy

') + + # Add JavaScript for splash screen and positioning the disclaimer + iface.load(js=""" + function addSplashScreen() { + const audioElement = document.querySelector('.audio-wrap'); + if (audioElement) { + const splashScreen = document.createElement('div'); + splashScreen.id = 'podcast-splash-screen'; + splashScreen.innerHTML = '

Generating podcast... This may take a couple of minutes.

'; + splashScreen.style.cssText = ` + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(0, 0, 0, 0.7); + color: white; + display: flex; + justify-content: center; + align-items: center; + z-index: 1000; + `; + audioElement.style.position = 'relative'; + audioElement.appendChild(splashScreen); + } + } + + function removeSplashScreen() { + const splashScreen = document.getElementById('podcast-splash-screen'); + if (splashScreen) { + splashScreen.remove(); + } + } + + function positionGenerationNote() { + const noteElement = document.getElementById('generation-note'); + const audioElement = document.querySelector('.audio-wrap'); + if (noteElement && audioElement) { + noteElement.style.position = 'absolute'; + noteElement.style.top = '-25px'; + noteElement.style.left = '0'; + noteElement.style.zIndex = '10'; + audioElement.style.position = 'relative'; + } + } + + document.querySelector('#generate_podcast').addEventListener('click', addSplashScreen); + + // Use a MutationObserver to watch for changes in the audio element + const observer = new MutationObserver((mutations) => { + mutations.forEach((mutation) => { + if (mutation.type === 'childList' && mutation.addedNodes.length > 0) { + removeSplashScreen(); + positionGenerationNote(); + } + }); + }); + + observer.observe(document.querySelector('.audio-wrap'), { childList: true, subtree: true }); + + // Position the note on initial load + window.addEventListener('load', positionGenerationNote); + """) + +if __name__ == "__main__": + iface.launch(share=True) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3e1c9f4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.poetry] +name = "podcastfy-app" +version = "0.1.0" +description = "Simple application for podcastfy.ai" +authors = ["Tharsis T. P. Souza"] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.12" +gradio-client = "^1.3.0" +gradio = "^4.44.1" +python-dotenv = "^1.0.1" +podcastfy = "^0.1.12" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d308c6c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +gradio-client==1.3.0 +gradio==4.44.1 +podcastfy==0.1.13 +python-dotenv==1.0.1 diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000..67ebc4e --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-3.11