Skip to content

Commit

Permalink
feat: use vicuna-13b-v1.5-16k and garage-bAInd/Platypus2-70B-instruct
Browse files Browse the repository at this point in the history
  • Loading branch information
engineervix committed Dec 13, 2023
1 parent dd6c918 commit 16e2877
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 44 deletions.
7 changes: 7 additions & 0 deletions app/core/news/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ def get_daily_mail_article_detail(url):
content = content.replace("CLICK TO READ MORE", "...")
content = content.replace("https://enews.daily-mail.co.zm/welcome/home", "")

# remove Read more: eNews Daily Mail | Without Fear Or Favour (daily-mail.co.zm)
content = content.replace("Read more: eNews Daily Mail | Without Fear Or Favour (daily-mail.co.zm)", "")

return content
elif article := soup.find("main"):
content_elements = article.select("div.e-con-inner")
Expand All @@ -57,6 +60,10 @@ def get_daily_mail_article_detail(url):
# remove Read more: eNews Daily Mail | Without Fear Or Favour (daily-mail.co.zm)
content = content.replace("Read more: eNews Daily Mail | Without Fear Or Favour (daily-mail.co.zm)", "")

# Remove "CLICK TO READ MORE" from the content
content = content.replace("CLICK TO READ MORE", "...")
content = content.replace("https://enews.daily-mail.co.zm/welcome/home", "")

return content
return None

Expand Down
47 changes: 12 additions & 35 deletions app/core/podcast/content.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import datetime
import logging
import time
from typing import Callable

import together
from pydantic import HttpUrl

from app.core.db.models import Article, Episode
from app.core.utilities import TOGETHER_API_KEY, podcast_host, today, today_human_readable
from app.core.utilities import DATA_DIR, TOGETHER_API_KEY, podcast_host, today, today_human_readable, today_iso_fmt


async def get_episode_number() -> int:
Expand Down Expand Up @@ -65,33 +66,10 @@ async def create_transcript(news: list[dict[str, str]], dest: str, summarizer: C
# Add the article to the list for the corresponding source
articles_by_source[source].append(article)

prompt = f"<human>: You are {podcast_host}, an accomplished, fun and witty scriptwriter, content creator and podcast host. You have a news and current affairs podcast which runs Monday to Friday. Your secretary has gathered the news from various sources, and has given you the notes as shown below. To ensure accuracy, please read the content carefully and pay attention to any nuances or complexities in the language, then go ahead and present today's episode. It is important that you cover EVERYTHING, do not leave out anything. Feel free to consolidate any similar news items from different sources, and present the news in a logical sequence, based on common themes. At the end, add a fun and witty remark informing your audience that you are actually an AI, and not a human.\n\n"
prompt = f"You are {podcast_host}, an accomplished, fun and witty scriptwriter, content creator and podcast host. You have a news and current affairs podcast which runs Monday to Friday. Your secretary has gathered the news from various sources as indicated below. Study the content, consolidate any similar news items from different sources, and organize the news in a logical, coherent manner so it's easy to follow. You can then go ahead and present today's episode, ensuring that you cover all the news your secretary has curated. At the end, add a fun and witty remark informing your audience that you are actually an AI, and not a human.\n\n"

metadata = f"Title: Zed News Podcast episode {await get_episode_number()}\nDate: {today_human_readable}\nHost: {podcast_host}\n\n"

unwanted_text = [
# "Sure, here's a summary of the news entry in two sentences:",
# "Sure, here is a summary of the news entry in two sentences:",
# "Sure, here's a summary of the news entry in not more than two sentences:",
# "Sure, here is a summary of the news entry in not more than two sentences:",
"Sure! Here's the summary:",
"Sure! Here is the summary:",
"Sure, I can help you with that!",
"Sure, I can do that!",
# "Here's a summary of the news entry in two sentences:",
# "Here is a summary of the news entry in two sentences:",
# "Here's a summary of the news entry in not more than two sentences:",
# "Here is a summary of the news entry in not more than two sentences:",
# "Here's a two-sentence summary of the news entry:",
# "Here is a two-sentence summary of the news entry:",
"Sure! Here's a possible summary of the news entry:",
"Sure! Here is a possible summary of the news entry:",
"Sure, here's a possible summary:",
"Sure, here is a possible summary:",
# "Sure! Here's a two-sentence summary of the news entry you provided:",
# "Sure! Here is a two-sentence summary of the news entry you provided:",
]

content = ""
counter = 0
for source in articles_by_source:
Expand All @@ -101,35 +79,34 @@ async def create_transcript(news: list[dict[str, str]], dest: str, summarizer: C
text = article["content"]
summary = summarizer(text, title)

for text in unwanted_text:
summary = summary.replace(text, "")

await update_article_with_summary(title, article["url"], today, summary)

counter += 1

content += f"{counter}. '{title}' (source: {source})"
content += f"\n{summary.strip()}\n\n"

notes = prompt + "```" + metadata + "News Items:\n\n" + content + "```<bot>:"
notes = prompt + "```" + metadata + "News Items:\n\n" + content + "```"

model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
temperature = 0.4
top_p = 0.5
# Write the content to a file
with open(f"{DATA_DIR}/{today_iso_fmt}_news_headlines.txt", "w") as f:
f.write(metadata + "News Items:\n\n" + content)

model = "lmsys/vicuna-13b-v1.5-16k"
temperature = 0.7
max_tokens = 4096
together.api_key = TOGETHER_API_KEY
output = together.Complete.create(
prompt=notes,
model=model,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
repetition_penalty=1.1,
)
time.sleep(30)
logging.info(output)

transcript = output["output"]["choices"][0]["text"]

# Write the content to a file
# Write the transcript to a file
with open(dest, "w") as f:
f.write(transcript)
8 changes: 5 additions & 3 deletions app/core/summarization/backends/together.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import time

import together

Expand All @@ -14,17 +15,18 @@ def summarize(content: str, title: str) -> str:
https://docs.together.ai/reference/complete
"""

prompt = f"<human>: You are a distinguished news editor and content publisher, your task is to summarize the following news entry. The summary should accurately reflect the main message and arguments presented in the original text, while also being concise and easy to understand. Just summarize straight away, without responding to me.\n\n ```{content}```\n<bot>:"
model = "togethercomputer/llama-2-70b-chat"
prompt = f"You are a distinguished news editor and content publisher, your task is to summarize the following news entry. The summary should accurately reflect the main message and arguments presented in the original news entry, while also being concise and easy to understand. Your summary should not exceed two sentences.\n\n ```{content}```:"
model = "garage-bAInd/Platypus2-70B-instruct"
temperature = 0.7
max_tokens = 512
max_tokens = 128

output = together.Complete.create(
prompt=prompt,
model=model,
temperature=temperature,
max_tokens=max_tokens,
)
time.sleep(1.5)
logging.info(output)

return output["output"]["choices"][0]["text"]
14 changes: 8 additions & 6 deletions social.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import os
import pathlib
import sys
import time
from http import HTTPStatus

import facebook
Expand Down Expand Up @@ -40,7 +41,7 @@
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
together.api_key = TOGETHER_API_KEY

podcast_transcript = f"{DATA_DIR}/{today_iso_fmt}/{today_iso_fmt}_podcast-content.txt"
news_headlines = f"{DATA_DIR}/{today_iso_fmt}_news_headlines.txt"
podcast_url = f"https://zednews.pages.dev/episode/{today_iso_fmt}/"


Expand All @@ -61,8 +62,8 @@ def podcast_is_live(url):


def get_content() -> str:
"""Get the content of the podcast transcript"""
with open(podcast_transcript, "r") as f:
"""Get the headlines"""
with open(news_headlines, "r") as f:
return f.read()


Expand All @@ -73,8 +74,9 @@ def create_facebook_post(content: str) -> str:
https://docs.together.ai/reference/complete
"""

prompt = f"<human>: You are a social media marketing guru. You have been hired by a podcaster, {podcast_host} to create a nice, short and catchy facebook post (max 130 words) inviting people to listen to today's podcast whose transcript is below. Highlight some interesting news headlines, appropriately paraphrasing them to grab the attention of your audience. Also, appropriately utilize bullet points, emojis, whitespace and hashtags where necessary. Do not add the link to the podcast as it will be added automatically.\n\n```{content}\n```\n<bot>:"
model = "togethercomputer/llama-2-70b-chat"
prompt = f"You are a social media marketing guru. You have been hired by a podcaster, {podcast_host}, who hosts a news and current affairs podcast which runs Monday to Friday. Your task is to create a nice, short and catchy facebook post inviting people to listen to today's podcast whose details are below. Appropriately utilize bullet points, emojis, whitespace and hashtags where necessary. Do not add the link to the podcast as it will be added automatically.\n\n```{content}\n```"

model = "lmsys/vicuna-13b-v1.5-16k"
temperature = 0.7
max_tokens = 768

Expand All @@ -83,9 +85,9 @@ def create_facebook_post(content: str) -> str:
model=model,
temperature=temperature,
max_tokens=max_tokens,
repetition_penalty=1.1,
)
logger.info(output)
time.sleep(30)

return output["output"]["choices"][0]["text"]

Expand Down

0 comments on commit 16e2877

Please sign in to comment.