-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt3.py
76 lines (63 loc) · 2.17 KB
/
gpt3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import openai
import nltk
nltk.download('punkt')
nltk.download('stopwords')
# this gives us a list of sentences
from gensim.utils import tokenize
from nltk.corpus import stopwords
import pickle
import textstat
import pandas as pd
import re
from rouge import Rouge
### Add your OpenAI API Key here
OPENAI_API_KEY = ""
openai.api_key = OPENAI_API_KEY
stop_words = set(stopwords.words('english'))
VECTORIZER_FILE = "static/vectorizer.pkl"
def preProcess(text, sentSep=" ", sentMap=False):
text = text.lower()
sent_text = nltk.sent_tokenize(text)
sentenceMapping = {}
# tokens = tokenize(text)
tok_sent_text = []
for i in range(len(sent_text)):
tokens = list(tokenize(sent_text[i]))
filtered_sentence_words = [w for w in tokens if not w in stop_words and len(w)>2]
sentenceText = " ".join(filtered_sentence_words)
tok_sent_text.append(sentenceText)
if sentMap:
sentenceMapping[i] = sent_text[i]
finalText = sentSep.join(tok_sent_text)
return finalText, sentenceMapping
# gpt-3 summarization
def summary_gpt3(input_text, percent, sentSep=" ", sentMap=False):
percentage = percent * 2
response = openai.Completion.create(
engine="text-davinci-003",
prompt=f"Given a transcript, get summary:\n\nTranscript: {input_text}\n\nSummary:",
temperature=0,
max_tokens=int(percentage), #change summary length
top_p=1,
frequency_penalty=0,
presence_penalty=0,
)
summary = response["choices"][0]["text"]
final_summary = summary.replace("\n","").strip()
count = re.split(r'[.!?]+', final_summary)
summary_len = len(count)
return final_summary, summary_len
def gpt3_readScore(text):
score = textstat.flesch_reading_ease(text)
return score
# F1, recall, and precision
def metrics(orginal_text, final_text):
ROUGE = Rouge()
rogue_score = ROUGE.get_scores(orginal_text, final_text)
return rogue_score
def gpt3_summarizer(text, sumLen):
clean_text, sentMap = preProcess(text, sentSep=". ", sentMap=True)
originalLen = len(sentMap)
summary, summary_len = summary_gpt3(clean_text, sumLen)
return summary, summary_len, originalLen