Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed cutoff for AI utterance #426

Merged
merged 4 commits into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ services:
deploy:
resources:
limits:
memory: 40G
memory: 50G
reservations:
memory: 40G
memory: 50G

dff-dream-persona-gpt-j-prompted-skill:
env_file: [ .env ]
Expand Down
38 changes: 29 additions & 9 deletions common/universal_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


GENERATIVE_ROBOT_TEMPLATE = re.compile(
r"(AI:|Robot:|ROBOT:|Computer:|COMPUTER:|User:|USER:|Speaker:|SPEAKER:|Human:|HUMAN:|Чат-?бот:)\s?"
r"(?:AI:|Robot:|ROBOT:|Computer:|COMPUTER:|User:|USER:|Speaker:|SPEAKER:|Human:|HUMAN:|Чат-?бот:)\s?"
dilyararimovna marked this conversation as resolved.
Show resolved Hide resolved
)
DUMMY_DONTKNOW_RESPONSES = {
"EN": [
Expand Down Expand Up @@ -177,7 +177,16 @@ def nounphrases_questions(nounphrase=None):
"всякое",
"другое",
]
NOTHING_LIKE = ["nothing", "none", "neither", "ничего", "нечего", "ни о чем", "не о чем", r"ни то,? ни то"]
NOTHING_LIKE = [
"nothing",
"none",
"neither",
"ничего",
"нечего",
"ни о чем",
"не о чем",
r"ни то,? ни то",
]
DONOTKNOW_LIKE = [
r"(i )?(do not|don't) know",
"you (choose|decide|pick up)",
Expand All @@ -198,7 +207,8 @@ def nounphrases_questions(nounphrase=None):
ABOUT_TOPIC = join_words_in_or_pattern(ABOUT_LIKE) + r"\s" + ANY_WORDS
KNOW = join_words_in_or_pattern(KNOW_LIKE)
SOMETHING_ELSE = re.compile(
r"((something|anything|everything|что-нибудь|что-то|что угодно|что-либо) (else|other|другом|другое))", re.IGNORECASE
r"((something|anything|everything|что-нибудь|что-то|что угодно|что-либо) (else|other|другом|другое))",
re.IGNORECASE,
)

# --------------- Let's talk. / Can we talk? / Talk to me. ------------
Expand Down Expand Up @@ -315,7 +325,8 @@ def nounphrases_questions(nounphrase=None):
LIKE_PATTERN = re.compile(LIKE_WORDS, re.IGNORECASE)

NOT_LIKE_PATTERN = re.compile(
rf"(hate|loathe|((not|n't) |dis|un)({LIKE_WORDS}|for (me|you)\b)|[a-z ]+\bfan\b)", re.IGNORECASE
rf"(hate|loathe|((not|n't) |dis|un)({LIKE_WORDS}|for (me|you)\b)|[a-z ]+\bfan\b)",
re.IGNORECASE,
)

STOP_PATTERN = re.compile(r"(stop|shut|something else|change|don't want)", re.IGNORECASE)
Expand Down Expand Up @@ -470,7 +481,8 @@ def if_chat_about_particular_topic(annotated_uttr, prev_annotated_uttr=None, key
elif prev_what_to_chat_about or chat_about:
if key_words:
trigger_pattern = re.compile(
rf"{join_word_beginnings_in_or_pattern(key_words)}[a-zA-Z0-9,\-\' ]+\?", re.IGNORECASE
rf"{join_word_beginnings_in_or_pattern(key_words)}[a-zA-Z0-9,\-\' ]+\?",
re.IGNORECASE,
)
offered_this_topic = trigger_pattern.search(prev_uttr_)
user_agrees_or_any = ANY_TOPIC_AMONG_OFFERED.search(uttr_) or is_yes(annotated_uttr)
Expand All @@ -480,10 +492,16 @@ def if_chat_about_particular_topic(annotated_uttr, prev_annotated_uttr=None, key
return False
elif compiled_pattern:
if isinstance(compiled_pattern, str):
offered_this_topic = re.search(rf"{compiled_pattern}[a-zA-Z0-9,\-\' ]+\?", prev_uttr_, re.IGNORECASE)
offered_this_topic = re.search(
rf"{compiled_pattern}[a-zA-Z0-9,\-\' ]+\?",
prev_uttr_,
re.IGNORECASE,
)
else:
offered_this_topic = re.search(
rf"{compiled_pattern.pattern}[a-zA-Z0-9,\-\' ]+\?", prev_uttr_, re.IGNORECASE
rf"{compiled_pattern.pattern}[a-zA-Z0-9,\-\' ]+\?",
prev_uttr_,
re.IGNORECASE,
)
user_agrees_or_any = ANY_TOPIC_AMONG_OFFERED.search(uttr_) or is_yes(annotated_uttr)
if re.search(compiled_pattern, uttr_) or (offered_this_topic and user_agrees_or_any):
Expand Down Expand Up @@ -570,10 +588,12 @@ def is_any_question_sentence_in_utterance(annotated_uttr):
I_LOVE_PATTERN = re.compile(r"(^|\b)(i|i'm|i am|we|we're|we are) " + WORD_LOVE, re.IGNORECASE)
I_HATE_PATTERN = re.compile(r"(^|\b)(i|i'm|i am|we|we're|we are) " + WORD_HATE, re.IGNORECASE)
WHAT_FAVORITE_PATTERN = re.compile(
r"(what|which)[a-z0-9A-Z \-]* your (favou?rite|(the )?best|beloved|(a )?loved|well-loved|truelove)", re.IGNORECASE
r"(what|which)[a-z0-9A-Z \-]* your (favou?rite|(the )?best|beloved|(a )?loved|well-loved|truelove)",
re.IGNORECASE,
)
WHAT_LESS_FAVORITE_PATTERN = re.compile(
r"(what|which)[a-z0-9A-Z \-]* your ((less|least)[- ]favou?rite|(the )?worst|unloved|unlovable)", re.IGNORECASE
r"(what|which)[a-z0-9A-Z \-]* your ((less|least)[- ]favou?rite|(the )?worst|unloved|unlovable)",
re.IGNORECASE,
)
WHAT_DO_YOU_THINK_PATTERN = re.compile(
r"(what (do|did|are|were) you (think|believe|recognize|sure|understand|feel|appeal|suppose|imagine|guess|"
Expand Down
8 changes: 6 additions & 2 deletions services/transformers_lm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def generate_responses(context, model, tokenizer, prompt, generation_params, con
for result in chat_history_ids:
output = tokenizer.decode(result, skip_special_tokens=True)
result_cut = output.replace(dialog_context + " ", "")
result_cut = GENERATIVE_ROBOT_TEMPLATE.sub("\n", result_cut).strip()
result_cut = [x.strip() for x in GENERATIVE_ROBOT_TEMPLATE.split(result_cut) if x.strip()][0]
logger.info(f"hypothesis: {result_cut}")
outputs.append(result_cut)

Expand All @@ -85,7 +85,11 @@ def generate_responses(context, model, tokenizer, prompt, generation_params, con
"num_return_sequences": 1,
}
example_response = generate_responses(
["What is the goal of SpaceX?"], model, tokenizer, "You are a SpaceX Assistant.", default_config
["What is the goal of SpaceX?"],
model,
tokenizer,
"You are a SpaceX Assistant.",
default_config,
)
logger.info(f"example response: {example_response}")
logger.info("transformers_lm is ready")
Expand Down
2 changes: 1 addition & 1 deletion services/transformers_peft_lm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def generate_responses(context, model, tokenizer, prompt, continue_last_uttr=Fal
for result in chat_history_ids:
output = tokenizer.decode(result, skip_special_tokens=True)
result_cut = output.replace(dialog_context + " ", "")
result_cut = GENERATIVE_ROBOT_TEMPLATE.sub("\n", result_cut).strip()
result_cut = [x.strip() for x in GENERATIVE_ROBOT_TEMPLATE.split(result_cut) if x.strip()][0]
logger.info(f"hypothesis: {result_cut}")
outputs.append(result_cut)

Expand Down