-
Notifications
You must be signed in to change notification settings - Fork 209
[TextMatch] QA说明文档
MachineLP edited this page Aug 24, 2020
·
2 revisions
QA库:
id | q | qkw | a | akw | intention |
---|---|---|---|---|---|
id0 | sk2神仙水多少钱 | ['神仙水','价格/多少钱'] | 神仙水是799 | ['799/七百九十九'] | 咨询价格 |
id1 | 海蓝之谜面霜有什么功效 | ['海蓝之谜/lammer', '面霜', '功效/功能/作用'] | 保湿祛斑 | ['保湿', '祛斑'] | 功效 |
注:使用时 '/'需换成'|'
> 用户问句:海蓝之谜面霜有什么作用?
> 匹配方式如下:
>(1)通过text_embedding计算相似度;
>(2)匹配阈值大于0.7,匹配到Qid为id1;
>(3)之后匹配关键词:正则匹配用户问句时候有 海蓝之谜或lammer 且 面霜 且 功效或功能或作用;
>(4)如果上述条件都满足则匹配成功。
git clone https://github.com/MachineLP/TextMatch
cd TextMatch
export PYTHONPATH=${PYTHONPATH}:../TextMatch
python tests/core_test/qa_match_test.py
import sys
from textmatch.core.qa_match import QMatch, AMatch, SemanticMatch
test_dict = {"id0": "其实事物发展有自己的潮流和规律",
"id1": "当你身处潮流之中的时候,要紧紧抓住潮流的机会",
"id2": "想办法脱颖而出,即使没有成功,也会更加洞悉时代的脉搏",
"id3": "收获珍贵的知识和经验。而如果潮流已经退去",
"id4": "这个时候再去往这个方向上努力,只会收获迷茫与压抑",
"id5": "对时代、对自己都没有什么帮助",
"id6": "但是时代的浪潮犹如海滩上的浪花,总是一浪接着一浪,只要你站在海边,身处这个行业之中,下一个浪潮很快又会到来。你需要敏感而又深刻地去观察,略去那些浮躁的泡沫,抓住真正潮流的机会,奋力一搏,不管成败,都不会遗憾。"}
def test_q_match(testword):
# QMatch
q_match = QMatch( q_dict=test_dict, match_models=['bow', 'tfidf', 'ngram_tfidf'])
q_match_pre = q_match.predict(testword, match_strategy='score', vote_threshold=0.5, key_weight = {'bow': 1, 'tfidf': 1, 'ngram_tfidf': 1})
print ('q_match_pre>>>>>', q_match_pre )
return q_match_pre
def test_a_match(testword):
# AMatch
a_match = AMatch( a_dict=test_dict, match_models=['bow', 'tfidf', 'ngram_tfidf'])
a_match_pre = a_match.predict(testword, ['id0', 'id1'], match_strategy='score', vote_threshold=0.5, key_weight = {'bow': 1, 'tfidf': 1, 'ngram_tfidf': 1})
print ('a_match_pre>>>>>', a_match_pre )
# a_match_pre>>>>> {'id0': 1.0, 'id1': 0.0}
return a_match_pre
def test_semantic_match(testword,words_dict=test_dict):
# SemanticMatch
s_match = SemanticMatch( words_dict=words_dict, match_models=['bow', 'tfidf', 'ngram_tfidf'] )
s_match_pre = s_match.predict(testword, ['id0','id1', "id5"], match_strategy='score', vote_threshold=0.5, key_weight = {'bow': 1, 'tfidf': 1, 'ngram_tfidf': 1})
print ('s_match_pre>>>>>', s_match_pre )
# s_match_pre>>>>> {'id0': 1.0, 'id1': 0.0}
return s_match_pre
if __name__ == '__main__':
testword = "其实事物发展有自己的潮流和规律"
test_q_match(testword)
test_a_match(testword)
test_semantic_match(testword)
'''
q_match_pre>>>>> {'id0': 0.6666665958333436}
a_match_pre>>>>> {'id0': 0.6666665958333436, 'id1': 0.31108716457138996}
s_match_pre>>>>> {'id0': 0.6666665958333436, 'id1': 0.31108716457138996, 'id5': 0.18341771209693802}
'''
git clone https://github.com/MachineLP/TextMatch
cd TextMatch
export PYTHONPATH=${PYTHONPATH}:../TextMatch
python tests/core_test/qa_match_kw_test.py
import sys
from textmatch.core.qa_match_kw import QAMatchKW
res_dict={'id0':0.8, 'id1':0.3}
qkw_dict = {
'id0': ['神仙水|神仙', '价格|多少钱'],
'id1': ['海蓝之谜|lammer', '面霜', '功效|功能|作用'],
'id2': ['快递']
}
akw_dict = {
'id0': ['799|七百九十九|七九九'],
'id1': ['补水|祛斑'],
'id2': ['顺丰']
}
def test_qkw_match(testword):
qkw_match = QAMatchKW( qkw_dict=qkw_dict, akw_path=akw_dict )
res = qkw_match.post_processing_q( testword,res_dict )
print ('res>>>>>', res )
return res
def test_akw_match(testword):
qkw_match = QAMatchKW( qkw_dict=qkw_dict, akw_path=akw_dict )
res = qkw_match.post_processing_a( testword,res_dict )
print ('res>>>>>', res )
return res
if __name__ == '__main__':
testword = "神仙水多少钱"
test_qkw_match(testword)
testword = "799"
test_akw_match(testword)
'''
res>>>>> {'id0': 1.0}
res>>>>> {'id0': [1.0, '神仙水'], 'id1': [0.0, '海蓝之谜|lammer']}
'''