Skip to content

[TextMatch] QA说明文档

MachineLP edited this page Aug 24, 2020 · 2 revisions

QA库:

id q qkw a akw intention
id0 sk2神仙水多少钱 ['神仙水','价格/多少钱'] 神仙水是799 ['799/七百九十九'] 咨询价格
id1 海蓝之谜面霜有什么功效 ['海蓝之谜/lammer', '面霜', '功效/功能/作用'] 保湿祛斑 ['保湿', '祛斑'] 功效

注:使用时 '/'需换成'|'

> 用户问句:海蓝之谜面霜有什么作用? 
> 匹配方式如下: 
>(1)通过text_embedding计算相似度;
>(2)匹配阈值大于0.7,匹配到Qid为id1; 
>(3)之后匹配关键词:正则匹配用户问句时候有  海蓝之谜或lammer  且 面霜 且 功效或功能或作用; 
>(4)如果上述条件都满足则匹配成功。 

run examples

git clone https://github.com/MachineLP/TextMatch
cd TextMatch
export PYTHONPATH=${PYTHONPATH}:../TextMatch
python tests/core_test/qa_match_test.py

qa match

import sys
from textmatch.core.qa_match import QMatch, AMatch, SemanticMatch

test_dict = {"id0": "其实事物发展有自己的潮流和规律",
   "id1": "当你身处潮流之中的时候,要紧紧抓住潮流的机会",
   "id2": "想办法脱颖而出,即使没有成功,也会更加洞悉时代的脉搏",
   "id3": "收获珍贵的知识和经验。而如果潮流已经退去",
   "id4": "这个时候再去往这个方向上努力,只会收获迷茫与压抑",
   "id5": "对时代、对自己都没有什么帮助",
   "id6": "但是时代的浪潮犹如海滩上的浪花,总是一浪接着一浪,只要你站在海边,身处这个行业之中,下一个浪潮很快又会到来。你需要敏感而又深刻地去观察,略去那些浮躁的泡沫,抓住真正潮流的机会,奋力一搏,不管成败,都不会遗憾。"}


def test_q_match(testword):
    # QMatch
    q_match = QMatch( q_dict=test_dict, match_models=['bow', 'tfidf', 'ngram_tfidf']) 
    q_match_pre = q_match.predict(testword, match_strategy='score', vote_threshold=0.5, key_weight = {'bow': 1, 'tfidf': 1, 'ngram_tfidf': 1})
    print ('q_match_pre>>>>>', q_match_pre )
    return q_match_pre

def test_a_match(testword):
    # AMatch
    a_match = AMatch( a_dict=test_dict, match_models=['bow', 'tfidf', 'ngram_tfidf']) 
    a_match_pre = a_match.predict(testword, ['id0', 'id1'], match_strategy='score', vote_threshold=0.5, key_weight = {'bow': 1, 'tfidf': 1, 'ngram_tfidf': 1}) 
    print ('a_match_pre>>>>>', a_match_pre )
    # a_match_pre>>>>> {'id0': 1.0, 'id1': 0.0} 
    return a_match_pre


def test_semantic_match(testword,words_dict=test_dict):
    # SemanticMatch
    s_match = SemanticMatch( words_dict=words_dict, match_models=['bow', 'tfidf', 'ngram_tfidf'] ) 
    s_match_pre = s_match.predict(testword, ['id0','id1', "id5"], match_strategy='score', vote_threshold=0.5, key_weight = {'bow': 1, 'tfidf': 1, 'ngram_tfidf': 1})
    print ('s_match_pre>>>>>', s_match_pre ) 
    # s_match_pre>>>>> {'id0': 1.0, 'id1': 0.0}
    return s_match_pre




if __name__ == '__main__':
    testword = "其实事物发展有自己的潮流和规律"
    test_q_match(testword)
    test_a_match(testword)
    test_semantic_match(testword)

'''
q_match_pre>>>>> {'id0': 0.6666665958333436}
a_match_pre>>>>> {'id0': 0.6666665958333436, 'id1': 0.31108716457138996}
s_match_pre>>>>> {'id0': 0.6666665958333436, 'id1': 0.31108716457138996, 'id5': 0.18341771209693802}
'''

基于关键词辅助

run examples

git clone https://github.com/MachineLP/TextMatch
cd TextMatch
export PYTHONPATH=${PYTHONPATH}:../TextMatch
python tests/core_test/qa_match_kw_test.py
import sys
from textmatch.core.qa_match_kw import QAMatchKW

res_dict={'id0':0.8, 'id1':0.3}

qkw_dict = {
    'id0': ['神仙水|神仙', '价格|多少钱'],
    'id1': ['海蓝之谜|lammer', '面霜', '功效|功能|作用'],
    'id2': ['快递']
    }

akw_dict = {
    'id0': ['799|七百九十九|七九九'],
    'id1': ['补水|祛斑'],
    'id2': ['顺丰']
    }

def test_qkw_match(testword):
    qkw_match = QAMatchKW( qkw_dict=qkw_dict, akw_path=akw_dict ) 
    res = qkw_match.post_processing_q( testword,res_dict )
    print ('res>>>>>', res )
    return res


def test_akw_match(testword):
    qkw_match = QAMatchKW( qkw_dict=qkw_dict, akw_path=akw_dict ) 
    res = qkw_match.post_processing_a( testword,res_dict )
    print ('res>>>>>', res )
    return res



if __name__ == '__main__':
    testword = "神仙水多少钱"
    test_qkw_match(testword)
    testword = "799"
    test_akw_match(testword)

'''
res>>>>> {'id0': 1.0}
res>>>>> {'id0': [1.0, '神仙水'], 'id1': [0.0, '海蓝之谜|lammer']}
'''