-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #128 from fractalego/indexing-with-large-corpus
Indexing with large corpus
- Loading branch information
Showing
38 changed files
with
344 additions
and
158 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import asyncio | ||
import os | ||
|
||
from unittest import TestCase | ||
from wafl.config import Configuration | ||
from wafl.connectors.remote.remote_entailer_connector import RemoteEntailerConnector | ||
from wafl.connectors.clients.entailer_client import EntailerClient | ||
|
||
_path = os.path.dirname(__file__) | ||
|
||
|
||
class TestConnection(TestCase): | ||
def test__entailer_connector(self): | ||
config = Configuration.load_local_config() | ||
connector = RemoteEntailerConnector(config.get_value("entailer_model")) | ||
prediction = asyncio.run( | ||
connector.predict( | ||
"The first contact is a romance novel set in the middle ages.", | ||
"The first contact is a science fiction novel about the first contact between humans and aliens.", | ||
) | ||
) | ||
assert prediction["score"] < 0.5 | ||
|
||
def test__entailment_client(self): | ||
|
||
config = Configuration.load_local_config() | ||
client = EntailerClient(config) | ||
prediction = asyncio.run( | ||
client.get_entailment_score( | ||
"The first contact is a romance novel set in the middle ages.", | ||
"The first contact is a science fiction novel about the first contact between humans and aliens.", | ||
) | ||
) | ||
assert prediction < 0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,14 @@ | ||
import os | ||
import textwrap | ||
|
||
from wafl.connectors.factories.llm_connector_factory import LLMConnectorFactory | ||
from wafl.connectors.prompt_template import PromptTemplate | ||
from wafl.interface.conversation import Utterance, Conversation | ||
|
||
_path = os.path.dirname(__file__) | ||
from wafl.connectors.clients.entailer_client import EntailerClient | ||
|
||
|
||
class Entailer: | ||
def __init__(self, config): | ||
self._connector = LLMConnectorFactory.get_connector(config) | ||
self.entailer_client = EntailerClient(config) | ||
self._config = config | ||
|
||
async def left_entails_right(self, lhs: str, rhs: str, dialogue) -> str: | ||
prompt = await self._get_answer_prompt(lhs, rhs, dialogue) | ||
result = await self._connector.generate(prompt) | ||
result = self._clean_result(result) | ||
return result == "yes" | ||
|
||
async def _get_answer_prompt(self, lhs, rhs, dialogue): | ||
return PromptTemplate( | ||
system_prompt="", | ||
conversation=self._get_dialogue_prompt(lhs, rhs, dialogue), | ||
) | ||
|
||
def _clean_result(self, result): | ||
result = result.replace("</task>", "") | ||
result = result.split("\n")[0] | ||
result = result.strip() | ||
return result.lower() | ||
async def left_entails_right(self, lhs: str, rhs: str) -> bool: | ||
prediction = await self.entailer_client.get_entailment_score(lhs, rhs) | ||
return prediction > 0.5 | ||
|
||
def _get_dialogue_prompt(self, dialogue, lhs, rhs): | ||
text = f""" | ||
Your task is to determine whether two sentences are similar. | ||
1) {lhs.lower()} | ||
2) {rhs.lower()} | ||
Please answer "yes" if the two sentences are similar or "no" if not: | ||
""".strip() | ||
return Conversation([Utterance(speaker="user", text=text)]) | ||
async def get_score(self, lhs: str, rhs: str) -> float: | ||
return await self.entailer_client.get_entailment_score(lhs, rhs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
- version 0.1.3 | ||
* added multi-threaded support for multiple files indexing | ||
* TODO: ADD support for multiple knowledge bases. | ||
It needs to index the rules and the files separately! | ||
* the interface should show where the facts come from in the web interface | ||
* add support for wafl studio where you can concatenate actions (and create corresponding yaml files) | ||
* use <> tags for contactenation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.