-
Notifications
You must be signed in to change notification settings - Fork 133
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Run `state.py` for things to be exercised. Noted pitfalls: - no caching - some of the state code is poorly constructed in terms of understanding what is being passed around, doesn't have good annotations, etc.
- Loading branch information
Showing
10 changed files
with
413 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
TODO: | ||
|
||
Explain what this is based on and how it works. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
"""Module to house functions for an LLM agent to use.""" | ||
import logging | ||
|
||
import arxiv_articles | ||
import pandas as pd | ||
import summarize_text | ||
|
||
from hamilton import base, driver | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def get_articles(query: str) -> pd.DataFrame: | ||
"""Use this function to get academic papers from arXiv to answer user questions. | ||
:param query: User query in JSON. Responses should be summarized and should include the article URL reference | ||
:return: List of dictionaries with title, summary, article_url, pdf_url | ||
""" | ||
dr = driver.Driver({}, arxiv_articles, adapter=base.SimplePythonGraphAdapter(base.DictResult())) | ||
inputs = { | ||
"embedding_model_name": "text-embedding-ada-002", | ||
"max_arxiv_results": 5, | ||
"article_query": query, | ||
"max_num_concurrent_requests": 5, | ||
"data_dir": "./data", | ||
"library_file_path": "./data/arxiv_library.csv", | ||
} | ||
dr.display_all_functions("./get_articles", {"format": "png"}) | ||
result = dr.execute(["arxiv_result_df", "save_arxiv_result_df"], inputs=inputs) | ||
logger.info(f"Added {result['save_arxiv_result_df']} to our DB.") | ||
_df = result["arxiv_result_df"] | ||
# _df = pd.read_csv(inputs["library_file_path"]) | ||
return _df[["title", "summary", "article_url", "pdf_url"]].to_dict(orient="records") | ||
|
||
|
||
def read_article_and_summarize(query: str) -> str: | ||
"""Use this function to read whole papers and provide a summary for users. | ||
You should NEVER call this function before get_articles has been called in the conversation. | ||
:param query: Description of the article in plain text based on the user's query. | ||
:return: Summarized text of the article given the query. | ||
""" | ||
dr = driver.Driver({}, summarize_text, adapter=base.SimplePythonGraphAdapter(base.DictResult())) | ||
inputs = { | ||
"embedding_model_name": "text-embedding-ada-002", | ||
"openai_gpt_model": "gpt-3.5-turbo-0613", | ||
"user_query": query, | ||
"top_n": 1, | ||
"max_token_length": 1500, | ||
"library_file_path": "./data/arxiv_library.csv", | ||
} | ||
dr.display_all_functions("./read_article_and_summarize", {"format": "png"}) | ||
result = dr.execute(["summarize_text"], inputs=inputs) | ||
return result["summarize_text"] | ||
|
||
|
||
if __name__ == "__main__": | ||
"""Code to quickly integration test.""" | ||
from hamilton import log_setup | ||
|
||
log_setup.setup_logging(log_level=log_setup.LOG_LEVELS["DEBUG"]) | ||
_df = get_articles("ppo reinforcement learning") | ||
print(_df) | ||
_summary = read_article_and_summarize("PPO reinforcement learning sequence generation") | ||
print(_summary) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed
BIN
-81 KB
examples/LLM_Workflows/knowledge_retrieval/populate_arxiv_library.png
Binary file not shown.
Binary file added
BIN
+129 KB
examples/LLM_Workflows/knowledge_retrieval/read_article_and_summarize.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.