Skip to content

Commit

Permalink
rag
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Mar 18, 2024
1 parent daf518e commit c517d4e
Showing 1 changed file with 9 additions and 82 deletions.
91 changes: 9 additions & 82 deletions 2024/lab9/Lab9 - RAG over KG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -154,22 +154,7 @@
"\n",
"SELECT ?uri ?predicate ?label ?type\n",
"WHERE {\n",
" ?uri a ?type ;\n",
" ?predicate ?label .\n",
" FILTER (\n",
" ?type = owl:Class ||\n",
" ?type = owl:DatatypeProperty ||\n",
" ?type = owl:ObjectProperty\n",
" )\n",
" FILTER (\n",
" ?predicate = rdfs:label ||\n",
" ?predicate = skos:prefLabel ||\n",
" ?predicate = skos:altLabel ||\n",
" ?predicate = skos:definition ||\n",
" ?predicate = rdfs:comment ||\n",
" ?predicate = dcterms:description ||\n",
" ?predicate = dc:title\n",
" )\n",
" TODO\n",
"}\"\"\"\n",
"\n",
"class OntologyLoader(BaseLoader):\n",
Expand Down Expand Up @@ -215,31 +200,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-03-17 11:02:48.072\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mfastembed.embedding\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m7\u001b[0m - \u001b[33m\u001b[1mDefaultEmbedding, FlagEmbedding, JinaEmbedding are deprecated. Use TextEmbedding instead.\u001b[0m\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4e0eea67b54c477ea3fd7e5ecd8e7e45",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Fetching 9 files: 0%| | 0/9 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"flag_embeddings = FastEmbedEmbeddings(model_name=\"BAAI/bge-small-en-v1.5\", max_length=512)\n",
"loader = OntologyLoader(\"https://semanticscience.org/ontology/sio.owl\", format=\"xml\")\n",
Expand Down Expand Up @@ -288,10 +251,7 @@
")\n",
"\n",
"# Prompt to reformulate the question using the chat history\n",
"reform_template = \"\"\"Given the following chat history and a follow up question,\n",
"rephrase the follow up question to be a standalone straightforward question, in its original language.\n",
"Do not answer the question! Just rephrase reusing informations from the chat history.\n",
"Make it short and straight to the point.\n",
"reform_template = \"\"\"TODO\n",
"\n",
"Chat History:\n",
"{chat_history}\n",
Expand All @@ -300,8 +260,7 @@
"REFORM_QUESTION_PROMPT = PromptTemplate.from_template(reform_template)\n",
"\n",
"# Prompt to ask to answer the reformulated question\n",
"answer_template = \"\"\"Briefly answer the question based only on the following context,\n",
"do not use any information outside this context:\n",
"answer_template = \"\"\"TODO\n",
"{context}\n",
"\n",
"Question: {question}\n",
Expand Down Expand Up @@ -392,25 +351,9 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"💭 Reformulated question: Can you give me the definition of a protein?\n",
"📚 Documents retrieved:\n",
"· protein (http://semanticscience.org/resource/SIO_010043)\n",
"· A protein is an organic polymer that is composed of one or more linear polymers of amino acids. (http://semanticscience.org/resource/SIO_010043)\n",
"· A protein complex is a molecular complex composed of at least two polypeptide chains. (http://semanticscience.org/resource/SIO_010497)\n",
"· A polypeptide is an organic polymer composed of amino acid residues, typically of less than 50 amino acids in length. (http://semanticscience.org/resource/SIO_010007)\n",
"· amino acid (http://semanticscience.org/resource/SIO_001224)\n",
"\n",
"Answer: A protein is an organic polymer composed of one or more linear polymers of amino acids."
]
}
],
"outputs": [],
"source": [
"# set_debug(True) # Uncomment to enable detailed LangChain debugging\n",
"output = stream_chain(final_chain, memory, {\n",
Expand All @@ -420,25 +363,9 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"💭 Reformulated question: What is the URI for the concept of a protein?\n",
"📚 Documents retrieved:\n",
"· protein (http://semanticscience.org/resource/SIO_010043)\n",
"· A protein complex is a molecular complex composed of at least two polypeptide chains. (http://semanticscience.org/resource/SIO_010497)\n",
"· protein complex (http://semanticscience.org/resource/SIO_010497)\n",
"· A protein-protein association is an association between two proteins. (http://semanticscience.org/resource/SIO_001438)\n",
"· protein-protein association (http://semanticscience.org/resource/SIO_001438)\n",
"\n",
"http://semanticscience.org/resource/SIO_010043"
]
}
],
"outputs": [],
"source": [
"output = stream_chain(final_chain, memory, {\n",
" \"question\": \"What is the URI for this concept?\"\n",
Expand Down

0 comments on commit c517d4e

Please sign in to comment.