Skip to content

Commit

Permalink
Fix schema access to title (#14)
Browse files Browse the repository at this point in the history
Signed-off-by: Aivin V. Solatorio <avsolatorio@gmail.com>
  • Loading branch information
avsolatorio authored Jun 16, 2023
1 parent 8f088e4 commit 2641e6e
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
2 changes: 1 addition & 1 deletion llm4data/schema/schema2info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

def get_doc_title(metadata: dict) -> str:
"""Get the title of the document from the metadata document_description."""
return metadata["title_statement"]["title"]
return metadata["document_description"]["title_statement"]["title"]


def get_doc_authors(metadata: dict) -> list:
Expand Down
3 changes: 2 additions & 1 deletion llm4data/scripts/indexing/docs/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from llm4data.embeddings.docs import get_docs_embeddings
from llm4data import index
from llm4data import configs
from llm4data.schema.schema2info import get_doc_title

# Get the docs embeddings
docs_embeddings = get_docs_embeddings()
Expand Down Expand Up @@ -39,7 +40,7 @@ def add_pdf_document(path: Union[str, Path], metadata: Optional[dict] = None):
if len(documents):
# Index the title of the document
documents.append(
Document(page_content=metadata["title"], metadata=documents[0].metadata)
Document(page_content=get_doc_title(metadata), metadata=documents[0].metadata)
)

for doc in documents:
Expand Down

0 comments on commit 2641e6e

Please sign in to comment.