Skip to content

Commit

Permalink
Merge branch 'main' into mb/highlight_hydrator
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBernstorff committed Sep 3, 2023
2 parents ef92e99 + 27f4f00 commit 44b5196
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 31 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@ _build/

# MacOS
.DS_Store
api_keys.py
cache
3 changes: 2 additions & 1 deletion .sample_env
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
OPENAI_API_KEY = ""
OPENAI_API_KEY = ""
HYPOTHESIS_API_KEY = ""
26 changes: 12 additions & 14 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@ classifiers = [
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
requires-python = ">=3.9"
dependencies = ["pydantic", "langchain", "openai", "python-dotenv"]
dependencies = ["pydantic", "langchain", "openai", "python-dotenv", "pytz"]
[project.license]
file = "LICENSE"
name = "MIT"
Expand All @@ -43,16 +41,16 @@ tests = [
"pytest-sugar>=0.9.4",
"tox>=4.5.0",
]
docs = [
"sphinx>=5.3.0",
"furo>=2022.12.7", # theme
"sphinx-copybutton>=0.5.1",
"sphinxext-opengraph>=0.7.3",
"sphinx_design>=0.3.0",
"sphinx_togglebutton>=0.2.3",
"myst-nb>=0.6.0", # for rendering notebooks
"jupyter>=1.0.0", # for tutorials
]
# docs = [
# "sphinx>=5.3.0",
# "furo>=2022.12.7", # theme
# "sphinx-copybutton>=0.5.1",
# "sphinxext-opengraph>=0.7.3",
# "sphinx_design>=0.3.0",
# "sphinx_togglebutton>=0.2.3",
# "myst-nb>=0.6.0", # for rendering notebooks
# "jupyter>=1.0.0", # for tutorials
# ]


[project.readme]
Expand Down Expand Up @@ -154,7 +152,7 @@ include-package-data = true
[tool.tox]
legacy_tox_ini = """
[tox]
envlist = py{39,310}
envlist = py{39}
[testenv]
description: run unit tests
Expand Down
2 changes: 0 additions & 2 deletions src/gpt2anki/first_test.py

This file was deleted.

6 changes: 3 additions & 3 deletions src/gpt2anki/magi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from langchain.schema.output import LLMResult

import gpt2anki.fileio as fileio
from gpt2anki.sources.hypothesis import Highlight
from gpt2anki.sources.base import HydratedHighlight

load_dotenv()
print(Path(__file__))
Expand All @@ -20,7 +20,7 @@ def initialize_model(model_name: str = "gpt-4") -> ChatOpenAI:
return ChatOpenAI(model=model_name)


def highlight_to_prompt(highlight: Highlight) -> str:
def highlight_to_prompt(highlight: HydratedHighlight) -> str:
return "<target>{target}</target><context>{context}</context>".format(
target=highlight.highlight,
context=highlight.context,
Expand All @@ -37,7 +37,7 @@ def parse_output(output: LLMResult) -> dict[str, str]:

async def prompt_gpt(
model: ChatOpenAI,
highlight: Highlight,
highlight: HydratedHighlight,
) -> dict[str, str]:
prompt = highlight_to_prompt(highlight)
messages = [SystemMessage(content=SYSTEM_PROMPT), HumanMessage(content=prompt)]
Expand Down
15 changes: 12 additions & 3 deletions src/gpt2anki/sources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,21 @@


@dataclass(frozen=True)
class Highlight:
context: str
class OrphanHighlight:
highlight: str
uri: str
title: str


@dataclass(frozen=True)
class HydratedHighlight:
title: str
highlight: str
context: str
uri: str


class HighlightSource(ABC):
@abstractmethod
def get_highlights_since_date(self, date: dt.datetime) -> tuple[Highlight]:
def get_highlights_since_date(self, date: dt.datetime) -> tuple[OrphanHighlight]:
raise NotImplementedError
93 changes: 88 additions & 5 deletions src/gpt2anki/sources/hypothesis.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,94 @@
import datetime as dt
import json
import os
import re

from gpt2anki.sources.base import Highlight, HighlightSource
import pytz
import requests
from dotenv import load_dotenv
from gpt2anki.sources.base import HighlightSource, OrphanHighlight
from pydantic import BaseModel

load_dotenv()


class SearchRequest(BaseModel):
search_after: dt.datetime
username: str

@property
def url_encoded_date(self) -> str:
return self.search_after.isoformat()

@property
def gmail_address(self) -> str:
return f"{self.username}@gmail.com"

@property
def hypothesis_user_id(self) -> str:
user_id = f"acct:{self.username}@hypothes.is"

assert (
len(re.findall(string=user_id, pattern=r"acct:[A-Za-z0-9._]{3,30}@.*")) == 1
)
return user_id


class HypothesisHighlightGetter(HighlightSource):
def __init__(self, username: str):
api_key = os.getenv("HYPOTHESIS_API_KEY")
if api_key is None:
raise ValueError("HYPOTHESIS_API_KEY not found in environment variables")

class HypothesisHighlight(HighlightSource):
def __init__(self, api_key: str):
self.api_key: str = api_key
self.endpoint: str = "https://api.hypothes.is/api/search"
self.username: str = username

def get_highlights_since_date(
self,
date: dt.datetime,
) -> tuple[OrphanHighlight, ...]:
request_spec = SearchRequest(search_after=date, username=self.username)

params = {
"search_after": request_spec.url_encoded_date,
"user": request_spec.hypothesis_user_id,
"sort": "created",
"order": "desc",
}
headers = {"Authorization": f"Bearer {self.api_key}"}

response = requests.get(url=self.endpoint, params=params, headers=headers)

# UTF-8 encode the response content
content = response.content.decode("utf-8")

# Convert the response content to a Python dictionary
response_dict = json.loads(content)

highlights: list[OrphanHighlight] = []
errors: list[str] = []
for row in response_dict["rows"]:
try:
highlights.append(
OrphanHighlight(
highlight=row["target"][0]["selector"][2]["exact"],
uri=row["uri"],
title=row["document"]["title"][0],
),
)
except KeyError:
errors.append(row)

print(f"n Errors: {len(errors)}")

return tuple(highlights)


if __name__ == "__main__":
# Load api-key from .env file
response = HypothesisHighlightGetter(
username="ryqiem",
).get_highlights_since_date(dt.datetime.now(tz=pytz.UTC) - dt.timedelta(days=200))

def get_highlights_since_date(self, date: dt.datetime) -> tuple[Highlight]:
raise NotImplementedError
pass
6 changes: 4 additions & 2 deletions src/gpt2anki/test_magi.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

import gpt2anki.magi as magi
from gpt2anki.sources.hypothesis import Highlight
from gpt2anki.sources.base import HydratedHighlight


# create a pytest fixture for the model
Expand All @@ -12,9 +12,11 @@ def model() -> magi.ChatOpenAI:

@pytest.mark.asyncio()
async def test_model_response(model: magi.ChatOpenAI) -> None:
higlight = Highlight(
higlight = HydratedHighlight(
context="Mitochondria is the powerhouse of the cell",
highlight="Mitochondria",
uri="https://en.wikipedia.org/wiki/Mitochondrion",
title="Mitochondrion - Wikipedia",
)
output = await magi.prompt_gpt(model, higlight)
# check that outpuis a dictionary with keys "answer" and "question"
Expand Down

0 comments on commit 44b5196

Please sign in to comment.