Skip to content

Commit

Permalink
add new source (docsbot.ai)
Browse files Browse the repository at this point in the history
  • Loading branch information
WilliamJlvt committed Oct 4, 2024
1 parent 5296c52 commit 6fe7721
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 7 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ from llm_pricing_sdk.enums import DataSources

# LLM Pricing SDK
LLM Pricing SDK is a Python package designed to scrape and organize pricing information for large language models (LLMs)
from the following sources:
from the following sources:
- https://docsbot.ai/tools/gpt-openai-api-pricing-calculator (best source for now)
- https://huggingface.co/spaces/philschmid/llm-pricing
- https://www.botgenuity.com/tools/llm-pricing
- https://llm-price.com
Expand Down Expand Up @@ -56,7 +57,7 @@ You can also chose the source of the data you want to scrape by passing the sour
```python
from llm_pricing_sdk.scrapers import LlmPricingScraper, DataSources

pricing_data = LlmPricingScraper.scrape(DataSources.BOTGENUITY)
pricing_data = LlmPricingScraper.scrape(DataSources.HUGGINGFACE)
```

### Example Output
Expand Down
4 changes: 3 additions & 1 deletion examples/flask/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ def get_pricing():
source = request.args.get('source', default='huggingface', type=str)

try:
if source.lower() == DataSources.BOTGENUITY.value:
if source.lower() == DataSources.DOCSBOT.value:
results = LlmPricingScraper.scrape(DataSources.DOCSBOT)
elif source.lower() == DataSources.BOTGENUITY.value:
results = LlmPricingScraper.scrape(DataSources.BOTGENUITY)
elif source.lower() == DataSources.HUGGINGFACE.value:
results = LlmPricingScraper.scrape(DataSources.HUGGINGFACE)
Expand Down
1 change: 1 addition & 0 deletions llm_pricing_sdk/enums.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from enum import Enum

class DataSources(Enum):
DOCSBOT = "docsbot" # Most likely to be used
BOTGENUITY = "botgenuity"
HUGGINGFACE = "huggingface"
HUHUHANG = "huhuhang"
5 changes: 4 additions & 1 deletion llm_pricing_sdk/scrapers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from llm_pricing_sdk.enums import DataSources
from llm_pricing_sdk.scrapers.botgenuity import BotgenuityScraper
from llm_pricing_sdk.scrapers.docsbot import DocsBotScraper
from llm_pricing_sdk.scrapers.huggingface import HuggingfaceScraper
from llm_pricing_sdk.scrapers.huhuhang import HuhuhangScraper

Expand All @@ -11,7 +12,9 @@ def scrape(source: DataSources = DataSources.HUGGINGFACE):
:returns: A list of LLMModelPricing objects.
"""
if source == DataSources.BOTGENUITY:
if source == DataSources.DOCSBOT:
return DocsBotScraper.scrape()
elif source == DataSources.BOTGENUITY:
return BotgenuityScraper.scrape()
elif source == DataSources.HUGGINGFACE:
return HuggingfaceScraper.scrape()
Expand Down
56 changes: 56 additions & 0 deletions llm_pricing_sdk/scrapers/docsbot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from llm_pricing_sdk.models import LLMModelPricing


class DocsBotScraper:
@staticmethod
def scrape():
url = "https://docsbot.ai/tools/gpt-openai-api-pricing-calculator"
response = requests.get(url)

if response.status_code != 200:
raise Exception(
f"Failed to fetch data from {url}. Status code: {response.status_code}")

soup = BeautifulSoup(response.content, "html.parser")

table = soup.find("table")
if not table:
raise Exception("No table found on the page.")

rows = []
for tr in table.find("tbody").find_all("tr"):
cells = tr.find_all("td")

if len(cells) >= 7:
provider = cells[0].text.strip()

model_td = cells[1]
model_name = model_td.text.strip()
model_name_div = model_td.find("div")
if model_name_div:
# Use the <div> inside the <td> to get the accurate model name
model_name = model_name_div.text.strip()

context = cells[2].text.strip()
input_tokens_price = cells[3].text.strip().replace("$", "")
output_tokens_price = cells[4].text.strip().replace("$", "")
updated = datetime.now().strftime("%Y-%m-%d")

pricing_info = LLMModelPricing(
model=model_name,
provider=provider,
input_tokens_price=float(
input_tokens_price) if input_tokens_price else 0.0,
output_tokens_price=float(
output_tokens_price) if output_tokens_price else 0.0,
context=context,
source=url,
updated=updated
)

rows.append(pricing_info)

return rows
10 changes: 7 additions & 3 deletions tests/test_llm_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
class TestLlmPricingScraper(unittest.TestCase):

def test_scrape_returns_at_least_one_result(self):
pricing_data = LlmPricingScraper.scrape()
pricing_data = LlmPricingScraper.scrape(DataSources.HUGGINGFACE)
self.assertTrue(len(pricing_data) > 0)
pricing_data = LlmPricingScraper.scrape(DataSources.DOCSBOT)
self.assertTrue(len(pricing_data) > 0)
pricing_data = LlmPricingScraper.scrape(DataSources.HUHUHANG)
self.assertTrue(len(pricing_data) > 0)
pricing_data = LlmPricingScraper.scrape(DataSources.BOTGENUITY)
self.assertTrue(len(pricing_data) > 0)
Expand All @@ -18,8 +22,8 @@ def test_scrape_empty_table(self, mock_request):
<html><body><table></table></body></html>
""")
results = LlmPricingScraper.scrape(DataSources.BOTGENUITY)
self.assertEqual(len(results),
0) # No data in table, expect empty list
# No data in table, expect empty list
self.assertEqual(len(results),0)

@requests_mock.Mocker()
def test_scrape_raises_error_on_failure(self, mock_request):
Expand Down

0 comments on commit 6fe7721

Please sign in to comment.