-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor project structure and update doc
- Loading branch information
1 parent
b3bf0ff
commit 7bb1012
Showing
10 changed files
with
157 additions
and
152 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from enum import Enum | ||
|
||
class DataSources(Enum): | ||
BOTGENUITY = "botgenuity" | ||
HUGGINGFACE = "huggingface" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
class LLMModelPricing: | ||
""" Pricing information model for LLM models. """ | ||
|
||
def __init__(self, model, provider, input_tokens_price, | ||
output_tokens_price, context, source, updated): | ||
self.model = model | ||
self.provider = provider | ||
self.input_tokens_price = input_tokens_price # price per 1M tokens in USD | ||
self.output_tokens_price = output_tokens_price # price per 1M tokens in USD | ||
self.context = context # context for the model | ||
self.source = source # source of the pricing information | ||
self.updated = updated | ||
|
||
def __str__(self): | ||
return f"Model: {self.model}, " \ | ||
f"Provider: {self.provider}, " \ | ||
f"Input Price: {self.input_tokens_price}, " \ | ||
f"Output Price: {self.output_tokens_price}, " \ | ||
f"Context: {self.context}, " \ | ||
f"Source: {self.source}, " \ | ||
f"Updated: {self.updated}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from llm_pricing_sdk.enums import DataSources | ||
from llm_pricing_sdk.scrapers.botgenuity import BotgenuityScraper | ||
from llm_pricing_sdk.scrapers.huggingface import HuggingfaceScraper | ||
|
||
class LlmPricingScraper: | ||
@staticmethod | ||
def scrape(source: DataSources = DataSources.HUGGINGFACE): | ||
""" | ||
Scrape the LLM pricing information from the specified source. | ||
:returns: A list of LLMModelPricing objects. | ||
""" | ||
if source == DataSources.BOTGENUITY: | ||
return BotgenuityScraper.scrape() | ||
elif source == DataSources.HUGGINGFACE: | ||
return HuggingfaceScraper.scrape() | ||
else: | ||
raise Exception(f"Source '{source}' is not supported.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
class BaseScraper: | ||
""" | ||
Base class for scrapers. | ||
Can be extended to provide common methods or validation logic for all scrapers. | ||
""" | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import requests | ||
from bs4 import BeautifulSoup | ||
from datetime import datetime | ||
|
||
from llm_pricing_sdk.models import LLMModelPricing | ||
|
||
|
||
class BotgenuityScraper: | ||
@staticmethod | ||
def scrape(): | ||
url = "https://www.botgenuity.com/tools/llm-pricing" | ||
response = requests.get(url) | ||
|
||
if response.status_code != 200: | ||
raise Exception(f"Failed to retrieve the webpage. Status code: {response.status_code}") | ||
|
||
soup = BeautifulSoup(response.content, "html.parser") | ||
table = soup.find("table") | ||
if not table: | ||
raise Exception("No table found on the page.") | ||
|
||
rows = [] | ||
for tr in table.find_all("tr")[1:]: # Skip the header row | ||
cells = tr.find_all("td") | ||
if len(cells) >= 5: | ||
provider = cells[0].text.strip() | ||
model = cells[1].text.strip() | ||
context = cells[2].text.strip() | ||
input_tokens_price = cells[3].text.strip().replace("$", "") | ||
output_tokens_price = cells[4].text.strip().replace("$", "") | ||
updated = datetime.strptime(cells[6].text.strip(), "%B %d, %Y").strftime("%B %d, %Y") | ||
|
||
pricing_info = LLMModelPricing( | ||
provider=provider, | ||
model=model, | ||
context=context, | ||
input_tokens_price=input_tokens_price, | ||
output_tokens_price=output_tokens_price, | ||
source=url, | ||
updated=updated | ||
) | ||
|
||
rows.append(pricing_info) | ||
|
||
return rows |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import re | ||
from datetime import datetime | ||
|
||
from llm_pricing_sdk.utils import fetch_ts_file | ||
from llm_pricing_sdk.models import LLMModelPricing | ||
|
||
|
||
class HuggingfaceScraper: | ||
@staticmethod | ||
def scrape(): | ||
url = "https://huggingface.co/spaces/philschmid/llm-pricing/resolve/main/src/lib/data.ts" | ||
|
||
provider_regex = re.compile(r"provider: '(.*?)',") | ||
uri_regex = re.compile(r"uri: '(.*?)',") | ||
models_regex = re.compile(r"\{ name: '(.*?)', inputPrice: ([\d.]+), outputPrice: ([\d.]+) \}") | ||
|
||
providers = [] | ||
|
||
content = fetch_ts_file(url) | ||
provider_blocks = content.split('},\n {') | ||
|
||
for block in provider_blocks: | ||
provider_match = provider_regex.search(block) | ||
uri_match = uri_regex.search(block) | ||
|
||
if provider_match and uri_match: | ||
provider_name = provider_match.group(1) | ||
provider_uri = uri_match.group(1) | ||
models = models_regex.findall(block) | ||
|
||
for model in models: | ||
model_name, input_price, output_price = model | ||
pricing_data = LLMModelPricing( | ||
model=model_name, | ||
provider=provider_name, | ||
input_tokens_price=float(input_price), | ||
output_tokens_price=float(output_price), | ||
context="", | ||
source=provider_uri, | ||
updated=str(datetime.now().date()) | ||
) | ||
providers.append(pricing_data) | ||
|
||
return providers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters