Skip to content

Commit

Permalink
refactor: Add and check type hints.
Browse files Browse the repository at this point in the history
  • Loading branch information
laipz8200 committed Jul 5, 2024
1 parent bd8b884 commit bbb6c51
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 25 deletions.
42 changes: 22 additions & 20 deletions api/core/tools/provider/builtin/firecrawl/firecrawl_appx.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import os
import time
from collections.abc import Mapping
from typing import Any

import requests
from requests.exceptions import HTTPError


class FirecrawlApp:
def __init__(self, api_key=None, base_url=None):
def __init__(self, api_key: str | None = None, base_url: str | None = None):
self.api_key = api_key
self.base_url = base_url or 'https://api.firecrawl.dev'
if not self.api_key:
raise ValueError("API key is required")

def _prepare_headers(self, idempotency_key=None):
def _prepare_headers(self, idempotency_key: str | None = None):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
Expand All @@ -21,7 +22,15 @@ def _prepare_headers(self, idempotency_key=None):
headers['Idempotency-Key'] = idempotency_key
return headers

def _request(self, method, url, data=None, headers=None, retries=3, backoff_factor=0.3):
def _request(
self,
method: str,
url: str,
data: Mapping[str, Any] | None = None,
headers: Mapping[str, str] | None = None,
retries: int = 3,
backoff_factor: float = 0.3,
) -> Mapping[str, Any] | None:
for i in range(retries):
try:
response = requests.request(method, url, json=data, headers=headers)
Expand All @@ -34,7 +43,7 @@ def _request(self, method, url, data=None, headers=None, retries=3, backoff_fact
raise
return None

def scrape_url(self, url, **kwargs):
def scrape_url(self, url: str, **kwargs):
endpoint = f'{self.base_url}/v0/scrape'
headers = self._prepare_headers()
data = {'url': url, **kwargs}
Expand All @@ -43,7 +52,7 @@ def scrape_url(self, url, **kwargs):
raise HTTPError("Failed to scrape URL after multiple retries")
return response

def search(self, query, **kwargs):
def search(self, query: str, **kwargs):
endpoint = f'{self.base_url}/v0/search'
headers = self._prepare_headers()
data = {'query': query, **kwargs}
Expand All @@ -52,40 +61,33 @@ def search(self, query, **kwargs):
raise HTTPError("Failed to perform search after multiple retries")
return response

def crawl_url(self, url, wait=False, poll_interval=5, idempotency_key=None, **kwargs):
def crawl_url(
self, url: str, wait: bool = False, poll_interval: int = 5, idempotency_key: str | None = None, **kwargs
):
endpoint = f'{self.base_url}/v0/crawl'
headers = self._prepare_headers(idempotency_key)
data = {'url': url, **kwargs}
response = self._request('POST', endpoint, data, headers)
if response is None:
raise HTTPError("Failed to initiate crawl after multiple retries")
job_id = response['jobId']
job_id: str = response['jobId']
if wait:
return self._monitor_job_status(job_id, headers, poll_interval)
return self._monitor_job_status(job_id=job_id, poll_interval=poll_interval)
return job_id

def check_crawl_status(self, job_id):
def check_crawl_status(self, job_id: str):
endpoint = f'{self.base_url}/v0/crawl/status/{job_id}'
headers = self._prepare_headers()
response = self._request('GET', endpoint, headers=headers)
if response is None:
raise HTTPError(f"Failed to check status for job {job_id} after multiple retries")
return response

def _monitor_job_status(self, job_id, headers, poll_interval):
def _monitor_job_status(self, job_id: str, poll_interval: int):
while True:
status = self.check_crawl_status(job_id)
if status['status'] == 'completed':
return status
elif status['status'] == 'failed':
raise HTTPError(f'Job {job_id} failed: {status["error"]}')
time.sleep(poll_interval)

if __name__ == "__main__":
api_key = os.getenv('FIRECRAWL_API_KEY')
app = FirecrawlApp(api_key)
try:
result = app.scrape_url('https://example.com')
print(result)
except HTTPError as e:
print("Error:", e)
8 changes: 3 additions & 5 deletions api/core/tools/provider/builtin/firecrawl/tools/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,10 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolIn
wait=True
)

if isinstance(crawl_result, dict):
result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
else:
result_message = str(crawl_result)
if not isinstance(crawl_result, str):
crawl_result = json.dumps(crawl_result, ensure_ascii=False, indent=4)

if not crawl_result:
return self.create_text_message("Crawl request failed.")

return self.create_text_message(result_message)
return self.create_text_message(crawl_result)

0 comments on commit bbb6c51

Please sign in to comment.