Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor network #50

Merged
merged 2 commits into from
Mar 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 36 additions & 23 deletions pyeed/fetchers/entrezrequester.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from http.client import IncompleteRead
import time
from typing import List, Union
from Bio import Entrez, SeqIO
from requests import HTTPError
from urllib.error import HTTPError
from pyeed.fetchers import LOGGER


Expand Down Expand Up @@ -50,7 +52,7 @@ def make_request(self) -> List:

if self._is_multiple:
sequence_results = []
print(f"⬇️ Fetching {len(self.foreign_id)} {self.db} entries for NCBI...")
print(f"⬇️ Fetching {len(self.foreign_id)} {self.db} entries from NCBI...")

for chunk in self.make_chunks(self.foreign_id, self.chunk_size):
sequence_results.extend(
Expand All @@ -62,32 +64,43 @@ def make_request(self) -> List:
else:
return self.fetch(self.foreign_id)

def fetch(self, request_string: str) -> List[dict]:
def fetch(
self, request_string: str, attempts: int = 3, delay: int = 5
) -> List[dict]:
"""
Fetches data from NCBI using the Entrez.efetch method.
"""
LOGGER.debug(f"Fetching {self.db} data from NCBI for {request_string}...")

try:
Entrez.email = self.email
Entrez.api_key = self.api_key
with Entrez.efetch(
db=self.db,
id=request_string,
retmode=self.retmode,
rettype=self.rettype,
) as handle:
if self.rettype == "genbank":
results = []
for record in SeqIO.parse(handle, "genbank"):
results.append(record)
return results

for attempt in range(attempts):

try:
Entrez.email = self.email
Entrez.api_key = self.api_key
with Entrez.efetch(
db=self.db,
id=request_string,
retmode=self.retmode,
rettype=self.rettype,
) as handle:
if self.rettype == "genbank":
results = []
for record in SeqIO.parse(handle, "genbank"):
results.append(record)
return results

else:
return Entrez.read(handle)
except (HTTPError, IncompleteRead) as e:
print(
f"Attempt {attempt + 1} of {attempts} failed: Error fetching data from NCBI: {e}"
)
LOGGER.error(
f"Attempt {attempt + 1} of {attempts} failed: Error fetching data from NCBI: {e}"
)
if attempt < attempts:
time.sleep(delay)
else:
return Entrez.read(handle)
except HTTPError() as e:
LOGGER.error(f"Error fetching data from NCBI: {e}")
return self.fetch(request_string)
raise

@staticmethod
def make_chunks(input_list: list, chunk_size: int = 100) -> List[list]:
Expand Down
Loading