Skip to content

Commit

Permalink
refactored requester
Browse files Browse the repository at this point in the history
  • Loading branch information
haeussma committed Oct 12, 2024
1 parent e4430d2 commit ac9a9d0
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 293 deletions.
16 changes: 16 additions & 0 deletions pyeed/dbconnect.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@ def add_protein(self, protein_record: Protein):
# Here you can add logic to store protein_record using Neomodel models
pass

def stats(self) -> dict:
"""
Returns the number of nodes and relationships in the database.
"""
node_count_query = "MATCH (n) RETURN count(n) AS node_count"
relationship_count_query = (
"MATCH ()-[r]->() RETURN count(r) AS relationship_count"
)

node_count = self.execute_read(node_count_query)[0]["node_count"]
relationship_count = self.execute_read(relationship_count_query)[0][
"relationship_count"
]

return {"nodes": node_count, "relationships": relationship_count}

def _initialize_db_constraints(
self,
user: str | None,
Expand Down
5 changes: 1 addition & 4 deletions pyeed/fetch/blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def run(self, program: str, ncbi_db: str) -> io.StringIO:
), f"Invalid database: {ncbi_db}, valid databases: {NCBIDataBase}"

if program == BlastProgram.BLASTP.value:

return NCBIWWW.qblast(
program,
ncbi_db,
Expand All @@ -95,9 +94,8 @@ def run(self, program: str, ncbi_db: str) -> io.StringIO:
matrix_name=self.matrix,
hitlist_size=self.n_hits,
)

elif program == BlastProgram.BLASTN.value:

elif program == BlastProgram.BLASTN.value:
return NCBIWWW.qblast(
program=program,
database=ncbi_db,
Expand All @@ -112,7 +110,6 @@ async def async_run(
program: str,
foreign_executor: Optional[ThreadPoolExecutor] = None,
) -> io.StringIO:

if not foreign_executor:
executor = ThreadPoolExecutor()
else:
Expand Down
17 changes: 7 additions & 10 deletions pyeed/fetch/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@

class PrimaryDBtoPyeed(Generic[T]):
@abstractmethod
async def add(self, data: dict):
def add_to_db(self, data: dict):
pass


class UniprotToPyeed(PrimaryDBtoPyeed[Protein]):
async def add(self, data: dict):
def add_to_db(self, data: dict):
# Organism information
taxonomy_id = data["organism"]["taxonomy"]

organism = Organism(taxonomy_id=taxonomy_id).save()

try:
Expand All @@ -28,12 +27,11 @@ async def add(self, data: dict):
protein = Protein(
accession_id=data["accession"],
sequence=data["sequence"]["sequence"],
mol_weight=float(data["sequence"]["mass"]),
ec_number=ec_number,
name=data["protein"]["recommendedName"]["fullName"]["value"],
)
protein.mol_weight = float(data["sequence"]["mass"])
protein.ec_number = ec_number
protein.name = data["protein"]["recommendedName"]["fullName"]["value"]
protein.seq_length = len(protein.sequence)

protein.save()

protein.organism.connect(organism)
Expand All @@ -45,9 +43,8 @@ def add_sites(self, data: dict, protein: Protein):
ligand_dict = defaultdict(list)
for feature in data["features"]:
if feature["type"] == "BINDING":
if not feature["begin"] == feature["end"]:
continue
ligand_dict[feature["ligand"]["name"]].append(int(feature["begin"]))
for position in range(int(feature["begin"]), int(feature["end"]) + 1):
ligand_dict[feature["ligand"]["name"]].append(position)

for ligand, positions in ligand_dict.items():
site = Site(
Expand Down
25 changes: 0 additions & 25 deletions pyeed/fetch/proteinfetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,6 @@
from pyeed.fetch.pdbmapper import PDBMapper
from pyeed.fetch.requester import AsyncParamRequester, AsyncRequester
from pyeed.fetch.taxonomymapper import TaxonomyMapper
from pyeed.fetch.uniprotmapper import UniprotMapper


class UniprotFetcher:
def __init__(
self,
ids: List[str],
):
self.ids = ids
nest_asyncio.apply()

async def fetch(self, **console_kwargs):
with Progress(console=Console(**console_kwargs)) as progress:
task_id = progress.add_task(
"Requesting sequences from UniProt...", total=len(self.ids)
)
requester = AsyncRequester(
ids=self.ids,
url="https://www.ebi.ac.uk/proteins/api/proteins?format=json&accession=",
task_id=task_id,
progress=progress,
batch_size=1,
rate_limit=5,
n_concurrent=20,
)


class ProteinFetcher:
Expand Down
Loading

0 comments on commit ac9a9d0

Please sign in to comment.