Skip to content

Commit

Permalink
improves progress handling
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Mar 10, 2024
1 parent 21cc4d1 commit e763295
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
2 changes: 1 addition & 1 deletion ceurws/ceur_ws_web_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def handle_args(self) -> bool:
endpoint = wdsync.dblpEndpoint
print(f"updating dblp cache from SPARQL endpoint {endpoint.sparql.url}")
# Instantiate the progress bar
pbar = tqdm(total=len(wdsync.dbpEndpoint.cache_functions))
pbar = tqdm(total=len(wdsync.dbpEndpoint.dblp_managers))
for _step, (cache_name, dblp_manager) in enumerate(
endpoint.dblp_managers.items(), start=1
):
Expand Down
11 changes: 7 additions & 4 deletions ceurws/dblp.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,9 @@ def load(self, force_query: bool = False):
for volume_number, vol_papers in sorted(self.papers_by_volume.items()):
vol_paper_lod=[dataclasses.asdict(paper) for paper in vol_papers]
cache_name=f"dblp/Vol-{volume_number}/papers"
if self.endpoint.debug:
print(f"caching {cache_name}")
if self.endpoint.progress_bar:
self.endpoint.progress_bar.update(30/3650)
#print(f"caching {cache_name}")
self.endpoint.cache_manager.store(
cache_name,
vol_paper_lod,
Expand Down Expand Up @@ -201,8 +202,8 @@ def load(self, force_query: bool = False):
volume_by_number, _errors = LOD.getLookup(volumes, "volume_number")
for number, volume in sorted(volume_by_number.items()):
cache_name=f"dblp/Vol-{number}/metadata"
if self.endpoint.debug:
print(f"caching {cache_name}")
if self.endpoint.progress_bar:
self.endpoint.progress_bar.update(30/3650)
self.endpoint.cache_manager.store(
cache_name, volume
)
Expand Down Expand Up @@ -273,6 +274,8 @@ def get_lod(self,

if self.debug:
print(f"loaded {len(lod)} records for {cache_name} in {duration:.2f} seconds")
if self.progress_bar:
self.progress_bar.update(duration*100/36)
return lod

def get_ceur_volume_papers(self, volume_number: int) -> List[DblpPaper]:
Expand Down
6 changes: 4 additions & 2 deletions tests/test_dblp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tests.basetest import Basetest
import os
import shutil
from tqdm import tqdm

class TestDblpEndpoint(Basetest):
"""tests DblpEndpoint"""
Expand Down Expand Up @@ -41,8 +42,9 @@ def setUp(self, debug=True, profile=True):
self.endpointUrl = "http://dblp.wikidata.dbis.rwth-aachen.de/api/dblp"
self.dblpEndpoint = DblpEndpoint(self.endpointUrl,debug=self.debug)
# force cache refresh
self.dblpEndpoint.cache_manager.base_dir="/tmp"

self.dblpEndpoint.cache_manager.base_dir="/tmp"
self.dblpEndpoint.progress_bar=tqdm(total=100) # expected 36 secs = 100%

#@unittest.skipIf(Basetest.inPublicCI(), "queries unreliable dblp endpoint")
def test_getWikidataIdByVolumeNumber(self):
"""tests getWikidataIdByVolumeNumber"""
Expand Down

0 comments on commit e763295

Please sign in to comment.