Skip to content

Commit

Permalink
improves indexparser
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Dec 27, 2023
1 parent b7d8f67 commit 09c1a13
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 9 deletions.
4 changes: 2 additions & 2 deletions ceurws/indexparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ def findVolume(self, startLine: int, expectedTr: int = 3) -> int:
Returns:
endLine of the volume html or None
"""
trStartLine = self.find(startLine, "<tr><th")
trStartLine = self.find(startLine, "\s*<tr><th")
if trStartLine is not None:
lineNo = trStartLine + 1
trCount = 1
while lineNo < len(self.lines):
trLine = self.find(lineNo, "<tr>")
trLine = self.find(lineNo, "\s*<tr>")
if trLine is None:
break
else:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_indexhtml_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@ def testReadingHtml(self):
test reading the HTML file
"""
debug = self.debug
# debug=True
#debug=True
if debug:
logging.basicConfig(level=logging.DEBUG)
vm = VolumeManager()
htmlText = vm.getIndexHtml(force=False)
indexParser = IndexHtmlParser(htmlText, debug=debug)
lineCount = len(indexParser.lines)
self.assertTrue(lineCount > 89500)
self.assertTrue(lineCount > 99000)
if debug or self.inPublicCI():
print(f"{lineCount} lines found in CEUR-WS index.html")
# limit=10
Expand All @@ -86,7 +86,7 @@ def testVolumesAsCsv(self):
vm = VolumeManager()
vm.load()
volumes = vm.getList()
self.volumesAsCsv(volumes, 3185, 3186)
self.volumesAsCsv(volumes, 3248,3249)

def testReadVolumePages(self):
"""
Expand Down
9 changes: 5 additions & 4 deletions tests/test_volumeparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ def testVolumeParser(self):
# title >=559
# acronym > = 901
dolimit = self.inPublicCI()
# dolimit = True
dolimit = True
debug = True
if dolimit:
start = 745
limit = 746
start = 3249
limit = 3250
else:
start = 1
limit = len(self.volumeList) + 1
Expand Down Expand Up @@ -89,7 +89,8 @@ def test_issue30(self):
scrapedDict, _soup = self.volumeParser.parse_volume(volumeWithKnownIssue)
self.assertEqual("SWAT4LS 2008", scrapedDict.get("acronym"))
self.assertEqual("http://www.swat4ls.org/", scrapedDict.get("homepage"))
print(scrapedDict)
if self.debug:
print(scrapedDict)

def test_parseEditors(self):
"""
Expand Down

0 comments on commit 09c1a13

Please sign in to comment.