Skip to content

Commit

Permalink
adds urn check digit result to JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Dec 28, 2023
1 parent 8aed1f7 commit 5e9282c
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 4 deletions.
2 changes: 1 addition & 1 deletion ceurws/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.2"
__version__ = "0.3.3"
71 changes: 71 additions & 0 deletions ceurws/urn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Created on 2023-12-28
@author: wf / ChatGPT-4 as instructed
"""


class URN:
"""
URN check digit calculator for DNB URN service:
see https://www.dnb.de/DE/Professionell/Services/URN-Service/urn-service_node.html
and
https://d-nb.info/1045320641/34
http://nbn-resolving.de/nbnpruefziffer.php
"""

@classmethod
def check_urn_checksum(cls, urn: str, debug: bool = False) -> bool:
expected_check_digit = int(urn[-1])
urn_prefix = urn[:-1]
check_digit = cls.calc_urn_checksum(urn_prefix, debug)
return check_digit == expected_check_digit

@classmethod
def calc_urn_checksum(cls, test_urn: str, debug: bool = False) -> int:
"""
converted from PHP and JavaScript code see
see https://github.com/bohnelang/URN-Pruefziffer
Args:
debug(bool) if True show the internal values while calculating
"""
# Code string provided in the original PHP function
code = "3947450102030405060708094117############1814191516212223242542262713282931123233113435363738########43"

# Initialization of variables
_sum = 0
pos = 1

# Iterating through each character in the URN
for i, char in enumerate(test_urn.upper()):
# Getting the ASCII value and adjusting it based on the character '-' (45 in ASCII)
x = ord(char) - 45
# Extracting two consecutive values from the code string
v1 = int(code[x * 2]) if code[x * 2] != "#" else 0
v2 = int(code[x * 2 + 1]) if code[x * 2 + 1] != "#" else 0

if v1 == 0:
# If v1 is 0, increment pos after multiplying v2 with its current value
_sum += v2 * pos
pos += 1 # post-increment equivalent in Python
else:
# If v1 is not 0, use pos for the first term, increment pos, then use the new value of pos for the second term
# This effectively increases pos by 2 in this branch
_sum += pos * v1
pos += 1 # increment for the first term
_sum += v2 * pos # use incremented pos for the second term
pos += 1 # increment for the second term

if debug:
print(
f"i: {i:2} pos: {pos:2} x: {x:2} v1: {v1:2} v2: {v2:2} sum: {_sum:4}"
)

# Assuming v2 is not 0 at the end of your URN calculations
check_digit = (_sum // v2) % 10 # Using integer division for floor behavior

return check_digit
17 changes: 15 additions & 2 deletions ceurws/volumeparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from ceurws.textparser import Textparser
from ceurws.utils.webscrape import ScrapeDescription, WebScrape

from ceurws.urn import URN

class VolumeParser(Textparser):
"""
Expand Down Expand Up @@ -125,8 +125,21 @@ def parse_volume(
"""
soup = self.get_volume_soup(number, use_cache=use_cache)
parsed_dict = self.parse_soup(number, soup)
self.check_parsed_dict(parsed_dict)
return parsed_dict, soup


def check_parsed_dict(self,parsed_dict:dict):
"""
check parsed_dict content e.g. urn check digit
"""
if "urn" in parsed_dict:
urn=parsed_dict["urn"]
urn_prefix = urn[:-1]
check_digit=URN.calc_urn_checksum(urn_prefix)
parsed_dict["urn_check_digit"]=check_digit
urn_ok=URN.check_urn_checksum(urn)
parsed_dict["urn_ok"]=urn_ok

def parse(self, url: str) -> dict:
"""
parse the given url
Expand Down
51 changes: 51 additions & 0 deletions tests/test_urn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
Created on 2023-12-28
@author: wf
"""
from ceurws.urn import URN
from tests.basetest import Basetest


class TestURN(Basetest):
"""
Test URN checkdigit calculation
"""

def test_urn_check_digits(self):
"""
test some examples
"""
debug = self.debug
verbose = False
urns = [
"urn:nbn:de:0183-mbi0003721",
"urn:nbn:de:0074-1000-9",
"urn:nbn:de:0074-1001-3",
"urn:nbn:de:0074-1002-6",
"urn:nbn:de:0074-1003-0",
"urn:nbn:de:0074-1004-3",
"urn:nbn:de:0074-1005-7",
"urn:nbn:de:0074-1006-1",
"urn:nbn:de:0074-1007-4",
"urn:nbn:de:0074-1008-8",
"urn:nbn:de:0074-1009-5",
"urn:nbn:de:0074-1010-3",
"urn:nbn:de:0074-1011-6",
"urn:nbn:de:0074-1012-0",
"urn:nbn:de:0074-1013-3",
"urn:nbn:de:0074-1014-7",
"urn:nbn:de:0074-1015-0",
"urn:nbn:de:0074-1016-4",
"urn:nbn:de:0074-1017-8",
]
for i, urn in enumerate(urns, start=1):
urn_prefix = urn[:-1]
expected = urn[-1]
digit = URN.calc_urn_checksum(urn_prefix)
if verbose:
check_mark = "✅" if expected == str(digit) else "❌"
print(f"{i:2} {check_mark}:{digit}:{urn_prefix}:{urn}")

for urn in urns:
self.assertTrue(URN.check_urn_checksum(urn, debug))
3 changes: 2 additions & 1 deletion tests/test_volumeparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def testVolumeParser(self):
volnumber, use_cache=True
)
if debug:
print(f"Vol-{volnumber}:{scrapedDict}")
scraped_str=json.dumps(scrapedDict,indent=2)
print(f"Vol-{volnumber}:{scraped_str}")

def testIssue41(self):
"""
Expand Down

0 comments on commit 5e9282c

Please sign in to comment.