Skip to content

Commit

Permalink
MBS-12665: Widen DNB regex
Browse files Browse the repository at this point in the history
We took this from Wikidata, but given the valid example added
as a test here, it was too strict.

phw/outsidecontext got in touch with DNB and got a kinda not
very useful answer, but his proposal based on it was to just
allow digits plus X and hyphen. It seems that the length
should be either 9 or 10 characters. Further assumptions
seem a bit dangerous so I'm not making any.
  • Loading branch information
reosarevok committed Feb 9, 2023
1 parent d08dcff commit 271b14c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
10 changes: 5 additions & 5 deletions root/static/scripts/edit/URLCleanup.js
Original file line number Diff line number Diff line change
Expand Up @@ -1963,8 +1963,8 @@ const CLEANUPS: CleanupEntries = {
restrict: [LINK_TYPES.otherdatabases],
clean: function (url) {
url = url.replace(/^(?:https?:\/\/)?(?:[^\/]+\.)?d-nb\.info\//, 'http://d-nb.info/');
url = url.replace(/^(?:https?:\/\/)?(?:[^\/]+\.)?dnb\.de\/opac(?:\.htm\?)?.*\bquery=nid%3D(1[012]?\d{7}[0-9X]|[47]\d{6}-\d|[1-9]\d{0,7}-[0-9X]|3\d{7}[0-9X]).*$/, 'http://d-nb.info/gnd/$1');
url = url.replace(/^(?:https?:\/\/)?(?:[^\/]+\.)?dnb\.de\/opac(?:\.htm\?)?.*\bquery=idn%3D(1[012]?\d{7}[0-9X]|[47]\d{6}-\d|[1-9]\d{0,7}-[0-9X]|3\d{7}[0-9X]).*$/, 'http://d-nb.info/$1');
url = url.replace(/^(?:https?:\/\/)?(?:[^\/]+\.)?dnb\.de\/opac(?:\.htm\?)?.*\bquery=nid%3D([0-9X-]{9,10}).*$/, 'http://d-nb.info/gnd/$1');
url = url.replace(/^(?:https?:\/\/)?(?:[^\/]+\.)?dnb\.de\/opac(?:\.htm\?)?.*\bquery=idn%3D([0-9X-]{9,10}).*$/, 'http://d-nb.info/$1');
return url;
},
validate: function (url, id) {
Expand All @@ -1974,17 +1974,17 @@ const CLEANUPS: CleanupEntries = {
case LINK_TYPES.otherdatabases.series:
case LINK_TYPES.otherdatabases.work:
return {
result: /^http:\/\/d-nb\.info\/(?:gnd\/)?(?:1[012]?\d{7}[0-9X]|[47]\d{6}-\d|[1-9]\d{0,7}-[0-9X]|3\d{7}[0-9X])$/.test(url),
result: /^http:\/\/d-nb\.info\/(?:gnd\/)?[0-9X-]{9,10}$/.test(url),
target: ERROR_TARGETS.ENTITY,
};
case LINK_TYPES.otherdatabases.label:
return {
result: /^http:\/\/d-nb\.info\/(?:(?:dnbn|gnd)\/)?(?:1[012]?\d{7}[0-9X]|[47]\d{6}-\d|[1-9]\d{0,7}-[0-9X]|3\d{7}[0-9X])$/.test(url),
result: /^http:\/\/d-nb\.info\/(?:(?:dnbn|gnd)\/)?[0-9X-]{9,10}$/.test(url),
target: ERROR_TARGETS.ENTITY,
};
case LINK_TYPES.otherdatabases.release:
return {
result: /^http:\/\/d-nb\.info\/(?:1[012]?\d{7}[0-9X]|[47]\d{6}-\d|[1-9]\d{0,7}-[0-9X]|3\d{7}[0-9X])$/.test(url),
result: /^http:\/\/d-nb\.info\/[0-9X-]{9,10}$/.test(url),
target: ERROR_TARGETS.ENTITY,
};
}
Expand Down
7 changes: 7 additions & 0 deletions root/static/scripts/tests/Control/URLCleanup.js
Original file line number Diff line number Diff line change
Expand Up @@ -1849,6 +1849,13 @@ limited_link_type_combinations: [
expected_clean_url: 'http://d-nb.info/1181136512',
only_valid_entity_types: ['artist', 'label', 'place', 'release', 'series', 'work'],
},
{
input_url: 'http://d-nb.info/97248485X',
input_entity_type: 'release',
expected_relationship_type: 'otherdatabases',
expected_clean_url: 'http://d-nb.info/97248485X',
only_valid_entity_types: ['artist', 'label', 'place', 'release', 'series', 'work'],
},
// Dogmazic
{
input_url: 'https://play.dogmazic.net/artists.php?action=show_all_songs&artist=2283',
Expand Down

0 comments on commit 271b14c

Please sign in to comment.