Skip to content

Commit

Permalink
issue #99: implementing B3Kat union catalogue local defined data elem…
Browse files Browse the repository at this point in the history
…ents.
  • Loading branch information
pkiraly committed Aug 27, 2021
1 parent 47258ef commit 09b1f37
Show file tree
Hide file tree
Showing 18 changed files with 582 additions and 19 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ options:
* `BL`, fields available at the British Library
* `MARC21NO`, fields available at the MARC21 profile for Norwegian public libraries
* `UVA`, fields available at the University of Amsterdam Library
* `B3KAT`, fields available at the B3Kat union catalogue of Bibliotheksverbundes
Bayern (BVB) and Kooperativen Bibliotheksverbundes Berlin-Brandenburg (KOBV)
* output parameters:
* `-t [directory]`, `--outputDir [directory]` specifies the output
directory where the files will be created
Expand Down
4 changes: 2 additions & 2 deletions scripts/bayern.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
. ./setdir.sh
NAME=bayern
MARC_DIR=${BASE_INPUT_DIR}/bayern/marc
TYPE_PARAMS="--marcxml"
MASK=*.xml
TYPE_PARAMS="--marcxml --marcVersion B3KAT --emptyLargeCollectors"
MASK=b3kat*.xml

. ./common-script

Expand Down
1 change: 1 addition & 0 deletions src/main/java/de/gwdg/metadataqa/marc/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ public static MarcVersion package2version(String packageName) {
case "sztetags": version = MarcVersion.SZTE; break;
case "nkcrtags": version = MarcVersion.NKCR; break;
case "uvatags": version = MarcVersion.UVA; break;
case "b3kattags": version = MarcVersion.B3KAT; break;
default: version = MarcVersion.MARC21; break;
}
return version;
Expand Down
37 changes: 25 additions & 12 deletions src/main/java/de/gwdg/metadataqa/marc/dao/Control005.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,33 +64,46 @@ protected void processContent() {
String cleanContent = content.replaceAll("[\\. ]*$", "").replaceAll("\\.", "");
// logger.warning(String.format("005 ('%s') does not match the expected pattern", content));
if (cleanContent.length() >= 4) {
year = Integer.parseInt(cleanContent.substring(0, 4));
cleanContent = cleanContent.substring(5);
year = extractRaw(cleanContent, 4, "year");
cleanContent = cleanContent.substring(4);
}
if (cleanContent.length() >= 2) {
month = Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(3);
month = extractRaw(cleanContent, 2, "month"); // Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(2);
}
if (cleanContent.length() >= 2) {
day = Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(3);
day = extractRaw(cleanContent, 2, "day"); // Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(2);
}
if (cleanContent.length() >= 2) {
hour = Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(3);
hour = extractRaw(cleanContent, 2, "hour"); // Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(2);
}
if (cleanContent.length() >= 2) {
min = Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(3);
min = extractRaw(cleanContent, 2, "min"); // Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(2);
}
if (cleanContent.length() >= 2) {
sec = Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(3);
sec = extractRaw(cleanContent, 2, "sec"); // Integer.parseInt(cleanContent.substring(0, 2));
cleanContent = cleanContent.substring(2);
}
}
}
}

private Integer extractRaw(String cleanContent, int end, String field) {
String raw = cleanContent.substring(0, end);
Integer data = null;
try {
data = Integer.parseInt(raw);
} catch (NumberFormatException e) {
String id = marcRecord != null ? String.format("#%s) ", marcRecord.getId()) : "";
logger.severe(String.format("%sBad input for %s: %s", id, field, raw));
initializationErrors.add(createError(String.format("invalid %s: %s", field, raw)));
}
return data;
}

@Override
public boolean validate(MarcVersion marcVersion) {
validationErrors = new ArrayList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ public enum MarcVersion {
SZTE("SZTE", "Szegedi Tudományegyetem"),
UNIMARC("UNIMARC", "UNIMARC"),
MARC21NO("MARC21NO", "MARC21 profile for Norwegian public libraries"),
UVA("UVA", "University of Amsterdam")
UVA("UVA", "University of Amsterdam"),
B3KAT("B3KAT", "B3Kat union catalogue of Bibliotheksverbundes Bayern (BVB) and Kooperativen Bibliotheksverbundes Berlin-Brandenburg (KOBV)"),
;

String code;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ private static MarcVersion getMarcVersion(Class<? extends DataFieldDefinition> d
version = MarcVersion.BL;
} else if (definitionClazz.getCanonicalName().contains(".uvatags.")) {
version = MarcVersion.UVA;
} else if (definitionClazz.getCanonicalName().contains(".b3kattags.")) {
version = MarcVersion.B3KAT;
}

return version;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package de.gwdg.metadataqa.marc.definition.general.codelist;

import de.gwdg.metadataqa.marc.utils.EncodedValueFileReader;

/**
* Übersicht über die am B3Kat beteiligten Bibliotheken
* https://www.bib-bvb.de/BibList/b3kat-biblist.html
*/
public class B3KatIdentifiers extends CodeList {

private void initialize() {
name = "MARC Organization Codes";
url = "http://www.loc.gov/marc/organizations/orgshome.html";
codes = EncodedValueFileReader.fileToCodeList("marc/b3kat.isil.csv");
indexCodes();
}

private static B3KatIdentifiers uniqueInstance;

private B3KatIdentifiers() {
initialize();
}

public static B3KatIdentifiers getInstance() {
if (uniqueInstance == null)
uniqueInstance = new B3KatIdentifiers();
return uniqueInstance;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public enum TagCategory {
nkcr(19, "nkcrtags", "NKCR", "Locally defined tags of NKCR", false),
bl(20, "bltags", "BL", "Locally defined tags of the British Library", false),
uva(21, "uvatags", "UvA", "Locally defined tags of University of Amsterdam", false),
b3kat(22, "b3kat", "B3Kat", "Locally defined tags of a German union cataogue B3Kat", false),
other(99, "unknown", "unknown", "unknown origin", false)
;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package de.gwdg.metadataqa.marc.definition.tags.b3kattags;

import de.gwdg.metadataqa.marc.codes.B3KatIdentifierReader;
import de.gwdg.metadataqa.marc.definition.Cardinality;
import de.gwdg.metadataqa.marc.definition.general.codelist.B3KatIdentifiers;
import de.gwdg.metadataqa.marc.definition.general.codelist.OrganizationCodes;
import de.gwdg.metadataqa.marc.definition.general.validator.RegexValidator;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.Indicator;

/**
* Besitznachweis (ISIL)
* https://www.bib-bvb.de/web/b3kat/open-data
*/
public class Tag049 extends DataFieldDefinition {

private static Tag049 uniqueInstance;

private Tag049() {
initialize();
postCreation();
}

public static Tag049 getInstance() {
if (uniqueInstance == null)
uniqueInstance = new Tag049();
return uniqueInstance;
}

private void initialize() {

tag = "049";
label = "Besitznachweis (ISIL)";
mqTag = "BesitznachweisISIL";
cardinality = Cardinality.Repeatable;
descriptionUrl = "https://www.bib-bvb.de/web/b3kat/open-data";
// setCompilanceLevels("O");

ind1 = new Indicator();

ind2 = new Indicator();

setSubfieldsWithCardinality(
"a", "Besitznachweis", "R"
);

// TODO: validator: possible ISIL numbers: https://www.bib-bvb.de/BibList/b3kat-biblist.html
getSubfield("a")
.setMqTag("Besitznachweis")
.setCodeList(B3KatIdentifiers.getInstance());
;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package de.gwdg.metadataqa.marc.definition.tags.b3kattags;

import de.gwdg.metadataqa.marc.definition.Cardinality;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.Indicator;

/**
* Regionale und lokale Kodierungen (MAB 078)
* https://www.bib-bvb.de/web/b3kat/open-data
*/
public class Tag940 extends DataFieldDefinition {

private static Tag940 uniqueInstance;

private Tag940() {
initialize();
postCreation();
}

public static Tag940 getInstance() {
if (uniqueInstance == null)
uniqueInstance = new Tag940();
return uniqueInstance;
}

private void initialize() {

tag = "940";
label = "Regionale und lokale Kodierungen (MAB 078)";
mqTag = "RegionaleUndLokaleKodierungen";
cardinality = Cardinality.Nonrepeatable;
descriptionUrl = "https://www.bib-bvb.de/web/b3kat/open-data";
// setCompilanceLevels("O");

ind1 = new Indicator("")
.setCodes(
"1", "",
"2", ""
);

ind2 = new Indicator();

setSubfieldsWithCardinality(
"f", "Selektionskennzeichen Sprachkreis", "NR",
"n", "Selektionskennzeichen bibliotheksübergreifende Bibliographien und Projekte", "NR",
"q", "Bibliotheksspezifische Selektionskennzeichen", "NR",
"r", "ISIL der redigierenden Bibliothek (VD18-Kontext, MAB 088 Unterfeld r)", "NR"
);

getSubfield("f").setMqTag("SelektionskennzeichenSprachkreis");
getSubfield("n").setMqTag("SelektionskennzeichenBibliotheksübergreifendeBibliographienUndProjekte");
getSubfield("q").setMqTag("BibliotheksspezifischeSelektionskennzeichen");
getSubfield("r").setMqTag("ISIL");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package de.gwdg.metadataqa.marc.definition.tags.b3kattags;

import de.gwdg.metadataqa.marc.definition.Cardinality;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.Indicator;

/**
* Strukturierte Quellenangabe (unselbständige Publikationen)
* https://www.bib-bvb.de/web/b3kat/open-data
*/
public class Tag941 extends DataFieldDefinition {

private static Tag941 uniqueInstance;

private Tag941() {
initialize();
postCreation();
}

public static Tag941 getInstance() {
if (uniqueInstance == null)
uniqueInstance = new Tag941();
return uniqueInstance;
}

private void initialize() {

tag = "941";
label = "Strukturierte Quellenangabe (unselbständige Publikationen)";
mqTag = "StrukturierteQuellenangabe";
cardinality = Cardinality.Nonrepeatable;
descriptionUrl = "https://www.bib-bvb.de/web/b3kat/open-data";
// setCompilanceLevels("O");

ind1 = new Indicator();

ind2 = new Indicator();

setSubfieldsWithCardinality(
"b", "", "NR",
"h", "", "NR",
"j", "", "NR",
"m", "", "NR",
"t", "", "NR",
"r", "", "NR",
"s", "", "NR"
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package de.gwdg.metadataqa.marc.definition.tags.b3kattags;

import de.gwdg.metadataqa.marc.definition.Cardinality;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.Indicator;

/**
* DDC (Dewey Decimal Classification) analytisch
* https://www.bib-bvb.de/web/b3kat/open-data
*/
public class Tag942 extends DataFieldDefinition {

private static Tag942 uniqueInstance;

private Tag942() {
initialize();
postCreation();
}

public static Tag942 getInstance() {
if (uniqueInstance == null)
uniqueInstance = new Tag942();
return uniqueInstance;
}

private void initialize() {

tag = "942";
label = "DDC (Dewey Decimal Classification) analytisch";
mqTag = "DDCAnalytisch";
cardinality = Cardinality.Nonrepeatable;
descriptionUrl = "https://www.bib-bvb.de/web/b3kat/open-data";
// setCompilanceLevels("O");

ind1 = new Indicator("")
.setCodes("1", "");

ind2 = new Indicator("")
.setCodes("1", "");

setSubfieldsWithCardinality(
"c", "Grundnotation (Sachaspekt)", "NR",
"f", "Notation aus Hilfstafel 1 (Zeitaspekt)", "NR",
"g", "Notation aus Hilfstafel 2 (Geographischer Aspekt)", "NR",
"e", "Angabe der zugrunde liegenden DDC-Ausgabe", "NR"
);

getSubfield("c").setMqTag("Grundnotation");
getSubfield("f").setMqTag("NotationAusHilfstafel1Zeitaspekt");
getSubfield("g").setMqTag("NotationAusHilfstafel2GeographischerAspekt");
getSubfield("e").setMqTag("AngabeDerZugrundeLiegendenDDCAusgabe");
}
}
Loading

0 comments on commit 09b1f37

Please sign in to comment.