Skip to content

Commit

Permalink
issue #127: Include version specific subfields to the JSON schema rep…
Browse files Browse the repository at this point in the history
…resentation and completeness
  • Loading branch information
pkiraly committed Feb 2, 2022
1 parent e46b319 commit b815dd8
Show file tree
Hide file tree
Showing 232 changed files with 734 additions and 678 deletions.
2 changes: 1 addition & 1 deletion catalogues/kbr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
. ./setdir.sh
NAME=kbr
MARC_DIR=${BASE_INPUT_DIR}/kbr/current
TYPE_PARAMS="--emptyLargeCollectors --marcVersion KBR --marcxml --ignorableFields 590,591,592,593,594,595,596,659,900,911,912,916,940,941,942,944,945,946,948,949,950,951,952,953,954,970,971,972,973,975,977,988,989"
TYPE_PARAMS="--emptyLargeCollectors --marcVersion KBR --marcxml --fixKbr --ignorableFields 590,591,592,593,594,595,596,659,900,911,912,916,940,941,942,944,945,946,948,949,950,951,952,953,954,970,971,972,973,975,977,988,989"
MASK=kbr-*.gz

. ./common-script
Expand Down
3 changes: 1 addition & 2 deletions common-script
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ if [[ "$1" != "help" && "$1" != "export-schema-files" ]]; then
fi
fi


do_validate() {
GENERAL_PARAMS="--details --trimId --summary --format csv --defaultRecordType BOOKS"
OUTPUT_PARAMS="--outputDir ${OUTPUT_DIR} --detailsFileName issue-details.csv --summaryFileName issue-summary.csv"
Expand Down Expand Up @@ -277,7 +276,7 @@ do_export_schema_files() {
./export-schema --withSubfieldCodelists > marc-schema/marc-schema.json
./export-schema --withSubfieldCodelists --solrFieldType human-readable --withSelfDescriptiveCode > marc-schema/marc-schema-with-solr.json
./export-schema --withSubfieldCodelists --solrFieldType human-readable --withSelfDescriptiveCode --withLocallyDefinedFields > marc-schema/marc-schema-with-solr-and-extensions.json
printf "%s %s> files generated at marc-schema\n" $(date +"%F %T")
printf "%s %s> 3 files generated at 'marc-schema' directory: marc-schema.json, marc-schema-with-solr.json, marc-schema-with-solr-and-extensions.json\n" $(date +"%F %T")
}

do_all_analyses() {
Expand Down
2 changes: 1 addition & 1 deletion marc-schema/marc-schema-with-solr-and-extensions.json

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,8 @@ private void saveLibraries003(String fileExtension, char separator) {
}

private void saveMarcElements(String fileExtension, char separator) {
Path path;
System.err.println("MARC elements");
path = Paths.get(parameters.getOutputDir(), "marc-elements" + fileExtension);
Path path = Paths.get(parameters.getOutputDir(), "marc-elements" + fileExtension);
try (var writer = Files.newBufferedWriter(path)) {
writer.write(createRow(
"documenttype", "path", "packageid", "package", "tag", "subfield",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class CommonParameters implements Serializable {
protected Leader.Type defaultRecordType = null;
protected boolean fixAlephseq = false;
protected boolean fixAlma = false;
protected boolean fixKbr = false;
protected boolean alephseq = false;
protected boolean marcxml = false;
protected boolean lineSeparated = false;
Expand All @@ -54,6 +55,7 @@ protected void setOptions() {
options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
options.addOption("X", "fixAlma", false, "fix the known issues of Alma format");
options.addOption("R", "fixKbr", false, "fix the known issues of Alma format");
options.addOption("p", "alephseq", false, "the source is in Alephseq format");
options.addOption("x", "marcxml", false, "the source is in MARCXML format");
options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
Expand Down Expand Up @@ -107,6 +109,7 @@ public CommonParameters(String[] arguments) throws ParseException {
fixAlephseq = cmd.hasOption("fixAlephseq");

fixAlma = cmd.hasOption("fixAlma");
fixKbr = cmd.hasOption("fixKbr");

setMarcxml(cmd.hasOption("marcxml"));

Expand Down Expand Up @@ -261,10 +264,18 @@ public void setFixAlma(boolean fixAlma) {
this.fixAlma = fixAlma;
}

public boolean fixKbr() {
return fixKbr;
}

public void setFixKbr(boolean fixKbr) {
this.fixKbr = fixKbr;
}

public String getReplecementInControlFields() {
if (fixAlephseq())
return "^";
else if (fixAlma())
else if (fixAlma() || fixKbr())
return "#";
else
return null;
Expand Down
17 changes: 17 additions & 0 deletions src/main/java/de/gwdg/metadataqa/marc/cli/utils/MappingToJson.java
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,23 @@ private void dataFieldToJson(Map fields, DataFieldDefinition tag) {
}
tagMap.put("subfields", subfields);

if (parameters.isWithLocallyDefinedFields()) {
Map<MarcVersion, List<SubfieldDefinition>> versionSpecificSubfields = tag.getVersionSpecificSubfields();
if (versionSpecificSubfields != null && !versionSpecificSubfields.isEmpty()) {
Map<String, Map<String, Object>> versionSpecificSubfieldsMap = new LinkedHashMap<>();
for (Map.Entry<MarcVersion, List<SubfieldDefinition>> entry : versionSpecificSubfields.entrySet()) {
String version = entry.getKey().getCode();
subfields = new LinkedHashMap<>();
for (SubfieldDefinition subfield : entry.getValue()) {
subfields.put(subfield.getCode(), subfieldToJson(subfield, keyGenerator));
}
versionSpecificSubfieldsMap.put(version, subfields);
}
if (!versionSpecificSubfieldsMap.isEmpty())
tagMap.put("versionSpecificSubfields", versionSpecificSubfieldsMap);
}
}

if (tag.getHistoricalSubfields() != null) {
subfields = new LinkedHashMap<>();
for (EncodedValue code : tag.getHistoricalSubfields()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,10 @@ public void putVersionSpecificSubfields(MarcVersion marcVersion,
if (versionSpecificSubfields == null)
versionSpecificSubfields = new EnumMap<>(MarcVersion.class);

for (SubfieldDefinition subfieldDefinition : subfieldDefinitions)
for (SubfieldDefinition subfieldDefinition : subfieldDefinitions) {
subfieldDefinition.setParent(this);
subfieldDefinition.setMarcVersion(marcVersion);
}

versionSpecificSubfields.put(marcVersion, subfieldDefinitions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public class SubfieldDefinition implements Serializable {
private CompilanceLevel nationalCompilanceLevel;
private CompilanceLevel minimalCompilanceLevel;
private List<MarcVersion> disallowedIn;
private MarcVersion marcVersion = null;

public String getCodeForIndex() {
if (codeForIndex == null) {
Expand All @@ -56,7 +57,14 @@ public String getCodeForIndex() {
default: codeForIndex = "_" + bibframeTag; break;
}
} else {
codeForIndex = "_" + code;
if (code.equals("#"))
codeForIndex = "_hash";
else if (code.equals("*"))
codeForIndex = "_star";
else if (code.equals("@"))
codeForIndex = "_at";
else
codeForIndex = "_" + code;
}
}
return codeForIndex;
Expand Down Expand Up @@ -345,6 +353,14 @@ public boolean isDisallowedIn(MarcVersion marcVersion) {
disallowedIn.contains(marcVersion);
}

public MarcVersion getMarcVersion() {
return marcVersion;
}

public void setMarcVersion(MarcVersion marcVersion) {
this.marcVersion = marcVersion;
}

@Override
public String toString() {
return "MarcSubfield{" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ private void initialize() {
.setFrbrFunctions(ManagementIdentify, ManagementProcess);

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ private void initialize() {
));

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,9 @@ private void initialize() {
);

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ private void initialize() {
));

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ private void initialize() {
.setCompilanceLevels("O");

putVersionSpecificSubfields(MarcVersion.KBR, Arrays.asList(
new SubfieldDefinition("*", "Link with identifier", "NR"),
new SubfieldDefinition("@", "Language of field", "NR"),
new SubfieldDefinition("#", "number/occurrence of field", "NR")
new SubfieldDefinition("*", "Link with identifier", "NR").setMqTag("link"),
new SubfieldDefinition("@", "Language of field", "NR").setMqTag("language"),
new SubfieldDefinition("#", "number/occurrence of field", "NR").setMqTag("number")
));
}
}
Loading

0 comments on commit b815dd8

Please sign in to comment.