Skip to content

Commit

Permalink
issue #118: generating different alternative Avram versions
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Dec 15, 2021
1 parent ecbb9bb commit 7538e54
Show file tree
Hide file tree
Showing 14 changed files with 89 additions and 85 deletions.
28 changes: 16 additions & 12 deletions common-script
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@ else
OUTPUT_DIR=${BASE_OUTPUT_DIR}/${NAME}
fi

if [[ "$1" != "help" ]]; then
if [[ "$1" != "help" && "$1" != "export-schema-files" ]]; then
echo "OUTPUT_DIR: ${OUTPUT_DIR}"
fi

if [[ ! -d $PREFIX ]]; then
mkdir -p $PREFIX
fi
if [[ ! -d $PREFIX ]]; then
mkdir -p $PREFIX
fi

# printf "%s %s> Logging to ${PREFIX}.log"
# printf "%s %s> Logging to ${PREFIX}.log"

if [ ! -d ${OUTPUT_DIR} ]; then
mkdir -p ${OUTPUT_DIR}
if [ ! -d ${OUTPUT_DIR} ]; then
mkdir -p ${OUTPUT_DIR}
fi
fi


do_validate() {
GENERAL_PARAMS="--details --trimId --summary --format csv --defaultRecordType BOOKS"
OUTPUT_PARAMS="--outputDir ${OUTPUT_DIR} --detailsFileName issue-details.csv --summaryFileName issue-summary.csv"
Expand Down Expand Up @@ -270,9 +270,13 @@ EOF
}

do_export_schema_files() {
if [[ ! -d marc-schema ]]; then
mkdir marc-schema
fi
printf "%s %s> [avram]\n" $(date +"%F %T")
./export-schema --withSubfieldCodelists > src/main/resources/marc-schema.json
./export-schema --withSubfieldCodelists --solrFieldType human-readable --withSelfDescriptiveCode > src/main/resources/marc-schema-with-solr.json
./export-schema --withSubfieldCodelists > marc-schema/marc-schema.json
./export-schema --withSubfieldCodelists --solrFieldType human-readable --withSelfDescriptiveCode > marc-schema/marc-schema-with-solr.json
./export-schema --withSubfieldCodelists --solrFieldType human-readable --withSelfDescriptiveCode --withLocallyDefinedFields > marc-schema/marc-schema-with-solr-and-extensions.json
}

do_all_analyses() {
Expand Down Expand Up @@ -348,7 +352,7 @@ case "$1" in
marc-history) do_marc_history ;;
record-patterns) do_record_patterns ;;
sqlite) do_sqlite ;;
do_export_schema_files) do_export_schema_files ;;
export-schema-files) do_export_schema_files ;;
all-analyses) do_all_analyses ;;
all-solr) do_all_solr ;;
all) do_all_analyses ; do_all_solr ; do_version_link ;;
Expand Down
2 changes: 1 addition & 1 deletion export-schema
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Calling MappingToJson
. ./common-variables

/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.MappingToJson $@
java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.utils.MappingToJson $@
1 change: 1 addition & 0 deletions marc-schema/marc-schema-with-solr-and-extensions.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions marc-schema/marc-schema-with-solr.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions marc-schema/marc-schema.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class MappingParameters {
private boolean exportSelfDescriptiveCodes = false;
private boolean exportFrbrFunctions = false;
private boolean exportCompilanceLevel = false;
private boolean withLocallyDefinedFields = false;

private SolrFieldType solrFieldType = SolrFieldType.MARC;

Expand All @@ -29,6 +30,7 @@ protected void setOptions() {
"type of Solr fields, could be one of 'marc-tags', 'human-readable', or 'mixed'");
options.addOption("f", "withFrbrFunctions", false, "with FRBR functions");
options.addOption("l", "withCompilanceLevel", false, "with compilance levels (national, minimal)");
options.addOption("d", "withLocallyDefinedFields", false, "with locally defined fields");
options.addOption("h", "help", false, "display help");
isOptionSet = true;
}
Expand All @@ -54,6 +56,9 @@ public MappingParameters(String[] arguments) throws ParseException {

if (cmd.hasOption("withCompilanceLevel"))
exportCompilanceLevel = true;

if (cmd.hasOption("withLocallyDefinedFields"))
withLocallyDefinedFields = true;
}

public Options getOptions() {
Expand Down Expand Up @@ -81,4 +86,8 @@ public boolean doExportFrbrFunctions() {
public boolean doExportCompilanceLevel() {
return exportCompilanceLevel;
}

public boolean isWithLocallyDefinedFields() {
return withLocallyDefinedFields;
}
}
68 changes: 34 additions & 34 deletions src/main/java/de/gwdg/metadataqa/marc/cli/utils/MappingToJson.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import de.gwdg.metadataqa.marc.cli.parameters.MappingParameters;
import de.gwdg.metadataqa.marc.definition.*;
import de.gwdg.metadataqa.marc.definition.controlpositions.ControlfieldPositionList;
import de.gwdg.metadataqa.marc.definition.controlpositions.tag008.Tag008all00;
import de.gwdg.metadataqa.marc.definition.structure.ControlFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.ControlfieldPositionDefinition;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
Expand Down Expand Up @@ -33,11 +32,6 @@ public class MappingToJson {

private static final Logger logger = Logger.getLogger(MappingToJson.class.getCanonicalName());

private static final List<String> nonMarc21TagLibraries = Arrays.asList(
"oclctags", "fennicatags", "dnbtags", "sztetags", "genttags", "nkcrtags",
"holdings"
);

private boolean exportSubfieldCodes = false;
private boolean exportSelfDescriptiveCodes = false;
private Map<String, Object> mapping;
Expand Down Expand Up @@ -77,14 +71,11 @@ public void build() {
fields.put("008", buildControlField(Control008Definition.getInstance(), Control008Positions.getInstance()));

for (Class<? extends DataFieldDefinition> tagClass : MarcTagLister.listTags()) {
if (isNonMarc21Tag(tagClass))
continue;

Method getInstance;
DataFieldDefinition fieldTag;
try {
getInstance = tagClass.getMethod("getInstance");
fieldTag = (DataFieldDefinition) getInstance.invoke(tagClass);
Method getInstance = tagClass.getMethod("getInstance");
DataFieldDefinition fieldTag = (DataFieldDefinition) getInstance.invoke(tagClass);
if (!parameters.isWithLocallyDefinedFields() && !fieldTag.getMarcVersion().equals(MarcVersion.MARC21))
continue;
dataFieldToJson(fields, fieldTag);
} catch (NoSuchMethodException
| IllegalAccessException
Expand Down Expand Up @@ -168,25 +159,14 @@ private PositionalControlFieldKeyGenerator getPositionalControlFieldKeyGenerator
return generator;
}

private static boolean isNonMarc21Tag(Class<? extends DataFieldDefinition> tagClass) {
boolean isNonMarc21Tag = false;
for (String nonCore : nonMarc21TagLibraries) {
if (tagClass.getCanonicalName().contains(nonCore)) {
isNonMarc21Tag = true;
break;
}
}
return isNonMarc21Tag;
}

private static Map<String, Object> controlPositionToJson(ControlfieldPositionDefinition subfield, PositionalControlFieldKeyGenerator generator) {
Map<String, Object> values = new LinkedHashMap<>();
values.put("position", subfield.formatPositon());
values.put("label", subfield.getLabel());
values.put("url", subfield.getDescriptionUrl());
values.put("start", subfield.getPositionStart());
values.put("end", subfield.getPositionEnd());
values.put("repeatableCOntent", subfield.isRepeatableContent());
values.put("repeatableContent", subfield.isRepeatableContent());
if (subfield.isRepeatableContent()) {
values.put("unitLength", subfield.getUnitLength());
}
Expand Down Expand Up @@ -235,6 +215,9 @@ private void dataFieldToJson(Map fields, DataFieldDefinition tag) {
if (parameters.doExportCompilanceLevel())
extractCompilanceLevel(tagMap, tag.getNationalCompilanceLevel(), tag.getMinimalCompilanceLevel());

if (parameters.isWithLocallyDefinedFields())
tagMap.put("version", tag.getMarcVersion().getCode());

tagMap.put("indicator1", indicatorToJson(tag.getInd1()));
tagMap.put("indicator2", indicatorToJson(tag.getInd2()));

Expand All @@ -258,7 +241,23 @@ private void dataFieldToJson(Map fields, DataFieldDefinition tag) {
tagMap.put("historical-subfields", subfields);
}

fields.put(tag.getTag(), tagMap);
if (fields.containsKey(tag.getTag())) {
Object existing = fields.get(tag.getTag());
List<Map> list = null;
if (existing instanceof Map) {
list = new ArrayList<>();
list.add((Map) existing);
} else if (existing instanceof List) {
list = (List) existing;
} else {
System.err.println("a strange object: " + existing.getClass().getCanonicalName());
list = new ArrayList<>();
}
list.add(tagMap);
fields.put(tag.getTag(), list);
} else {
fields.put(tag.getTag(), tagMap);
}
}

private void extractFunctions(Map<String, Object> tagMap, List<FRBRFunction> functions) {
Expand Down Expand Up @@ -286,15 +285,16 @@ private Map<String, Object> subfieldToJson(SubfieldDefinition subfield, DataFiel
meta.put("name", codeList.getName());
meta.put("url", codeList.getUrl());

if (exportSubfieldCodes
&& !codeList.getName().equals("MARC Organization Codes")) {
Map<String, Object> codes = new LinkedHashMap<>();
for (EncodedValue code : subfield.getCodeList().getCodes()) {
Map<String, Object> codeListMap = new LinkedHashMap<>();
codeListMap.put("label", code.getLabel());
codes.put(code.getCode(), codeListMap);
if (exportSubfieldCodes && !codeList.getName().equals("MARC Organization Codes")) {
if (subfield.getCodeList() != null) {
Map<String, Object> codes = new LinkedHashMap<>();
for (EncodedValue code : subfield.getCodeList().getCodes()) {
Map<String, Object> codeListMap = new LinkedHashMap<>();
codeListMap.put("label", code.getLabel());
codes.put(code.getCode(), codeListMap);
}
meta.put("codes", codes);
}
meta.put("codes", codes);
}
codeMap.put("codelist", meta);
}
Expand Down
22 changes: 11 additions & 11 deletions src/main/java/de/gwdg/metadataqa/marc/definition/MarcVersion.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
package de.gwdg.metadataqa.marc.definition;

public enum MarcVersion {
MARC21("MARC21", "MARC21"),
BL("BL", "British Library"),
DNB("DNB", "Deutsche Nationalbibliothek"),
FENNICA("FENNICA", "National Library of Finland"),
GENT("GENT", "Universiteitsbibliotheek Gent"),
NKCR("NKCR", "National Library of the Czech Republic"),
OCLC("OCLC", "OCLC"),
SZTE("SZTE", "Szegedi Tudományegyetem"),
UNIMARC("UNIMARC", "UNIMARC"),
MARC21( "MARC21", "MARC21"),
BL( "BL", "British Library"),
DNB( "DNB", "Deutsche Nationalbibliothek"),
FENNICA( "FENNICA", "National Library of Finland"),
GENT( "GENT", "Universiteitsbibliotheek Gent"),
NKCR( "NKCR", "National Library of the Czech Republic"),
OCLC( "OCLC", "OCLC"),
SZTE( "SZTE", "Szegedi Tudományegyetem"),
UNIMARC( "UNIMARC", "UNIMARC"),
MARC21NO("MARC21NO", "MARC21 profile for Norwegian public libraries"),
UVA("UVA", "University of Amsterdam"),
B3KAT("B3KAT", "B3Kat union catalogue of Bibliotheksverbundes Bayern (BVB) and Kooperativen Bibliotheksverbundes Berlin-Brandenburg (KOBV)"),
UVA( "UVA", "University of Amsterdam"),
B3KAT( "B3KAT", "B3Kat union catalogue of Bibliotheksverbundes Bayern (BVB) and Kooperativen Bibliotheksverbundes Berlin-Brandenburg (KOBV)"),
;

String code;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.gwdg.metadataqa.marc.definition;

import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.utils.MarcTagLister;

Expand Down Expand Up @@ -54,7 +55,7 @@ private static void loadAndCacheTag(Class<? extends DataFieldDefinition> definit
if (dataFieldDefinition != null) {
String tag = dataFieldDefinition.getTag();
commonCache.put(tag, dataFieldDefinition);
MarcVersion version = getMarcVersion(definitionClazz);
MarcVersion version = Utils.getVersion(definitionClazz);
versionedCache.computeIfAbsent(tag, s -> new EnumMap<>(MarcVersion.class));
versionedCache.get(tag).put(version, dataFieldDefinition);
}
Expand All @@ -63,31 +64,6 @@ private static void loadAndCacheTag(Class<? extends DataFieldDefinition> definit
}
}

private static MarcVersion getMarcVersion(Class<? extends DataFieldDefinition> definitionClazz) {
var version = MarcVersion.MARC21;
if (definitionClazz.getCanonicalName().contains(".oclctags.")) {
version = MarcVersion.OCLC;
} else if (definitionClazz.getCanonicalName().contains(".dnbtags.")) {
version = MarcVersion.DNB;
} else if (definitionClazz.getCanonicalName().contains(".genttags.")) {
version = MarcVersion.GENT;
} else if (definitionClazz.getCanonicalName().contains(".sztetags.")) {
version = MarcVersion.SZTE;
} else if (definitionClazz.getCanonicalName().contains(".fennicatags.")) {
version = MarcVersion.FENNICA;
} else if (definitionClazz.getCanonicalName().contains(".nkcrtags.")) {
version = MarcVersion.NKCR;
} else if (definitionClazz.getCanonicalName().contains(".bltags.")) {
version = MarcVersion.BL;
} else if (definitionClazz.getCanonicalName().contains(".uvatags.")) {
version = MarcVersion.UVA;
} else if (definitionClazz.getCanonicalName().contains(".b3kattags.")) {
version = MarcVersion.B3KAT;
}

return version;
}

public static DataFieldDefinition load(String tag) {
return load(tag, MarcVersion.MARC21);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
public class FormOfItem extends CodeList {

private void initialize() {
name = "Form of Item";
url = "https://www.oclc.org/bibformats/en/fixedfield/form.html";
codes = Utils.generateCodes(
" ", "None of the following",
"a", "Microfilm",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
public class Frequency extends CodeList {

private void initialize() {
name = "Frequency";
url = "https://www.oclc.org/bibformats/en/fixedfield/freq.html";
codes = Utils.generateCodes(
" ", "No determinable frequency. The item has no determinable frequency. Use when the frequency is known to be intentionally irregular.",
"a", "Annual. The item is issued or updated once a year.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
public class Regularity extends CodeList {

private void initialize() {
name = "Regularity";
url = "http://www.oclc.org/bibformats/en/fixedfield/regl.html";
codes = Utils.generateCodes(
"n", "Normalized irregular",
"r", "Regular",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
public class TypeOfDateOrPublicationStatus extends CodeList {

private void initialize() {
name = "Type of Date/Publication Status";
url = "https://www.oclc.org/bibformats/en/fixedfield/dtst.html";
codes = Utils.generateCodes(
"b", "B.C. date",
"e", "Detailed date",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public abstract class DataFieldDefinition implements BibliographicFieldDefinitio
protected boolean obsolete = false;
private CompilanceLevel nationalCompilanceLevel;
private CompilanceLevel minimalCompilanceLevel;
private MarcVersion marcVersion;

public String getTag() {
return tag;
Expand Down Expand Up @@ -241,13 +242,16 @@ public void setMinimalCompilanceLevel(CompilanceLevel minimalLevel) {
}

public MarcVersion getMarcVersion() {
return Utils.getVersion(this);
if (marcVersion == null)
marcVersion = Utils.getVersion(this);
return marcVersion;
}

public boolean isObsolete() {
return obsolete;
}


@Override
public String toString() {
return "DataFieldDefinition{" +
Expand Down

0 comments on commit 7538e54

Please sign in to comment.