Skip to content

Commit

Permalink
issue #83: Include control fields in completeness calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Jan 29, 2021
1 parent ede5900 commit 665c19a
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 66 deletions.
18 changes: 11 additions & 7 deletions solr-functions
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ prepare_schema() {

echo "prepare_schema ${LOCAL_CORE}"

HAS_PROPER_SNI=$(curl -is $SCHEMA_URL/dynamicfields/*_sni | grep -c '"type":"string_big"')
HAS_PROPER_SNI=$(curl -is "$SCHEMA_URL/dynamicfields/*_sni" | grep -c '"type":"string_big"')
echo "Does ${LOCAL_CORE} have proper *_sni field definition? Answer: ${HAS_PROPER_SNI}"
if [[ $HAS_PROPER_SNI -eq 0 ]]; then

HAS_FIELD=$(curl -is $SCHEMA_URL/dynamicfields/*_sni | head -1 | grep -c "200 OK")
HAS_FIELD=$(curl -is "$SCHEMA_URL/dynamicfields/*_sni" | head -1 | grep -c "200 OK")
echo "Does ${LOCAL_CORE} have *_sni field definition? Answer: ${HAS_FIELD}"
if [[ $HAS_FIELD -eq 1 ]]; then
echo "Delete *_sni field definition from ${LOCAL_CORE}."
Expand All @@ -49,7 +49,7 @@ prepare_schema() {
}' $SCHEMA_URL
fi

HAS_TYPE=$(curl -is $SCHEMA_URL/fieldtypes/string_big | head -1 | grep -c "200 OK")
HAS_TYPE=$(curl -is "$SCHEMA_URL/fieldtypes/string_big" | head -1 | grep -c "200 OK")
if [[ $HAS_TYPE -eq 1 ]]; then
echo "Delete string_big field type definition from ${LOCAL_CORE}."
curl -X POST -H 'Content-type:application/json' --data-binary '{
Expand Down Expand Up @@ -87,7 +87,7 @@ prepare_schema() {

fi

NUMBER_OF_COPY_FIELD=$(curl -is '$SCHEMA_URL/copyfields?source.fl=*_ss' | grep -c '"source":"\*_ss"')
NUMBER_OF_COPY_FIELD=$(curl -is "$SCHEMA_URL/copyfields?source.fl=*_ss" | grep -c '"source":"\*_ss"')
echo "NUMBER_OF_COPY_FIELD: ${NUMBER_OF_COPY_FIELD}"
if [[ $NUMBER_OF_COPY_FIELD -eq 0 ]]; then
echo "add *_ss copyfield definition to ${LOCAL_CORE}"
Expand All @@ -99,9 +99,13 @@ prepare_schema() {
"dest":"_text_"
}}' $SCHEMA_URL
elif [[ $NUMBER_OF_COPY_FIELD -gt 1 ]]; then
curl -s -X POST -H 'Content-type:application/json' --data-binary '{
"delete-copy-field":{ "source":"*_ss", "dest":"_text_" }
}' $SCHEMA_URL
while [[ $NUMBER_OF_COPY_FIELD -gt 1 ]]; do
echo "delete extra copy fields"
curl -s -X POST -H 'Content-type:application/json' --data-binary '{
"delete-copy-field":{ "source":"*_ss", "dest":"_text_" }
}' $SCHEMA_URL
NUMBER_OF_COPY_FIELD=$(curl -is "$SCHEMA_URL/copyfields?source.fl=*_ss" | grep -c '"source":"\*_ss"')
done
fi
}

Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public Map<String, List<String>> getKeyValuePairs() {
@Override
public Map<String, List<String>> getKeyValuePairs(SolrFieldType type) {
Map<String, List<String>> map = new LinkedHashMap<>();

map.put(
getSolrKey(type, definition.getTag(), definition.getMqTag()),
Arrays.asList(content));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public void setMarcRecord(MarcRecord record) {

protected abstract void processContent();

@Override
public Map<String, List<String>> getKeyValuePairs(SolrFieldType type) {
return getKeyValuePairs(definition.getTag(), definition.getMqTag(), type);
}
Expand All @@ -49,8 +50,7 @@ public Map<String, List<String>> getKeyValuePairs(String tag,
String mqTag,
SolrFieldType type) {
Map<String, List<String>> map = new LinkedHashMap<>();
PositionalControlFieldKeyGenerator keyGenerator =
new PositionalControlFieldKeyGenerator(tag, mqTag, type);
PositionalControlFieldKeyGenerator keyGenerator = new PositionalControlFieldKeyGenerator(tag, mqTag, type);
if (content != null) {
map.put(keyGenerator.forTag(), Arrays.asList(content));
for (Map.Entry<ControlfieldPositionDefinition, String> entry : valuesMap.entrySet()) {
Expand Down
2 changes: 0 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.marc4j.marc.Record;

import java.io.BufferedWriter;
Expand Down Expand Up @@ -354,7 +353,6 @@ private void saveLibraries(String fileExtension, char separator) {
}
}

@NotNull
private String formatCardinality(char separator,
String marcPath,
int cardinality,
Expand Down
3 changes: 0 additions & 3 deletions src/main/java/de/gwdg/metadataqa/marc/cli/PairGenerator.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package de.gwdg.metadataqa.marc.cli;

import de.gwdg.metadataqa.marc.Utils;
import org.jetbrains.annotations.NotNull;

import java.io.BufferedWriter;
import java.io.IOException;
Expand Down Expand Up @@ -147,7 +146,6 @@ private void processConcepts(String fileName) {
}
}

@NotNull
private Object[] stringToBase36(String[] ids) {
Object[] encoded;
encoded = new String[ids.length];
Expand All @@ -157,7 +155,6 @@ private Object[] stringToBase36(String[] ids) {
return encoded;
}

@NotNull
private Object[] stringToInteger(String[] ids) {
Object[] encoded = new Integer[ids.length];
for (int i = 0; i < ids.length; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.marc4j.marc.Record;

import java.io.BufferedWriter;
Expand Down Expand Up @@ -92,7 +91,6 @@ public void beforeIteration() {
print(createRow(createHeaders()));
}

@NotNull
private List<String> createHeaders() {
List<String> headers = new ArrayList<>();
headers.add("id");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ public String forSubfield(ControlfieldPositionDefinition subfield) {
case MIXED:
if (isLeader)
key = String.format("%s_%s_%s", tag, subfield.formatPositon(), code);
else
key = String.format("%s_%s_%s_%s", tag, subfield.formatPositon(), mqTag, code);
else {
// key = String.format("%s_%s_%s_%s", tag, subfield.formatPositon(), mqTag, code);
key = String.format("%s_%s_%s", subfield.getId(), mqTag, code);
}
break;
case MARC: default:
key = String.format("%s_%s", forTag(), subfield.formatPositon());
Expand Down
43 changes: 21 additions & 22 deletions src/test/java/de/gwdg/metadataqa/marc/MarcFactoryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -493,28 +493,27 @@ public void getKeyValuePairTest() throws IOException, URISyntaxException {
"003_ControlNumberIdentifier, " +
"005_LatestTransactionTime, " +
"007_PhysicalDescription, " +
"007_00_PhysicalDescription_categoryOfMaterial, " +
"007_01_PhysicalDescription_specificMaterialDesignation, " +
"008_GeneralInformation, " +
"008_00-05_GeneralInformation_dateEnteredOnFile, " +
"008_06_GeneralInformation_typeOfDateOrPublicationStatus, " +
"008_07-10_GeneralInformation_date1, " +
"008_11-14_GeneralInformation_date2, " +
"008_15-17_GeneralInformation_placeOfPublicationProductionOrExecution, " +
"008_35-37_GeneralInformation_language, " +
"008_38_GeneralInformation_modifiedRecord, " +
"008_39_GeneralInformation_catalogingSource, " +
"008_18_GeneralInformation_frequency, " +
"008_19_GeneralInformation_regularity, " +
"008_21_GeneralInformation_typeOfContinuingResource, " +
"008_22_GeneralInformation_formOfOriginalItem, " +
"008_23_GeneralInformation_formOfItem, " +
"008_24_GeneralInformation_natureOfEntireWork, " +
"008_25-27_GeneralInformation_natureOfContents, " +
"008_28_GeneralInformation_governmentPublication, " +
"008_29_GeneralInformation_conferencePublication, " +
"008_33_GeneralInformation_originalAlphabetOrScriptOfTitle, " +
"008_34_GeneralInformation_entryConvention, " +
"007text00_PhysicalDescription_categoryOfMaterial, " +
"007text01_PhysicalDescription_specificMaterialDesignation, " +
"008_GeneralInformation, 008all00_GeneralInformation_dateEnteredOnFile, " +
"008all06_GeneralInformation_typeOfDateOrPublicationStatus, " +
"008all07_GeneralInformation_date1, " +
"008all11_GeneralInformation_date2, " +
"008all15_GeneralInformation_placeOfPublicationProductionOrExecution, " +
"008all35_GeneralInformation_language, " +
"008all38_GeneralInformation_modifiedRecord, " +
"008all39_GeneralInformation_catalogingSource, " +
"008continuing18_GeneralInformation_frequency, " +
"008continuing19_GeneralInformation_regularity, " +
"008continuing21_GeneralInformation_typeOfContinuingResource, " +
"008continuing22_GeneralInformation_formOfOriginalItem, " +
"008continuing23_GeneralInformation_formOfItem, " +
"008continuing24_GeneralInformation_natureOfEntireWork, " +
"008continuing25_GeneralInformation_natureOfContents, " +
"008continuing28_GeneralInformation_governmentPublication, " +
"008continuing29_GeneralInformation_conferencePublication, " +
"008continuing33_GeneralInformation_originalAlphabetOrScriptOfTitle, " +
"008continuing34_GeneralInformation_entryConvention, " +
"0162_IdIntifiedByLocal_source, " +
"016ind1_IdIntifiedByLocal_agency, " +
"016a_IdIntifiedByLocal, " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ public void test() {
boolean isValid = record.validate(MarcVersion.MARC21);
assertFalse(isValid);
assertEquals(6, record.getValidationErrors().size());
assertEquals("006/01-04 (tag006book01)", record.getValidationErrors().get(0).getMarcPath());
assertEquals("006/01-04 (006book01)", record.getValidationErrors().get(0).getMarcPath());
assertEquals(ValidationErrorType.CONTROL_POSITION_INVALID_CODE, record.getValidationErrors().get(0).getType());
assertEquals("006/01-04 (tag006book01)", record.getValidationErrors().get(1).getMarcPath());
assertEquals("006/01-04 (006book01)", record.getValidationErrors().get(1).getMarcPath());
assertEquals(ValidationErrorType.CONTROL_POSITION_INVALID_CODE, record.getValidationErrors().get(1).getType());
assertEquals("006/12 (tag006book12)", record.getValidationErrors().get(2).getMarcPath());
assertEquals("006/12 (006book12)", record.getValidationErrors().get(2).getMarcPath());
assertEquals(ValidationErrorType.CONTROL_POSITION_INVALID_VALUE, record.getValidationErrors().get(2).getType());
assertEquals("006/13 (tag006book13)", record.getValidationErrors().get(3).getMarcPath());
assertEquals("006/13 (006book13)", record.getValidationErrors().get(3).getMarcPath());
assertEquals(ValidationErrorType.CONTROL_POSITION_INVALID_VALUE, record.getValidationErrors().get(3).getType());
assertEquals("006/14 (tag006book14)", record.getValidationErrors().get(4).getMarcPath());
assertEquals("006/14 (006book14)", record.getValidationErrors().get(4).getMarcPath());
assertEquals(ValidationErrorType.CONTROL_POSITION_INVALID_VALUE, record.getValidationErrors().get(4).getType());
assertEquals("006/16 (tag006book16)", record.getValidationErrors().get(5).getMarcPath());
assertEquals("006/16 (006book16)", record.getValidationErrors().get(5).getMarcPath());
assertEquals(ValidationErrorType.CONTROL_POSITION_INVALID_VALUE, record.getValidationErrors().get(5).getType());

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,23 @@ public void testMixed() {
assertNotNull(generator.forTag());
assertEquals("008_GeneralInformation", generator.forTag());

assertEquals("008_00-05_GeneralInformation_dateEnteredOnFile", generator.forSubfield(Tag008all00.getInstance()));
assertEquals("008_06_GeneralInformation_typeOfDateOrPublicationStatus", generator.forSubfield(Tag008all06.getInstance()));
assertEquals("008_07-10_GeneralInformation_date1", generator.forSubfield(Tag008all07.getInstance()));
assertEquals("008_11-14_GeneralInformation_date2", generator.forSubfield(Tag008all11.getInstance()));
assertEquals("008_15-17_GeneralInformation_placeOfPublicationProductionOrExecution", generator.forSubfield(Tag008all15.getInstance()));
assertEquals("008_35-37_GeneralInformation_language", generator.forSubfield(Tag008all35.getInstance()));
assertEquals("008_38_GeneralInformation_modifiedRecord", generator.forSubfield(Tag008all38.getInstance()));
assertEquals("008_39_GeneralInformation_catalogingSource", generator.forSubfield(Tag008all39.getInstance()));
assertEquals("008_18-21_GeneralInformation_illustrations", generator.forSubfield(Tag008book18.getInstance()));
assertEquals("008_22_GeneralInformation_targetAudience", generator.forSubfield(Tag008book22.getInstance()));
assertEquals("008_23_GeneralInformation_formOfItem", generator.forSubfield(Tag008book23.getInstance()));
assertEquals("008_24-27_GeneralInformation_natureOfContents", generator.forSubfield(Tag008book24.getInstance()));
assertEquals("008_28_GeneralInformation_governmentPublication", generator.forSubfield(Tag008book28.getInstance()));
assertEquals("008_29_GeneralInformation_conferencePublication", generator.forSubfield(Tag008book29.getInstance()));
assertEquals("008_30_GeneralInformation_festschrift", generator.forSubfield(Tag008book30.getInstance()));
assertEquals("008_31_GeneralInformation_index", generator.forSubfield(Tag008book31.getInstance()));
assertEquals("008_33_GeneralInformation_literaryForm", generator.forSubfield(Tag008book33.getInstance()));
assertEquals("008_34_GeneralInformation_biography", generator.forSubfield(Tag008book34.getInstance()));
assertEquals("008all00_GeneralInformation_dateEnteredOnFile", generator.forSubfield(Tag008all00.getInstance()));
assertEquals("008all06_GeneralInformation_typeOfDateOrPublicationStatus", generator.forSubfield(Tag008all06.getInstance()));
assertEquals("008all07_GeneralInformation_date1", generator.forSubfield(Tag008all07.getInstance()));
assertEquals("008all11_GeneralInformation_date2", generator.forSubfield(Tag008all11.getInstance()));
assertEquals("008all15_GeneralInformation_placeOfPublicationProductionOrExecution", generator.forSubfield(Tag008all15.getInstance()));
assertEquals("008all35_GeneralInformation_language", generator.forSubfield(Tag008all35.getInstance()));
assertEquals("008all38_GeneralInformation_modifiedRecord", generator.forSubfield(Tag008all38.getInstance()));
assertEquals("008all39_GeneralInformation_catalogingSource", generator.forSubfield(Tag008all39.getInstance()));
assertEquals("008book18_GeneralInformation_illustrations", generator.forSubfield(Tag008book18.getInstance()));
assertEquals("008book22_GeneralInformation_targetAudience", generator.forSubfield(Tag008book22.getInstance()));
assertEquals("008book23_GeneralInformation_formOfItem", generator.forSubfield(Tag008book23.getInstance()));
assertEquals("008book24_GeneralInformation_natureOfContents", generator.forSubfield(Tag008book24.getInstance()));
assertEquals("008book28_GeneralInformation_governmentPublication", generator.forSubfield(Tag008book28.getInstance()));
assertEquals("008book29_GeneralInformation_conferencePublication", generator.forSubfield(Tag008book29.getInstance()));
assertEquals("008book30_GeneralInformation_festschrift", generator.forSubfield(Tag008book30.getInstance()));
assertEquals("008book31_GeneralInformation_index", generator.forSubfield(Tag008book31.getInstance()));
assertEquals("008book33_GeneralInformation_literaryForm", generator.forSubfield(Tag008book33.getInstance()));
assertEquals("008book34_GeneralInformation_biography", generator.forSubfield(Tag008book34.getInstance()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import net.minidev.json.parser.JSONParser;
import net.minidev.json.parser.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.junit.Test;

import java.io.*;
Expand Down Expand Up @@ -183,7 +182,6 @@ private boolean directoryContains(Map<String, List<PicaTagDefinition>> schemaDir
return false;
}

@NotNull
private Map<String, List<PicaTagDefinition>> readSchema(JSONParser parser, String fileName) throws IOException, URISyntaxException, ParseException {
Map<String, List<PicaTagDefinition>> map = new HashMap<>();

Expand Down

0 comments on commit 665c19a

Please sign in to comment.