-
Notifications
You must be signed in to change notification settings - Fork 17
/
validate
executable file
·135 lines (120 loc) · 7.49 KB
/
validate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Calling Validator
. ./common-variables
ME=$(basename $0)
show_usage() { # display help message
cat <<EOF
QA catalogue validation
usage:
${ME} [options] <files>
options:
-m, --marcVersion <arg> MARC version ('OCLC' or 'DNB')
-h, --help display help
-n, --nolog do not display log messages
-l, --limit <arg> limit the number of records to process
-o, --offset <arg> the first record to process
-i, --id <arg> the MARC identifier (content of 001)
-d, --defaultRecordType <arg> the default record type if the record's type is undetectable
-q, --fixAlephseq fix the known issues of Alephseq format
-a, --fixAlma fix the known issues of Alma format
-b, --fixKbr fix the known issues of Alma format
-p, --alephseq the source is in Alephseq format
-x, --marcxml the source is in MARCXML format
-y, --lineSeparated the source is in line separated MARC format
-t, --outputDir <arg> output directory
-r, --trimId remove spaces from the end of record IDs
-z, --ignorableFields <arg> ignore fields from the analysis
-v, --ignorableRecords <arg> ignore records from the analysis
-f, --marcFormat <arg> MARC format (like 'ISO' or 'MARCXML')
-s, --dataSource <arg> data source (file of stream)
-g, --defaultEncoding <arg> default character encoding
-1, --alephseqLineType <arg> Alephseq line type
-2, --picaIdField <arg> PICA id field
-u, --picaSubfieldSeparator <arg> PICA subfield separator
-j, --picaSchemaFile <arg> Avram PICA schema file
-w, --schemaType <arg> metadata schema type ('MARC21', 'UNIMARC', or 'PICA')
-k, --picaRecordType <arg> picaRecordType
-c, --allowableRecords <arg> allow records for the analysis
-e, --groupBy <arg> group the results by the value of this data element (e.g. the ILN of library)
-3, --groupListFile <arg> the file which contains a list of ILN codes
-4, --solrForScoresUrl <arg> the URL of the Solr server used to store scores
-G, --summaryFileName <arg> the summary file name (provides a summary of issues, such as the number of instance and number of records having the particular issue)
-S, --summary show summary instead of record level display
-H, --details show record level display
-F, --detailsFileName <arg> the report file name (default is 'issue-details.csv')
-R, --format <arg> specify a format
-W, --emptyLargeCollectors empty large collectors
-T, --collectAllErrors collect all errors (useful only for validating small number of records)
-I, --ignorableIssueTypes <arg> comma separated list of issue types not to collect
more info: https://github.com/pkiraly/qa-catalogue#validating-marc-records
EOF
exit 1
}
if [ $# -eq 0 ]; then
show_usage
fi
SHORT_OPTIONS="m:hnl:o:i:d:qabpxyt:rz:v:f:s:g:1:2:u:j:w:k:c:e:3:4:G:SHF:R:WTI:"
LONG_OPTIONS="marcVersion:,help,nolog,limit:,offset:,id:,defaultRecordType:,fixAlephseq,fixAlma,fixKbr,alephseq,marcxml,lineSeparated,outputDir:,trimId,ignorableFields:,ignorableRecords:,marcFormat:,dataSource:,defaultEncoding:,alephseqLineType:,picaIdField:,picaSubfieldSeparator:,picaSchemaFile:,schemaType:,picaRecordType:,allowableRecords:,groupBy:,groupListFile:,solrForScoresUrl:,summaryFileName:,summary,details,detailsFileName:,format:,emptyLargeCollectors,collectAllErrors,ignorableIssueTypes:"
echo "@: ${@}"
GETOPT=$(getopt \
-o ${SHORT_OPTIONS} \
--long ${LONG_OPTIONS} \
-n ${ME} -- "$@")
eval set -- "${GETOPT}"
echo "GETOPT: ${GETOPT}"
PARAMS=""
HELP=0
while true ; do
echo "process $1"
case "$1" in
-m|--marcVersion) PARAMS="$PARAMS --marcVersion $2" ; shift 2 ;;
-h|--help) PARAMS="$PARAMS --help" ; HELP=1; shift ;;
-n|--nolog) PARAMS="$PARAMS --nolog" ; shift ;;
-l|--limit) PARAMS="$PARAMS --limit $2" ; shift 2 ;;
-o|--offset) PARAMS="$PARAMS --offset $2" ; shift 2 ;;
-i|--id) PARAMS="$PARAMS --id $2" ; shift 2 ;;
-d|--defaultRecordType) PARAMS="$PARAMS --defaultRecordType $2" ; shift 2 ;;
-q|--fixAlephseq) PARAMS="$PARAMS --fixAlephseq" ; shift ;;
-a|--fixAlma) PARAMS="$PARAMS --fixAlma" ; shift ;;
-b|--fixKbr) PARAMS="$PARAMS --fixKbr" ; shift ;;
-p|--alephseq) PARAMS="$PARAMS --alephseq" ; shift ;;
-x|--marcxml) PARAMS="$PARAMS --marcxml" ; shift ;;
-y|--lineSeparated) PARAMS="$PARAMS --lineSeparated" ; shift ;;
-t|--outputDir) PARAMS="$PARAMS --outputDir $2" ; shift 2 ;;
-r|--trimId) PARAMS="$PARAMS --trimId" ; shift ;;
-z|--ignorableFields) PARAMS="$PARAMS --ignorableFields $2" ; shift 2 ;;
-v|--ignorableRecords) PARAMS="$PARAMS --ignorableRecords $2" ; shift 2 ;;
-f|--marcFormat) PARAMS="$PARAMS --marcFormat $2" ; shift 2 ;;
-s|--dataSource) PARAMS="$PARAMS --dataSource $2" ; shift 2 ;;
-g|--defaultEncoding) PARAMS="$PARAMS --defaultEncoding $2" ; shift 2 ;;
-1|--alephseqLineType) PARAMS="$PARAMS --alephseqLineType $2" ; shift 2 ;;
-2|--picaIdField) PARAMS="$PARAMS --picaIdField $2" ; shift 2 ;;
-u|--picaSubfieldSeparator) PARAMS="$PARAMS --picaSubfieldSeparator $2" ; shift 2 ;;
-j|--picaSchemaFile) PARAMS="$PARAMS --picaSchemaFile $2" ; shift 2 ;;
-w|--schemaType) PARAMS="$PARAMS --schemaType $2" ; shift 2 ;;
-k|--picaRecordType) PARAMS="$PARAMS --picaRecordType $2" ; shift 2 ;;
-c|--allowableRecords) PARAMS="$PARAMS --allowableRecords $2" ; shift 2 ;;
-e|--groupBy) PARAMS="$PARAMS --groupBy $2" ; shift 2 ;;
-3|--groupListFile) PARAMS="$PARAMS --groupListFile $2" ; shift 2 ;;
-4|--solrForScoresUrl) PARAMS="$PARAMS --solrForScoresUrl $2" ; shift 2 ;;
-G|--summaryFileName) PARAMS="$PARAMS --summaryFileName $2" ; shift 2 ;;
-S|--summary) PARAMS="$PARAMS --summary" ; shift ;;
-H|--details) PARAMS="$PARAMS --details" ; shift ;;
-F|--detailsFileName) PARAMS="$PARAMS --detailsFileName $2" ; shift 2 ;;
-R|--format) PARAMS="$PARAMS --format $2" ; shift 2 ;;
-W|--emptyLargeCollectors) PARAMS="$PARAMS --emptyLargeCollectors" ; shift ;;
-T|--collectAllErrors) PARAMS="$PARAMS --collectAllErrors" ; shift ;;
-I|--ignorableIssueTypes) PARAMS="$PARAMS --ignorableIssueTypes $2" ; shift 2 ;;
--) shift ; break ;;
*) echo "Internal error!: $1" ; exit 1 ;;
esac
done
if [[ $HELP -eq 1 ]]; then
show_usage
fi
CMD="/usr/bin/java -Xmx8g -cp $JAR de.gwdg.metadataqa.marc.cli.ValidatorCli"
echo 'CMD: ' $CMD
echo 'PARAMS: ' $PARAMS
echo 'REST: ' "$@"
echo $CMD $PARAMS "$@"
exit
$CMD $PARAMS "$@"