Skip to content

Commit

Permalink
Use GETOPT for the command line scripts #326
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Oct 19, 2023
1 parent 41d9d5e commit 2b70a55
Show file tree
Hide file tree
Showing 22 changed files with 1,032 additions and 36 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Screenshot from the web UI of the QA catalogue
* [Export mapping table](#export-mapping-table)
* [to Avram JSON](#to-avram-json)
* [to HTML](#to-html)
* [Shacl4Bib](#Shacl4Bib)
* [Extending the functionalities](#extending-the-functionalities)
* [User interface](#user-interface)
* Appendices
Expand Down Expand Up @@ -1238,7 +1239,7 @@ or

options:
* [general parameters](#general-parameters)
* `-f <file>`, `--fileName <file>`: the name of report the program produces.
* `-F <file>`, `--fileName <file>`: the name of report the program produces.
Default is `tt-completeness.csv`.

It produces a CSV file like this:
Expand Down Expand Up @@ -1298,7 +1299,7 @@ or

options:
* [general parameters](#general-parameters)
* `-f <file>`, `--fileName <file>`: the report file name (default is
* `-F <file>`, `--fileName <file>`: the report file name (default is
`shelf-ready-completeness.csv`)

### Serial score analysis
Expand Down Expand Up @@ -1332,7 +1333,7 @@ or

options:
* [general parameters](#general-parameters)
* `-f <file>`, `--fileName <file>`: the report file name. Default is
* `-F <file>`, `--fileName <file>`: the report file name. Default is
`shelf-ready-completeness.csv`.

### Classification analysis
Expand Down
106 changes: 105 additions & 1 deletion authorities
Original file line number Diff line number Diff line change
@@ -1,4 +1,108 @@
# Calling Validator
. ./common-variables

/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.AuthorityAnalysis $@
ME=$(basename $0)

show_usage() { # display help message
cat <<EOF
QA catalogue authority name analysis

usage:
${ME} [options] <files>

options:
-m, --marcVersion <arg> MARC version ('OCLC' or 'DNB')
-h, --help display help
-n, --nolog do not display log messages
-l, --limit <arg> limit the number of records to process
-o, --offset <arg> the first record to process
-i, --id <arg> the MARC identifier (content of 001)
-d, --defaultRecordType <arg> the default record type if the record's type is undetectable
-q, --fixAlephseq fix the known issues of Alephseq format
-a, --fixAlma fix the known issues of Alma format
-b, --fixKbr fix the known issues of Alma format
-p, --alephseq the source is in Alephseq format
-x, --marcxml the source is in MARCXML format
-y, --lineSeparated the source is in line separated MARC format
-t, --outputDir <arg> output directory
-r, --trimId remove spaces from the end of record IDs
-z, --ignorableFields <arg> ignore fields from the analysis
-v, --ignorableRecords <arg> ignore records from the analysis
-f, --marcFormat <arg> MARC format (like 'ISO' or 'MARCXML')
-s, --dataSource <arg> data source (file of stream)
-g, --defaultEncoding <arg> default character encoding
-1, --alephseqLineType <arg> Alephseq line type
-2, --picaIdField <arg> PICA id field
-u, --picaSubfieldSeparator <arg> PICA subfield separator
-j, --picaSchemaFile <arg> Avram PICA schema file
-w, --schemaType <arg> metadata schema type ('MARC21', 'UNIMARC', or 'PICA')
-k, --picaRecordType <arg> picaRecordType
-c, --allowableRecords <arg> allow records for the analysis
-e, --groupBy <arg> group the results by the value of this data element (e.g. the ILN of library)
-3, --groupListFile <arg> the file which contains a list of ILN codes

more info: https://github.com/pkiraly/qa-catalogue#authority-name-analysis

EOF
exit 1
}

if [ $# -eq 0 ]; then
show_usage
fi

SHORT_OPTIONS="m:hnl:o:i:d:qabpxyt:rz:v:f:s:g:1:2:u:j:w:k:c:e:3:"
LONG_OPTIONS="marcVersion:,help,nolog,limit:,offset:,id:,defaultRecordType:,fixAlephseq,fixAlma,fixKbr,alephseq,marcxml,lineSeparated,outputDir:,trimId,ignorableFields:,ignorableRecords:,marcFormat:,dataSource:,defaultEncoding:,alephseqLineType:,picaIdField:,picaSubfieldSeparator:,picaSchemaFile:,schemaType:,picaRecordType:,allowableRecords:,groupBy:,groupListFile:"

GETOPT=$(getopt \
-o ${SHORT_OPTIONS} \
--long ${LONG_OPTIONS} \
-n ${ME} -- "$@")
eval set -- "${GETOPT}"

PARAMS=""
HELP=0
while true ; do
case "$1" in
-m|--marcVersion) PARAMS="$PARAMS --marcVersion $2" ; shift 2 ;;
-h|--help) PARAMS="$PARAMS --help" ; HELP=1; shift ;;
-n|--nolog) PARAMS="$PARAMS --nolog" ; shift ;;
-l|--limit) PARAMS="$PARAMS --limit $2" ; shift 2 ;;
-o|--offset) PARAMS="$PARAMS --offset $2" ; shift 2 ;;
-i|--id) PARAMS="$PARAMS --id $2" ; shift 2 ;;
-d|--defaultRecordType) PARAMS="$PARAMS --defaultRecordType $2" ; shift 2 ;;
-q|--fixAlephseq) PARAMS="$PARAMS --fixAlephseq" ; shift ;;
-a|--fixAlma) PARAMS="$PARAMS --fixAlma" ; shift ;;
-b|--fixKbr) PARAMS="$PARAMS --fixKbr" ; shift ;;
-p|--alephseq) PARAMS="$PARAMS --alephseq" ; shift ;;
-x|--marcxml) PARAMS="$PARAMS --marcxml" ; shift ;;
-y|--lineSeparated) PARAMS="$PARAMS --lineSeparated" ; shift ;;
-t|--outputDir) PARAMS="$PARAMS --outputDir $2" ; shift 2 ;;
-r|--trimId) PARAMS="$PARAMS --trimId" ; shift ;;
-z|--ignorableFields) PARAMS="$PARAMS --ignorableFields $2" ; shift 2 ;;
-v|--ignorableRecords) PARAMS="$PARAMS --ignorableRecords $2" ; shift 2 ;;
-f|--marcFormat) PARAMS="$PARAMS --marcFormat $2" ; shift 2 ;;
-s|--dataSource) PARAMS="$PARAMS --dataSource $2" ; shift 2 ;;
-g|--defaultEncoding) PARAMS="$PARAMS --defaultEncoding $2" ; shift 2 ;;
-1|--alephseqLineType) PARAMS="$PARAMS --alephseqLineType $2" ; shift 2 ;;
-2|--picaIdField) PARAMS="$PARAMS --picaIdField $2" ; shift 2 ;;
-u|--picaSubfieldSeparator) PARAMS="$PARAMS --picaSubfieldSeparator $2" ; shift 2 ;;
-j|--picaSchemaFile) PARAMS="$PARAMS --picaSchemaFile $2" ; shift 2 ;;
-w|--schemaType) PARAMS="$PARAMS --schemaType $2" ; shift 2 ;;
-k|--picaRecordType) PARAMS="$PARAMS --picaRecordType $2" ; shift 2 ;;
-c|--allowableRecords) PARAMS="$PARAMS --allowableRecords $2" ; shift 2 ;;
-e|--groupBy) PARAMS="$PARAMS --groupBy $2" ; shift 2 ;;
-3|--groupListFile) PARAMS="$PARAMS --groupListFile $2" ; shift 2 ;;
--) shift ; break ;;
*) echo "Internal error!: $1" ; exit 1 ;;
esac
done

if [[ $HELP -eq 1 ]]; then
show_usage
fi

CMD="/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.AuthorityAnalysis"

echo $CMD $PARAMS "$@"
$CMD $PARAMS "$@"
2 changes: 1 addition & 1 deletion catalogues/gent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ NAME=gent
# TYPE_PARAMS="--marcVersion GENT"
# MARC_DIR=${BASE_INPUT_DIR}/gent/marc/2019-06-05
# MASK=*.mrc
TYPE_PARAMS="--marcVersion GENT --alephseq --with-delete"
TYPE_PARAMS="--marcVersion GENT --alephseq"
# MARC_DIR=${BASE_INPUT_DIR}/gent/marc/2020-05-27
# MASK=*.export
MARC_DIR=${BASE_INPUT_DIR}/gent/marc/2021-01-02
Expand Down
108 changes: 107 additions & 1 deletion classifications
Original file line number Diff line number Diff line change
@@ -1,4 +1,110 @@
# Calling Validator
. ./common-variables

/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.ClassificationAnalysis $@
ME=$(basename $0)

show_usage() { # display help message
cat <<EOF
QA catalogue subject analysis

usage:
${ME} [options] <files>

options:
-m, --marcVersion <arg> MARC version ('OCLC' or 'DNB')
-h, --help display help
-n, --nolog do not display log messages
-l, --limit <arg> limit the number of records to process
-o, --offset <arg> the first record to process
-i, --id <arg> the MARC identifier (content of 001)
-d, --defaultRecordType <arg> the default record type if the record's type is undetectable
-q, --fixAlephseq fix the known issues of Alephseq format
-a, --fixAlma fix the known issues of Alma format
-b, --fixKbr fix the known issues of Alma format
-p, --alephseq the source is in Alephseq format
-x, --marcxml the source is in MARCXML format
-y, --lineSeparated the source is in line separated MARC format
-t, --outputDir <arg> output directory
-r, --trimId remove spaces from the end of record IDs
-z, --ignorableFields <arg> ignore fields from the analysis
-v, --ignorableRecords <arg> ignore records from the analysis
-f, --marcFormat <arg> MARC format (like 'ISO' or 'MARCXML')
-s, --dataSource <arg> data source (file of stream)
-g, --defaultEncoding <arg> default character encoding
-1, --alephseqLineType <arg> Alephseq line type
-2, --picaIdField <arg> PICA id field
-u, --picaSubfieldSeparator <arg> PICA subfield separator
-j, --picaSchemaFile <arg> Avram PICA schema file
-w, --schemaType <arg> metadata schema type ('MARC21', 'UNIMARC', or 'PICA')
-k, --picaRecordType <arg> picaRecordType
-c, --allowableRecords <arg> allow records for the analysis
-e, --groupBy <arg> group the results by the value of this data element (e.g. the ILN of library)
-3, --groupListFile <arg> the file which contains a list of ILN codes
-A, --collectCollocations collect collocatiion of schemas

more info: https://github.com/pkiraly/qa-catalogue#classification-analysis

EOF
exit 1
}

if [ $# -eq 0 ]; then
show_usage
fi

SHORT_OPTIONS="m:hnl:o:i:d:qabpxyt:rz:v:f:s:g:1:2:u:j:w:k:c:e:3:A"
LONG_OPTIONS="marcVersion:,help,nolog,limit:,offset:,id:,defaultRecordType:,fixAlephseq,fixAlma,fixKbr,alephseq,marcxml,lineSeparated,outputDir:,trimId,ignorableFields:,ignorableRecords:,marcFormat:,dataSource:,defaultEncoding:,alephseqLineType:,picaIdField:,picaSubfieldSeparator:,picaSchemaFile:,schemaType:,picaRecordType:,allowableRecords:,groupBy:,groupListFile:,collectCollocations"

GETOPT=$(getopt \
-o ${SHORT_OPTIONS} \
--long ${LONG_OPTIONS} \
-n ${ME} -- "$@")
eval set -- "${GETOPT}"

PARAMS=""
HELP=0
while true ; do
case "$1" in
-m|--marcVersion) PARAMS="$PARAMS --marcVersion $2" ; shift 2 ;;
-h|--help) PARAMS="$PARAMS --help" ; HELP=1; shift ;;
-n|--nolog) PARAMS="$PARAMS --nolog" ; shift ;;
-l|--limit) PARAMS="$PARAMS --limit $2" ; shift 2 ;;
-o|--offset) PARAMS="$PARAMS --offset $2" ; shift 2 ;;
-i|--id) PARAMS="$PARAMS --id $2" ; shift 2 ;;
-d|--defaultRecordType) PARAMS="$PARAMS --defaultRecordType $2" ; shift 2 ;;
-q|--fixAlephseq) PARAMS="$PARAMS --fixAlephseq" ; shift ;;
-a|--fixAlma) PARAMS="$PARAMS --fixAlma" ; shift ;;
-b|--fixKbr) PARAMS="$PARAMS --fixKbr" ; shift ;;
-p|--alephseq) PARAMS="$PARAMS --alephseq" ; shift ;;
-x|--marcxml) PARAMS="$PARAMS --marcxml" ; shift ;;
-y|--lineSeparated) PARAMS="$PARAMS --lineSeparated" ; shift ;;
-t|--outputDir) PARAMS="$PARAMS --outputDir $2" ; shift 2 ;;
-r|--trimId) PARAMS="$PARAMS --trimId" ; shift ;;
-z|--ignorableFields) PARAMS="$PARAMS --ignorableFields $2" ; shift 2 ;;
-v|--ignorableRecords) PARAMS="$PARAMS --ignorableRecords $2" ; shift 2 ;;
-f|--marcFormat) PARAMS="$PARAMS --marcFormat $2" ; shift 2 ;;
-s|--dataSource) PARAMS="$PARAMS --dataSource $2" ; shift 2 ;;
-g|--defaultEncoding) PARAMS="$PARAMS --defaultEncoding $2" ; shift 2 ;;
-1|--alephseqLineType) PARAMS="$PARAMS --alephseqLineType $2" ; shift 2 ;;
-2|--picaIdField) PARAMS="$PARAMS --picaIdField $2" ; shift 2 ;;
-u|--picaSubfieldSeparator) PARAMS="$PARAMS --picaSubfieldSeparator $2" ; shift 2 ;;
-j|--picaSchemaFile) PARAMS="$PARAMS --picaSchemaFile $2" ; shift 2 ;;
-w|--schemaType) PARAMS="$PARAMS --schemaType $2" ; shift 2 ;;
-k|--picaRecordType) PARAMS="$PARAMS --picaRecordType $2" ; shift 2 ;;
-c|--allowableRecords) PARAMS="$PARAMS --allowableRecords $2" ; shift 2 ;;
-e|--groupBy) PARAMS="$PARAMS --groupBy $2" ; shift 2 ;;
-3|--groupListFile) PARAMS="$PARAMS --groupListFile $2" ; shift 2 ;;
-A|--collectCollocations) PARAMS="$PARAMS --collectCollocations" ; shift ;;
--) shift ; break ;;
*) echo "Internal error!: $1" ; exit 1 ;;
esac
done

if [[ $HELP -eq 1 ]]; then
show_usage
fi

CMD="/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.ClassificationAnalysis"

echo $CMD $PARAMS "$@"
$CMD $PARAMS "$@"
Loading

0 comments on commit 2b70a55

Please sign in to comment.