Use GETOPT for the command line scripts #326

pkiraly · Oct 19, 2023 · 2b70a55 · 2b70a55
1 parent 41d9d5e
commit 2b70a55
Show file tree

Hide file tree

Showing 22 changed files with 1,032 additions and 36 deletions.
diff --git a/README.md b/README.md
@@ -57,6 +57,7 @@ Screenshot from the web UI of the QA catalogue
     * [Export mapping table](#export-mapping-table)
       * [to Avram JSON](#to-avram-json)
       * [to HTML](#to-html)
+    * [Shacl4Bib](#Shacl4Bib) 
 * [Extending the functionalities](#extending-the-functionalities)
 * [User interface](#user-interface)
 * Appendices
@@ -1238,7 +1239,7 @@ or
 
 options:
 * [general parameters](#general-parameters)
-* `-f <file>`, `--fileName <file>`: the name of report the program produces.
+* `-F <file>`, `--fileName <file>`: the name of report the program produces.
   Default is `tt-completeness.csv`.
 
 It produces a CSV file like this:
@@ -1298,7 +1299,7 @@ or
 
 options:
 * [general parameters](#general-parameters)
-* `-f <file>`, `--fileName <file>`: the report file name (default is
+* `-F <file>`, `--fileName <file>`: the report file name (default is
   `shelf-ready-completeness.csv`)
 
 ### Serial score analysis
@@ -1332,7 +1333,7 @@ or
 
 options:
 * [general parameters](#general-parameters)
-* `-f <file>`, `--fileName <file>`: the report file name. Default is
+* `-F <file>`, `--fileName <file>`: the report file name. Default is
   `shelf-ready-completeness.csv`.
 
 ### Classification analysis

diff --git a/authorities b/authorities
@@ -1,4 +1,108 @@
 # Calling Validator
 . ./common-variables
 
-/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.AuthorityAnalysis $@
+ME=$(basename $0)
+
+show_usage() { # display help message
+  cat <<EOF
+QA catalogue authority name analysis
+
+usage:
+ ${ME} [options] <files>
+
+options:
+ -m, --marcVersion <arg>            MARC version ('OCLC' or 'DNB')
+ -h, --help                         display help
+ -n, --nolog                        do not display log messages
+ -l, --limit <arg>                  limit the number of records to process
+ -o, --offset <arg>                 the first record to process
+ -i, --id <arg>                     the MARC identifier (content of 001)
+ -d, --defaultRecordType <arg>      the default record type if the record's type is undetectable
+ -q, --fixAlephseq                  fix the known issues of Alephseq format
+ -a, --fixAlma                      fix the known issues of Alma format
+ -b, --fixKbr                       fix the known issues of Alma format
+ -p, --alephseq                     the source is in Alephseq format
+ -x, --marcxml                      the source is in MARCXML format
+ -y, --lineSeparated                the source is in line separated MARC format
+ -t, --outputDir <arg>              output directory
+ -r, --trimId                       remove spaces from the end of record IDs
+ -z, --ignorableFields <arg>        ignore fields from the analysis
+ -v, --ignorableRecords <arg>       ignore records from the analysis
+ -f, --marcFormat <arg>             MARC format (like 'ISO' or 'MARCXML')
+ -s, --dataSource <arg>             data source (file of stream)
+ -g, --defaultEncoding <arg>        default character encoding
+ -1, --alephseqLineType <arg>       Alephseq line type
+ -2, --picaIdField <arg>            PICA id field
+ -u, --picaSubfieldSeparator <arg>  PICA subfield separator
+ -j, --picaSchemaFile <arg>         Avram PICA schema file
+ -w, --schemaType <arg>             metadata schema type ('MARC21', 'UNIMARC', or 'PICA')
+ -k, --picaRecordType <arg>         picaRecordType
+ -c, --allowableRecords <arg>       allow records for the analysis
+ -e, --groupBy <arg>                group the results by the value of this data element (e.g. the ILN of  library)
+ -3, --groupListFile <arg>          the file which contains a list of ILN codes
+
+more info: https://github.com/pkiraly/qa-catalogue#authority-name-analysis
+
+EOF
+  exit 1
+}
+
+if [ $# -eq 0 ]; then
+  show_usage
+fi
+
+SHORT_OPTIONS="m:hnl:o:i:d:qabpxyt:rz:v:f:s:g:1:2:u:j:w:k:c:e:3:"
+LONG_OPTIONS="marcVersion:,help,nolog,limit:,offset:,id:,defaultRecordType:,fixAlephseq,fixAlma,fixKbr,alephseq,marcxml,lineSeparated,outputDir:,trimId,ignorableFields:,ignorableRecords:,marcFormat:,dataSource:,defaultEncoding:,alephseqLineType:,picaIdField:,picaSubfieldSeparator:,picaSchemaFile:,schemaType:,picaRecordType:,allowableRecords:,groupBy:,groupListFile:"
+
+GETOPT=$(getopt \
+  -o ${SHORT_OPTIONS} \
+  --long ${LONG_OPTIONS} \
+  -n ${ME} -- "$@")
+eval set -- "${GETOPT}"
+
+PARAMS=""
+HELP=0
+while true ; do
+  case "$1" in
+    -m|--marcVersion)            PARAMS="$PARAMS --marcVersion $2" ;           shift 2 ;;
+    -h|--help)                   PARAMS="$PARAMS --help" ; HELP=1;             shift   ;;
+    -n|--nolog)                  PARAMS="$PARAMS --nolog" ;                    shift   ;;
+    -l|--limit)                  PARAMS="$PARAMS --limit $2" ;                 shift 2 ;;
+    -o|--offset)                 PARAMS="$PARAMS --offset $2" ;                shift 2 ;;
+    -i|--id)                     PARAMS="$PARAMS --id $2" ;                    shift 2 ;;
+    -d|--defaultRecordType)      PARAMS="$PARAMS --defaultRecordType $2" ;     shift 2 ;;
+    -q|--fixAlephseq)            PARAMS="$PARAMS --fixAlephseq" ;              shift   ;;
+    -a|--fixAlma)                PARAMS="$PARAMS --fixAlma" ;                  shift   ;;
+    -b|--fixKbr)                 PARAMS="$PARAMS --fixKbr" ;                   shift   ;;
+    -p|--alephseq)               PARAMS="$PARAMS --alephseq" ;                 shift   ;;
+    -x|--marcxml)                PARAMS="$PARAMS --marcxml" ;                  shift   ;;
+    -y|--lineSeparated)          PARAMS="$PARAMS --lineSeparated" ;            shift   ;;
+    -t|--outputDir)              PARAMS="$PARAMS --outputDir $2" ;             shift 2 ;;
+    -r|--trimId)                 PARAMS="$PARAMS --trimId" ;                   shift   ;;
+    -z|--ignorableFields)        PARAMS="$PARAMS --ignorableFields $2" ;       shift 2 ;;
+    -v|--ignorableRecords)       PARAMS="$PARAMS --ignorableRecords $2" ;      shift 2 ;;
+    -f|--marcFormat)             PARAMS="$PARAMS --marcFormat $2" ;            shift 2 ;;
+    -s|--dataSource)             PARAMS="$PARAMS --dataSource $2" ;            shift 2 ;;
+    -g|--defaultEncoding)        PARAMS="$PARAMS --defaultEncoding $2" ;       shift 2 ;;
+    -1|--alephseqLineType)       PARAMS="$PARAMS --alephseqLineType $2" ;      shift 2 ;;
+    -2|--picaIdField)            PARAMS="$PARAMS --picaIdField $2" ;           shift 2 ;;
+    -u|--picaSubfieldSeparator)  PARAMS="$PARAMS --picaSubfieldSeparator $2" ; shift 2 ;;
+    -j|--picaSchemaFile)         PARAMS="$PARAMS --picaSchemaFile $2" ;        shift 2 ;;
+    -w|--schemaType)             PARAMS="$PARAMS --schemaType $2" ;            shift 2 ;;
+    -k|--picaRecordType)         PARAMS="$PARAMS --picaRecordType $2" ;        shift 2 ;;
+    -c|--allowableRecords)       PARAMS="$PARAMS --allowableRecords $2" ;      shift 2 ;;
+    -e|--groupBy)                PARAMS="$PARAMS --groupBy $2" ;               shift 2 ;;
+    -3|--groupListFile)          PARAMS="$PARAMS --groupListFile $2" ;         shift 2 ;;
+    --) shift ; break ;;
+    *) echo "Internal error!: $1" ; exit 1 ;;
+  esac
+done
+
+if [[ $HELP -eq 1 ]]; then
+  show_usage
+fi
+
+CMD="/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.AuthorityAnalysis"
+
+echo $CMD $PARAMS "$@"
+$CMD $PARAMS "$@"
diff --git a/catalogues/gent.sh b/catalogues/gent.sh
@@ -6,7 +6,7 @@ NAME=gent
 # TYPE_PARAMS="--marcVersion GENT"
 # MARC_DIR=${BASE_INPUT_DIR}/gent/marc/2019-06-05
 # MASK=*.mrc
-TYPE_PARAMS="--marcVersion GENT --alephseq --with-delete"
+TYPE_PARAMS="--marcVersion GENT --alephseq"
 # MARC_DIR=${BASE_INPUT_DIR}/gent/marc/2020-05-27
 # MASK=*.export
 MARC_DIR=${BASE_INPUT_DIR}/gent/marc/2021-01-02

diff --git a/classifications b/classifications
@@ -1,4 +1,110 @@
 # Calling Validator
 . ./common-variables
 
-/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.ClassificationAnalysis $@
+ME=$(basename $0)
+
+show_usage() { # display help message
+  cat <<EOF
+QA catalogue subject analysis
+
+usage:
+ ${ME} [options] <files>
+
+options:
+ -m, --marcVersion <arg>            MARC version ('OCLC' or 'DNB')
+ -h, --help                         display help
+ -n, --nolog                        do not display log messages
+ -l, --limit <arg>                  limit the number of records to process
+ -o, --offset <arg>                 the first record to process
+ -i, --id <arg>                     the MARC identifier (content of 001)
+ -d, --defaultRecordType <arg>      the default record type if the record's type is undetectable
+ -q, --fixAlephseq                  fix the known issues of Alephseq format
+ -a, --fixAlma                      fix the known issues of Alma format
+ -b, --fixKbr                       fix the known issues of Alma format
+ -p, --alephseq                     the source is in Alephseq format
+ -x, --marcxml                      the source is in MARCXML format
+ -y, --lineSeparated                the source is in line separated MARC format
+ -t, --outputDir <arg>              output directory
+ -r, --trimId                       remove spaces from the end of record IDs
+ -z, --ignorableFields <arg>        ignore fields from the analysis
+ -v, --ignorableRecords <arg>       ignore records from the analysis
+ -f, --marcFormat <arg>             MARC format (like 'ISO' or 'MARCXML')
+ -s, --dataSource <arg>             data source (file of stream)
+ -g, --defaultEncoding <arg>        default character encoding
+ -1, --alephseqLineType <arg>       Alephseq line type
+ -2, --picaIdField <arg>            PICA id field
+ -u, --picaSubfieldSeparator <arg>  PICA subfield separator
+ -j, --picaSchemaFile <arg>         Avram PICA schema file
+ -w, --schemaType <arg>             metadata schema type ('MARC21', 'UNIMARC', or 'PICA')
+ -k, --picaRecordType <arg>         picaRecordType
+ -c, --allowableRecords <arg>       allow records for the analysis
+ -e, --groupBy <arg>                group the results by the value of this data element (e.g. the ILN of  library)
+ -3, --groupListFile <arg>          the file which contains a list of ILN codes
+ -A, --collectCollocations          collect collocatiion of schemas
+
+more info: https://github.com/pkiraly/qa-catalogue#classification-analysis
+
+EOF
+  exit 1
+}
+
+if [ $# -eq 0 ]; then
+  show_usage
+fi
+
+SHORT_OPTIONS="m:hnl:o:i:d:qabpxyt:rz:v:f:s:g:1:2:u:j:w:k:c:e:3:A"
+LONG_OPTIONS="marcVersion:,help,nolog,limit:,offset:,id:,defaultRecordType:,fixAlephseq,fixAlma,fixKbr,alephseq,marcxml,lineSeparated,outputDir:,trimId,ignorableFields:,ignorableRecords:,marcFormat:,dataSource:,defaultEncoding:,alephseqLineType:,picaIdField:,picaSubfieldSeparator:,picaSchemaFile:,schemaType:,picaRecordType:,allowableRecords:,groupBy:,groupListFile:,collectCollocations"
+
+GETOPT=$(getopt \
+  -o ${SHORT_OPTIONS} \
+  --long ${LONG_OPTIONS} \
+  -n ${ME} -- "$@")
+eval set -- "${GETOPT}"
+
+PARAMS=""
+HELP=0
+while true ; do
+  case "$1" in
+    -m|--marcVersion)            PARAMS="$PARAMS --marcVersion $2" ;           shift 2 ;;
+    -h|--help)                   PARAMS="$PARAMS --help" ; HELP=1;             shift   ;;
+    -n|--nolog)                  PARAMS="$PARAMS --nolog" ;                    shift   ;;
+    -l|--limit)                  PARAMS="$PARAMS --limit $2" ;                 shift 2 ;;
+    -o|--offset)                 PARAMS="$PARAMS --offset $2" ;                shift 2 ;;
+    -i|--id)                     PARAMS="$PARAMS --id $2" ;                    shift 2 ;;
+    -d|--defaultRecordType)      PARAMS="$PARAMS --defaultRecordType $2" ;     shift 2 ;;
+    -q|--fixAlephseq)            PARAMS="$PARAMS --fixAlephseq" ;              shift   ;;
+    -a|--fixAlma)                PARAMS="$PARAMS --fixAlma" ;                  shift   ;;
+    -b|--fixKbr)                 PARAMS="$PARAMS --fixKbr" ;                   shift   ;;
+    -p|--alephseq)               PARAMS="$PARAMS --alephseq" ;                 shift   ;;
+    -x|--marcxml)                PARAMS="$PARAMS --marcxml" ;                  shift   ;;
+    -y|--lineSeparated)          PARAMS="$PARAMS --lineSeparated" ;            shift   ;;
+    -t|--outputDir)              PARAMS="$PARAMS --outputDir $2" ;             shift 2 ;;
+    -r|--trimId)                 PARAMS="$PARAMS --trimId" ;                   shift   ;;
+    -z|--ignorableFields)        PARAMS="$PARAMS --ignorableFields $2" ;       shift 2 ;;
+    -v|--ignorableRecords)       PARAMS="$PARAMS --ignorableRecords $2" ;      shift 2 ;;
+    -f|--marcFormat)             PARAMS="$PARAMS --marcFormat $2" ;            shift 2 ;;
+    -s|--dataSource)             PARAMS="$PARAMS --dataSource $2" ;            shift 2 ;;
+    -g|--defaultEncoding)        PARAMS="$PARAMS --defaultEncoding $2" ;       shift 2 ;;
+    -1|--alephseqLineType)       PARAMS="$PARAMS --alephseqLineType $2" ;      shift 2 ;;
+    -2|--picaIdField)            PARAMS="$PARAMS --picaIdField $2" ;           shift 2 ;;
+    -u|--picaSubfieldSeparator)  PARAMS="$PARAMS --picaSubfieldSeparator $2" ; shift 2 ;;
+    -j|--picaSchemaFile)         PARAMS="$PARAMS --picaSchemaFile $2" ;        shift 2 ;;
+    -w|--schemaType)             PARAMS="$PARAMS --schemaType $2" ;            shift 2 ;;
+    -k|--picaRecordType)         PARAMS="$PARAMS --picaRecordType $2" ;        shift 2 ;;
+    -c|--allowableRecords)       PARAMS="$PARAMS --allowableRecords $2" ;      shift 2 ;;
+    -e|--groupBy)                PARAMS="$PARAMS --groupBy $2" ;               shift 2 ;;
+    -3|--groupListFile)          PARAMS="$PARAMS --groupListFile $2" ;         shift 2 ;;
+    -A|--collectCollocations)    PARAMS="$PARAMS --collectCollocations" ;      shift   ;;
+    --) shift ; break ;;
+    *) echo "Internal error!: $1" ; exit 1 ;;
+  esac
+done
+
+if [[ $HELP -eq 1 ]]; then
+  show_usage
+fi
+
+CMD="/usr/bin/java -Xmx2g -cp $JAR de.gwdg.metadataqa.marc.cli.ClassificationAnalysis"
+
+echo $CMD $PARAMS "$@"
+$CMD $PARAMS "$@"