Use GETOPT for the command line scripts #326

pkiraly · Nov 1, 2023 · 1207a33 · 1207a33
1 parent 3a6e67f
commit 1207a33
Show file tree

Hide file tree

Showing 11 changed files with 11 additions and 241 deletions.
diff --git a/.gitignore b/.gitignore
@@ -29,6 +29,7 @@ qa-catalogue.iml
 temp
 scala/project
 scala/target
+scripts/cli-generator/*.sh.txt
 
 # default input and output base directory
 input

diff --git a/scripts/cli-generator/authorities.txt b/scripts/cli-generator/authorities.txt
@@ -1,30 +0,0 @@
-      options.addOption("m", "marcVersion", true, "MARC version ('OCLC' or 'DNB')");
-      options.addOption("h", "help", false, "display help");
-      options.addOption("n", "nolog", false, "do not display log messages");
-      options.addOption("l", "limit", true, "limit the number of records to process");
-      options.addOption("o", "offset", true, "the first record to process");
-      options.addOption("i", "id", true, "the MARC identifier (content of 001)");
-      options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
-      options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
-      options.addOption("a", "fixAlma", false, "fix the known issues of Alma format");
-      options.addOption("b", "fixKbr", false, "fix the known issues of Alma format");
-      options.addOption("p", "alephseq", false, "the source is in Alephseq format");
-      options.addOption("x", "marcxml", false, "the source is in MARCXML format");
-      options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
-      options.addOption("t", "outputDir", true, "output directory");
-      options.addOption("r", "trimId", false, "remove spaces from the end of record IDs");
-      options.addOption("z", "ignorableFields", true, "ignore fields from the analysis");
-      options.addOption("v", "ignorableRecords", true, "ignore records from the analysis");
-      options.addOption("f", "marcFormat", true, "MARC format (like 'ISO' or 'MARCXML')");
-      options.addOption("s", "dataSource", true, "data source (file of stream)");
-      options.addOption("g", "defaultEncoding", true, "default character encoding");
-      options.addOption("1", "alephseqLineType", true, "Alephseq line type");
-      options.addOption("2", "picaIdField", true, "PICA id field");
-      options.addOption("u", "picaSubfieldSeparator", true, "PICA subfield separator");
-      options.addOption("j", "picaSchemaFile", true, "Avram PICA schema file");
-      options.addOption("w", "schemaType", true, "metadata schema type ('MARC21', 'UNIMARC', or 'PICA')");
-      options.addOption("k", "picaRecordType", true, "picaRecordType");
-      options.addOption("c", "allowableRecords", true, "allow records for the analysis");
-      options.addOption("e", "groupBy", true, "group the results by the value of this data element (e.g. the ILN of  library)");
-      options.addOption("3", "groupListFile", true, "the file which contains a list of ILN codes");
-      options.addOption("4", "solrForScoresUrl", true, "the URL of the Solr server used to store scores");

diff --git a/scripts/cli-generator/classifications.txt b/scripts/cli-generator/classifications.txt
@@ -1,31 +1 @@
-      options.addOption("m", "marcVersion", true, "MARC version ('OCLC' or 'DNB')");
-      options.addOption("h", "help", false, "display help");
-      options.addOption("n", "nolog", false, "do not display log messages");
-      options.addOption("l", "limit", true, "limit the number of records to process");
-      options.addOption("o", "offset", true, "the first record to process");
-      options.addOption("i", "id", true, "the MARC identifier (content of 001)");
-      options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
-      options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
-      options.addOption("a", "fixAlma", false, "fix the known issues of Alma format");
-      options.addOption("b", "fixKbr", false, "fix the known issues of Alma format");
-      options.addOption("p", "alephseq", false, "the source is in Alephseq format");
-      options.addOption("x", "marcxml", false, "the source is in MARCXML format");
-      options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
-      options.addOption("t", "outputDir", true, "output directory");
-      options.addOption("r", "trimId", false, "remove spaces from the end of record IDs");
-      options.addOption("z", "ignorableFields", true, "ignore fields from the analysis");
-      options.addOption("v", "ignorableRecords", true, "ignore records from the analysis");
-      options.addOption("f", "marcFormat", true, "MARC format (like 'ISO' or 'MARCXML')");
-      options.addOption("s", "dataSource", true, "data source (file of stream)");
-      options.addOption("g", "defaultEncoding", true, "default character encoding");
-      options.addOption("1", "alephseqLineType", true, "Alephseq line type");
-      options.addOption("2", "picaIdField", true, "PICA id field");
-      options.addOption("u", "picaSubfieldSeparator", true, "PICA subfield separator");
-      options.addOption("j", "picaSchemaFile", true, "Avram PICA schema file");
-      options.addOption("w", "schemaType", true, "metadata schema type ('MARC21', 'UNIMARC', or 'PICA')");
-      options.addOption("k", "picaRecordType", true, "picaRecordType");
-      options.addOption("c", "allowableRecords", true, "allow records for the analysis");
-      options.addOption("e", "groupBy", true, "group the results by the value of this data element (e.g. the ILN of  library)");
-      options.addOption("3", "groupListFile", true, "the file which contains a list of ILN codes");
-      options.addOption("4", "solrForScoresUrl", true, "the URL of the Solr server used to store scores");
       options.addOption("A", "collectCollocations", false, "collect collocatiion of schemas");
diff --git a/scripts/cli-generator/completeness.txt b/scripts/cli-generator/completeness.txt
@@ -1,33 +1,3 @@
-      options.addOption("m", "marcVersion", true, "MARC version ('OCLC' or 'DNB')");
-      options.addOption("h", "help", false, "display help");
-      options.addOption("n", "nolog", false, "do not display log messages");
-      options.addOption("l", "limit", true, "limit the number of records to process");
-      options.addOption("o", "offset", true, "the first record to process");
-      options.addOption("i", "id", true, "the MARC identifier (content of 001)");
-      options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
-      options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
-      options.addOption("a", "fixAlma", false, "fix the known issues of Alma format");
-      options.addOption("b", "fixKbr", false, "fix the known issues of Alma format");
-      options.addOption("p", "alephseq", false, "the source is in Alephseq format");
-      options.addOption("x", "marcxml", false, "the source is in MARCXML format");
-      options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
-      options.addOption("t", "outputDir", true, "output directory");
-      options.addOption("r", "trimId", false, "remove spaces from the end of record IDs");
-      options.addOption("z", "ignorableFields", true, "ignore fields from the analysis");
-      options.addOption("v", "ignorableRecords", true, "ignore records from the analysis");
-      options.addOption("f", "marcFormat", true, "MARC format (like 'ISO' or 'MARCXML')");
-      options.addOption("s", "dataSource", true, "data source (file of stream)");
-      options.addOption("g", "defaultEncoding", true, "default character encoding");
-      options.addOption("1", "alephseqLineType", true, "Alephseq line type");
-      options.addOption("2", "picaIdField", true, "PICA id field");
-      options.addOption("u", "picaSubfieldSeparator", true, "PICA subfield separator");
-      options.addOption("j", "picaSchemaFile", true, "Avram PICA schema file");
-      options.addOption("w", "schemaType", true, "metadata schema type ('MARC21', 'UNIMARC', or 'PICA')");
-      options.addOption("k", "picaRecordType", true, "picaRecordType");
-      options.addOption("c", "allowableRecords", true, "allow records for the analysis");
-      options.addOption("e", "groupBy", true, "group the results by the value of this data element (e.g. the ILN of  library)");
-      options.addOption("3", "groupListFile", true, "the file which contains a list of ILN codes");
-      options.addOption("4", "solrForScoresUrl", true, "the URL of the Solr server used to store scores");
       options.addOption("R", "format", true, "specify a format");
       options.addOption("V", "advanced", false, "advanced mode (not yet implemented)");
       options.addOption("P", "onlyPackages", false, "only packages (not yet implemented)");
diff --git a/scripts/cli-generator/functions.txt b/scripts/cli-generator/functions.txt
@@ -1,30 +0,0 @@
-      options.addOption("m", "marcVersion", true, "MARC version ('OCLC' or 'DNB')");
-      options.addOption("h", "help", false, "display help");
-      options.addOption("n", "nolog", false, "do not display log messages");
-      options.addOption("l", "limit", true, "limit the number of records to process");
-      options.addOption("o", "offset", true, "the first record to process");
-      options.addOption("i", "id", true, "the MARC identifier (content of 001)");
-      options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
-      options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
-      options.addOption("a", "fixAlma", false, "fix the known issues of Alma format");
-      options.addOption("b", "fixKbr", false, "fix the known issues of Alma format");
-      options.addOption("p", "alephseq", false, "the source is in Alephseq format");
-      options.addOption("x", "marcxml", false, "the source is in MARCXML format");
-      options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
-      options.addOption("t", "outputDir", true, "output directory");
-      options.addOption("r", "trimId", false, "remove spaces from the end of record IDs");
-      options.addOption("z", "ignorableFields", true, "ignore fields from the analysis");
-      options.addOption("v", "ignorableRecords", true, "ignore records from the analysis");
-      options.addOption("f", "marcFormat", true, "MARC format (like 'ISO' or 'MARCXML')");
-      options.addOption("s", "dataSource", true, "data source (file of stream)");
-      options.addOption("g", "defaultEncoding", true, "default character encoding");
-      options.addOption("1", "alephseqLineType", true, "Alephseq line type");
-      options.addOption("2", "picaIdField", true, "PICA id field");
-      options.addOption("u", "picaSubfieldSeparator", true, "PICA subfield separator");
-      options.addOption("j", "picaSchemaFile", true, "Avram PICA schema file");
-      options.addOption("w", "schemaType", true, "metadata schema type ('MARC21', 'UNIMARC', or 'PICA')");
-      options.addOption("k", "picaRecordType", true, "picaRecordType");
-      options.addOption("c", "allowableRecords", true, "allow records for the analysis");
-      options.addOption("e", "groupBy", true, "group the results by the value of this data element (e.g. the ILN of  library)");
-      options.addOption("3", "groupListFile", true, "the file which contains a list of ILN codes");
-      options.addOption("4", "solrForScoresUrl", true, "the URL of the Solr server used to store scores");

diff --git a/scripts/cli-generator/generate.php b/scripts/cli-generator/generate.php
@@ -46,7 +46,7 @@
 $maxLong = 0;
 $index = (object)['longs' => [], 'shorts' => []];
 $options = readOptions('common.txt', $index);
-$options = readOptions($fileName, $index);
+$options = array_merge($options, readOptions($fileName, $index));
 
 createHelp($options);
 echo LN;

diff --git a/scripts/cli-generator/run-all.sh b/scripts/cli-generator/run-all.sh
@@ -0,0 +1,9 @@
+php generate.php validate.txt > validate.sh.txt
+php generate.php completeness.txt > completeness.sh.txt
+php generate.php classifications.txt > classifications.sh.txt
+php generate.php authorities.txt > authorities.sh.txt
+php generate.php serials.txt > serials.sh.txt
+php generate.php tt-completeness.txt > tt-completeness.sh.txt
+php generate.php shelf-ready-completeness.txt > shelf-ready-completeness.sh.txt
+php generate.php functions.txt > functions.sh.txt
+php generate.php shacl4bib.txt > shacl4bib.sh.txt
diff --git a/scripts/cli-generator/serials.txt b/scripts/cli-generator/serials.txt
@@ -1,31 +1 @@
-      options.addOption("m", "marcVersion", true, "MARC version ('OCLC' or 'DNB')");
-      options.addOption("h", "help", false, "display help");
-      options.addOption("n", "nolog", false, "do not display log messages");
-      options.addOption("l", "limit", true, "limit the number of records to process");
-      options.addOption("o", "offset", true, "the first record to process");
-      options.addOption("i", "id", true, "the MARC identifier (content of 001)");
-      options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
-      options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
-      options.addOption("a", "fixAlma", false, "fix the known issues of Alma format");
-      options.addOption("b", "fixKbr", false, "fix the known issues of Alma format");
-      options.addOption("p", "alephseq", false, "the source is in Alephseq format");
-      options.addOption("x", "marcxml", false, "the source is in MARCXML format");
-      options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
-      options.addOption("t", "outputDir", true, "output directory");
-      options.addOption("r", "trimId", false, "remove spaces from the end of record IDs");
-      options.addOption("z", "ignorableFields", true, "ignore fields from the analysis");
-      options.addOption("v", "ignorableRecords", true, "ignore records from the analysis");
-      options.addOption("f", "marcFormat", true, "MARC format (like 'ISO' or 'MARCXML')");
-      options.addOption("s", "dataSource", true, "data source (file of stream)");
-      options.addOption("g", "defaultEncoding", true, "default character encoding");
-      options.addOption("1", "alephseqLineType", true, "Alephseq line type");
-      options.addOption("2", "picaIdField", true, "PICA id field");
-      options.addOption("u", "picaSubfieldSeparator", true, "PICA subfield separator");
-      options.addOption("j", "picaSchemaFile", true, "Avram PICA schema file");
-      options.addOption("w", "schemaType", true, "metadata schema type ('MARC21', 'UNIMARC', or 'PICA')");
-      options.addOption("k", "picaRecordType", true, "picaRecordType");
-      options.addOption("c", "allowableRecords", true, "allow records for the analysis");
-      options.addOption("e", "groupBy", true, "group the results by the value of this data element (e.g. the ILN of  library)");
-      options.addOption("3", "groupListFile", true, "the file which contains a list of ILN codes");
-      options.addOption("4", "solrForScoresUrl", true, "the URL of the Solr server used to store scores");
       options.addOption("F", "fileName", true, "the report file name (default is serial-score.csv)");
diff --git a/scripts/cli-generator/shacl4bib.txt b/scripts/cli-generator/shacl4bib.txt
@@ -1,33 +1,3 @@
-      options.addOption("m", "marcVersion", true, "MARC version ('OCLC' or 'DNB')");
-      options.addOption("h", "help", false, "display help");
-      options.addOption("n", "nolog", false, "do not display log messages");
-      options.addOption("l", "limit", true, "limit the number of records to process");
-      options.addOption("o", "offset", true, "the first record to process");
-      options.addOption("i", "id", true, "the MARC identifier (content of 001)");
-      options.addOption("d", "defaultRecordType", true, "the default record type if the record's type is undetectable");
-      options.addOption("q", "fixAlephseq", false, "fix the known issues of Alephseq format");
-      options.addOption("a", "fixAlma", false, "fix the known issues of Alma format");
-      options.addOption("b", "fixKbr", false, "fix the known issues of Alma format");
-      options.addOption("p", "alephseq", false, "the source is in Alephseq format");
-      options.addOption("x", "marcxml", false, "the source is in MARCXML format");
-      options.addOption("y", "lineSeparated", false, "the source is in line separated MARC format");
-      options.addOption("t", "outputDir", true, "output directory");
-      options.addOption("r", "trimId", false, "remove spaces from the end of record IDs");
-      options.addOption("z", "ignorableFields", true, "ignore fields from the analysis");
-      options.addOption("v", "ignorableRecords", true, "ignore records from the analysis");
-      options.addOption("f", "marcFormat", true, "MARC format (like 'ISO' or 'MARCXML')");
-      options.addOption("s", "dataSource", true, "data source (file of stream)");
-      options.addOption("g", "defaultEncoding", true, "default character encoding");
-      options.addOption("1", "alephseqLineType", true, "Alephseq line type");
-      options.addOption("2", "picaIdField", true, "PICA id field");
-      options.addOption("u", "picaSubfieldSeparator", true, "PICA subfield separator");
-      options.addOption("j", "picaSchemaFile", true, "Avram PICA schema file");
-      options.addOption("w", "schemaType", true, "metadata schema type ('MARC21', 'UNIMARC', or 'PICA')");
-      options.addOption("k", "picaRecordType", true, "picaRecordType");
-      options.addOption("c", "allowableRecords", true, "allow records for the analysis");
-      options.addOption("e", "groupBy", true, "group the results by the value of this data element (e.g. the ILN of  library)");
-      options.addOption("3", "groupListFile", true, "the file which contains a list of ILN codes");
-      options.addOption("4", "solrForScoresUrl", true, "the URL of the Solr server used to store scores");
       options.addOption("C", "shaclConfigurationFile", true, "specify the configuration file");
       options.addOption("O", "shaclOutputFile", true, "output file");
       options.addOption("P", "shaclOutputType", true, "output type (STATUS: status only, SCORE: score only, BOTH: status and score");