Merge pull request #123 from unfoldingWord/RJHimprovements

Next set of improvements
unfoldingWord · Jan 25, 2021 · 06ae824 · 06ae824
2 parents 23b76b2 + d369a41
commit 06ae824
Show file tree

Hide file tree

Showing 44 changed files with 2,884 additions and 2,236 deletions.
diff --git a/README.md b/README.md
@@ -50,6 +50,8 @@ The top-level checking demonstrations return:
 1. A list of things that were checked (successList)
 1. Typically a list of (higher-priority) errors and a list of (lower-priority) warnings, but other formats for display of messages are also demonstrated.
 
+Note that it's quite possible for the package to give multiple different notices for a single mistake. For example, a punctuation abnormality in a quoted text might advise about the bad punctuation as well as advising about not being able to match the quote. The package errs on the side that having additional warnings is better than missing a warning (and then later being confused about why a data file won't load). These consequential warnings mean that, if possible, it's good to rerun the checks from time to time on your latest files as you're fixing multiple errors.
+
 ### Notice Objects (in noticeList)
 
 However, the lower-level checking functions provide only the list of success message strings and one list of `notices` (i.e., warnings/errors combined) typically consisting of an object with some or all of the following fields (as available/relevant):
@@ -76,7 +78,9 @@ All of the following fields may be missing or undefined, i.e., they’re all opt
 1. `location`: A string indicating the context of the notice, e.g., "in line 17 of 'someBook.usfm'". (Still not completely sure what should be left in this string now that we have added optional `repoName`, `filename`, `rowID`, `lineNumber`, `fieldName` fields.)
 1. `extra`: for a check that looks in multiple repos, this contains extra identifying information (typically the `repoCode`) to help the user determine what resource/repo/file that the notice applies to (which, in the demos, is then often prepended to the `message`).
 
-Keeping our notices in this format, rather than the simplicity of just saving an array of single strings, allows the above *notice components* to be processed at a higher level, e.g., to allow user-controlled filtering, sorting, etc. The default is to funnel them all through the supplied `processNoticesToErrorsWarnings` function (in demos/notice-processing-functions.fs) which does the following:
+Keeping our notices in this format, rather than the simplicity of just saving an array of single strings, allows the above *notice components* to be processed at a higher level, e.g., to allow user-controlled filtering, sorting, etc. For example, in a rush it might be good to display the highest priority messages first, and fix a few of those. On the other hand, if working systematically through, it might be good to sort by filename and line-number so that warnings about the same part of a file can be viewed together irrespective of their different priority numbers.
+
+The default in the demos is to funnel all the raw notices through the supplied `processNoticesToErrorsWarnings` function (in demos/notice-processing-functions.fs) which does the following:
 
 1. Removes excess repeated errors. For example, if there’s a systematic error in a file, say with unneeded leading spaces in every field, rather than returning with hundreds of errors, only the first several errors will be returned, followed by an "errors suppressed" message. (The number of each error displayed is settable as an option—zero means display all errors with no suppression.)
 1. Separates notices into error and warning lists based on the priority number. (The switch-over point is settable as an option.)
@@ -174,7 +178,7 @@ Once you have this codebase forked and cloned to your local machine, you can sta
 1. Ensure that the Styleguide is running by visiting `localhost:6060` on your web browser. (for Chromebooks see note below)
 1. Modify the code and documentation in your code editor and check out the Styleguide.
     - Update the styleguide.config.js to match your new component names.
-1. See debug `console.log()` output in browser console—in chrome, CTRL-SHIFT-J to open.
+1. See debug `userLog()` output in browser console—in chrome, CTRL-SHIFT-J to open.
 
 ### Setting up NPM Publishing
 

diff --git a/noticeList.txt b/noticeList.txt
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "uw-content-validation",
   "description": "Functions for Checking Door43.org Scriptural Content/Resources.",
-  "version": "1.0.3",
+  "version": "1.1.0",
   "private": false,
   "homepage": "https://unfoldingword.github.io/uw-content-validation/",
   "repository": {

diff --git a/src/__tests__/__snapshots__/book-package-check.test.js.snap b/src/__tests__/__snapshots__/book-package-check.test.js.snap
@@ -187,6 +187,22 @@ Object {
       "repoName": "hbo_uhb",
       "username": "unfoldingWord",
     },
+    Object {
+      "C": "4",
+      "V": "11",
+      "bookID": "RUT",
+      "details": "Expected 3-4 attributes but only found 2",
+      "extra": "UHB",
+      "extract": "\\\\w שֵׁ֖ם|lemma=\\"שֵׁם\\" strong=\\"H8034\\"\\\\w*",
+      "filename": "08-RUT.usfm",
+      "lineNumber": 1195,
+      "location": " in en RUT book package from unfoldingWord master branch",
+      "message": "Seems too few original \\\\w attributes",
+      "priority": 837,
+      "repoCode": "UHB",
+      "repoName": "hbo_uhb",
+      "username": "unfoldingWord",
+    },
     Object {
       "C": "4",
       "V": "11",

diff --git a/src/__tests__/__snapshots__/tn-table-row-check.test.js.snap b/src/__tests__/__snapshots__/tn-table-row-check.test.js.snap
@@ -200,7 +200,7 @@ Object {
       "fieldName": "OrigQuote",
       "location": " that was supplied",
       "message": "Seems original language quote might not start at the beginning of a word",
-      "priority": 620,
+      "priority": 909,
       "rowID": "e7qw",
     },
     Object {

diff --git a/src/core/BCS-usfm-grammar-check.js b/src/core/BCS-usfm-grammar-check.js
@@ -1,6 +1,7 @@
 import grammar from 'usfm-grammar';
 import * as books from '../core/books/books';
 import { DEFAULT_EXTRACT_LENGTH } from './text-handling-functions'
+import { userLog, parameterAssert } from './utilities';
 
 
 // const USFM_GRAMMAR_VALIDATOR_VERSION_STRING = '0.3.2';
@@ -9,22 +10,22 @@ import { DEFAULT_EXTRACT_LENGTH } from './text-handling-functions'
 export function runBCSGrammarCheck(strictnessString, fileText, filename, givenLocation, checkingOptions) {
     // Runs the BCS USFM Grammar checker
     //  which can be quite time-consuming on large, complex USFM files
-    // console.log(`Running ${strictnessString} BCS USFM grammar check${givenLocation} (can take quite a while for a large book)…`);
-    console.assert(strictnessString === 'strict' || strictnessString === 'relaxed', `Unexpected strictnessString='${strictnessString}'`);
+    // debugLog(`Running ${strictnessString} BCS USFM grammar check${givenLocation} (can take quite a while for a large book)…`);
+    parameterAssert(strictnessString === 'strict' || strictnessString === 'relaxed', `Unexpected strictnessString='${strictnessString}'`);
 
     let extractLength;
     try {
         extractLength = checkingOptions?.extractLength;
     } catch (usfmELerror) { }
     if (typeof extractLength !== 'number' || isNaN(extractLength)) {
         extractLength = DEFAULT_EXTRACT_LENGTH;
-        // console.log(`Using default extractLength=${extractLength}`);
+        // debugLog(`Using default extractLength=${extractLength}`);
     }
     // else
-    // console.log(`Using supplied extractLength=${extractLength} cf. default=${DEFAULT_EXTRACT_LENGTH}`);
+    // debugLog(`Using supplied extractLength=${extractLength} cf. default=${DEFAULT_EXTRACT_LENGTH}`);
     const halfLength = Math.floor(extractLength / 2); // rounded down
     const halfLengthPlus = Math.floor((extractLength + 1) / 2); // rounded up
-    // console.log(`Using halfLength=${halfLength}`, `halfLengthPlus=${halfLengthPlus}`);
+    // debugLog(`Using halfLength=${halfLength}`, `halfLengthPlus=${halfLengthPlus}`);
 
     // Now create the parser and run the check
     const ourUsfmParser = new grammar.USFMParser(fileText,
@@ -35,21 +36,21 @@ export function runBCSGrammarCheck(strictnessString, fileText, filename, givenLo
     const parserResult = ourUsfmParser.toJSON()
     let parserMessages;
     parserMessages = parserResult._messages; // Throw away the JSON (if any)
-    // console.log(`  Finished BCS USFM grammar check with messages: ${JSON.stringify(parserResult)}\n and warnings: ${JSON.stringify(ourUsfmParser.warnings)}.`);
+    // debugLog(`  Finished BCS USFM grammar check with messages: ${JSON.stringify(parserResult)}\n and warnings: ${JSON.stringify(ourUsfmParser.warnings)}.`);
     let parseError;
     parseError = parserMessages._error;
-    // console.log(`  parseError: ${parseError}`);
+    // debugLog(`  parseError: ${parseError}`);
     let ourErrorMessage, lineNumberString, characterIndex, extract;
     // NOTE: The following code is quite fragile
     //  as it depends on the precise format of the error message return from USFMParser
     let ourErrorObject = {};
     if (parseError) {
         const contextRE = /(\d+?)\s\|\s(.+)/g;
         for (const errorLine of parseError.split('\n')) {
-            // console.log(`BCS errorLine=${errorLine}`);
+            // debugLog(`BCS errorLine=${errorLine}`);
             if (errorLine.startsWith('>')) {
                 const regexResult = contextRE.exec(errorLine.substring(1).trim());
-                // console.log(`  regexResult: ${JSON.stringify(regexResult)}`);
+                // debugLog(`  regexResult: ${JSON.stringify(regexResult)}`);
                 if (regexResult) {
                     lineNumberString = regexResult[1];
                     extract = regexResult[2];
@@ -63,7 +64,7 @@ export function runBCSGrammarCheck(strictnessString, fileText, filename, givenLo
             }
             else ourErrorMessage = errorLine; // We only want the last one
         }
-        // console.log(`  ourErrorMessage: '${ourErrorMessage}' lineNumberString=${lineNumberString} characterIndex=${characterIndex} extract='${extract}'`);
+        // debugLog(`  ourErrorMessage: '${ourErrorMessage}' lineNumberString=${lineNumberString} characterIndex=${characterIndex} extract='${extract}'`);
 
         // Some of these "errors" need to be degraded in priority
 
@@ -92,7 +93,7 @@ export function runBCSGrammarCheck(strictnessString, fileText, filename, givenLo
                 if (!lines[n - 1]) {
                     lineNumber += 1; // Increment error line number for each blank line
                     if (!notified) {
-                        console.log("Temporarily adjusting BCS grammar error line number to account for blank lines");
+                        userLog("Temporarily adjusting BCS grammar error line number to account for blank lines");
                         notified = true;
                     }
                 }
@@ -104,10 +105,10 @@ export function runBCSGrammarCheck(strictnessString, fileText, filename, givenLo
     }
 
     const parseWarnings = parserResult._warnings ? parserResult._warnings : ourUsfmParser.warnings;
-    // console.log(`  Warnings: ${JSON.stringify(parseWarnings)}`);
+    // debugLog(`  Warnings: ${JSON.stringify(parseWarnings)}`);
     let ourWarnings = [];
     for (const warningString of parseWarnings) {
-        // console.log(`warningString: '${warningString}'`);
+        // debugLog(`warningString: '${warningString}'`);
         // Clean up their warnings a little: Remove trailing spaces and periods
         let adjustedString = warningString.trim(); // Removes the trailing space
         if (adjustedString.endsWith('.')) adjustedString = adjustedString.substring(0, adjustedString.length - 1);
@@ -129,8 +130,8 @@ export function checkUSFMGrammar(bookID, strictnessString, filename, givenText,
 
      Returns a result object containing a successList and a noticeList
      */
-    console.log(`checkUSFMGrammar(${givenText.length.toLocaleString()} chars, '${givenLocation}')…`);
-    console.assert(strictnessString === 'strict' || strictnessString === 'relaxed', `Unexpected strictnessString='${strictnessString}'`);
+    userLog(`checkUSFMGrammar(${givenText.length.toLocaleString()} chars, '${givenLocation}')…`);
+    parameterAssert(strictnessString === 'strict' || strictnessString === 'relaxed', `Unexpected strictnessString='${strictnessString}'`);
 
     let ourLocation = givenLocation;
     if (ourLocation && ourLocation[0] !== ' ') ourLocation = ` ${ourLocation}`;
@@ -139,7 +140,7 @@ export function checkUSFMGrammar(bookID, strictnessString, filename, givenText,
     const cugResult = { successList: [], noticeList: [] };
 
     function addSuccessMessage(successString) {
-        // console.log(`checkUSFMGrammar success: ${successString}`);
+        // debugLog(`checkUSFMGrammar success: ${successString}`);
         cugResult.successList.push(successString);
     }
     function addNotice6to7(noticeObject) {
@@ -151,17 +152,17 @@ export function checkUSFMGrammar(bookID, strictnessString, filename, givenText,
         * @param {String} extract - short extract from the line centred on the problem (if available)
         * @param {String} location - description of where the issue is located
         */
-        // console.log(`checkUSFMGrammar notice: (priority=${priority}) ${message}${characterIndex > 0 ? ` (at character ${characterIndex})` : ""}${extract ? ` ${extract}` : ""}${location}`);
-        console.assert(noticeObject.priority !== undefined, "cUSFMgr addNotice6to7: 'priority' parameter should be defined");
-        console.assert(typeof noticeObject.priority === 'number', `cUSFMgr addNotice6to7: 'priority' parameter should be a number not a '${typeof noticeObject.priority}': ${noticeObject.priority}`);
-        console.assert(noticeObject.message !== undefined, "cUSFMgr addNotice6to7: 'message' parameter should be defined");
-        console.assert(typeof noticeObject.message === 'string', `cUSFMgr addNotice6to7: 'message' parameter should be a string not a '${typeof noticeObject.message}': ${noticeObject.message}`);
-        // console.assert(characterIndex !== undefined, "cUSFMgr addNotice6to7: 'characterIndex' parameter should be defined");
-        if (noticeObject.characterIndex) console.assert(typeof noticeObject.characterIndex === 'number', `cUSFMgr addNotice6to7: 'characterIndex' parameter should be a number not a '${typeof noticeObject.characterIndex}': ${noticeObject.characterIndex}`);
-        // console.assert(extract !== undefined, "cUSFMgr addNotice6to7: 'extract' parameter should be defined");
-        if (noticeObject.extract) console.assert(typeof noticeObject.extract === 'string', `cUSFMgr addNotice6to7: 'extract' parameter should be a string not a '${typeof extract}': ${noticeObject.extract}`);
-        console.assert(noticeObject.location !== undefined, "cUSFMgr addNotice6to7: 'location' parameter should be defined");
-        console.assert(typeof noticeObject.location === 'string', `cUSFMgr addNotice6to7: 'location' parameter should be a string not a '${typeof noticeObject.location}': ${noticeObject.location}`);
+        // debugLog(`checkUSFMGrammar notice: (priority=${priority}) ${message}${characterIndex > 0 ? ` (at character ${characterIndex})` : ""}${extract ? ` ${extract}` : ""}${location}`);
+        parameterAssert(noticeObject.priority !== undefined, "cUSFMgr addNotice6to7: 'priority' parameter should be defined");
+        parameterAssert(typeof noticeObject.priority === 'number', `cUSFMgr addNotice6to7: 'priority' parameter should be a number not a '${typeof noticeObject.priority}': ${noticeObject.priority}`);
+        parameterAssert(noticeObject.message !== undefined, "cUSFMgr addNotice6to7: 'message' parameter should be defined");
+        parameterAssert(typeof noticeObject.message === 'string', `cUSFMgr addNotice6to7: 'message' parameter should be a string not a '${typeof noticeObject.message}': ${noticeObject.message}`);
+        // parameterAssert(characterIndex !== undefined, "cUSFMgr addNotice6to7: 'characterIndex' parameter should be defined");
+        if (noticeObject.characterIndex) parameterAssert(typeof noticeObject.characterIndex === 'number', `cUSFMgr addNotice6to7: 'characterIndex' parameter should be a number not a '${typeof noticeObject.characterIndex}': ${noticeObject.characterIndex}`);
+        // parameterAssert(extract !== undefined, "cUSFMgr addNotice6to7: 'extract' parameter should be defined");
+        if (noticeObject.extract) parameterAssert(typeof noticeObject.extract === 'string', `cUSFMgr addNotice6to7: 'extract' parameter should be a string not a '${typeof extract}': ${noticeObject.extract}`);
+        parameterAssert(noticeObject.location !== undefined, "cUSFMgr addNotice6to7: 'location' parameter should be defined");
+        parameterAssert(typeof noticeObject.location === 'string', `cUSFMgr addNotice6to7: 'location' parameter should be a string not a '${typeof noticeObject.location}': ${noticeObject.location}`);
         cugResult.noticeList.push({ ...noticeObject, bookID, filename });
     }
 
@@ -171,7 +172,7 @@ export function checkUSFMGrammar(bookID, strictnessString, filename, givenText,
         return cugResult;
 
     const grammarCheckResult = runBCSGrammarCheck(strictnessString, givenText, filename, ourLocation, checkingOptions);
-    // console.log(`grammarCheckResult=${JSON.stringify(grammarCheckResult)}`);
+    // debugLog(`grammarCheckResult=${JSON.stringify(grammarCheckResult)}`);
 
     if (!grammarCheckResult.isValidUSFM)
         addNotice6to7({ priority: 944, message: `USFM3 Grammar Check (${strictnessString} mode) doesn’t pass`, filename, location: ourLocation });
@@ -185,8 +186,8 @@ export function checkUSFMGrammar(bookID, strictnessString, filename, givenText,
         addNotice6to7({ priority: 101, message: `USFMGrammar: ${warningString}`, filename, location: ourLocation });
 
     addSuccessMessage(`Checked USFM Grammar (${strictnessString} mode) ${grammarCheckResult.isValidUSFM ? "without errors" : " (but the USFM DIDN’T validate)"}`);
-    // console.log(`  checkUSFMGrammar returning with ${result.successList.length.toLocaleString()} success(es) and ${result.noticeList.length.toLocaleString()} notice(s).`);
-    // console.log(`checkUSFMGrammar result is ${JSON.stringify(result)}`);
+    // debugLog(`  checkUSFMGrammar returning with ${result.successList.length.toLocaleString()} success(es) and ${result.noticeList.length.toLocaleString()} notice(s).`);
+    // debugLog(`checkUSFMGrammar result is ${JSON.stringify(result)}`);
     return cugResult;
 }
 // end of checkUSFMGrammar function