From 039cfbfd9c13e9259ed1b7e5c87d607b6a1d64c9 Mon Sep 17 00:00:00 2001
From: Hatton <hattonjohn@gmail.com>
Date: Thu, 19 Dec 2024 14:46:11 -0700
Subject: [PATCH] Enforce sentence casing when writing IMDI

---
 .github/copilot-instructions.md               |  5 +++
 e2e/localization.e2e.ts                       |  5 +--
 package.json                                  |  2 +-
 .../session/FieldOpenChoiceChooser.tsx        |  4 +--
 src/export/ImdiGenerator-vocab.spec.ts        | 35 +++++++++++++++++++
 src/export/ImdiGenerator.ts                   | 15 ++++----
 src/export/sessionImdi-custom.spec.ts         |  3 +-
 src/export/sessionImdi-edolo.spec.ts          |  2 +-
 src/model/Project/Project.ts                  |  2 +-
 src/model/field/FieldDefinition.ts            |  2 +-
 src/other/case.spec.ts                        | 34 +++++++++---------
 src/other/case.ts                             | 17 ++++++---
 12 files changed, 87 insertions(+), 39 deletions(-)
 create mode 100644 .github/copilot-instructions.md
 create mode 100644 src/export/ImdiGenerator-vocab.spec.ts
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 00000000..5b59db19
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,5 @@
+Use vitest, not jest.
+Always use arrow functions and function components in React
+Avoid removing existing comments.
+Avoid adding a comment like "// add this line".
+If you think you might need access to another file, stop and ask me for it.
diff --git a/e2e/localization.e2e.ts b/e2e/localization.e2e.ts
index d2153a49..9291ef62 100644
--- a/e2e/localization.e2e.ts
+++ b/e2e/localization.e2e.ts
@@ -62,10 +62,7 @@ test.describe("Localization", () => {
     // tabs of the selected file
     await shouldSeeExactlyOnce(["Catatan"]);
     // some fields
-    await shouldSeeExactlyOnce([
-      /*"Deskripsi"*/ "Keterangan",
-      "Kotak-kotak Khusus"
-    ]);
+    await shouldSeeExactlyOnce(["Deskripsi", "Kotak-kotak Khusus"]);
 
     await shouldHaveMultiple("Tanggal", 2); // date
 
diff --git a/package.json b/package.json
index d27642df..86d5d06a 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "main": "dist/src/mainProcess/main.js",
   "name": "lameta",
   "productName": "lameta",
-  "version": "2.3.13-beta",
+  "version": "2.3.14-beta",
   "author": {
     "name": "lameta",
     "email": "sorryno@email.org"
diff --git a/src/components/session/FieldOpenChoiceChooser.tsx b/src/components/session/FieldOpenChoiceChooser.tsx
index 9c2c1cee..5bd11bd7 100644
--- a/src/components/session/FieldOpenChoiceChooser.tsx
+++ b/src/components/session/FieldOpenChoiceChooser.tsx
@@ -3,7 +3,7 @@ import { Field } from "../../model/field/Field";
 import { observer } from "mobx-react";
 import CreatableSelect from "react-select/creatable";
 import { lameta_orange } from "../../containers/theme";
-import { CapitalCase } from "../../other/case";
+import { capitalCase } from "../../other/case";
 import { OptionWithTooltip } from "../OptionWithTooltip";
 
 //const Choices = new Dictionary<string, Array<string>>();
@@ -105,7 +105,7 @@ const FieldOpenChoiceChooser: React.FunctionComponent<{
         }}
         onChange={(s: any) => {
           props.field.setValueFromString(
-            CapitalCase(s && s.value ? s.value : "")
+            capitalCase(s && s.value ? s.value : "")
           );
         }}
         components={{ Option: OptionWithTooltip }}
diff --git a/src/export/ImdiGenerator-vocab.spec.ts b/src/export/ImdiGenerator-vocab.spec.ts
new file mode 100644
index 00000000..5aaee95f
--- /dev/null
+++ b/src/export/ImdiGenerator-vocab.spec.ts
@@ -0,0 +1,35 @@
+import ImdiGenerator, { IMDIMode } from "./ImdiGenerator";
+import { Project } from "../model/Project/Project";
+import temp from "temp";
+import { setResultXml, value } from "../other/xmlUnitTestUtils";
+
+let project: Project;
+const projectDir = temp.mkdirSync("lameta imdi generator vocab test");
+describe("ImdiGenerator genre handling", () => {
+  beforeAll(async () => {
+    temp.track();
+    project = Project.fromDirectory(projectDir);
+    await project.descriptionFolder.addFileForTestAsync(randomFileName());
+    await project.otherDocsFolder.addFileForTestAsync(randomFileName());
+  });
+  it("should convert genre to sentence case", () => {
+    const session = project.addSession();
+    session.properties.setText("genre", "procedural_discourse");
+    const imdi = ImdiGenerator.generateSession(
+      IMDIMode.RAW_IMDI,
+      session,
+      project,
+      true
+    );
+    setResultXml(imdi);
+
+    // Check for correct genre case in the output
+    expect(value("//Session/MDGroup/Content/Genre")).toBe(
+      "Procedural discourse"
+    );
+  });
+});
+
+function randomFileName() {
+  return Math.random().toString(36).substring(7) + ".test.txt";
+}
diff --git a/src/export/ImdiGenerator.ts b/src/export/ImdiGenerator.ts
index 6710960e..f641743b 100644
--- a/src/export/ImdiGenerator.ts
+++ b/src/export/ImdiGenerator.ts
@@ -21,7 +21,11 @@ import { sentryBreadCrumb } from "../other/errorHandling";
 import { stringify } from "flatted";
 import { NotifyWarning } from "../components/Notify";
 import { getStatusOfFile } from "../model/file/FileStatus";
-import { CapitalCase, safeSentenceCase } from "../other/case";
+import {
+  capitalCase,
+  sentenceCase,
+  sentenceCaseUnlessAcronym
+} from "../other/case";
 
 export enum IMDIMode {
   OPEX, // wrap in OPEX elements, name .opex
@@ -453,12 +457,12 @@ export default class ImdiGenerator {
               // Hanna asks that we not do this to topic and keyword
               if (["status" /*, "keyword", "topic",*/].indexOf(key) > -1) {
                 // capitalize the first letter of each word
-                v = safeSentenceCase(v);
+                v = sentenceCaseUnlessAcronym(v);
               }
 
               this.tail = this.tail.element("Key", v);
               this.mostRecentElement = this.tail;
-              this.attributeLiteral("Name", CapitalCase(key)); //https://trello.com/c/GXxtRimV/68-topic-and-keyword-in-the-imdi-output-should-start-with-upper-case
+              this.attributeLiteral("Name", capitalCase(key)); //https://trello.com/c/GXxtRimV/68-topic-and-keyword-in-the-imdi-output-should-start-with-upper-case
               this.tail = this.tail.up();
             });
           }
@@ -968,8 +972,7 @@ export default class ImdiGenerator {
 
     if (["genre", "subgenre", "socialContext"].indexOf(fieldName) > -1) {
       // For genre in IMDI export, ELAR doesn't want "formulaic_discourse",
-      // they want "Formulaic Discourse"
-      //https://trello.com/c/3H1oJsWk/66-imdi-save-genre-as-the-full-ui-form-not-the-underlying-token
+      // they want "Formulaic discourse" (Sentence Case)
 
       // for some, we may have access to an explicit label. Probably makes no difference, the keys are always just snake case of the label.
       const label = f.properties
@@ -977,7 +980,7 @@ export default class ImdiGenerator {
         // this probably isn't used. The idea is that if we didn't get new label, just replace the underscores
         .replace(/_/g, " ");
 
-      value = titleCase(label);
+      value = sentenceCase(label);
     }
     if (projectFallbackFieldName && (!value || value.length === 0)) {
       value = this.project.properties.getTextStringOrEmpty(
diff --git a/src/export/sessionImdi-custom.spec.ts b/src/export/sessionImdi-custom.spec.ts
index 7da5a885..409e684e 100644
--- a/src/export/sessionImdi-custom.spec.ts
+++ b/src/export/sessionImdi-custom.spec.ts
@@ -55,9 +55,8 @@ describe("session imdi export", () => {
         true /*omit namespace*/
       )
     );
-    // expect there to be a <Genre> element with text "Academic Output"
     expect(count("//Genre")).toBe(1);
-    expect("//Genre").toHaveText("Academic Output");
+    expect("//Genre").toHaveText("Academic output");
   });
 
   // notion issue #239
diff --git a/src/export/sessionImdi-edolo.spec.ts b/src/export/sessionImdi-edolo.spec.ts
index 06974cb4..f2f269f8 100644
--- a/src/export/sessionImdi-edolo.spec.ts
+++ b/src/export/sessionImdi-edolo.spec.ts
@@ -95,7 +95,7 @@ it("should contain Actors", () => {
     "1960"
   );
   expect("METATRANSCRIPT/Session/MDGroup/Actors/Actor[2]/Role").toMatch(
-    "participant"
+    "Participant"
   );
   expect("METATRANSCRIPT/Session/MDGroup/Actors/Actor[3]/Name").toMatch(
     "Hatton"
diff --git a/src/model/Project/Project.ts b/src/model/Project/Project.ts
index 6b86247d..78284707 100644
--- a/src/model/Project/Project.ts
+++ b/src/model/Project/Project.ts
@@ -35,7 +35,7 @@ import {
   NotifyWarning
 } from "../../components/Notify";
 import { setCurrentProjectId } from "./MediaFolderAccess";
-import { CapitalCase } from "../../other/case";
+import { capitalCase } from "../../other/case";
 import { IChoice } from "../field/Field";
 import { sanitizeForArchive } from "../../other/sanitizeForArchive";
 import { initializeSanitizeForArchive } from "../../other/sanitizeForArchive";
diff --git a/src/model/field/FieldDefinition.ts b/src/model/field/FieldDefinition.ts
index 50b59631..ffceaeb0 100644
--- a/src/model/field/FieldDefinition.ts
+++ b/src/model/field/FieldDefinition.ts
@@ -1,4 +1,4 @@
-import { CapitalCase } from "../../other/case";
+import { capitalCase } from "../../other/case";
 import { IChoice } from "./Field";
 
 export class FieldDefinition {
diff --git a/src/other/case.spec.ts b/src/other/case.spec.ts
index aa074753..7bd77ad7 100644
--- a/src/other/case.spec.ts
+++ b/src/other/case.spec.ts
@@ -1,33 +1,35 @@
 import { vi, describe, it, beforeAll, beforeEach, expect } from "vitest";
-import { CapitalCase, safeSentenceCase } from "./case";
+import { capitalCase, sentenceCaseUnlessAcronym } from "./case";
 
-describe("CapitalCase tests", () => {
+describe("capitalCase tests", () => {
   it("capitalizes", () => {
-    expect(CapitalCase("hello")).toBe("Hello");
-    expect(CapitalCase("hello world")).toBe("Hello World");
-    expect(CapitalCase("HELlo WoRld")).toBe("Hello World");
+    expect(capitalCase("hello")).toBe("Hello");
+    expect(capitalCase("hello world")).toBe("Hello World");
+    expect(capitalCase("HELlo WoRld")).toBe("Hello World");
     // Not totally clear what the right answer would be here:
-    expect(CapitalCase(" hello world")).toBe(" Hello World");
-    expect(CapitalCase("explicación")).toBe("Explicación");
-    expect(CapitalCase("")).toBe("");
+    expect(capitalCase(" hello world")).toBe(" Hello World");
+    expect(capitalCase("explicación")).toBe("Explicación");
+    expect(capitalCase("")).toBe("");
   });
 });
 
 describe("safeSentenceCase tests", () => {
   it("capitalizes", () => {
-    expect(safeSentenceCase("hello")).toBe("Hello");
-    expect(safeSentenceCase("hello world")).toBe("Hello world");
-    expect(safeSentenceCase("HELlo WoRld")).toBe("Hello world");
+    expect(sentenceCaseUnlessAcronym("hello")).toBe("Hello");
+    expect(sentenceCaseUnlessAcronym("hello world")).toBe("Hello world");
+    expect(sentenceCaseUnlessAcronym("HELlo WoRld")).toBe("Hello world");
     // Not totally clear what the right answer would be here:
-    expect(safeSentenceCase("")).toBe("");
+    expect(sentenceCaseUnlessAcronym("")).toBe("");
   });
   it("does not capitalize some known mixed-case things", () => {
-    expect(safeSentenceCase("FLEx")).toBe("FLEx");
-    expect(safeSentenceCase("FLEx project")).toBe("FLEx project");
-    expect(safeSentenceCase("the FLEx Project")).toBe("The FLEx project");
+    expect(sentenceCaseUnlessAcronym("FLEx")).toBe("FLEx");
+    expect(sentenceCaseUnlessAcronym("FLEx project")).toBe("FLEx project");
+    expect(sentenceCaseUnlessAcronym("the FLEx Project")).toBe(
+      "The FLEx project"
+    );
   });
 
   it("does not capitalize acronyms", () => {
-    expect(safeSentenceCase("on AWS")).toBe("On AWS");
+    expect(sentenceCaseUnlessAcronym("on AWS")).toBe("On AWS");
   });
 });
diff --git a/src/other/case.ts b/src/other/case.ts
index 8b3e7f05..4ea3a303 100644
--- a/src/other/case.ts
+++ b/src/other/case.ts
@@ -1,4 +1,4 @@
-export function CapitalCase(s: string) {
+export function capitalCase(s: string) {
   const allLower = s.toLowerCase();
   const words = allLower.split(" ");
   const capitalizedWords = words
@@ -7,11 +7,9 @@ export function CapitalCase(s: string) {
   return capitalizedWords;
 }
 
-// "SentenceCase" from https://github.com/blakeembrey/change-case has
-// the problem that it first splits the strings, so acronyms get totally hosed.
-// So our algorithm is to leave words alone if they are all caps, or if they are
+// Leave words alone if they are all caps, or if they are
 // in a list of words that we know should not be changed.
-export function safeSentenceCase(s: string): string {
+export function sentenceCaseUnlessAcronym(s: string): string {
   const doNotChangeWords = ["FLEx"];
   // Split the string into words
   const words = s.split(" ");
@@ -38,3 +36,12 @@ export function safeSentenceCase(s: string): string {
   // Join the words back into a string
   return words.join(" ");
 }
+
+export function sentenceCase(s: string): string {
+  if (!s) return "";
+  const words = s.trim().toLowerCase().split(" ");
+  const firstWord = words[0].charAt(0).toUpperCase() + words[0].slice(1);
+  return words.length === 1
+    ? firstWord
+    : `${firstWord} ${words.slice(1).join(" ")}`;
+}