From 039cfbfd9c13e9259ed1b7e5c87d607b6a1d64c9 Mon Sep 17 00:00:00 2001 From: Hatton Date: Thu, 19 Dec 2024 14:46:11 -0700 Subject: [PATCH] Enforce sentence casing when writing IMDI --- .github/copilot-instructions.md | 5 +++ e2e/localization.e2e.ts | 5 +-- package.json | 2 +- .../session/FieldOpenChoiceChooser.tsx | 4 +-- src/export/ImdiGenerator-vocab.spec.ts | 35 +++++++++++++++++++ src/export/ImdiGenerator.ts | 15 ++++---- src/export/sessionImdi-custom.spec.ts | 3 +- src/export/sessionImdi-edolo.spec.ts | 2 +- src/model/Project/Project.ts | 2 +- src/model/field/FieldDefinition.ts | 2 +- src/other/case.spec.ts | 34 +++++++++--------- src/other/case.ts | 17 ++++++--- 12 files changed, 87 insertions(+), 39 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 100644 src/export/ImdiGenerator-vocab.spec.ts diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000..5b59db19 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,5 @@ +Use vitest, not jest. +Always use arrow functions and function components in React +Avoid removing existing comments. +Avoid adding a comment like "// add this line". +If you think you might need access to another file, stop and ask me for it. diff --git a/e2e/localization.e2e.ts b/e2e/localization.e2e.ts index d2153a49..9291ef62 100644 --- a/e2e/localization.e2e.ts +++ b/e2e/localization.e2e.ts @@ -62,10 +62,7 @@ test.describe("Localization", () => { // tabs of the selected file await shouldSeeExactlyOnce(["Catatan"]); // some fields - await shouldSeeExactlyOnce([ - /*"Deskripsi"*/ "Keterangan", - "Kotak-kotak Khusus" - ]); + await shouldSeeExactlyOnce(["Deskripsi", "Kotak-kotak Khusus"]); await shouldHaveMultiple("Tanggal", 2); // date diff --git a/package.json b/package.json index d27642df..86d5d06a 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "main": "dist/src/mainProcess/main.js", "name": "lameta", "productName": "lameta", - "version": "2.3.13-beta", + "version": "2.3.14-beta", "author": { "name": "lameta", "email": "sorryno@email.org" diff --git a/src/components/session/FieldOpenChoiceChooser.tsx b/src/components/session/FieldOpenChoiceChooser.tsx index 9c2c1cee..5bd11bd7 100644 --- a/src/components/session/FieldOpenChoiceChooser.tsx +++ b/src/components/session/FieldOpenChoiceChooser.tsx @@ -3,7 +3,7 @@ import { Field } from "../../model/field/Field"; import { observer } from "mobx-react"; import CreatableSelect from "react-select/creatable"; import { lameta_orange } from "../../containers/theme"; -import { CapitalCase } from "../../other/case"; +import { capitalCase } from "../../other/case"; import { OptionWithTooltip } from "../OptionWithTooltip"; //const Choices = new Dictionary>(); @@ -105,7 +105,7 @@ const FieldOpenChoiceChooser: React.FunctionComponent<{ }} onChange={(s: any) => { props.field.setValueFromString( - CapitalCase(s && s.value ? s.value : "") + capitalCase(s && s.value ? s.value : "") ); }} components={{ Option: OptionWithTooltip }} diff --git a/src/export/ImdiGenerator-vocab.spec.ts b/src/export/ImdiGenerator-vocab.spec.ts new file mode 100644 index 00000000..5aaee95f --- /dev/null +++ b/src/export/ImdiGenerator-vocab.spec.ts @@ -0,0 +1,35 @@ +import ImdiGenerator, { IMDIMode } from "./ImdiGenerator"; +import { Project } from "../model/Project/Project"; +import temp from "temp"; +import { setResultXml, value } from "../other/xmlUnitTestUtils"; + +let project: Project; +const projectDir = temp.mkdirSync("lameta imdi generator vocab test"); +describe("ImdiGenerator genre handling", () => { + beforeAll(async () => { + temp.track(); + project = Project.fromDirectory(projectDir); + await project.descriptionFolder.addFileForTestAsync(randomFileName()); + await project.otherDocsFolder.addFileForTestAsync(randomFileName()); + }); + it("should convert genre to sentence case", () => { + const session = project.addSession(); + session.properties.setText("genre", "procedural_discourse"); + const imdi = ImdiGenerator.generateSession( + IMDIMode.RAW_IMDI, + session, + project, + true + ); + setResultXml(imdi); + + // Check for correct genre case in the output + expect(value("//Session/MDGroup/Content/Genre")).toBe( + "Procedural discourse" + ); + }); +}); + +function randomFileName() { + return Math.random().toString(36).substring(7) + ".test.txt"; +} diff --git a/src/export/ImdiGenerator.ts b/src/export/ImdiGenerator.ts index 6710960e..f641743b 100644 --- a/src/export/ImdiGenerator.ts +++ b/src/export/ImdiGenerator.ts @@ -21,7 +21,11 @@ import { sentryBreadCrumb } from "../other/errorHandling"; import { stringify } from "flatted"; import { NotifyWarning } from "../components/Notify"; import { getStatusOfFile } from "../model/file/FileStatus"; -import { CapitalCase, safeSentenceCase } from "../other/case"; +import { + capitalCase, + sentenceCase, + sentenceCaseUnlessAcronym +} from "../other/case"; export enum IMDIMode { OPEX, // wrap in OPEX elements, name .opex @@ -453,12 +457,12 @@ export default class ImdiGenerator { // Hanna asks that we not do this to topic and keyword if (["status" /*, "keyword", "topic",*/].indexOf(key) > -1) { // capitalize the first letter of each word - v = safeSentenceCase(v); + v = sentenceCaseUnlessAcronym(v); } this.tail = this.tail.element("Key", v); this.mostRecentElement = this.tail; - this.attributeLiteral("Name", CapitalCase(key)); //https://trello.com/c/GXxtRimV/68-topic-and-keyword-in-the-imdi-output-should-start-with-upper-case + this.attributeLiteral("Name", capitalCase(key)); //https://trello.com/c/GXxtRimV/68-topic-and-keyword-in-the-imdi-output-should-start-with-upper-case this.tail = this.tail.up(); }); } @@ -968,8 +972,7 @@ export default class ImdiGenerator { if (["genre", "subgenre", "socialContext"].indexOf(fieldName) > -1) { // For genre in IMDI export, ELAR doesn't want "formulaic_discourse", - // they want "Formulaic Discourse" - //https://trello.com/c/3H1oJsWk/66-imdi-save-genre-as-the-full-ui-form-not-the-underlying-token + // they want "Formulaic discourse" (Sentence Case) // for some, we may have access to an explicit label. Probably makes no difference, the keys are always just snake case of the label. const label = f.properties @@ -977,7 +980,7 @@ export default class ImdiGenerator { // this probably isn't used. The idea is that if we didn't get new label, just replace the underscores .replace(/_/g, " "); - value = titleCase(label); + value = sentenceCase(label); } if (projectFallbackFieldName && (!value || value.length === 0)) { value = this.project.properties.getTextStringOrEmpty( diff --git a/src/export/sessionImdi-custom.spec.ts b/src/export/sessionImdi-custom.spec.ts index 7da5a885..409e684e 100644 --- a/src/export/sessionImdi-custom.spec.ts +++ b/src/export/sessionImdi-custom.spec.ts @@ -55,9 +55,8 @@ describe("session imdi export", () => { true /*omit namespace*/ ) ); - // expect there to be a element with text "Academic Output" expect(count("//Genre")).toBe(1); - expect("//Genre").toHaveText("Academic Output"); + expect("//Genre").toHaveText("Academic output"); }); // notion issue #239 diff --git a/src/export/sessionImdi-edolo.spec.ts b/src/export/sessionImdi-edolo.spec.ts index 06974cb4..f2f269f8 100644 --- a/src/export/sessionImdi-edolo.spec.ts +++ b/src/export/sessionImdi-edolo.spec.ts @@ -95,7 +95,7 @@ it("should contain Actors", () => { "1960" ); expect("METATRANSCRIPT/Session/MDGroup/Actors/Actor[2]/Role").toMatch( - "participant" + "Participant" ); expect("METATRANSCRIPT/Session/MDGroup/Actors/Actor[3]/Name").toMatch( "Hatton" diff --git a/src/model/Project/Project.ts b/src/model/Project/Project.ts index 6b86247d..78284707 100644 --- a/src/model/Project/Project.ts +++ b/src/model/Project/Project.ts @@ -35,7 +35,7 @@ import { NotifyWarning } from "../../components/Notify"; import { setCurrentProjectId } from "./MediaFolderAccess"; -import { CapitalCase } from "../../other/case"; +import { capitalCase } from "../../other/case"; import { IChoice } from "../field/Field"; import { sanitizeForArchive } from "../../other/sanitizeForArchive"; import { initializeSanitizeForArchive } from "../../other/sanitizeForArchive"; diff --git a/src/model/field/FieldDefinition.ts b/src/model/field/FieldDefinition.ts index 50b59631..ffceaeb0 100644 --- a/src/model/field/FieldDefinition.ts +++ b/src/model/field/FieldDefinition.ts @@ -1,4 +1,4 @@ -import { CapitalCase } from "../../other/case"; +import { capitalCase } from "../../other/case"; import { IChoice } from "./Field"; export class FieldDefinition { diff --git a/src/other/case.spec.ts b/src/other/case.spec.ts index aa074753..7bd77ad7 100644 --- a/src/other/case.spec.ts +++ b/src/other/case.spec.ts @@ -1,33 +1,35 @@ import { vi, describe, it, beforeAll, beforeEach, expect } from "vitest"; -import { CapitalCase, safeSentenceCase } from "./case"; +import { capitalCase, sentenceCaseUnlessAcronym } from "./case"; -describe("CapitalCase tests", () => { +describe("capitalCase tests", () => { it("capitalizes", () => { - expect(CapitalCase("hello")).toBe("Hello"); - expect(CapitalCase("hello world")).toBe("Hello World"); - expect(CapitalCase("HELlo WoRld")).toBe("Hello World"); + expect(capitalCase("hello")).toBe("Hello"); + expect(capitalCase("hello world")).toBe("Hello World"); + expect(capitalCase("HELlo WoRld")).toBe("Hello World"); // Not totally clear what the right answer would be here: - expect(CapitalCase(" hello world")).toBe(" Hello World"); - expect(CapitalCase("explicación")).toBe("Explicación"); - expect(CapitalCase("")).toBe(""); + expect(capitalCase(" hello world")).toBe(" Hello World"); + expect(capitalCase("explicación")).toBe("Explicación"); + expect(capitalCase("")).toBe(""); }); }); describe("safeSentenceCase tests", () => { it("capitalizes", () => { - expect(safeSentenceCase("hello")).toBe("Hello"); - expect(safeSentenceCase("hello world")).toBe("Hello world"); - expect(safeSentenceCase("HELlo WoRld")).toBe("Hello world"); + expect(sentenceCaseUnlessAcronym("hello")).toBe("Hello"); + expect(sentenceCaseUnlessAcronym("hello world")).toBe("Hello world"); + expect(sentenceCaseUnlessAcronym("HELlo WoRld")).toBe("Hello world"); // Not totally clear what the right answer would be here: - expect(safeSentenceCase("")).toBe(""); + expect(sentenceCaseUnlessAcronym("")).toBe(""); }); it("does not capitalize some known mixed-case things", () => { - expect(safeSentenceCase("FLEx")).toBe("FLEx"); - expect(safeSentenceCase("FLEx project")).toBe("FLEx project"); - expect(safeSentenceCase("the FLEx Project")).toBe("The FLEx project"); + expect(sentenceCaseUnlessAcronym("FLEx")).toBe("FLEx"); + expect(sentenceCaseUnlessAcronym("FLEx project")).toBe("FLEx project"); + expect(sentenceCaseUnlessAcronym("the FLEx Project")).toBe( + "The FLEx project" + ); }); it("does not capitalize acronyms", () => { - expect(safeSentenceCase("on AWS")).toBe("On AWS"); + expect(sentenceCaseUnlessAcronym("on AWS")).toBe("On AWS"); }); }); diff --git a/src/other/case.ts b/src/other/case.ts index 8b3e7f05..4ea3a303 100644 --- a/src/other/case.ts +++ b/src/other/case.ts @@ -1,4 +1,4 @@ -export function CapitalCase(s: string) { +export function capitalCase(s: string) { const allLower = s.toLowerCase(); const words = allLower.split(" "); const capitalizedWords = words @@ -7,11 +7,9 @@ export function CapitalCase(s: string) { return capitalizedWords; } -// "SentenceCase" from https://github.com/blakeembrey/change-case has -// the problem that it first splits the strings, so acronyms get totally hosed. -// So our algorithm is to leave words alone if they are all caps, or if they are +// Leave words alone if they are all caps, or if they are // in a list of words that we know should not be changed. -export function safeSentenceCase(s: string): string { +export function sentenceCaseUnlessAcronym(s: string): string { const doNotChangeWords = ["FLEx"]; // Split the string into words const words = s.split(" "); @@ -38,3 +36,12 @@ export function safeSentenceCase(s: string): string { // Join the words back into a string return words.join(" "); } + +export function sentenceCase(s: string): string { + if (!s) return ""; + const words = s.trim().toLowerCase().split(" "); + const firstWord = words[0].charAt(0).toUpperCase() + words[0].slice(1); + return words.length === 1 + ? firstWord + : `${firstWord} ${words.slice(1).join(" ")}`; +}