Skip to content

Commit

Permalink
Enforce sentence casing when writing IMDI
Browse files Browse the repository at this point in the history
  • Loading branch information
hatton committed Dec 19, 2024
1 parent df563b5 commit 039cfbf
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 39 deletions.
5 changes: 5 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Use vitest, not jest.
Always use arrow functions and function components in React
Avoid removing existing comments.
Avoid adding a comment like "// add this line".
If you think you might need access to another file, stop and ask me for it.
5 changes: 1 addition & 4 deletions e2e/localization.e2e.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,7 @@ test.describe("Localization", () => {
// tabs of the selected file
await shouldSeeExactlyOnce(["Catatan"]);
// some fields
await shouldSeeExactlyOnce([
/*"Deskripsi"*/ "Keterangan",
"Kotak-kotak Khusus"
]);
await shouldSeeExactlyOnce(["Deskripsi", "Kotak-kotak Khusus"]);

await shouldHaveMultiple("Tanggal", 2); // date

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"main": "dist/src/mainProcess/main.js",
"name": "lameta",
"productName": "lameta",
"version": "2.3.13-beta",
"version": "2.3.14-beta",
"author": {
"name": "lameta",
"email": "sorryno@email.org"
Expand Down
4 changes: 2 additions & 2 deletions src/components/session/FieldOpenChoiceChooser.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Field } from "../../model/field/Field";
import { observer } from "mobx-react";
import CreatableSelect from "react-select/creatable";
import { lameta_orange } from "../../containers/theme";
import { CapitalCase } from "../../other/case";
import { capitalCase } from "../../other/case";
import { OptionWithTooltip } from "../OptionWithTooltip";

//const Choices = new Dictionary<string, Array<string>>();
Expand Down Expand Up @@ -105,7 +105,7 @@ const FieldOpenChoiceChooser: React.FunctionComponent<{
}}
onChange={(s: any) => {
props.field.setValueFromString(
CapitalCase(s && s.value ? s.value : "")
capitalCase(s && s.value ? s.value : "")
);
}}
components={{ Option: OptionWithTooltip }}
Expand Down
35 changes: 35 additions & 0 deletions src/export/ImdiGenerator-vocab.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import ImdiGenerator, { IMDIMode } from "./ImdiGenerator";
import { Project } from "../model/Project/Project";
import temp from "temp";
import { setResultXml, value } from "../other/xmlUnitTestUtils";

let project: Project;
const projectDir = temp.mkdirSync("lameta imdi generator vocab test");
describe("ImdiGenerator genre handling", () => {
beforeAll(async () => {
temp.track();
project = Project.fromDirectory(projectDir);
await project.descriptionFolder.addFileForTestAsync(randomFileName());
await project.otherDocsFolder.addFileForTestAsync(randomFileName());
});
it("should convert genre to sentence case", () => {
const session = project.addSession();
session.properties.setText("genre", "procedural_discourse");
const imdi = ImdiGenerator.generateSession(
IMDIMode.RAW_IMDI,
session,
project,
true
);
setResultXml(imdi);

// Check for correct genre case in the output
expect(value("//Session/MDGroup/Content/Genre")).toBe(
"Procedural discourse"
);
});
});

function randomFileName() {
return Math.random().toString(36).substring(7) + ".test.txt";
}
15 changes: 9 additions & 6 deletions src/export/ImdiGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ import { sentryBreadCrumb } from "../other/errorHandling";
import { stringify } from "flatted";
import { NotifyWarning } from "../components/Notify";
import { getStatusOfFile } from "../model/file/FileStatus";
import { CapitalCase, safeSentenceCase } from "../other/case";
import {
capitalCase,
sentenceCase,
sentenceCaseUnlessAcronym
} from "../other/case";

export enum IMDIMode {
OPEX, // wrap in OPEX elements, name .opex
Expand Down Expand Up @@ -453,12 +457,12 @@ export default class ImdiGenerator {
// Hanna asks that we not do this to topic and keyword
if (["status" /*, "keyword", "topic",*/].indexOf(key) > -1) {
// capitalize the first letter of each word
v = safeSentenceCase(v);
v = sentenceCaseUnlessAcronym(v);
}

this.tail = this.tail.element("Key", v);
this.mostRecentElement = this.tail;
this.attributeLiteral("Name", CapitalCase(key)); //https://trello.com/c/GXxtRimV/68-topic-and-keyword-in-the-imdi-output-should-start-with-upper-case
this.attributeLiteral("Name", capitalCase(key)); //https://trello.com/c/GXxtRimV/68-topic-and-keyword-in-the-imdi-output-should-start-with-upper-case
this.tail = this.tail.up();
});
}
Expand Down Expand Up @@ -968,16 +972,15 @@ export default class ImdiGenerator {

if (["genre", "subgenre", "socialContext"].indexOf(fieldName) > -1) {
// For genre in IMDI export, ELAR doesn't want "formulaic_discourse",
// they want "Formulaic Discourse"
//https://trello.com/c/3H1oJsWk/66-imdi-save-genre-as-the-full-ui-form-not-the-underlying-token
// they want "Formulaic discourse" (Sentence Case)

// for some, we may have access to an explicit label. Probably makes no difference, the keys are always just snake case of the label.
const label = f.properties
.getLabelOfValue(fieldName)
// this probably isn't used. The idea is that if we didn't get new label, just replace the underscores
.replace(/_/g, " ");

value = titleCase(label);
value = sentenceCase(label);
}
if (projectFallbackFieldName && (!value || value.length === 0)) {
value = this.project.properties.getTextStringOrEmpty(
Expand Down
3 changes: 1 addition & 2 deletions src/export/sessionImdi-custom.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,8 @@ describe("session imdi export", () => {
true /*omit namespace*/
)
);
// expect there to be a <Genre> element with text "Academic Output"
expect(count("//Genre")).toBe(1);
expect("//Genre").toHaveText("Academic Output");
expect("//Genre").toHaveText("Academic output");
});

// notion issue #239
Expand Down
2 changes: 1 addition & 1 deletion src/export/sessionImdi-edolo.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ it("should contain Actors", () => {
"1960"
);
expect("METATRANSCRIPT/Session/MDGroup/Actors/Actor[2]/Role").toMatch(
"participant"
"Participant"
);
expect("METATRANSCRIPT/Session/MDGroup/Actors/Actor[3]/Name").toMatch(
"Hatton"
Expand Down
2 changes: 1 addition & 1 deletion src/model/Project/Project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import {
NotifyWarning
} from "../../components/Notify";
import { setCurrentProjectId } from "./MediaFolderAccess";
import { CapitalCase } from "../../other/case";
import { capitalCase } from "../../other/case";
import { IChoice } from "../field/Field";
import { sanitizeForArchive } from "../../other/sanitizeForArchive";
import { initializeSanitizeForArchive } from "../../other/sanitizeForArchive";
Expand Down
2 changes: 1 addition & 1 deletion src/model/field/FieldDefinition.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { CapitalCase } from "../../other/case";
import { capitalCase } from "../../other/case";
import { IChoice } from "./Field";

export class FieldDefinition {
Expand Down
34 changes: 18 additions & 16 deletions src/other/case.spec.ts
Original file line number Diff line number Diff line change
@@ -1,33 +1,35 @@
import { vi, describe, it, beforeAll, beforeEach, expect } from "vitest";
import { CapitalCase, safeSentenceCase } from "./case";
import { capitalCase, sentenceCaseUnlessAcronym } from "./case";

describe("CapitalCase tests", () => {
describe("capitalCase tests", () => {
it("capitalizes", () => {
expect(CapitalCase("hello")).toBe("Hello");
expect(CapitalCase("hello world")).toBe("Hello World");
expect(CapitalCase("HELlo WoRld")).toBe("Hello World");
expect(capitalCase("hello")).toBe("Hello");
expect(capitalCase("hello world")).toBe("Hello World");
expect(capitalCase("HELlo WoRld")).toBe("Hello World");
// Not totally clear what the right answer would be here:
expect(CapitalCase(" hello world")).toBe(" Hello World");
expect(CapitalCase("explicación")).toBe("Explicación");
expect(CapitalCase("")).toBe("");
expect(capitalCase(" hello world")).toBe(" Hello World");
expect(capitalCase("explicación")).toBe("Explicación");
expect(capitalCase("")).toBe("");
});
});

describe("safeSentenceCase tests", () => {
it("capitalizes", () => {
expect(safeSentenceCase("hello")).toBe("Hello");
expect(safeSentenceCase("hello world")).toBe("Hello world");
expect(safeSentenceCase("HELlo WoRld")).toBe("Hello world");
expect(sentenceCaseUnlessAcronym("hello")).toBe("Hello");
expect(sentenceCaseUnlessAcronym("hello world")).toBe("Hello world");
expect(sentenceCaseUnlessAcronym("HELlo WoRld")).toBe("Hello world");
// Not totally clear what the right answer would be here:
expect(safeSentenceCase("")).toBe("");
expect(sentenceCaseUnlessAcronym("")).toBe("");
});
it("does not capitalize some known mixed-case things", () => {
expect(safeSentenceCase("FLEx")).toBe("FLEx");
expect(safeSentenceCase("FLEx project")).toBe("FLEx project");
expect(safeSentenceCase("the FLEx Project")).toBe("The FLEx project");
expect(sentenceCaseUnlessAcronym("FLEx")).toBe("FLEx");
expect(sentenceCaseUnlessAcronym("FLEx project")).toBe("FLEx project");
expect(sentenceCaseUnlessAcronym("the FLEx Project")).toBe(
"The FLEx project"
);
});

it("does not capitalize acronyms", () => {
expect(safeSentenceCase("on AWS")).toBe("On AWS");
expect(sentenceCaseUnlessAcronym("on AWS")).toBe("On AWS");
});
});
17 changes: 12 additions & 5 deletions src/other/case.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export function CapitalCase(s: string) {
export function capitalCase(s: string) {
const allLower = s.toLowerCase();
const words = allLower.split(" ");
const capitalizedWords = words
Expand All @@ -7,11 +7,9 @@ export function CapitalCase(s: string) {
return capitalizedWords;
}

// "SentenceCase" from https://github.com/blakeembrey/change-case has
// the problem that it first splits the strings, so acronyms get totally hosed.
// So our algorithm is to leave words alone if they are all caps, or if they are
// Leave words alone if they are all caps, or if they are
// in a list of words that we know should not be changed.
export function safeSentenceCase(s: string): string {
export function sentenceCaseUnlessAcronym(s: string): string {
const doNotChangeWords = ["FLEx"];
// Split the string into words
const words = s.split(" ");
Expand All @@ -38,3 +36,12 @@ export function safeSentenceCase(s: string): string {
// Join the words back into a string
return words.join(" ");
}

export function sentenceCase(s: string): string {
if (!s) return "";
const words = s.trim().toLowerCase().split(" ");
const firstWord = words[0].charAt(0).toUpperCase() + words[0].slice(1);
return words.length === 1
? firstWord
: `${firstWord} ${words.slice(1).join(" ")}`;
}

0 comments on commit 039cfbf

Please sign in to comment.