diff --git a/.eslintrc.yml b/.eslintrc.yml index 390523f..da98b50 100644 --- a/.eslintrc.yml +++ b/.eslintrc.yml @@ -1,2 +1,4 @@ extends: - - '@digitallinguistics/eslint-config' \ No newline at end of file + - '@digitallinguistics/eslint-config' +env: + mocha: true \ No newline at end of file diff --git a/README.md b/README.md index 21518de..0a5348d 100644 --- a/README.md +++ b/README.md @@ -95,13 +95,14 @@ A JavaScript library that converts linguistic texts in [scription format][script ## Options -Option | Default | Description ---------------------|-------------|---------------------------------------------------------------------------------------------------------------------- -`codes` | `{}` | This option allows you to use custom backslash codes in your interlinear glosses. It should be a hash containing the scription code as a key (without a leading backslash), and the custom code as the value; ex: `"txn": "t"` will allow you to write `\t` instead of `\txn` for transcription lines. -`errors` | `"warn"` | This option allows you to specify how to handle errors. If set to `"warn""` (the default), an utterance which throws an error is skipped and a warning is logged to the console. If set to `"object"`, an error object with information is returned in the results array. If set to `false`, utterances with errors will be skipped silently. If set to `true`, utterances with errors will throw and stop further processing. -`orthography` | `"default"` | An abbreviation for the default orthography to use for transcriptions when one is not specified. -`parser` | `undefined` | A YAML parser to use in parsing the header of a scription document. If none is present, the header will be provided as a string in the `header` property of the returned object. -`utteranceMetadata` | `true` | Whether to parse the utterance metadata line (the first line when it begins with `#`). If set to `true`, a `metadata` property will be added to each utterance that has it. +| Option | Default | Description | +| ------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `codes` | `{}` | This option allows you to use custom backslash codes in your interlinear glosses. It should be a hash containing the scription code as a key (without a leading backslash), and the custom code as the value; ex: `"txn": "t"` will allow you to write `\t` instead of `\txn` for transcription lines. | +| `emphasis` | `true` | This option specifies whether emphasis should be passed through as-is (`true`, default), or stripped from the data (`false`). +| `errors` | `"warn"` | This option allows you to specify how to handle errors. If set to `"warn""` (the default), an utterance which throws an error is skipped and a warning is logged to the console. If set to `"object"`, an error object with information is returned in the results array. If set to `false`, utterances with errors will be skipped silently. If set to `true`, utterances with errors will throw and stop further processing. | +| `orthography` | `"default"` | An abbreviation for the default orthography to use for transcriptions when one is not specified. | +| `parser` | `undefined` | A YAML parser to use in parsing the header of a scription document. If none is present, the header will be provided as a string in the `header` property of the returned object. | +| `utteranceMetadata` | `true` | Whether to parse the utterance metadata line (the first line when it begins with `#`). If set to `true`, a `metadata` property will be added to each utterance that has it. | [actions]: https://github.com/digitallinguistics/scription2dlx/actions/ [AJV]: https://www.npmjs.com/package/ajv diff --git a/src/index.js b/src/index.js index c2dc9e1..5ebe4e3 100644 --- a/src/index.js +++ b/src/index.js @@ -11,6 +11,7 @@ import parseUtterances from './parseUtterances.js' */ export default function scription2dlx(scription = ``, { codes = {}, + emphasis = true, errors = `warn`, orthography = `default`, parser, @@ -26,6 +27,7 @@ export default function scription2dlx(scription = ``, { } const options = { + emphasis, errors, orthography, utteranceMetadata, diff --git a/src/parseUtterance/index.js b/src/parseUtterance/index.js index 71af1fc..6533437 100644 --- a/src/parseUtterance/index.js +++ b/src/parseUtterance/index.js @@ -26,12 +26,6 @@ export default function parseUtterance(rawLines, schema, codesHash, options) { const utterance = {} let lines = [...rawLines] - const { - errors, - orthography, - utteranceMetadata, - } = options - try { // metadata @@ -42,7 +36,7 @@ export default function parseUtterance(rawLines, schema, codesHash, options) { const rawMetadata = lines.shift() - if (utteranceMetadata === true) { + if (options.utteranceMetadata === true) { const metadata = parseMetadata(rawMetadata) if (metadata) utterance.metadata = metadata } @@ -86,24 +80,24 @@ export default function parseUtterance(rawLines, schema, codesHash, options) { // Transcript if (types.includes(`trs`)) { - utterance.transcript = parseTranscript(codesHash.trs, lines, orthography) + utterance.transcript = parseTranscript(codesHash.trs, lines, options) } // Transcription - utterance.transcription = parseTranscription(codesHash.txn, lines, orthography) + utterance.transcription = parseTranscription(codesHash.txn, lines, options) // Phonetic if (types.includes(`phon`) && schema.includes(`phon`)) { - utterance.phonetic = parsePhonetic(lines[codesHash.phon]) + utterance.phonetic = parsePhonetic(lines[codesHash.phon], options) } // Literal Translation if (types.includes(`lit`)) { - utterance.literal = parseLiteral(codesHash.lit, lines) + utterance.literal = parseLiteral(codesHash.lit, lines, options) } // Free Translation - utterance.translation = parseTranslation(codesHash.tln, lines) || `` + utterance.translation = parseTranslation(codesHash.tln, lines, options) || `` // Source if (types.includes(`s`)) { @@ -118,7 +112,7 @@ export default function parseUtterance(rawLines, schema, codesHash, options) { } // Words - const words = parseWords(codesHash, lines, orthography) + const words = parseWords(codesHash, lines, options) if (words.length) utterance.words = words // Notes @@ -137,23 +131,23 @@ export default function parseUtterance(rawLines, schema, codesHash, options) { } if (typeof utterance.transcription === `string`) { - utterance.transcription = { [orthography]: utterance.transcription } + utterance.transcription = { [options.orthography]: utterance.transcription } } return utterance } catch (e) { - if (!errors) return + if (!options.errors) return e.text = rawLines.join(`\n`) - if (errors === `warn`) { + if (options.errors === `warn`) { console.warn(e) return } - if (errors === `object`) { + if (options.errors === `object`) { return e } diff --git a/src/parseUtterance/parseLiteral.js b/src/parseUtterance/parseLiteral.js index 50ffbb9..3813862 100644 --- a/src/parseUtterance/parseLiteral.js +++ b/src/parseUtterance/parseLiteral.js @@ -4,14 +4,16 @@ import removeEmphasis from '../utilities/removeEmphasis.js' /** * Parses, validates, and cleans the literal translation lines - * @param {String} lineCode The line code to use for literal lines - * @param {Object} lines The lines hash + * @param {String} lineCode The line code to use for literal lines + * @param {Object} lines The lines hash + * @param {Object} options The options hash + * @param {Boolean} options.emphasis Whether to retain emphasis in the output * @return {String|Object} */ -export default function parseLiteral(lineCode, lines) { +export default function parseLiteral(lineCode, lines, options) { let data = groupLines(lineCode, lines) if (!data) return null - data = removeEmphasis(data) + if (options.emphasis === false) data = removeEmphasis(data) // NB: Do not use the lineCode variable as the first argument to removeBrackets // removeBrackets accept an abstract type, not a line code, as its first argument. // Use both `lit` and `tln` to remove square brackets and multiple types of single quotes diff --git a/src/parseUtterance/parseMorphemes/index.js b/src/parseUtterance/parseMorphemes/index.js index 2f9df44..2d3e79b 100644 --- a/src/parseUtterance/parseMorphemes/index.js +++ b/src/parseUtterance/parseMorphemes/index.js @@ -49,12 +49,13 @@ function tokenizeWord(string) { /** * Accepts an lines hash for a word (morpheme and gloss lines) and returns an array of DLx Morpheme objects - * @param {Object} codes The hash of line codes - * @param {Object} wordHash The lines hash for the word - * @param {String} orthogrpahy The abbreviation to use for the default orthography, if none is provided. - * @return {Array} Returns an array of DLx Morpheme objeccts + * @param {Object} codes The hash of line codes + * @param {Object} wordHash The lines hash for the word + * @param {Object} options The options hash + * @param {String} options.orthography The abbreviation to use for the default orthography, if none is provided. + * @return {Array} Returns an array of DLx Morpheme objeccts */ -export default function parseMorphemes(codes, wordHash, orthography) { +export default function parseMorphemes(codes, wordHash, options) { const morphemeLines = getLines([codes.gl, codes.m], wordHash) @@ -72,7 +73,7 @@ export default function parseMorphemes(codes, wordHash, orthography) { let transcription = groupLines(codes.m, data) || `` transcription = removeBrackets(`infix`, transcription) - if (typeof transcription === `string`) transcription = { [orthography]: transcription } + if (typeof transcription === `string`) transcription = { [options.orthography]: transcription } return { transcription, diff --git a/src/parseUtterance/parsePhonetic.js b/src/parseUtterance/parsePhonetic.js index 3811f4d..33b3051 100644 --- a/src/parseUtterance/parsePhonetic.js +++ b/src/parseUtterance/parsePhonetic.js @@ -3,12 +3,15 @@ import removeEmphasis from '../utilities/removeEmphasis.js' /** * Cleans and validates the phonetic line - * @param {String} line The phonetic line + * @param {String} line The phonetic line + * @param {Object} options The options hash + * @param {Boolean} options.emphasis Whether to retain emphasis in the output * @return {String} */ -export default function parsePhonetic(line) { +export default function parsePhonetic(line, options) { if (!line) return null // NB: Do not use the lineCode variable as the first argument to removeBrackets // removeBrackets accept an abstract type, not a line code, as its first argument - return removeBrackets(`phon`, removeEmphasis(line)) + if (options.emphasis === false) line = removeEmphasis(line) + return removeBrackets(`phon`, line) } diff --git a/src/parseUtterance/parseTranscript.js b/src/parseUtterance/parseTranscript.js index 1059959..93d146b 100644 --- a/src/parseUtterance/parseTranscript.js +++ b/src/parseUtterance/parseTranscript.js @@ -2,16 +2,17 @@ import groupLines from '../utilities/groupLines.js' /** * Extracts, validates, and cleans the transcript lines from the lines hash - * @param {String} lineCode The line code for the transcript lines - * @param {Object} lines The lines hash - * @param {String} orthography The abbreviation to use for the transcript orthography if none is present + * @param {String} lineCode The line code for the transcript lines + * @param {Object} lines The lines hash + * @param {Object} options The options hash + * @param {String} options.orthography The abbreviation to use for the transcript orthography if none is present * @return {String|Object} */ -export default function parseTranscript(lineCode, lines, orthography) { +export default function parseTranscript(lineCode, lines, options) { const data = groupLines(lineCode, lines) - if (typeof data === `string`) return { [orthography]: data } + if (typeof data === `string`) return { [options.orthography]: data } return data || null diff --git a/src/parseUtterance/parseTranscription.js b/src/parseUtterance/parseTranscription.js index 820cf27..101291c 100644 --- a/src/parseUtterance/parseTranscription.js +++ b/src/parseUtterance/parseTranscription.js @@ -4,12 +4,13 @@ import removeEmphasis from '../utilities/removeEmphasis.js' /** * Extracts, validates, and cleans the transcription lines from the lines hash - * @param {String} lineCode The code to use for transcription lines - * @param {Object} lines The lines hash - * @param {String} orthography An abbreviation to use for the orthography if one is not specified + * @param {String} lineCode The code to use for transcription lines + * @param {Object} lines The lines hash + * @param {Object} options The options hash + * @param {String} options.orthography An abbreviation to use for the orthography if one is not specified * @return {String|Object} */ -export default function parseTranscription(lineCode, lines, orthography) { +export default function parseTranscription(lineCode, lines, options) { let data = groupLines(lineCode, lines) @@ -17,10 +18,10 @@ export default function parseTranscription(lineCode, lines, orthography) { // NB: Do not use lineCode for removeBrackets here // removeBrackets takes an abstract type, not a line code, as its first argument - data = removeEmphasis(data) + if (options.emphasis === false) data = removeEmphasis(data) data = removeBrackets(`txn`, data) - if (typeof data === `string`) return { [orthography]: data } + if (typeof data === `string`) return { [options.orthography]: data } return data diff --git a/src/parseUtterance/parseTranslation.js b/src/parseUtterance/parseTranslation.js index ffc1773..fdf373e 100644 --- a/src/parseUtterance/parseTranslation.js +++ b/src/parseUtterance/parseTranslation.js @@ -4,14 +4,16 @@ import removeEmphasis from '../utilities/removeEmphasis.js' /** * Extracts, validates, and cleans the translation lines from the lines hash - * @param {String} lineCode The line code to use for translations lines - * @param {Object} lines The lines hash + * @param {String} lineCode The line code to use for translations lines + * @param {Object} lines The lines hash + * @param {Object} options The options hash + * @param {Boolean} options.emphasis Whether to retain emphasis in the output * @return {String|Object} */ -export default function parseTranslation(lineCode, lines) { +export default function parseTranslation(lineCode, lines, options) { let data = groupLines(lineCode, lines) if (!data) return null - data = removeEmphasis(data) + if (options.emphasis === false) data = removeEmphasis(data) // NB: removeBrackets accepts an abstract line type, not a specific line code // as its first argument data = removeBrackets(`tln`, data) diff --git a/src/parseUtterance/parseWords.js b/src/parseUtterance/parseWords.js index 7613818..e283da6 100644 --- a/src/parseUtterance/parseWords.js +++ b/src/parseUtterance/parseWords.js @@ -8,23 +8,25 @@ import zip from '../utilities/js/zip.js' /** * Parses the word hash into a DLx Word object - * @param {Object} codes The hash of line codes - * @param {Object} data The word hash - * @param {String} orthography The orthography abbreviation to use if one is not specified - * @return {Object} Returns a DLx Word object + * @param {Object} codes The hash of line codes + * @param {Object} data The word hash + * @param {Object} options The options hash + * @param {Boolean} options.emphasis Whether to retain empahsis in the output + * @param {String} options.orthography The orthography abbreviation to use if one is not specified + * @return {Object} Returns a DLx Word object */ -function parseWord(codes, data, orthography) { +function parseWord(codes, data, options) { - data = removeEmphasis(data) // eslint-disable-line no-param-reassign + if (options.emphasis === false) data = removeEmphasis(data) // eslint-disable-line no-param-reassign let transcription = groupLines(codes.w, data) || `` let analysis = groupLines(codes.m, data) const gloss = groupLines(codes.gl, data) const literal = groupLines(codes.wlt, data) - const morphemes = parseMorphemes(codes, data, orthography) + const morphemes = parseMorphemes(codes, data, options) - if (typeof transcription === `string`) transcription = { [orthography]: transcription } - if (typeof analysis === `string`) analysis = { [orthography]: analysis } + if (typeof transcription === `string`) transcription = { [options.orthography]: transcription } + if (typeof analysis === `string`) analysis = { [options.orthography]: analysis } return { transcription, @@ -52,12 +54,13 @@ function tokenizeLine(string) { /** * Extracts word-specific lines from the lines hash and converts them into an array of DLx Word objects - * @param {Object} codes The line codes hash - * @param {Object} lines The lines hash - * @param {String} orthography The abbreviation to use for the orthography if one is not specified - * @return {Array} Returns an array of DLx Word objects + * @param {Object} codes The line codes hash + * @param {Object} lines The lines hash + * @param {Object} options The options hash + * @param {String} options.orthography The abbreviation to use for the orthography if one is not specified + * @return {Array} Returns an array of DLx Word objects */ -export default function parseWords(codesHash, lines, orthography) { +export default function parseWords(codesHash, lines, options) { const wordLineCodes = wordTypes.map(type => codesHash[type]) const wordLines = getLines(wordLineCodes, lines) @@ -74,6 +77,6 @@ export default function parseWords(codesHash, lines, orthography) { validateNumItems(wordsHash) return zip(wordsHash) - .map(wordData => parseWord(codesHash, wordData, orthography)) + .map(wordData => parseWord(codesHash, wordData, options)) } diff --git a/test/lines/glosses.test.js b/test/lines/glosses.test.js index 9157aaf..55369a5 100644 --- a/test/lines/glosses.test.js +++ b/test/lines/glosses.test.js @@ -216,7 +216,7 @@ describe(`glosses`, () => { }) - + it(`tokenizes words and morphemes correctly`, () => { const text = ` @@ -270,7 +270,7 @@ describe(`glosses`, () => { expect(w4m.gloss).to.equal(`null`) }) - + it(`populates both word and morpheme glosses`, () => { @@ -289,18 +289,35 @@ describe(`glosses`, () => { }) - it(`is stripped of emphasis`, () => { + it(`retains emphasis`, function() { - const wordGloss = `waxt-qungu` + const withEmphasis = `*waxt*-qungu` + const withoutEmphasis = `waxt-qungu` const text = ` - \\m ${ wordGloss } qasi - \\gl *waxt*-qungu qasi + \\m ${ withoutEmphasis } qasi + \\gl ${ withEmphasis } qasi ` - const { utterances: [{ words: [word] }] } = convert(text) + const { utterances: [{ words: [word] }] } = convert(text, { emphasis: true }) + + expect(word.gloss).to.equal(withEmphasis) + + }) + + it(`strips emphasis`, () => { + + const withEmphasis = `*waxt*-qungu` + const withoutEmphasis = `waxt-qungu` + + const text = ` + \\m ${ withoutEmphasis } qasi + \\gl ${ withEmphasis } qasi + ` + + const { utterances: [{ words: [word] }] } = convert(text, { emphasis: false }) - expect(word.gloss).to.equal(wordGloss) + expect(word.gloss).to.equal(withoutEmphasis) }) diff --git a/test/lines/literal.test.js b/test/lines/literal.test.js index cff1e19..ba2f945 100644 --- a/test/lines/literal.test.js +++ b/test/lines/literal.test.js @@ -85,18 +85,27 @@ describe(`literal translation (utterance: "\\lit")`, () => { }) - it(`is stripped of emphasis`, () => { - - const literal = `one day a man` + it(`retains emphasis`, function() { const text = ` - \\txn waxdungu qasi - \\lit ${ literal } - ` + \\txn *waxdungu* qasi + \\lit *one day* a man` const { utterances: [utterance] } = convert(text) - expect(utterance.literal).to.equal(literal) + expect(utterance.literal).to.equal(`*one day* a man`) + + }) + + it(`strips emphasis`, function() { + + const text = ` + \\txn *waxdungu* qasi + \\lit *one day* a man` + + const { utterances: [utterance] } = convert(text, { emphasis: false }) + + expect(utterance.literal).to.equal(`one day a man`) }) diff --git a/test/lines/meta.test.js b/test/lines/meta.test.js index d2595ae..a960ad4 100644 --- a/test/lines/meta.test.js +++ b/test/lines/meta.test.js @@ -92,7 +92,7 @@ describe(`utterance metadata`, () => { }) - it(`is not stripped of emphasis`, () => { + it(`retains emphasis`, () => { const meta = `*Chitimacha*` diff --git a/test/lines/morphemes.test.js b/test/lines/morphemes.test.js index c422df3..009b523 100644 --- a/test/lines/morphemes.test.js +++ b/test/lines/morphemes.test.js @@ -91,7 +91,7 @@ describe(`morphemes`, () => { }) - it(`is stripped of emphasis`, () => { + it(`retains emphasis`, function() { const text = ` \\m *waxt*-qungu qasi @@ -100,6 +100,19 @@ describe(`morphemes`, () => { const { utterances: [{ words: [{ morphemes: [morpheme] }] }] } = convert(text) + expect(morpheme.transcription.default).to.equal(`*waxt*`) + + }) + + it(`strips emphasis`, function() { + + const text = ` + \\m *waxt*-qungu qasi + \\gl day-one man + ` + + const { utterances: [{ words: [{ morphemes: [morpheme] }] }] } = convert(text, { emphasis: false }) + expect(morpheme.transcription.default).to.equal(`waxt`) }) diff --git a/test/lines/note.test.js b/test/lines/note.test.js index 0baec20..56bc4cc 100644 --- a/test/lines/note.test.js +++ b/test/lines/note.test.js @@ -162,7 +162,7 @@ describe(`note`, () => { }) - it(`is not stripped of emphasis`, () => { + it(`retains emphasis`, () => { const noteText = `This note has *emphasis*.` diff --git a/test/lines/phonetic.test.js b/test/lines/phonetic.test.js index 8467982..74bd85c 100644 --- a/test/lines/phonetic.test.js +++ b/test/lines/phonetic.test.js @@ -37,7 +37,7 @@ describe(`phonetic transcription`, () => { }) - it(`is stripped of emphasis`, () => { + it(`retains emphasis`, function() { const phonetic = `waʃtʼunkʼu *ʔasi*` @@ -47,7 +47,21 @@ describe(`phonetic transcription`, () => { ` const { utterances: [utterance] } = convert(text) - expect(utterance.phonetic.includes(`*`)).to.equal(false) + expect(utterance.phonetic).to.include(`*ʔasi*`) + + }) + + it(`strips emphasis`, function() { + + const phonetic = `waʃtʼunkʼu *ʔasi*` + + const text = ` + \\phon ${ phonetic } + \\tln one day a man + ` + + const { utterances: [utterance] } = convert(text, { emphasis: false }) + expect(utterance.phonetic).not.to.include(`*`) }) diff --git a/test/lines/transcript.test.js b/test/lines/transcript.test.js index ad8c451..99e55ab 100644 --- a/test/lines/transcript.test.js +++ b/test/lines/transcript.test.js @@ -26,7 +26,7 @@ describe(`transcript`, () => { }) - it(`is not stripped of emphasis`, () => { + it(`retains emphasis`, () => { const transcript = `*waxdungu* qasi` diff --git a/test/lines/transcription.test.js b/test/lines/transcription.test.js index 76f7a22..8dbae54 100644 --- a/test/lines/transcription.test.js +++ b/test/lines/transcription.test.js @@ -72,17 +72,29 @@ describe(`phomemic transcription (utterance)`, () => { }) - it(`is stripped of emphasis`, () => { - - const transcription = `*waxdungu* qasi` + it(`retains emphasis`, function() { const text = ` - ${ transcription } + *waxdungu* qasi one day a man ` const { utterances: [utterance] } = convert(text) - expect(utterance.transcription.default.includes(`*`)).to.equal(false) + + expect(utterance.transcription.default).to.equal(`*waxdungu* qasi`) + + }) + + it(`retains emphasis`, function() { + + const text = ` + *waxdungu* qasi + one day a man + ` + + const { utterances: [utterance] } = convert(text, { emphasis: false }) + + expect(utterance.transcription.default).to.not.include(`*`) }) diff --git a/test/lines/translation.test.js b/test/lines/translation.test.js index 95bb5e8..7281f18 100644 --- a/test/lines/translation.test.js +++ b/test/lines/translation.test.js @@ -71,7 +71,7 @@ describe(`free translation`, () => { }) - it(`is stripped of emphasis`, () => { + it(`retains emphasis`, function() { const text = ` waxdungu qasi @@ -80,6 +80,19 @@ describe(`free translation`, () => { const { utterances: [utterance] } = convert(text) + expect(utterance.translation).to.equal(`*one* day a man`) + + }) + + it(`strips emphasis`, function() { + + const text = ` + waxdungu qasi + *one* day a man + ` + + const { utterances: [utterance] } = convert(text, { emphasis: false }) + expect(utterance.translation).to.equal(`one day a man`) }) diff --git a/test/lines/word-literal.test.js b/test/lines/word-literal.test.js index 48e1204..7b554a9 100644 --- a/test/lines/word-literal.test.js +++ b/test/lines/word-literal.test.js @@ -68,18 +68,29 @@ describe(`literal word translation (word: \\wlt)`, () => { }) - it(`is stripped of emphasis`, () => { - - const literal = `day.one` + it(`retains emphasis`, function() { const text = ` \\w waxdungu qasi - \\wlt ${ literal } a.man + \\wlt day.*one* a.man ` const { utterances: [{ words: [word] }] } = convert(text) - expect(word.literal).to.equal(literal) + expect(word.literal).to.equal(`day.*one*`) + + }) + + it(`strips emphasis`, function() { + + const text = ` + \\w waxdungu qasi + \\wlt day.*one* a.man + ` + + const { utterances: [{ words: [word] }] } = convert(text, { emphasis: false }) + + expect(word.literal).to.equal(`day.one`) })