From 86f2190a456ed8ac65a70ab5fe065c47754c2415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Marc=C3=A8=20i=20Igual?= Date: Sat, 19 Feb 2022 21:31:34 +0100 Subject: [PATCH 1/4] Copy italian to catalan --- server/lib/validation/languages/ca.js | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 server/lib/validation/languages/ca.js diff --git a/server/lib/validation/languages/ca.js b/server/lib/validation/languages/ca.js new file mode 100644 index 00000000..a0b9423a --- /dev/null +++ b/server/lib/validation/languages/ca.js @@ -0,0 +1,35 @@ +// According to Mozilla Italia guidelines, we count chars to validate instead of words. +const MIN_LENGTH = 1; +const MAX_LENGTH = 125; + +const INVALIDATIONS = [{ + fn: (sentence) => { + return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH; + }, + error: `Number of characters must be between ${MIN_LENGTH} and ${MAX_LENGTH} (inclusive)`, +}, { + regex: /[0-9]+/, + error: 'Sentence should not contain numbers', +}, { + // This could mean multiple sentences per line. + regex: /[?!.].+/, + error: 'Sentence should not contain sentence punctuation inside a sentence', +}, { + // Italian: Simboli non permessi, aggiungere anche qui sotto oltre che nella regex: + // < > + * \ # @ ^ “ ” ‘ ’ ( ) É [ ] / { } + // doppio " " e più di un "." nella stessa frase. + regex: /[<>+*\\#@^“”‘’(){}É[\]/]|\s{2,}|!{2,}/, + error: 'Sentence should not contain symbols or multiple spaces/exclamation marks', +}, { + // Any words consisting of uppercase letters or uppercase letters with a period + // inbetween are considered abbreviations or acronyms. + // This currently also matches fooBAR but we most probably don't want that either + // as users wouldn't know how to pronounce the uppercase letters. + // Versione italiana: dag7dev + regex: /[A-Z]{2,}|[A-Z][a-z]+\.*[A-Z]+/, + error: 'Sentence should not contain abbreviations', +}]; + +module.exports = { + INVALIDATIONS, +}; From 29b91bcf53ccee876e380fa0af9fd566752fc869 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Marc=C3=A8=20i=20Igual?= Date: Sat, 19 Feb 2022 22:02:10 +0100 Subject: [PATCH 2/4] feat: add catalan validator --- server/lib/validation/index.js | 2 ++ server/lib/validation/languages/ca.js | 25 ++++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/server/lib/validation/index.js b/server/lib/validation/index.js index ddadfbd8..ea8bf81f 100644 --- a/server/lib/validation/index.js +++ b/server/lib/validation/index.js @@ -13,6 +13,7 @@ const th = require('./languages/th'); const ur = require('./languages/ur'); const uz = require('./languages/uz'); const yue = require('./languages/yue'); +const ca = require('./languages/ca'); const VALIDATORS = { bas, @@ -29,6 +30,7 @@ const VALIDATORS = { ur, uz, yue, + ca }; module.exports = { diff --git a/server/lib/validation/languages/ca.js b/server/lib/validation/languages/ca.js index a0b9423a..f323fa5c 100644 --- a/server/lib/validation/languages/ca.js +++ b/server/lib/validation/languages/ca.js @@ -1,12 +1,17 @@ -// According to Mozilla Italia guidelines, we count chars to validate instead of words. -const MIN_LENGTH = 1; -const MAX_LENGTH = 125; +const tokenizeWords = require('talisman/tokenizers/words/gersam'); + +// Minimum of words that qualify as a sentence. +const MIN_WORDS = 1; + +// Maximum of words allowed per sentence to keep recordings in a manageable duration. +const MAX_WORDS = 14; const INVALIDATIONS = [{ fn: (sentence) => { - return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH; + const words = tokenizeWords('ca', sentence); + return words.length < MIN_WORDS || words.length > MAX_WORDS; }, - error: `Number of characters must be between ${MIN_LENGTH} and ${MAX_LENGTH} (inclusive)`, + error: `Number of words must be between ${MIN_WORDS} and ${MAX_WORDS} (inclusive)`, }, { regex: /[0-9]+/, error: 'Sentence should not contain numbers', @@ -15,18 +20,16 @@ const INVALIDATIONS = [{ regex: /[?!.].+/, error: 'Sentence should not contain sentence punctuation inside a sentence', }, { - // Italian: Simboli non permessi, aggiungere anche qui sotto oltre che nella regex: - // < > + * \ # @ ^ “ ” ‘ ’ ( ) É [ ] / { } - // doppio " " e più di un "." nella stessa frase. - regex: /[<>+*\\#@^“”‘’(){}É[\]/]|\s{2,}|!{2,}/, + // Symbols not allowed, also add them below as well to the regex: + // < > + * \ # @ ^ “ ” ‘ ’ ( ) [ ] / { } + regex: /[<>+*\\#@^“”‘’(){}[\]/]|\s{2,}|!{2,}/, error: 'Sentence should not contain symbols or multiple spaces/exclamation marks', }, { // Any words consisting of uppercase letters or uppercase letters with a period // inbetween are considered abbreviations or acronyms. // This currently also matches fooBAR but we most probably don't want that either // as users wouldn't know how to pronounce the uppercase letters. - // Versione italiana: dag7dev - regex: /[A-Z]{2,}|[A-Z][a-z]+\.*[A-Z]+/, + regex: /[A-Z]{2,}|[A-Z]+\.*[A-Z]+/, error: 'Sentence should not contain abbreviations', }]; From abc9bb4e6288c9e1404806cf34af23b673b5a311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Marc=C3=A8=20i=20Igual?= Date: Sat, 19 Feb 2022 23:20:47 +0100 Subject: [PATCH 3/4] fix: translate errors to Catalan --- server/lib/validation/languages/ca.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/lib/validation/languages/ca.js b/server/lib/validation/languages/ca.js index f323fa5c..bf3c34e9 100644 --- a/server/lib/validation/languages/ca.js +++ b/server/lib/validation/languages/ca.js @@ -11,26 +11,26 @@ const INVALIDATIONS = [{ const words = tokenizeWords('ca', sentence); return words.length < MIN_WORDS || words.length > MAX_WORDS; }, - error: `Number of words must be between ${MIN_WORDS} and ${MAX_WORDS} (inclusive)`, + error: `El nombre de paraules ha de ser entre ${MIN_WORDS} i ${MAX_WORDS} (inclòs)`, }, { regex: /[0-9]+/, - error: 'Sentence should not contain numbers', + error: 'La frase no pot contenir nombres', }, { // This could mean multiple sentences per line. regex: /[?!.].+/, - error: 'Sentence should not contain sentence punctuation inside a sentence', + error: 'La frase no pot contenir signes de puntuació al mig', }, { // Symbols not allowed, also add them below as well to the regex: // < > + * \ # @ ^ “ ” ‘ ’ ( ) [ ] / { } regex: /[<>+*\\#@^“”‘’(){}[\]/]|\s{2,}|!{2,}/, - error: 'Sentence should not contain symbols or multiple spaces/exclamation marks', + error: 'La frase no pot contenir simbols o multiples espais o exclamacions', }, { // Any words consisting of uppercase letters or uppercase letters with a period // inbetween are considered abbreviations or acronyms. // This currently also matches fooBAR but we most probably don't want that either // as users wouldn't know how to pronounce the uppercase letters. regex: /[A-Z]{2,}|[A-Z]+\.*[A-Z]+/, - error: 'Sentence should not contain abbreviations', + error: 'La frase no pot contenir abreviacions o acrònims', }]; module.exports = { From b939a8535d35f6f51b178deabcf468013a2d5780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Marc=C3=A8=20i=20Igual?= Date: Sat, 19 Feb 2022 23:21:53 +0100 Subject: [PATCH 4/4] fix: sort includes alfabetically --- server/lib/validation/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/lib/validation/index.js b/server/lib/validation/index.js index ea8bf81f..e063bf4a 100644 --- a/server/lib/validation/index.js +++ b/server/lib/validation/index.js @@ -1,5 +1,6 @@ const defaultValidator = require('./languages/default'); const bas = require('./languages/bas'); +const ca = require('./languages/ca'); const ckb = require('./languages/ckb'); const en = require('./languages/en'); const eo = require('./languages/eo'); @@ -13,10 +14,10 @@ const th = require('./languages/th'); const ur = require('./languages/ur'); const uz = require('./languages/uz'); const yue = require('./languages/yue'); -const ca = require('./languages/ca'); const VALIDATORS = { bas, + ca, ckb, en, eo, @@ -30,7 +31,6 @@ const VALIDATORS = { ur, uz, yue, - ca }; module.exports = {