From 18fa4b29cd78742dbee90606a7e2397bb6d42354 Mon Sep 17 00:00:00 2001 From: laubonghaudoi Date: Tue, 15 Feb 2022 14:04:43 -0500 Subject: [PATCH] fix: add sentence validator for Cantonese (#605) --- server/lib/validation/index.js | 2 ++ server/lib/validation/languages/yue.js | 34 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 server/lib/validation/languages/yue.js diff --git a/server/lib/validation/index.js b/server/lib/validation/index.js index d2157581..ddadfbd8 100644 --- a/server/lib/validation/index.js +++ b/server/lib/validation/index.js @@ -12,6 +12,7 @@ const ru = require('./languages/ru'); const th = require('./languages/th'); const ur = require('./languages/ur'); const uz = require('./languages/uz'); +const yue = require('./languages/yue'); const VALIDATORS = { bas, @@ -27,6 +28,7 @@ const VALIDATORS = { th, ur, uz, + yue, }; module.exports = { diff --git a/server/lib/validation/languages/yue.js b/server/lib/validation/languages/yue.js new file mode 100644 index 00000000..617d325d --- /dev/null +++ b/server/lib/validation/languages/yue.js @@ -0,0 +1,34 @@ +// Minimum of characters that qualify as a sentence. +const MIN_LENGTH = 3; + +// Maximum of characters allowed per sentence to keep recordings in a manageable duration. +const MAX_LENGTH = 50; + +const INVALIDATIONS = [{ + fn: (sentence) => { + return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH; + }, + error: `字數必須要喺 ${MIN_LENGTH} 同 ${MAX_LENGTH} 之間`, +}, { + regex: /[0-9]+/, + error: "句子唔可以包含阿拉伯數字", +}, { + regex: /[<>+*#@%^[\]()/]/, + error: "句子唔可以有特殊符號", +}, { + // 7 or more repeating characters in a row is likely a non-formal spelling or difficult to read. + regex: /(.)\1{6}/, + error: "唔可以有連續 7 個或以上重複字元", +}, { + // Emoji range from https://www.regextester.com/106421 and + // https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript + regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/, + error: "句子唔可以含有 emoji 或者其他特殊 Unicode 符號", +}, { + regex: /[\u5427\u5504\u5436](\s|\u3002|\u002E|\uFF0C|\u002C|$)/, + error: '句子唔可以有官話語氣詞(例如吧、唄、吶)', +}]; + +module.exports = { + INVALIDATIONS, +};