common-voice · MichaelKohler · Feb 15, 2022 · Feb 13, 2022 · Feb 13, 2022 · Feb 13, 2022
diff --git a/server/lib/validation/index.js b/server/lib/validation/index.js
@@ -12,6 +12,7 @@ const ru = require('./languages/ru');
 const th = require('./languages/th');
 const ur = require('./languages/ur');
 const uz = require('./languages/uz');
+const yue = require('./languages/yue');
 
 const VALIDATORS = {
   bas,
@@ -27,6 +28,7 @@ const VALIDATORS = {
   th,
   ur,
   uz,
+  yue,
 };
 
 module.exports = {

diff --git a/server/lib/validation/languages/yue.js b/server/lib/validation/languages/yue.js
@@ -0,0 +1,34 @@
+// Minimum of words that qualify as a sentence.
+const MIN_LENGTH = 3;
+
+// Maximum of words allowed per sentence to keep recordings in a manageable duration.
+const MAX_LENGTH = 50;
+
+const INVALIDATIONS = [{
+  fn: (sentence) => {
+    return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH;
+  },
+  error: `Number of characters must be between ${MIN_LENGTH} and ${MAX_LENGTH} (inclusive)`,
+}, {
+  regex: /[0-9]+/,
+  error: "Sentence should not contain numbers",
+}, {
+  regex: /[<>+*#@%^[\]()\/]/,
+  error: "Sentence should not contain symbols",
+}, {
+  // 7 or more repeating characters in a row is likely a non-formal spelling or difficult to read.
+  regex: /(.)\1{6}/,
+  error: "Sentence should not contain more than 7 of the same character in a row",
+}, {
+  // Emoji range from https://www.regextester.com/106421 and
+  // https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript
+  regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/,
+  error: "Sentence should not contain emojis or other special Unicode symbols",
+}, {
+  regex: /[\u5427](\s|$)/,
+  error: 'Sentence should not end with Mandarin particles',
+}];
+
+module.exports = {
+  INVALIDATIONS,
+};