From a88acab06117107fc24b4f8c2f59785bfc5903fc Mon Sep 17 00:00:00 2001 From: devformatters2 <177856586+devformatters2@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:28:03 +0800 Subject: [PATCH 1/2] fix(notice): fix incorrect word counting of `truncateNoticeTitle` --- src/common/utils/text/notice.test.ts | 212 +++++------------- src/common/utils/text/notice.ts | 196 ++++------------ .../Notice/NoticeCollectionTitle.tsx | 5 +- src/components/Notice/NoticeMomentTitle.tsx | 8 +- 4 files changed, 101 insertions(+), 320 deletions(-) diff --git a/src/common/utils/text/notice.test.ts b/src/common/utils/text/notice.test.ts index c51d491a6b..a00efd1fb7 100644 --- a/src/common/utils/text/notice.test.ts +++ b/src/common/utils/text/notice.test.ts @@ -1,164 +1,74 @@ import { describe, expect, it } from 'vitest' -import { UserLanguage } from '~/gql/graphql' - import { truncateNoticeTitle } from './notice' -describe.concurrent('utils/text/collection/truncateNoticeTitle', () => { - describe('for Chinese', () => { - it('should truncate the title to the specified maximum number of words', () => { - const title = '这是一个标题这是一个标题这是一个标题' - const maxLength = 3 - const expected = '这是一...' - const result = truncateNoticeTitle(title, { - locale: UserLanguage.ZhHans, - maxLength, - }) - // Assert - expect(result).toEqual(expected) - }) - - it('should return the title as is if it has fewer words than the maximum', () => { - const title = '这是一个标题' - const maxLength = 7 - const result = truncateNoticeTitle(title, { - locale: UserLanguage.ZhHans, - maxLength, - }) - // Assert - expect(result).toEqual(title) - }) - - it('should return the title for the default length of 10 words', () => { - const title = '这是一个标题这是一个标题这是一个标题' - const expected = '这是一个标题这是一个...' - const result = truncateNoticeTitle(title, { locale: UserLanguage.ZhHans }) - // Assert - expect(result).toEqual(expected) - }) - }) - - describe('for English', () => { - it('should return the title as is if it has fewer words than the maximum', () => { - const title = 'The birds are chirping and the sun is shining' - const maxLength = 50 - const result = truncateNoticeTitle(title, { - locale: UserLanguage.En, - maxLength, - }) - // Assert - expect(result).toEqual(title) - }) +const CHINESE_ONLY = '这是一个标题这是一个标题这是一个标题这是一个标题这是一个' +const CHINESE_WITH_NUMBERS_AND_PUNCTUATION = + '看起來 10 拍,快樂喜歡如其實也是我於有我的部分' +const ENGLISH_ONLY = + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.' +const ENGLISH_WITH_NUMBERS_AND_PUNCTUATION = + 'Lorem ipsum 10 dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.' +const MIXED = + '看起來 10 拍,consectetur Lorem ipsum dolor sit amet, adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.' +const MENTIONS = + '看起來 10 拍 @用戶 @user Lorem ipsum dolor,快樂喜歡如其實也是我於有我的部分' - it('should truncate the title to the specified maximum number of words', () => { - const title = 'The birds are chirping and the sun is shining' - const maxLength = 27 - const expected = 'The birds are chirping and...' - const result = truncateNoticeTitle(title, { - locale: UserLanguage.En, - maxLength, - }) - // Assert - expect(result).toEqual(expected) - }) +describe.concurrent('utils/text/collection/truncateNoticeTitle', () => { + it('should return the title for the default length of 10 words', () => { + expect(truncateNoticeTitle(CHINESE_ONLY)).toEqual('这是一个标题这是一个...') + expect(truncateNoticeTitle(CHINESE_WITH_NUMBERS_AND_PUNCTUATION)).toEqual( + '看起來 10 拍,快樂喜歡如...' + ) + expect(truncateNoticeTitle(ENGLISH_ONLY)).toEqual( + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do...' + ) + expect(truncateNoticeTitle(ENGLISH_WITH_NUMBERS_AND_PUNCTUATION)).toEqual( + 'Lorem ipsum 10 dolor sit amet, consectetur adipiscing elit, sed...' + ) + expect(truncateNoticeTitle(MIXED)).toEqual( + '看起來 10 拍,consectetur Lorem ipsum dolor sit...' + ) + expect(truncateNoticeTitle(MENTIONS)).toEqual( + '看起來 10 拍 @用戶 @user Lorem ipsum dolor...' + ) }) - describe('for English with tagged users', () => { - it('should truncate characters to under 10 words for english', () => { - expect( - truncateNoticeTitle('This is a very long sentence.', { - includeAtSign: true, - }) - ).toBe('This is a...') - expect( - truncateNoticeTitle('Hello, world.', { includeAtSign: true }) - ).toBe('Hello,...') - }) + it('should truncate the title to the specified maximum number of words', () => { + const maxLength = 6 - it('should truncate if over 10 characters with tagged users and remaining length is 0 while having english characters', () => { - expect( - truncateNoticeTitle('This is a craaaazy article here! @user1 @user2', { - includeAtSign: true, - }) - ).toBe('This is a...@user1 @user2') - }) + expect(truncateNoticeTitle(CHINESE_ONLY, maxLength)).toEqual( + '这是一个标题...' + ) + expect( + truncateNoticeTitle(CHINESE_WITH_NUMBERS_AND_PUNCTUATION, maxLength) + ).toEqual('看起來 10 拍,快...') + expect(truncateNoticeTitle(ENGLISH_ONLY, maxLength)).toEqual( + 'Lorem ipsum dolor sit amet, consectetur...' + ) + expect( + truncateNoticeTitle(ENGLISH_WITH_NUMBERS_AND_PUNCTUATION, maxLength) + ).toEqual('Lorem ipsum 10 dolor sit amet...') + expect(truncateNoticeTitle(MIXED, maxLength)).toEqual( + '看起來 10 拍,consectetur...' + ) + expect(truncateNoticeTitle(MENTIONS, maxLength)).toEqual( + '看起來 10 拍 @用戶...' + ) }) - describe('for Chinese with tagged users', () => { - it('should not truncate if under 10 characters', () => { - expect( - truncateNoticeTitle('這篇文章真的很厲害!', { - locale: UserLanguage.ZhHant, - maxLength: 10, - includeAtSign: true, - }) - ).toBe('這篇文章真的很厲害!') - expect( - truncateNoticeTitle('很厲害!', { - locale: UserLanguage.ZhHant, - maxLength: 10, - includeAtSign: true, - }) - ).toBe('很厲害!') - }) - - it('should truncate if over 10 characters', () => { - expect( - truncateNoticeTitle('這篇文章真的很厲害,大家應該都來看一下!', { - locale: UserLanguage.ZhHant, - maxLength: 10, - includeAtSign: true, - }) - ).toBe('這篇文章真的很厲害,...') - }) - - it('should truncate when the title is over 10 characters and the mentions are at the end', () => { - expect( - truncateNoticeTitle( - '這篇文章真的很厲害,大家應該都來看一下 @user1 @user2', - { locale: UserLanguage.ZhHant, maxLength: 10, includeAtSign: true } - ) - ).toBe('這篇文章真的很厲害,...@user1 @user2') - expect( - truncateNoticeTitle( - '這篇文章真的很厲害,大家應該都來看一下! @user1 @user2', - { locale: UserLanguage.ZhHant, maxLength: 10, includeAtSign: true } - ) - ).toBe('這篇文章真的很厲害,...@user1 @user2') - expect( - truncateNoticeTitle('這是一個時刻!!!!!!!@jj', { - locale: UserLanguage.ZhHant, - maxLength: 10, - includeAtSign: true, - }) - ).toBe('這是一個時刻!!!!...@jj') - }) - - it('should truncate if over 10 characters with tagged users in the middle or the beginning', () => { - expect( - truncateNoticeTitle('我和 @zhangsan 在台北一起去吃吃吃!', { - locale: UserLanguage.ZhHans, - maxLength: 10, - includeAtSign: true, - }) - ).toBe('我和 @zhangsan 在台北一起去...') - expect( - truncateNoticeTitle('@zhangsan 和我在台北一起去吃吃吃!', { - locale: UserLanguage.ZhHans, - maxLength: 10, - includeAtSign: true, - }) - ).toBe('@zhangsan 和我在台北一起去吃...') - }) + it('should return the title as is if it has fewer words than the maximum', () => { + const maxLength = 100 - it('should truncate characters to when the mention is a bit spread out', () => { - expect( - truncateNoticeTitle('我和 @zhangsan 還有 @yp 在台北一起去吃吃吃!', { - locale: UserLanguage.ZhHans, - maxLength: 10, - includeAtSign: true, - }) - ).toBe('我和 @zhangsan 還有 @yp 在台...') - }) + expect(truncateNoticeTitle(CHINESE_ONLY, maxLength)).toEqual(CHINESE_ONLY) + expect( + truncateNoticeTitle(CHINESE_WITH_NUMBERS_AND_PUNCTUATION, maxLength) + ).toEqual(CHINESE_WITH_NUMBERS_AND_PUNCTUATION) + expect(truncateNoticeTitle(ENGLISH_ONLY, maxLength)).toEqual(ENGLISH_ONLY) + expect( + truncateNoticeTitle(ENGLISH_WITH_NUMBERS_AND_PUNCTUATION, maxLength) + ).toEqual(ENGLISH_WITH_NUMBERS_AND_PUNCTUATION) + expect(truncateNoticeTitle(MIXED, maxLength)).toEqual(MIXED) + expect(truncateNoticeTitle(MENTIONS, maxLength)).toEqual(MENTIONS) }) }) diff --git a/src/common/utils/text/notice.ts b/src/common/utils/text/notice.ts index db101e79c2..58db112f75 100644 --- a/src/common/utils/text/notice.ts +++ b/src/common/utils/text/notice.ts @@ -1,182 +1,62 @@ -import { UserLanguage } from '~/gql/graphql' - -type TruncateNoticeTitleOptions = { - locale?: UserLanguage - maxLength?: number - includeAtSign?: boolean -} - /** * Truncates a title to a specified maximum length, while preserving tagged users. * * @param title - The title to truncate. * @param maxLength - The maximum length of the truncated title. - * @param locale - The locale to determine the truncation rules. Defaults to 'en'. + * - Each CJK character is counted as 1 unit. + * - Each latin word is counted as 1 unit. + * - Each tagged user is counted as 1 unit. + * - Ignoer spaces and punctuations. + * * @returns The truncated title with preserved tagged users. */ -export const truncateNoticeTitle = ( - title: string, - options: TruncateNoticeTitleOptions = {} -) => { - const DEFAULTS = { - locale: UserLanguage.En, - includeAtSign: false, - maxLength: 10, - } - let localOptions = { ...DEFAULTS, ...options } +const REGEXP_CJK = + '[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff66-\uff9f]' - if (/^zh/.test(localOptions.locale)) { - return localOptions.includeAtSign - ? truncateTitleForChineseWithAtSign(title, localOptions) - : truncateTitleForChinese(title, localOptions) - } else { - return localOptions.includeAtSign - ? truncateTitleForEnglishWithAtSign(title, localOptions) - : truncateTitleForEnglish(title, localOptions) +function countUnits(word: string) { + // Latin word + if (/^@\w+/.test(word) || new RegExp(`^@${REGEXP_CJK}+`).test(word)) { + return 1 } -} - -/** - * Truncates a title to a specified maximum length for Chinese (Simplified or traditional) text. - * - * @param text - The title to truncate. - * @param maxWords - The maximum number of words in the truncated title. Defaults to 10. - * @returns The truncated title. - */ -export function truncateTitleForChinese( - text: string, - { - maxLength, - }: { maxLength: NonNullable } -): string { - const chineseRegex = /[\u4e00-\u9fa5]/g - const chineseWords = text.match(chineseRegex) - if (chineseWords && chineseWords.length > maxLength) { - return chineseWords.slice(0, maxLength).join('') + '...' + // CJK + else if (new RegExp(REGEXP_CJK, 'g').test(word)) { + return 1 } - return text -} - -/** - * Truncates a title to a specified maximum length for English text. - * - * @param text - The title to truncate. - * @param maxLength - The maximum length of the truncated title. Defaults to 50. - * @returns The truncated title. - */ -export function truncateTitleForEnglish( - text: string, - { - maxLength, - }: { maxLength: NonNullable } -): string { - if (text.length > maxLength) { - const words = text.split(' ') - let truncatedText = '' - let count = 0 - for (const word of words) { - if (count + word.length <= maxLength) { - truncatedText += word + ' ' - count += word.length + 1 - } else { - break - } - } - return truncatedText.trim() + '...' + // Latin + else if (/^\w+/.test(word)) { + return 1 } - return text -} -/** - * Truncates a title in English to a specified maximum length, while preserving tagged users. - * - * @param title - The title to truncate. - * @param maxLength - The maximum length of the truncated title. - * @returns The truncated title with preserved tagged users. - */ -const truncateTitleForEnglishWithAtSign = ( - title: string, - { - maxLength, - }: { maxLength: NonNullable } -) => { - const words = title.split(/\s+/) - let hasTag = words.some((word) => word.startsWith('@')) - let truncated = '' - let count = 0 + // Ignore spaces and punctuations + return 0 +} - for (const word of words) { - if (word.startsWith('@')) { - truncated += `${word} ` - continue - } - if (count + word.length + 1 > maxLength) { - break - } - truncated += `${word} ` - count += word.length + 1 - } +function trimSpacesAndPunctuations(str: string) { + return str.replace(/^[\s\p{P}]+|[\s\p{P}]+$/gu, '') +} - let base = truncated.trim() + (title.length > count ? '...' : '') - if (hasTag && !base.includes('@')) { - for (const word of words) { - if (word.startsWith('@')) { - base += `${word} ` - } - } - } +export const truncateNoticeTitle = (title: string, maxLength: number = 10) => { + const components = + title.match( + new RegExp(`(@\\w+|@${REGEXP_CJK}+|\\w+|${REGEXP_CJK}|[^\w\s])`, 'g') + ) || [] - return base.trim() -} + let truncatedTitle = '' + let currentLength = 0 -/** - * Truncates a title in CJK (Chinese, Japanese, Korean) to a specified maximum length, while preserving tagged users. - * - * @param title - The title to truncate. - * @param maxLength - The maximum length of the truncated title. - * @returns The truncated title with preserved tagged users. - */ -const truncateTitleForChineseWithAtSign = ( - title: string, - { - maxLength, - }: { maxLength: NonNullable } -) => { - const pattern = /(@\w+|[^\x00-\x7F]|\s)/gu - const phrases = title.match(pattern)?.filter((s) => s !== ' ') || [] - let hasTag = phrases.some((p) => p.startsWith('@')) - let count = 0 - let truncated = '' + for (const [index, component] of components.entries()) { + const componentUnits = countUnits(component) - for (const [idx, p] of phrases.entries()) { - if (p.startsWith('@')) { - if (idx + 1 == phrases.length) { - truncated += ` ${p}` - count += 1 - } else if (idx === 0) { - truncated += `${p} ` - count += 1 - } else { - truncated += ` ${p} ` - count += 2 + if (currentLength + componentUnits > maxLength) { + if (index < components.length - 1) { + truncatedTitle = trimSpacesAndPunctuations(truncatedTitle) + '...' } - continue - } - if (count + 1 > maxLength) { break } - truncated += p - count++ - } - let base = truncated.trim() + (title.length > count ? '...' : '') - if (hasTag && !base.includes('@')) { - for (const p of phrases) { - if (p.startsWith('@')) { - base += `${p} ` - } - } + truncatedTitle += component + currentLength += componentUnits } - return base.trim() + return truncatedTitle } diff --git a/src/components/Notice/NoticeCollectionTitle.tsx b/src/components/Notice/NoticeCollectionTitle.tsx index 05534f2777..b06404b318 100644 --- a/src/components/Notice/NoticeCollectionTitle.tsx +++ b/src/components/Notice/NoticeCollectionTitle.tsx @@ -1,13 +1,11 @@ import gql from 'graphql-tag' import Link from 'next/link' -import { useContext } from 'react' import { TEST_ID } from '~/common/enums' import { toPath } from '~/common/utils' import { truncateNoticeTitle } from '~/common/utils/text/notice' import { CollectionNoticeFragment } from '~/gql/graphql' -import { LanguageContext } from '../Context' import styles from './styles.module.css' const NoticeCollectionTitle = ({ @@ -16,7 +14,6 @@ const NoticeCollectionTitle = ({ notice: CollectionNoticeFragment | null }) => { const userName = notice?.collection?.author.userName - const { lang } = useContext(LanguageContext) if (!notice || !userName) { return null @@ -34,7 +31,7 @@ const NoticeCollectionTitle = ({ className={styles.noticeArticleTitle} data-test-id={TEST_ID.NOTICE_COLLECTION_TITLE} > - {truncateNoticeTitle(notice.collection.title, { locale: lang })} + {truncateNoticeTitle(notice.collection.title)} ) diff --git a/src/components/Notice/NoticeMomentTitle.tsx b/src/components/Notice/NoticeMomentTitle.tsx index 99dd343478..928511d383 100644 --- a/src/components/Notice/NoticeMomentTitle.tsx +++ b/src/components/Notice/NoticeMomentTitle.tsx @@ -1,11 +1,9 @@ import gql from 'graphql-tag' import Link from 'next/link' -import { useContext } from 'react' import { useIntl } from 'react-intl' import { TEST_ID } from '~/common/enums' import { stripHtml, toPath, truncateNoticeTitle } from '~/common/utils' -import { LanguageContext } from '~/components' import { NoticeMomentTitleFragment } from '~/gql/graphql' import styles from './styles.module.css' @@ -15,7 +13,6 @@ const NoticeMomentTitle = ({ }: { moment: NoticeMomentTitleFragment }) => { - const { lang } = useContext(LanguageContext) const intl = useIntl() const path = toPath({ @@ -23,10 +20,7 @@ const NoticeMomentTitle = ({ moment, }) - const title = truncateNoticeTitle(stripHtml(moment.content || ''), { - maxLength: 10, - locale: lang, - }) + const title = truncateNoticeTitle(stripHtml(moment.content || '')) const images = moment.assets.length ? intl .formatMessage({ defaultMessage: `[image]`, id: 'W3tqQO' }) From 185c185ea8d949c8f85105ac9eeefabc993039d1 Mon Sep 17 00:00:00 2001 From: devformatters2 <177856586+devformatters2@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:39:42 +0800 Subject: [PATCH 2/2] fix(notice): use explicit punctuations instead --- src/common/utils/form/validate.ts | 6 +++--- src/common/utils/text/notice.ts | 10 +++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/common/utils/form/validate.ts b/src/common/utils/form/validate.ts index 756092522b..7ac71cc36c 100644 --- a/src/common/utils/form/validate.ts +++ b/src/common/utils/form/validate.ts @@ -28,10 +28,10 @@ import { import { hasUpperCase, isValidPaymentPointer } from '../validator' -const PUNCTUATION_CHINESE = +export const PUNCTUATION_CHINESE = '\u3002\uff1f\uff01\uff0c\u3001\uff1b\uff1a\u201c\u201d\u2018\u2019\uff08\uff09\u300a\u300b\u3008\u3009\u3010\u3011\u300e\u300f\u300c\u300d\ufe43\ufe44\u3014\u3015\u2026\u2014\uff5e\ufe4f\uffe5' -const PUNCTUATION_ASCII = '\x00-\x2f\x3a-\x40\x5b-\x60\x7a-\x7f' -const REGEXP_ALL_PUNCTUATIONS = new RegExp( +export const PUNCTUATION_ASCII = '\x00-\x2f\x3a-\x40\x5b-\x60\x7a-\x7f' +export const REGEXP_ALL_PUNCTUATIONS = new RegExp( `^[${PUNCTUATION_CHINESE}${PUNCTUATION_ASCII}]*$` ) diff --git a/src/common/utils/text/notice.ts b/src/common/utils/text/notice.ts index 58db112f75..244589df57 100644 --- a/src/common/utils/text/notice.ts +++ b/src/common/utils/text/notice.ts @@ -1,3 +1,5 @@ +import { PUNCTUATION_ASCII, PUNCTUATION_CHINESE } from '../form' + /** * Truncates a title to a specified maximum length, while preserving tagged users. * @@ -32,7 +34,13 @@ function countUnits(word: string) { } function trimSpacesAndPunctuations(str: string) { - return str.replace(/^[\s\p{P}]+|[\s\p{P}]+$/gu, '') + return str.replace( + new RegExp( + `^[${PUNCTUATION_CHINESE}${PUNCTUATION_ASCII}]+|[${PUNCTUATION_CHINESE}${PUNCTUATION_ASCII}]+$`, + 'g' + ), + '' + ) } export const truncateNoticeTitle = (title: string, maxLength: number = 10) => {