diff --git a/packages/core/src/appendSkinToneIndex.ts b/packages/core/src/appendSkinToneIndex.ts new file mode 100644 index 00000000..0a7af948 --- /dev/null +++ b/packages/core/src/appendSkinToneIndex.ts @@ -0,0 +1,8 @@ +import { SkinTone } from './types'; + +export default function appendSkinToneIndex( + shortcode: string, + emoji: { tone?: SkinTone | SkinTone[] }, +): string { + return `${shortcode}_${Array.isArray(emoji.tone) ? emoji.tone.join('-') : emoji.tone}`; +} diff --git a/packages/core/src/fetchEmojis.ts b/packages/core/src/fetchEmojis.ts index 10f61064..3ae6bf3f 100644 --- a/packages/core/src/fetchEmojis.ts +++ b/packages/core/src/fetchEmojis.ts @@ -1,9 +1,11 @@ -import { ShortcodePreset, CompactEmoji, Emoji } from './types'; +import { ShortcodePreset, CompactEmoji, Emoji, ShortcodesDataset } from './types'; import fetchFromCDN, { FetchFromCDNOptions } from './fetchFromCDN'; import fetchShortcodes from './fetchShortcodes'; +import flattenEmojiData from './flattenEmojiData'; export interface FetchEmojisOptions extends FetchFromCDNOptions { compact?: boolean; + flat?: boolean; shortcodes?: ShortcodePreset[]; } @@ -20,14 +22,24 @@ async function fetchEmojis( ): Promise; async function fetchEmojis(locale: string, options: FetchEmojisOptions = {}) { - const { compact = false, shortcodes: presets = [], ...opts } = options; - const emojis = await fetchFromCDN(`${locale}/${compact ? 'compact' : 'data'}.json`, opts); + const { compact = false, flat = false, shortcodes: presets = [], ...opts } = options; + const emojis = await fetchFromCDN( + `${locale}/${compact ? 'compact' : 'data'}.json`, + opts, + ); + const shortcodes: ShortcodesDataset[] = []; if (presets.length > 0) { - await Promise.all(presets.map((preset) => fetchShortcodes(locale, preset, opts))); + try { + shortcodes.push( + ...(await Promise.all(presets.map((preset) => fetchShortcodes(locale, preset, opts)))), + ); + } catch { + // Ignore + } } - return emojis; + return flat ? flattenEmojiData(emojis as Emoji[], shortcodes) : emojis; } export default fetchEmojis; diff --git a/packages/core/src/flattenEmojiData.ts b/packages/core/src/flattenEmojiData.ts index 23417d73..42cf3af2 100644 --- a/packages/core/src/flattenEmojiData.ts +++ b/packages/core/src/flattenEmojiData.ts @@ -1,30 +1,34 @@ -import { Emoji } from './types'; +import { Emoji, ShortcodesDataset, CompactEmoji } from './types'; +import joinShortcodesToEmoji from './joinShortcodesToEmoji'; -export default function flattenEmojiData(data: Emoji[]): Emoji[] { +export default function flattenEmojiData( + data: T[], + shortcodeDatasets: ShortcodesDataset[] = [], +): T[] { const emojis: Emoji[] = []; - data.forEach((emoji) => { + (data as Emoji[]).forEach((emoji) => { if (emoji.skins) { - const { skins, ...restEmoji } = emoji; - // Dont include nested skins array - emojis.push(restEmoji); + const { skins, ...baseEmoji } = emoji; + + emojis.push(joinShortcodesToEmoji(baseEmoji, shortcodeDatasets)); // Push each skin modification into the root list skins.forEach((skin) => { const skinEmoji = { ...skin }; // Inherit tags from parent if they exist - if (emoji.tags) { - skinEmoji.tags = [...emoji.tags]; + if (baseEmoji.tags) { + skinEmoji.tags = [...baseEmoji.tags]; } - emojis.push(skinEmoji); + emojis.push(joinShortcodesToEmoji(skinEmoji, shortcodeDatasets)); }); } else { - emojis.push(emoji); + emojis.push(joinShortcodesToEmoji(emoji, shortcodeDatasets)); } }); - return emojis; + return emojis as T[]; } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3db645cb..0d5a5b49 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -3,6 +3,7 @@ * @license https://opensource.org/licenses/MIT */ +import appendSkinToneIndex from './appendSkinToneIndex'; import fetchEmojis from './fetchEmojis'; import fetchFromCDN from './fetchFromCDN'; import fetchShortcodes from './fetchShortcodes'; @@ -11,9 +12,12 @@ import fromCodepointToUnicode from './fromCodepointToUnicode'; import fromHexcodeToCodepoint from './fromHexcodeToCodepoint'; import fromUnicodeToHexcode from './fromUnicodeToHexcode'; import generateEmoticonPermutations from './generateEmoticonPermutations'; +import joinShortcodes from './joinShortcodes'; +import joinShortcodesToEmoji from './joinShortcodesToEmoji'; import stripHexcode from './stripHexcode'; export { + appendSkinToneIndex, fetchEmojis, fetchFromCDN, fetchShortcodes, @@ -22,6 +26,8 @@ export { fromHexcodeToCodepoint, fromUnicodeToHexcode, generateEmoticonPermutations, + joinShortcodes, + joinShortcodesToEmoji, stripHexcode, }; diff --git a/packages/core/src/joinShortcodes.ts b/packages/core/src/joinShortcodes.ts new file mode 100644 index 00000000..69734c96 --- /dev/null +++ b/packages/core/src/joinShortcodes.ts @@ -0,0 +1,17 @@ +import { Emoji, CompactEmoji, ShortcodesDataset } from './types'; +import joinShortcodesToEmoji from './joinShortcodesToEmoji'; + +export default function joinShortcodes( + emojis: T[], + shortcodeDatasets: ShortcodesDataset[], +): T[] { + emojis.forEach((emoji) => { + joinShortcodesToEmoji(emoji, shortcodeDatasets); + + if (emoji.skins) { + joinShortcodes(emoji.skins as Emoji[], shortcodeDatasets); + } + }); + + return emojis; +} diff --git a/packages/core/src/joinShortcodesToEmoji.ts b/packages/core/src/joinShortcodesToEmoji.ts new file mode 100644 index 00000000..a673faab --- /dev/null +++ b/packages/core/src/joinShortcodesToEmoji.ts @@ -0,0 +1,26 @@ +import { Emoji, CompactEmoji, ShortcodesDataset } from './types'; + +export default function joinShortcodesToEmoji( + emoji: T, + shortcodeDatasets: ShortcodesDataset[], +): T { + if (shortcodeDatasets.length === 0) { + return emoji; + } + + const list = new Set(emoji.shortcodes); + + shortcodeDatasets.forEach((dataset) => { + const shortcodes = dataset[emoji.hexcode]; + + if (Array.isArray(shortcodes)) { + shortcodes.forEach((code) => list.add(code)); + } else if (shortcodes) { + list.add(shortcodes); + } + }); + + emoji.shortcodes = Array.from(list); + + return emoji; +} diff --git a/packages/core/tests/fromUnicodeToHexcode.test.ts b/packages/core/tests/fromUnicodeToHexcode.test.ts index 6ebe1095..10491756 100644 --- a/packages/core/tests/fromUnicodeToHexcode.test.ts +++ b/packages/core/tests/fromUnicodeToHexcode.test.ts @@ -1,6 +1,7 @@ import fromUnicodeToHexcode from '../src/fromUnicodeToHexcode'; import flattenEmojiData from '../src/flattenEmojiData'; import { TEXT } from '../src/constants'; +import { Emoji } from '../lib/types'; const SEQUENCE_HEXCODE_PATTERN = /-(200D|FE0E|FE0F)/g; @@ -8,7 +9,7 @@ describe('fromUnicodeToHexcode()', () => { // eslint-disable-next-line global-require const emojiData = require('../../data/en/raw.json'); - flattenEmojiData(emojiData).forEach((emoji) => { + flattenEmojiData(emojiData).forEach((emoji) => { const unicode = emoji.type === TEXT ? emoji.text : emoji.emoji; // The `hexcode` does not include variation selectors, diff --git a/packages/core/tests/joinShortcodesToEmoji.test.ts b/packages/core/tests/joinShortcodesToEmoji.test.ts new file mode 100644 index 00000000..f34ba84b --- /dev/null +++ b/packages/core/tests/joinShortcodesToEmoji.test.ts @@ -0,0 +1,40 @@ +import joinShortcodesToEmoji from '../src/joinShortcodesToEmoji'; +import { Emoji } from '../src/types'; + +describe('joinShortcodesToEmoji()', () => { + const info: Emoji = { + annotation: 'information', + name: 'INFORMATION SOURCE', + hexcode: '2139', + tags: ['i'], + emoji: 'ℹ️', + text: '', + type: 0, + order: 3821, + group: 8, + subgroup: 95, + version: 0.6, + }; + + it('adds all matching hexcodes to shortcodes list', () => { + const emoji = joinShortcodesToEmoji({ ...info }, [ + { '2139': 'information' }, + { '2139': 'info' }, + { '2139': ['info_source', 'info'] }, + ]); + + expect(emoji.shortcodes).toEqual(['information', 'info', 'info_source']); + }); + + it('sets empty list if no matching datasets', () => { + const emoji = joinShortcodesToEmoji({ ...info }, [{ '0000': 'shortcode' }]); + + expect(emoji.shortcodes).toEqual([]); + }); + + it('doesnt set property if no datasets', () => { + const emoji = joinShortcodesToEmoji({ ...info }, []); + + expect(emoji.shortcodes).toBeUndefined(); + }); +}); diff --git a/packages/generator/src/generators/generateShortcodes.ts b/packages/generator/src/generators/generateShortcodes.ts index 151af846..7c1b008b 100644 --- a/packages/generator/src/generators/generateShortcodes.ts +++ b/packages/generator/src/generators/generateShortcodes.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-unsafe-assignment, unicorn/better-regex */ -import { SUPPORTED_LOCALES, NON_LATIN_LOCALES } from 'emojibase'; +import { SUPPORTED_LOCALES, NON_LATIN_LOCALES, appendSkinToneIndex } from 'emojibase'; import Kuroshiro from 'kuroshiro'; import KuromojiAnalyzer from 'kuroshiro-analyzer-kuromoji'; import { transliterate } from 'transliteration'; @@ -9,7 +9,7 @@ import buildAnnotationData from '../builders/buildAnnotationData'; import writeDataset from '../helpers/writeDataset'; import filterData from '../helpers/filterData'; import log from '../helpers/log'; -import { ShortcodeDataMap, EmojiModification } from '../types'; +import { ShortcodeDataMap } from '../types'; const CUSTOM_SHORTCODES: { [key: string]: string } = { e_mail: 'email', @@ -50,10 +50,6 @@ async function slugify(value: string, locale: string, transform: boolean = false return CUSTOM_SHORTCODES[slug] || slug; } -function appendToneIndex(shortcode: string, mod: EmojiModification): string { - return `${shortcode}_${Array.isArray(mod.tone) ? mod.tone.join('-') : mod.tone}`; -} - export default async function generateShortcodes(): Promise { log.title('data', 'Generating shortcode datasets'); @@ -95,11 +91,14 @@ export default async function generateShortcodes(): Promise { // eslint-disable-next-line no-loop-func Object.values(emoji.modifications).forEach((mod) => { if (hasLatin) { - cldr[mod.hexcode] = appendToneIndex(String(cldr[emoji.hexcode]), mod); + cldr[mod.hexcode] = appendSkinToneIndex(String(cldr[emoji.hexcode]), mod); } if (hasNonLatin) { - cldrNonLatin[mod.hexcode] = appendToneIndex(String(cldrNonLatin[emoji.hexcode]), mod); + cldrNonLatin[mod.hexcode] = appendSkinToneIndex( + String(cldrNonLatin[emoji.hexcode]), + mod, + ); } }); } diff --git a/website/docs/api.mdx b/website/docs/api.mdx index d7319d5e..ef297683 100644 --- a/website/docs/api.mdx +++ b/website/docs/api.mdx @@ -58,6 +58,8 @@ await fetchFromCDN('ja/compact.json', { version: '2.1.3' }); ## `fetchEmojis` +> fetchEmojis(locale: string, options?: FetchEmojisOptions): Promise + Fetches and returns a localized list of emojis from our [CDN][cdn]. Uses [`fetchFromCDN`](#fetchfromcdn) and [`fetchShortcodes`](#fetchshortcodes) under the hood. @@ -76,6 +78,9 @@ await fetchEmojis('ja', { compact: true, version: '2.1.3' }); ## `fetchShortcodes` +> fetchShortcodes(locale: string, preset: ShortcodePreset, options?: FetchFromCDNOptions): +> Promise + Fetches and returns localized shortcodes for the defined preset from our [CDN][cdn]. The response is a mapping of emoji hexcodes to shortcodes (either a string or array of strings). Uses [`fetchFromCDN`](#fetchfromcdn) under the hood. @@ -102,12 +107,15 @@ await fetchShortcodes('ja', 'cldr', { version: '2.1.3' }); ## `flattenEmojiData` -> flattenEmojiData(data: Emoji[]): Emoji[] +> flattenEmojiData(data: Emoji[], shortcodeDatasets?: ShortcodesDataset[]): Emoji[] By default, emoji [skin modifications are nested](./datasets.mdx#data-structure) under the base neutral skin tone emoji. To flatten the data into a single dimension array, use the `flattenEmojiData` function. +If `shortcodeDatasets` is defined, it will join the shortcodes to the emoji object using +[`joinShortcodesToEmoji`](#joinshortcodestoemoji). + ```ts import { flattenEmojiData } from 'emojibase'; @@ -214,6 +222,54 @@ generateEmoticonPermutations(':)', { withNose: false }); // =}, =], =), :}, :], generateEmoticonPermutations('\\m/', { isFace: false }); // \m/, \M/ ``` +## `joinShortcodesToEmoji` + +> joinShortcodesToEmoji(emoji: T, shortcodeDatasets: +> ShortcodesDataset[]): T + +Will join shortcodes from multiple shortcode datasets into a single emoji object using its hexcode. +Will remove duplicates in the process. + +```ts +import { joinShortcodesToEmoji } from 'emojibase'; + +joinShortcodesToEmoji( + { + annotation: 'information', + name: 'INFORMATION SOURCE', + hexcode: '2139', + tags: ['i'], + emoji: 'ℹ️', + // ... + }, + [ + { '2139': 'information' /* ... */ }, + { '2139': 'info' /* ... */ }, + { '2139': 'info_source' /* ... */ }, + { '2139': 'info' /* ... */ }, + ], +); + +/* +{ + annotation: 'information', + name: 'INFORMATION SOURCE', + hexcode: '2139', + tags: ['i'], + emoji: 'ℹ️', + shortcodes: ['information', 'info', 'info_source'], + // ... +} +*/ +``` + +## `joinShortcodes` + +> joinShortcodes(emojis: T[], shortcodeDatasets: +> ShortcodesDataset[]): T[] + +Like `joinShortcodesToEmoji` but joins shortcodes to a list of emoji objects. + ## `stripHexcode` > stripHexcode(hexcode: Hexcode): Hexcode