Skip to content

Commit

Permalink
pronouns: generate shortest possible prefixes for linking
Browse files Browse the repository at this point in the history
Inspiration for the actual prefix-finding:
https://www.techiedelight.com/shortest-unique-prefix/
  • Loading branch information
dgw committed May 10, 2022
1 parent 41b7a13 commit 14a7d4d
Showing 1 changed file with 56 additions and 8 deletions.
64 changes: 56 additions & 8 deletions sopel/modules/pronouns.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,23 +65,71 @@ def setup(bot):
r = requests.get(
'https://github.com/witch-house/pronoun.is/raw/master/resources/pronouns.tab')
r.raise_for_status()
fetched_pairs = _process_pronoun_sets(r.text.splitlines())
except requests.exceptions.RequestException:
# don't do anything, just log the failure and use the hard-coded set
LOGGER.exception("Couldn't fetch full pronouns list; using default set.")
return

fetched_sets = {}
try:
for line in r.text.splitlines():
split_set = line.split('\t')
short = '{}/.../{}'.format(split_set[0], split_set[-1])
fetched_sets[short] = '/'.join(split_set)
except Exception:
# don't care what failed, honestly, since we aren't trying to fix it
LOGGER.exception("Couldn't parse fetched pronouns; using default set.")
return
else:
bot.memory['pronoun_sets'] = dict(fetched_pairs)


def _process_pronoun_sets(set_list):
trie = PronounTrie()
trie.insert_list(set_list)
yield from trie.get_pairs()


class PronounTrieNode:
def __init__(self, source=''):
self.children = {}
"""Child nodes are stored here."""

self.freq = 0
"""Store how many times this node is visited during insertion."""

bot.memory['pronoun_sets'] = fetched_sets
self.source = source
"""The full pronoun set that caused this node's creation."""


class PronounTrie:
def __init__(self):
self.root = PronounTrieNode()
"""A Trie needs a root entry."""

def insert(self, pronoun_set):
"""Insert a single pronoun set."""
pronoun_set = pronoun_set.replace('\t', '/')
current_node = self.root
for pronoun in pronoun_set.split('/'):
# create a new node if the path doesn't exist
# and use it as the current node
current_node = current_node.children.setdefault(pronoun, PronounTrieNode(pronoun_set))

# increment frequency
current_node.freq += 1

def insert_list(self, set_list):
"""Load a list of pronoun sets all at once."""
for item in set_list:
self.insert(item)

def get_pairs(self, root=None, prefix=''):
"""Yield tuples of ``(prefix, full/pronoun/set)``."""
if root is None:
root = self.root

if root.freq == 1:
yield prefix, root.source
else:
if prefix:
prefix += '/'
for word, node in root.children.items():
yield from self.get_pairs(node, prefix + word)


@plugin.command('pronouns')
Expand Down

0 comments on commit 14a7d4d

Please sign in to comment.