Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add a config option to prioritise local users in user directory search results #9383

Merged
merged 7 commits into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/9383.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a configuration option, `user_directory.prefer_local_users`, which when enabled will make it more likely for users on the same server as you to appear above other users.
5 changes: 5 additions & 0 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2542,9 +2542,14 @@ spam_checker:
# rebuild the user_directory search indexes, see
# https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
#
# 'prefer_local_users' defines whether to prioritise local users in
# search query results. If True, local users are more likely to appear above
# remote users when searching the user directory. Defaults to false.
#
#user_directory:
# enabled: true
# search_all_users: false
# prefer_local_users: false


# User Consent configuration
Expand Down
9 changes: 9 additions & 0 deletions synapse/config/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class UserDirectoryConfig(Config):
def read_config(self, config, **kwargs):
self.user_directory_search_enabled = True
self.user_directory_search_all_users = False
self.user_directory_search_prefer_local_users = False
user_directory_config = config.get("user_directory", None)
if user_directory_config:
self.user_directory_search_enabled = user_directory_config.get(
Expand All @@ -34,6 +35,9 @@ def read_config(self, config, **kwargs):
self.user_directory_search_all_users = user_directory_config.get(
"search_all_users", False
)
self.user_directory_search_prefer_local_users = user_directory_config.get(
"prefer_local_users", False
)

def generate_config_section(self, config_dir_path, server_name, **kwargs):
return """
Expand All @@ -49,7 +53,12 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs):
# rebuild the user_directory search indexes, see
# https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md
#
# 'prefer_local_users' defines whether to prioritise local users in
# search query results. If True, local users are more likely to appear above
# remote users when searching the user directory. Defaults to false.
#
#user_directory:
# enabled: true
# search_all_users: false
# prefer_local_users: false
"""
65 changes: 52 additions & 13 deletions synapse/storage/databases/main/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,7 @@ def _get_next_batch(txn):
return len(users_to_work_on)

async def is_room_world_readable_or_publicly_joinable(self, room_id):
"""Check if the room is either world_readable or publically joinable
"""
"""Check if the room is either world_readable or publically joinable"""

# Create a state filter that only queries join and history state event
types_to_filter = (
Expand Down Expand Up @@ -516,8 +515,7 @@ async def add_users_in_public_rooms(
)

async def delete_all_from_user_dir(self) -> None:
"""Delete the entire user directory
"""
"""Delete the entire user directory"""

def _delete_all_from_user_dir_txn(txn):
txn.execute("DELETE FROM user_directory")
Expand Down Expand Up @@ -558,6 +556,11 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)

self._prefer_local_users_in_search = (
hs.config.user_directory_search_prefer_local_users
)
self._server_name = hs.config.server_name

async def remove_from_user_dir(self, user_id: str) -> None:
def _remove_from_user_dir_txn(txn):
self.db_pool.simple_delete_txn(
Expand Down Expand Up @@ -750,9 +753,24 @@ async def search_user_dir(self, user_id, search_term, limit):
)
"""

# We allow manipulating the ranking algorithm by injecting statements
# based on config options.
additional_ordering_statements = []
ordering_arguments = ()

if isinstance(self.database_engine, PostgresEngine):
full_query, exact_query, prefix_query = _parse_query_postgres(search_term)

# If enabled, this config option will rank local users higher than those on
# remote instances.
if self._prefer_local_users_in_search:
# This statement checks whether a given user's user ID contains a server name
# that matches the local server
statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)"
additional_ordering_statements.append(statement)

ordering_arguments += ("%:" + self._server_name,)

# We order by rank and then if they have profile info
# The ranking algorithm is hand tweaked for "best" results. Broadly
# the idea is we give a higher weight to exact matches.
Expand All @@ -763,7 +781,7 @@ async def search_user_dir(self, user_id, search_term, limit):
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%s
%(where_clause)s
AND vector @@ to_tsquery('simple', ?)
ORDER BY
(CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
Expand All @@ -783,33 +801,54 @@ async def search_user_dir(self, user_id, search_term, limit):
8
)
)
%(order_case_statements)s
DESC,
display_name IS NULL,
avatar_url IS NULL
LIMIT ?
""" % (
where_clause,
""" % {
"where_clause": where_clause,
"order_case_statements": " ".join(additional_ordering_statements),
}
args = (
join_args
+ (full_query, exact_query, prefix_query)
+ ordering_arguments
+ (limit + 1,)
)
args = join_args + (full_query, exact_query, prefix_query, limit + 1)
elif isinstance(self.database_engine, Sqlite3Engine):
search_query = _parse_query_sqlite(search_term)

# If enabled, this config option will rank local users higher than those on
# remote instances.
if self._prefer_local_users_in_search:
# This statement checks whether a given user's user ID contains a server name
# that matches the local server
#
# Note that we need to include a comma at the end for valid SQL
statement = "user_id LIKE ? DESC,"
additional_ordering_statements.append(statement)

ordering_arguments += ("%:" + self._server_name,)

sql = """
SELECT d.user_id AS user_id, display_name, avatar_url
FROM user_directory_search as t
INNER JOIN user_directory AS d USING (user_id)
WHERE
%s
%(where_clause)s
AND value MATCH ?
ORDER BY
rank(matchinfo(user_directory_search)) DESC,
%(order_statements)s
display_name IS NULL,
avatar_url IS NULL
LIMIT ?
""" % (
where_clause,
)
args = join_args + (search_query, limit + 1)
""" % {
"where_clause": where_clause,
"order_statements": " ".join(additional_ordering_statements),
}
args = join_args + (search_query,) + ordering_arguments + (limit + 1,)
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")
Expand Down
94 changes: 94 additions & 0 deletions tests/handlers/test_user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import synapse.rest.admin
from synapse.api.constants import EventTypes, RoomEncryptionAlgorithms, UserTypes
from synapse.api.room_versions import RoomVersion, RoomVersions
from synapse.rest.client.v1 import login, room
from synapse.rest.client.v2_alpha import user_directory
from synapse.storage.roommember import ProfileInfo
Expand Down Expand Up @@ -46,6 +47,8 @@ def make_homeserver(self, reactor, clock):
def prepare(self, reactor, clock, hs):
self.store = hs.get_datastore()
self.handler = hs.get_user_directory_handler()
self.event_builder_factory = self.hs.get_event_builder_factory()
self.event_creation_handler = self.hs.get_event_creation_handler()

def test_handle_local_profile_change_with_support_user(self):
support_user_id = "@support:test"
Expand Down Expand Up @@ -541,6 +544,97 @@ def test_initial_share_all_users(self):
s = self.get_success(self.handler.search_users(u1, u4, 10))
self.assertEqual(len(s["results"]), 1)

@override_config(
{
"user_directory": {
"enabled": True,
"search_all_users": True,
"prefer_local_users": True,
}
}
)
def test_prefer_local_users(self):
"""Tests that local users are shown higher in search results when
user_directory.prefer_local_users is True.
"""
# Create a room and few users to test the directory with
searching_user = self.register_user("searcher", "password")
searching_user_tok = self.login("searcher", "password")

room_id = self.helper.create_room_as(
searching_user,
room_version=RoomVersions.V1.identifier,
tok=searching_user_tok,
)

# Create a few local users and join them to the room
local_user_1 = self.register_user("user_xxxxx", "password")
local_user_2 = self.register_user("user_bbbbb", "password")
local_user_3 = self.register_user("user_zzzzz", "password")

self._add_user_to_room(room_id, RoomVersions.V1, local_user_1)
self._add_user_to_room(room_id, RoomVersions.V1, local_user_2)
self._add_user_to_room(room_id, RoomVersions.V1, local_user_3)

# Create a few "remote" users and join them to the room
remote_user_1 = "@user_aaaaa:remote_server"
remote_user_2 = "@user_yyyyy:remote_server"
remote_user_3 = "@user_ccccc:remote_server"
self._add_user_to_room(room_id, RoomVersions.V1, remote_user_1)
self._add_user_to_room(room_id, RoomVersions.V1, remote_user_2)
self._add_user_to_room(room_id, RoomVersions.V1, remote_user_3)

local_users = [local_user_1, local_user_2, local_user_3]
remote_users = [remote_user_1, remote_user_2, remote_user_3]

# Populate the user directory via background update
self._add_background_updates()
while not self.get_success(
self.store.db_pool.updates.has_completed_background_updates()
):
self.get_success(
self.store.db_pool.updates.do_next_background_update(100), by=0.1
)

# The local searching user searches for the term "user", which other users have
# in their user id
results = self.get_success(
self.handler.search_users(searching_user, "user", 20)
)["results"]
received_user_id_ordering = [result["user_id"] for result in results]

# Typically we'd expect Synapse to return users in lexicographical order,
# assuming they have similar User IDs/display names, and profile information.

# Check that the order of returned results using our module is as we expect,
# i.e our local users show up first, despite all users having lexographically mixed
# user IDs.
[self.assertIn(user, local_users) for user in received_user_id_ordering[:3]]
[self.assertIn(user, remote_users) for user in received_user_id_ordering[3:]]

def _add_user_to_room(
self, room_id: str, room_version: RoomVersion, user_id: str,
):
# Add a user to the room.
builder = self.event_builder_factory.for_room_version(
room_version,
{
"type": "m.room.member",
"sender": user_id,
"state_key": user_id,
"room_id": room_id,
"content": {"membership": "join"},
},
)

event, context = self.get_success(
self.event_creation_handler.create_new_client_event(builder)
)

self.get_success(
self.hs.get_storage().persistence.persist_event(event, context)
)


class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
user_id = "@test:test"
Expand Down