Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moved metadata_store into database and content_discovery #7773

Merged
merged 1 commit into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/tribler/core/components/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from tribler.core.components.libtorrent.download_manager.download_manager import DownloadManager
from tribler.core.components.libtorrent.settings import LibtorrentSettings
from tribler.core.components.libtorrent.torrentdef import TorrentDef
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.config.tribler_config import TriblerConfig
from tribler.core.tests.tools.common import TESTS_DATA_DIR
from tribler.core.utilities.path_util import Path
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from asyncio import Future

from ipv8.requestcache import RandomNumberCache
from tribler.core.components.metadata_store.utils import RequestTimeoutException


class SelectRequest(RandomNumberCache):
Expand All @@ -25,6 +24,10 @@ def on_timeout(self):
self.timeout_callback(self)


class RequestTimeoutException(Exception):
pass


class EvaSelectRequest(SelectRequest):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
from tribler.core.components.ipv8.eva.protocol import EVAProtocol
from tribler.core.components.ipv8.eva.result import TransferResult
from tribler.core.components.knowledge.community.knowledge_validator import is_valid_resource
from tribler.core.components.metadata_store.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, entries_to_chunk
from tribler.core.components.metadata_store.db.store import ObjState
from tribler.core.components.database.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, entries_to_chunk
from tribler.core.components.database.db.store import ObjState
from tribler.core.components.torrent_checker.torrent_checker.dataclasses import HealthInfo
from tribler.core.upgrade.tags_to_knowledge.previous_dbs.knowledge_db import ResourceType
from tribler.core.utilities.pony_utils import run_threaded
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from ipv8.community import CommunitySettings
from tribler.core.components.database.db.tribler_database import TriblerDatabase
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.components.torrent_checker.torrent_checker.torrent_checker import TorrentChecker
from tribler.core.utilities.notifier import Notifier

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from asyncio import Future
from binascii import hexlify
from operator import attrgetter
from random import choices, randint, random
from random import choices, randint
from typing import List
from unittest.mock import AsyncMock, Mock, patch

Expand All @@ -30,9 +30,9 @@
from tribler.core.components.database.db.layers.tests.test_knowledge_data_access_layer_base import \
Resource, TestKnowledgeAccessLayerBase
from tribler.core.components.database.db.tribler_database import TriblerDatabase
from tribler.core.components.metadata_store.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, NEW
from tribler.core.components.metadata_store.db.serialization import CHANNEL_THUMBNAIL, NULL_KEY, REGULAR_TORRENT
from tribler.core.components.metadata_store.db.store import MetadataStore
from tribler.core.components.database.db.orm_bindings.torrent_metadata import LZ4_EMPTY_ARCHIVE, NEW
from tribler.core.components.database.db.serialization import CHANNEL_THUMBNAIL, NULL_KEY, REGULAR_TORRENT
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.components.content_discovery.community.content_discovery_community import ContentDiscoveryCommunity
from tribler.core.components.torrent_checker.torrent_checker.torrent_checker import TorrentChecker
from tribler.core.components.torrent_checker.torrent_checker.torrentchecker_session import HealthInfo
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from ipv8.peerdiscovery.network import Network
from tribler.core.components.component import Component
from tribler.core.components.content_discovery.community.content_discovery_community import ContentDiscoveryCommunity
from tribler.core.components.database.database_component import DatabaseComponent
from tribler.core.components.ipv8.ipv8_component import INFINITE, Ipv8Component
from tribler.core.components.metadata_store.metadata_store_component import MetadataStoreComponent
from tribler.core.components.reporter.reporter_component import ReporterComponent
from tribler.core.components.torrent_checker.torrent_checker_component import TorrentCheckerComponent

Expand All @@ -18,15 +18,15 @@ async def run(self):
await self.get_component(ReporterComponent)

self._ipv8_component = await self.require_component(Ipv8Component)
metadata_store_component = await self.require_component(MetadataStoreComponent)
database_component = await self.require_component(DatabaseComponent)
torrent_checker_component = await self.require_component(TorrentCheckerComponent)

self.community = ContentDiscoveryCommunity(ContentDiscoveryCommunity.settings_class(
my_peer = self._ipv8_component.peer,
endpoint = self._ipv8_component.ipv8.endpoint,
network = Network(),
maximum_payload_size = self.session.config.content_discovery_community.maximum_payload_size,
metadata_store=metadata_store_component.mds,
metadata_store=database_component.mds,
torrent_checker=torrent_checker_component.torrent_checker,
notifier=self.session.notifier
))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from marshmallow.fields import Integer, String

from tribler.core.components.database.restapi.schema import MetadataParameters


class RemoteQueryParameters(MetadataParameters):
uuid = String()
channel_pk = String(description='Channel to query, must also define origin_id')
origin_id = Integer(default=None, description='Peer id to query, must also define channel_pk')
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from binascii import hexlify, unhexlify

from aiohttp import web
from aiohttp_apispec import docs, querystring_schema
from marshmallow.fields import List, String

from ipv8.REST.schema import schema
from tribler.core.components.content_discovery.community.content_discovery_community import ContentDiscoveryCommunity
from tribler.core.components.content_discovery.restapi.schema import RemoteQueryParameters
from tribler.core.components.restapi.rest.rest_endpoint import HTTP_BAD_REQUEST, MAX_REQUEST_SIZE, RESTEndpoint, \
RESTResponse
from tribler.core.utilities.utilities import froze_it


@froze_it
class SearchEndpoint(RESTEndpoint):
"""
This endpoint is responsible for searching in channels and torrents present in the local Tribler database.
"""
path = '/search'

def __init__(self,
popularity_community: ContentDiscoveryCommunity,
middlewares=(),
client_max_size=MAX_REQUEST_SIZE):
super().__init__(middlewares, client_max_size)
self.popularity_community = popularity_community

def setup_routes(self):
self.app.add_routes([web.put('/remote', self.remote_search)])

@classmethod
def sanitize_parameters(cls, parameters):
sanitized = dict(parameters)
if "max_rowid" in parameters:
sanitized["max_rowid"] = int(parameters["max_rowid"])
if "channel_pk" in parameters:
sanitized["channel_pk"] = unhexlify(parameters["channel_pk"])
if "origin_id" in parameters:
sanitized["origin_id"] = int(parameters["origin_id"])
return sanitized


@docs(
tags=['Metadata'],
summary="Perform a search for a given query.",
responses={200: {
'schema': schema(RemoteSearchResponse={'request_uuid': String(), 'peers': List(String())})},
"examples": {
'Success': {
"request_uuid": "268560c0-3f28-4e6e-9d85-d5ccb0269693",
"peers": ["50e9a2ce646c373985a8e827e328830e053025c6", "107c84e5d9636c17b46c88c3ddb54842d80081b0"]
}
}
},
)
@querystring_schema(RemoteQueryParameters)
async def remote_search(self, request):
self._logger.info('Create remote search request')
# Query remote results from the GigaChannel Community.
# Results are returned over the Events endpoint.
try:
sanitized = self.sanitize_parameters(request.query)
except (ValueError, KeyError) as e:
return RESTResponse({"error": f"Error processing request parameters: {e}"}, status=HTTP_BAD_REQUEST)
self._logger.info(f'Parameters: {sanitized}')

request_uuid, peers_list = self.popularity_community.send_search_request(**sanitized)
peers_mid_list = [hexlify(p.mid).decode() for p in peers_list]

return RESTResponse({"request_uuid": str(request_uuid), "peers": peers_mid_list})
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import uuid
from unittest.mock import Mock

import pytest

from tribler.core.components.content_discovery.restapi.search_endpoint import SearchEndpoint
from tribler.core.components.restapi.rest.base_api_test import do_request
from tribler.core.utilities.unicode import hexlify


@pytest.fixture(name="mock_content_discovery_community")
def fixture_mock_content_discovery_community():
return Mock()


@pytest.fixture(name="endpoint")
def fixture_endpoint(mock_content_discovery_community):
return SearchEndpoint(mock_content_discovery_community)


async def test_create_remote_search_request(rest_api, mock_content_discovery_community):
"""
Test that remote search call is sent on a REST API search request
"""
sent = {}
peers = []
request_uuid = uuid.uuid4()

def mock_send(**kwargs):
sent.update(kwargs)
return request_uuid, peers

# Test querying for keywords
mock_content_discovery_community.send_search_request = mock_send
search_txt = "foo"
await do_request(
rest_api,
f'search/remote?txt_filter={search_txt}&max_rowid=1',
request_type="PUT",
expected_code=200,
expected_json={"request_uuid": str(request_uuid), "peers": peers},
)
assert sent['txt_filter'] == search_txt
sent.clear()

# Test querying channel data by public key, e.g. for channel preview purposes
channel_pk = "ff"
await do_request(
rest_api, f'search/remote?channel_pk={channel_pk}&metadata_type=torrent', request_type="PUT", expected_code=200
)
assert hexlify(sent['channel_pk']) == channel_pk


async def test_create_remote_search_request_illegal(rest_api):
"""
Test that remote search call is sent on a REST API search request
"""
response = await do_request(
rest_api,
'search/remote?origin_id=a',
request_type="PUT",
expected_code=400
)
assert "error" in response
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from tribler.core.components.key.key_component import KeyComponent
from tribler.core.components.knowledge.knowledge_component import KnowledgeComponent
from tribler.core.components.libtorrent.libtorrent_component import LibtorrentComponent
from tribler.core.components.metadata_store.metadata_store_component import MetadataStoreComponent
from tribler.core.components.content_discovery.content_discovery_component import ContentDiscoveryComponent
from tribler.core.components.session import Session
from tribler.core.components.socks_servers.socks_servers_component import SocksServersComponent
Expand All @@ -15,8 +14,7 @@

async def test_content_discovery_component(tribler_config):
components = [DatabaseComponent(), SocksServersComponent(), LibtorrentComponent(), TorrentCheckerComponent(),
KnowledgeComponent(), MetadataStoreComponent(), KeyComponent(), Ipv8Component(),
ContentDiscoveryComponent()]
KnowledgeComponent(), KeyComponent(), Ipv8Component(), ContentDiscoveryComponent()]
async with Session(tribler_config, components) as session:
comp = session.get_instance(ContentDiscoveryComponent)
assert comp.community
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import re
from functools import cmp_to_key

from tribler.core.components.metadata_store.category_filter.family_filter import default_xxx_filter
from tribler.core.components.metadata_store.category_filter.init_category import getCategoryInfo
from tribler.core.components.database.category_filter.family_filter import default_xxx_filter
from tribler.core.components.database.category_filter.init_category import getCategoryInfo
from tribler.core.utilities.install_dir import get_lib_path
from tribler.core.utilities.unicode import recursive_unicode

CATEGORY_CONFIG_FILE = get_lib_path() / 'components/metadata_store/category_filter/category.conf'
CATEGORY_CONFIG_FILE = get_lib_path() / 'components/database/category_filter/category.conf'


def cmp_rank(a, b):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

WORDS_REGEXP = re.compile('[a-zA-Z0-9]+')

termfilename = get_lib_path() / 'components' / 'metadata_store' / 'category_filter' / 'filter_terms.filter'
termfilename = get_lib_path() / 'components' / 'database' / 'category_filter' / 'filter_terms.filter'


def initTerms(filename):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# !ACHTUNG! We must first read the line into a file, then release the lock, and only then pass it to regex compiler.
# Otherwise, there is an annoying race condition that reads in an empty file!
with open(get_lib_path() / 'components' / 'metadata_store' / 'category_filter' / 'level2.regex', encoding="utf-8") as f:
with open(get_lib_path() / 'components' / 'database' / 'category_filter' / 'level2.regex', encoding="utf-8") as f:
regex = f.read().strip()
stoplist_expression = re.compile(regex, re.IGNORECASE)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from tribler.core.components.metadata_store.category_filter.category import Category, cmp_rank
from tribler.core.components.metadata_store.category_filter.family_filter import XXXFilter
from tribler.core.components.database.category_filter.category import Category, cmp_rank
from tribler.core.components.database.category_filter.family_filter import XXXFilter


@pytest.fixture(name="xxx_filter")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from tribler.core.components.metadata_store.category_filter.family_filter import XXXFilter
from tribler.core.components.metadata_store.category_filter.l2_filter import is_forbidden
from tribler.core.components.database.category_filter.family_filter import XXXFilter
from tribler.core.components.database.category_filter.l2_filter import is_forbidden


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tribler.core.components.metadata_store.category_filter.category import CATEGORY_CONFIG_FILE
from tribler.core.components.metadata_store.category_filter.init_category import INIT_FUNC_DICT, getCategoryInfo
from tribler.core.components.database.category_filter.category import CATEGORY_CONFIG_FILE
from tribler.core.components.database.category_filter.init_category import INIT_FUNC_DICT, getCategoryInfo


def test_split_list():
Expand Down
48 changes: 46 additions & 2 deletions src/tribler/core/components/database/database_component.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,67 @@
from tribler.core import notifications
from tribler.core.components.component import Component
from tribler.core.components.database.db.store import MetadataStore
from tribler.core.components.database.db.tribler_database import TriblerDatabase
from tribler.core.components.key.key_component import KeyComponent
from tribler.core.components.knowledge.rules.knowledge_rules_processor import KnowledgeRulesProcessor
from tribler.core.utilities.simpledefs import STATEDIR_DB_DIR


class DatabaseComponent(Component):
tribler_should_stop_on_component_error = True

db: TriblerDatabase = None
mds: MetadataStore = None # TODO: legacy, should be merged into ``db``

async def run(self):
await super().run()

db_path = self.session.config.state_dir / STATEDIR_DB_DIR / "tribler.db"
if self.session.config.gui_test_mode:
config = self.session.config
db_path = config.state_dir / STATEDIR_DB_DIR / "tribler.db"
if config.gui_test_mode:
db_path = ":memory:"

self.db = TriblerDatabase(str(db_path))

# TODO: merge the code below into the TriblerDatabase
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a TODO for the next PR, or is it a forgotten TODO for the current PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for the next PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of the developers and I in this group consider it bad, or let's say "not good," practice to merge changes with TODOs into the main branch.

I'm not stopping you from doing this by not giving acceptance to the PR. I'm just highlighting that there are many points of view on this practice, some of which consider it to be a bad practice that leads to code quality degradation.

https://www.reddit.com/r/learnprogramming/comments/r25bj7/are_is_committing_code_to_github_with_todos_bad/?rdt=46532

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with them. However, our current linters are configured to allow this. Notably, I did not have to disable any warnings to pass our PR checks.

Once we fix #1722 we can adopt a more strict stance on this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you saying that if you understand there is a bad practice, but there are no configured linter issues, then it is okay for you to commit and merge the code despite its quality?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even though I disagree with the practice personally, this is the approach that the team has chosen and I adapt my workflow to it.

If I were the only developer, I would make make own standards and conform to what I would like to see. Sadly, I am not the only developer and I have to conform to the existing rules and linter settings.


channels_dir = config.chant.get_path_as_absolute('channels_dir', config.state_dir)
chant_testnet = config.general.testnet or config.chant.testnet

metadata_db_name = 'metadata.db'
if chant_testnet:
metadata_db_name = 'metadata_testnet.db'

Check warning on line 33 in src/tribler/core/components/database/database_component.py

View check run for this annotation

Codecov / codecov/patch

src/tribler/core/components/database/database_component.py#L33

Added line #L33 was not covered by tests
elif config.gui_test_mode: # Avoid interfering with the main database in test mode
# Note we don't use in-memory database in core test mode, because MDS uses threads,
# and SQLite creates a different in-memory DB for each connection by default.
# To change this behaviour, we have to use url-based SQLite initialization syntax,
# which is not supported by PonyORM yet.
metadata_db_name = 'metadata_gui_test.db'

Check warning on line 39 in src/tribler/core/components/database/database_component.py

View check run for this annotation

Codecov / codecov/patch

src/tribler/core/components/database/database_component.py#L39

Added line #L39 was not covered by tests

database_path = config.state_dir / STATEDIR_DB_DIR / metadata_db_name

# Make sure that we start with a clean metadata database when in GUI mode every time.
if config.gui_test_mode and database_path.exists():
self.logger.info("Wiping metadata database in GUI test mode")
database_path.unlink(missing_ok=True)

Check warning on line 46 in src/tribler/core/components/database/database_component.py

View check run for this annotation

Codecov / codecov/patch

src/tribler/core/components/database/database_component.py#L45-L46

Added lines #L45 - L46 were not covered by tests

key_component = await self.require_component(KeyComponent)

metadata_store = MetadataStore(
database_path,
channels_dir,
key_component.primary_key,
notifier=self.session.notifier,
disable_sync=config.gui_test_mode,
tag_processor_version=KnowledgeRulesProcessor.version
)
self.mds = metadata_store
self.session.notifier.add_observer(notifications.torrent_metadata_added,
metadata_store.TorrentMetadata.add_ffa_from_dict)

async def shutdown(self):
await super().shutdown()
if self.db:
self.db.shutdown()
if self.mds:
self.mds.shutdown()
Loading
Loading