From 1e1ddd276e0c5d9a6fa9db3cd1aeee7103495b4c Mon Sep 17 00:00:00 2001 From: Heinz Wiesinger Date: Sat, 27 Jan 2018 18:16:09 +0100 Subject: [PATCH 1/4] Small performance optimization when fetching items from library. Fetch flexible attributes once for all items instead of once per item. For queries with larger result sets this drastically cuts down the number of issued sqlite queries. --- beets/dbcore/db.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 71810ead20..cc619b29ba 100755 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -565,6 +565,13 @@ def _get_objects(self): a `Results` object a second time should be much faster than the first. """ + + # First fetch all flexible attributes for all items in the result. + # Doing the per-item filtering in python is faster than issuing + # one query per item to sqlite. + item_ids = [row['id'] for row in self._rows] + flex_attrs = self._get_flex_attrs(item_ids) + index = 0 # Position in the materialized objects. while index < len(self._objects) or self._rows: # Are there previously-materialized objects to produce? @@ -577,7 +584,10 @@ def _get_objects(self): else: while self._rows: row = self._rows.pop(0) - obj = self._make_model(row) + if flex_attrs: + obj = self._make_model(row, flex_attrs[row['id']]) + else: + obj = self._make_model(row) # If there is a slow-query predicate, ensurer that the # object passes it. if not self.query or self.query.match(obj): @@ -599,20 +609,31 @@ def __iter__(self): # Objects are pre-sorted (i.e., by the database). return self._get_objects() - def _make_model(self, row): - # Get the flexible attributes for the object. + def _get_flex_attrs(self, ids): + # Get the flexible attributes for all ids. with self.db.transaction() as tx: + id_list = ', '.join(str(id) for id in ids) flex_rows = tx.query( - 'SELECT * FROM {0} WHERE entity_id=?'.format( - self.model_class._flex_table - ), - (row['id'],) + 'SELECT * FROM {0} WHERE entity_id IN ({1})'.format( + self.model_class._flex_table, + id_list + ) ) + # Index flexible attributes by the entity id they belong to + flex_values = dict() + for row in flex_rows: + if row['entity_id'] not in flex_values: + flex_values[row['entity_id']] = dict() + + flex_values[row['entity_id']][row['key']] = row['value'] + + return flex_values + + def _make_model(self, row, flex_values={}): cols = dict(row) values = dict((k, v) for (k, v) in cols.items() if not k[:4] == 'flex') - flex_values = dict((row['key'], row['value']) for row in flex_rows) # Construct the Python object obj = self.model_class._awaken(self.db, values, flex_values) From 31ec222e0e205dc1956a13529a3a3f382aa3ab39 Mon Sep 17 00:00:00 2001 From: Heinz Wiesinger Date: Tue, 27 Nov 2018 17:39:09 +0100 Subject: [PATCH 2/4] Query flexible attributes already in _fetch() This resolves the query length and potential security problems, while keeping the performance benefits. --- beets/dbcore/db.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index cc619b29ba..87587d0579 100755 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -32,7 +32,6 @@ from .query import MatchQuery, NullSort, TrueQuery import six - class DBAccessError(Exception): """The SQLite database became inaccessible. @@ -524,7 +523,8 @@ class Results(object): """An item query result set. Iterating over the collection lazily constructs LibModel objects that reflect database rows. """ - def __init__(self, model_class, rows, db, query=None, sort=None): + def __init__(self, model_class, rows, db, flex_rows, + query=None, sort=None): """Create a result set that will construct objects of type `model_class`. @@ -544,6 +544,7 @@ def __init__(self, model_class, rows, db, query=None, sort=None): self.db = db self.query = query self.sort = sort + self.flex_rows = flex_rows # We keep a queue of rows we haven't yet consumed for # materialization. We preserve the original total number of @@ -569,8 +570,7 @@ def _get_objects(self): # First fetch all flexible attributes for all items in the result. # Doing the per-item filtering in python is faster than issuing # one query per item to sqlite. - item_ids = [row['id'] for row in self._rows] - flex_attrs = self._get_flex_attrs(item_ids) + flex_attrs = self._get_indexed_flex_attrs() index = 0 # Position in the materialized objects. while index < len(self._objects) or self._rows: @@ -609,20 +609,11 @@ def __iter__(self): # Objects are pre-sorted (i.e., by the database). return self._get_objects() - def _get_flex_attrs(self, ids): - # Get the flexible attributes for all ids. - with self.db.transaction() as tx: - id_list = ', '.join(str(id) for id in ids) - flex_rows = tx.query( - 'SELECT * FROM {0} WHERE entity_id IN ({1})'.format( - self.model_class._flex_table, - id_list - ) - ) - - # Index flexible attributes by the entity id they belong to + def _get_indexed_flex_attrs(self): + """ Index flexible attributes by the entity id they belong to + """ flex_values = dict() - for row in flex_rows: + for row in self.flex_rows: if row['entity_id'] not in flex_values: flex_values[row['entity_id']] = dict() @@ -631,6 +622,8 @@ def _get_flex_attrs(self, ids): return flex_values def _make_model(self, row, flex_values={}): + """ Create a Model object for the given row + """ cols = dict(row) values = dict((k, v) for (k, v) in cols.items() if not k[:4] == 'flex') @@ -920,11 +913,23 @@ def _fetch(self, model_cls, query=None, sort=None): "ORDER BY {0}".format(order_by) if order_by else '', ) + # Fetch flexible attributes for items matching the main query + flex_sql = (""" + SELECT * FROM {0} WHERE entity_id IN + (SELECT id FROM {1} WHERE {2}); + """.format( + model_cls._flex_table, + model_cls._table, + where or '1', + ) + ) + with self.transaction() as tx: rows = tx.query(sql, subvals) + flex_rows = tx.query(flex_sql, subvals) return Results( - model_cls, rows, self, + model_cls, rows, self, flex_rows, None if where else query, # Slow query component. sort if sort.is_slow() else None, # Slow sort component. ) From 5494c8b2609de2443c12b2da1fff1c67bfc7bf2b Mon Sep 17 00:00:00 2001 From: Heinz Wiesinger Date: Wed, 28 Nov 2018 11:06:21 +0100 Subject: [PATCH 3/4] Improve documentation --- beets/dbcore/db.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 87587d0579..14b0553f1c 100755 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -32,6 +32,7 @@ from .query import MatchQuery, NullSort, TrueQuery import six + class DBAccessError(Exception): """The SQLite database became inaccessible. @@ -567,9 +568,7 @@ def _get_objects(self): first. """ - # First fetch all flexible attributes for all items in the result. - # Doing the per-item filtering in python is faster than issuing - # one query per item to sqlite. + # Index flexible attributes by the item ID, so we have easier access flex_attrs = self._get_indexed_flex_attrs() index = 0 # Position in the materialized objects. @@ -913,7 +912,9 @@ def _fetch(self, model_cls, query=None, sort=None): "ORDER BY {0}".format(order_by) if order_by else '', ) - # Fetch flexible attributes for items matching the main query + # Fetch flexible attributes for items matching the main query. + # Doing the per-item filtering in python is faster than issuing + # one query per item to sqlite. flex_sql = (""" SELECT * FROM {0} WHERE entity_id IN (SELECT id FROM {1} WHERE {2}); From 7c568aa52894eb78c987ecbc0a72e68ce06d435f Mon Sep 17 00:00:00 2001 From: Heinz Wiesinger Date: Sun, 2 Dec 2018 11:50:12 +0100 Subject: [PATCH 4/4] Account for items that do not have flexible attributes --- beets/dbcore/db.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/beets/dbcore/db.py b/beets/dbcore/db.py index 14b0553f1c..24c20ef1aa 100755 --- a/beets/dbcore/db.py +++ b/beets/dbcore/db.py @@ -583,10 +583,7 @@ def _get_objects(self): else: while self._rows: row = self._rows.pop(0) - if flex_attrs: - obj = self._make_model(row, flex_attrs[row['id']]) - else: - obj = self._make_model(row) + obj = self._make_model(row, flex_attrs.get(row['id'], {})) # If there is a slow-query predicate, ensurer that the # object passes it. if not self.query or self.query.match(obj):