From 2829c78fa8d4e1e50567929a1aa7365a3c61b82e Mon Sep 17 00:00:00 2001 From: "A. Murat Eren" Date: Sat, 27 Jun 2020 08:44:45 -0500 Subject: [PATCH 1/3] every direct SQL call double checks the existence of table --- anvio/db.py | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/anvio/db.py b/anvio/db.py index a5cc297a09..40d241e45a 100644 --- a/anvio/db.py +++ b/anvio/db.py @@ -72,6 +72,8 @@ def __init__(self, db_path, client_version, new_database=False, ignore_version=F self.cursor = self.conn.cursor() + self.table_names_in_db = self.get_table_names() + if new_database: self.create_self() self.set_version(client_version) @@ -89,7 +91,7 @@ def __init__(self, db_path, client_version, new_database=False, ignore_version=F f"wants to work with v{client_version}). You can migrate your database without losing any data using the " f"program `anvi-migrate` with either of the flags `--migrate-dbs-safely` or `--migrate-dbs-quickly`.") - bad_tables = [table_name for table_name in self.get_table_names() if table_name not in tables.requires_unique_entry_id] + bad_tables = [table_name for table_name in self.table_names_in_db if table_name not in tables.requires_unique_entry_id] if len(bad_tables): raise ConfigError("You better be a programmer tinkering with anvi'o databases adding new tables or something. Otherwise we " "have quite a serious problem :/ Each table in a given anvi'o database must have an entry in the " @@ -333,11 +335,7 @@ def insert_rows_from_dataframe(self, table_name, dataframe, raise_if_no_columns= return next_available_id """ - if table_name not in self.get_table_names(): - raise ConfigError("insert_rows_from_dataframe :: A table with the name %s does " - "not exist in the database you requested. %s are the tables " - "existent in the database" \ - % (table_name, ", ".join(self.get_table_names()))) + self.is_table_exists(table_name) if not list(dataframe.columns) and not raise_if_no_columns: # if the dataframe has no colums, we just return @@ -362,31 +360,47 @@ def insert_rows_from_dataframe(self, table_name, dataframe, raise_if_no_columns= self.insert_many(table_name, entries=entries) + def is_table_exists(self, table_name): + if table_name not in self.table_names_in_db: + raise ConfigError(f"The database at {self.db_path} does seem to have a table `{table_name}` :/ " + f"Here is a list of table names this database knows: {', '.join(self.table_names_in_db)}") + + def get_all_rows_from_table(self, table_name): + self.is_table_exists(table_name) + response = self._exec('''SELECT %s FROM %s''' % (self.PROPER_SELECT_STATEMENT(table_name), table_name)) return response.fetchall() def get_some_rows_from_table(self, table_name, where_clause): + self.is_table_exists(table_name) + response = self._exec('''SELECT %s FROM %s WHERE %s''' % (self.PROPER_SELECT_STATEMENT(table_name), table_name, where_clause)) return response.fetchall() - def get_row_counts_from_table(self, table, where_clause=None): + def get_row_counts_from_table(self, table_name, where_clause=None): + self.is_table_exists(table_name) + if where_clause: - response = self._exec('''SELECT COUNT(*) FROM %s WHERE %s''' % (table, where_clause)) + response = self._exec('''SELECT COUNT(*) FROM %s WHERE %s''' % (table_name, where_clause)) else: - response = self._exec('''SELECT COUNT(*) FROM %s''' % (table)) + response = self._exec('''SELECT COUNT(*) FROM %s''' % (table_name)) return response.fetchall()[0][0] def remove_some_rows_from_table(self, table_name, where_clause): + self.is_table_exists(table_name) + self._exec('''DELETE FROM %s WHERE %s''' % (table_name, where_clause)) self.commit() def get_single_column_from_table(self, table, column, unique=False, where_clause=None): + self.is_table_exists(table) + if where_clause: response = self._exec('''SELECT %s %s FROM %s WHERE %s''' % ('DISTINCT' if unique else '', column, table, where_clause)) else: @@ -395,6 +409,8 @@ def get_single_column_from_table(self, table, column, unique=False, where_clause def get_some_columns_from_table(self, table, comma_separated_column_names, unique=False, where_clause=None): + self.is_table_exists(table) + if where_clause: response = self._exec('''SELECT %s %s FROM %s WHERE %s''' % ('DISTINCT' if unique else '', comma_separated_column_names, table, where_clause)) else: @@ -403,17 +419,23 @@ def get_some_columns_from_table(self, table, comma_separated_column_names, uniqu def get_frequencies_of_values_from_a_column(self, table_name, column_name): + self.is_table_exists(table_name) + response = self._exec('''select %s, COUNT(*) from %s group by %s''' % (column_name, table_name, column_name)) return response.fetchall() def get_table_column_types(self, table_name): + self.is_table_exists(table_name) + response = self._exec('PRAGMA TABLE_INFO(%s)' % table_name) return [t[2] for t in response.fetchall()] def get_table_structure(self, table_name): + self.is_table_exists(table_name) + response = self._exec('''SELECT * FROM %s''' % table_name) return [t[0] for t in response.description] From 7551699177933543f748a460baef7163304ee04c Mon Sep 17 00:00:00 2001 From: "A. Murat Eren" Date: Sat, 27 Jun 2020 08:45:10 -0500 Subject: [PATCH 2/3] a new function to learn about table columns and types as a dict --- anvio/db.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/anvio/db.py b/anvio/db.py index 40d241e45a..8c81b01265 100644 --- a/anvio/db.py +++ b/anvio/db.py @@ -433,6 +433,13 @@ def get_table_column_types(self, table_name): return [t[2] for t in response.fetchall()] + def get_table_columns_and_types(self, table_name): + self.is_table_exists(table_name) + + response = self._exec('PRAGMA TABLE_INFO(%s)' % table_name) + return dict([(t[1], t[2]) for t in response.fetchall()]) + + def get_table_structure(self, table_name): self.is_table_exists(table_name) From 0a144d9fd93e4e08ba40b4d097380a7d76e19c4b Mon Sep 17 00:00:00 2001 From: "A. Murat Eren" Date: Sat, 27 Jun 2020 08:45:28 -0500 Subject: [PATCH 3/3] if table column is numeric, don't treat it as if it is str. --- anvio/db.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/anvio/db.py b/anvio/db.py index 8c81b01265..9b6ad1c871 100644 --- a/anvio/db.py +++ b/anvio/db.py @@ -494,8 +494,16 @@ def func(items_of_interest=None): A set of item names of interest. If the set is empty, the function will return the entire content of `table_name` """ + table_columns_and_types = self.get_table_columns_and_types(table_name) + + if column not in table_columns_and_types: + raise ConfigError(f"The column name `{column}` is not in table `{table_name}` :/") + if column and data: - items = ','.join(['"%s"' % d for d in data]) + if table_columns_and_types[column] in ["numeric", "integer"]: + items = ','.join([str(d) for d in data]) + else: + items = ','.join(['"%s"' % d for d in data]) if progress: progress.update(f'Reading **SOME** data from `{table_name.replace("_", " ")}` table :)')