diff --git a/conda/webapp.yaml b/conda/webapp.yaml index e2cf13ffd..6710b1000 100644 --- a/conda/webapp.yaml +++ b/conda/webapp.yaml @@ -22,5 +22,6 @@ dependencies: - bibtexparser - blast>=2.9.0 - gxx + - django-autocomplete-light - pip: - scoary-2 diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e07da51c3..75d69bd36 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -9,6 +9,8 @@ to [Common Changelog](https://common-changelog.org) ### Changed +- Handle groups in hit extraction view. ([#84](https://github.com/metagenlab/zDB/pull/84)) (Niklaus Johner) +- Allow using groups to define phenotype in GWAS view. ([#82](https://github.com/metagenlab/zDB/pull/82)) (Niklaus Johner) - Display form validation errors next to the corresponding fields. ([#83](https://github.com/metagenlab/zDB/pull/83)) (Niklaus Johner) - Filter VF hits by SeqID and coverage and keep one hit per locus. ([#77](https://github.com/metagenlab/zDB/pull/77)) (Niklaus Johner) - Improve layout for various views, making better use of available space. ([#70](https://github.com/metagenlab/zDB/pull/70)) (Niklaus Johner) diff --git a/testing/webapp/test_autocomplete_views.py b/testing/webapp/test_autocomplete_views.py new file mode 100644 index 000000000..38f8f9347 --- /dev/null +++ b/testing/webapp/test_autocomplete_views.py @@ -0,0 +1,141 @@ +import json +from contextlib import contextmanager + +from django.conf import settings +from django.test import SimpleTestCase +from lib.db_utils import DB + + +class BaseAutocompleteTestCase(SimpleTestCase): + + def make_request(self, **kwargs): + url = f'{self.base_url}?forward={json.dumps(kwargs)}' + return self.client.get(url) + + +class TestAutocompleteTaxid(BaseAutocompleteTestCase): + + base_url = '/autocomplete_taxid/' + + taxons = [ + {'id': '1', 'text': 'Klebsiella pneumoniae R6724_16313'}, + {'id': '2', 'text': 'Klebsiella pneumoniae R6726_16314'}, + {'id': '3', 'text': 'Klebsiella pneumoniae R6728_16315'}] + + groups = [ + {'id': 'group:positive', 'text': 'positive'}, + {'id': 'group:negative', 'text': 'negative'}, + {'id': 'group:all', 'text': 'all'}] + + plasmids = [ + {'id': 'plasmid:1', 'text': 'Klebsiella pneumoniae R6724_16313 plasmid'}, + {'id': 'plasmid:2', 'text': 'Klebsiella pneumoniae R6726_16314 plasmid'}, + {'id': 'plasmid:3', 'text': 'Klebsiella pneumoniae R6728_16315 plasmid'}] + + @contextmanager + def add_plasmid_for_taxids(self, taxids): + """We need to commit this change so that it is picked up + by the autocomplete view, so we need to cleanup afterwards. + I guess we could also have isolated the DB by making a backup in setUp + and restoring it in tearDown, but seemed like overkill for now. + """ + try: + plasmid_term_id = self.db.server.adaptor.execute_one( + "SELECT term_id FROM term WHERE name='plasmid'")[0] + for taxid in taxids: + self.db.server.adaptor.execute( + f"UPDATE bioentry_qualifier_value SET value=1 " + f"WHERE bioentry_id={taxid} AND term_id={plasmid_term_id};") + self.db.server.commit() + yield + finally: + for taxid in taxids: + self.db.server.adaptor.execute( + f"UPDATE bioentry_qualifier_value SET value=0 " + f"WHERE bioentry_id={taxid} AND term_id={plasmid_term_id};") + self.db.server.commit() + + def assertItemsEqual(self, expected, actual): + self.assertEqual(sorted(expected, key=lambda x: x["id"]), + sorted(actual, key=lambda x: x["id"])) + + def setUp(self): + biodb_path = settings.BIODB_DB_PATH + self.db = DB.load_db_from_name(biodb_path) + + def tearDown(self): + self.db.server.close() + + def test_handles_include_plasmids(self): + with self.add_plasmid_for_taxids([1, 2]): + resp = self.make_request() + self.assertItemsEqual( + self.taxons + self.groups, + resp.json()["results"]) + + resp = self.make_request(include_plasmids=True) + self.assertItemsEqual( + self.taxons + self.groups + self.plasmids[:2], + resp.json()["results"]) + + def test_handles_exclude(self): + resp = self.make_request(exclude=["3"]) + self.assertItemsEqual( + [self.taxons[0], self.taxons[1], self.groups[0]], + resp.json()["results"]) + + resp = self.make_request(exclude=["group:positive"]) + self.assertItemsEqual( + [self.taxons[2], self.groups[1]], + resp.json()["results"]) + + resp = self.make_request(exclude=["3", "group:positive"]) + self.assertItemsEqual( + [], + resp.json()["results"]) + + def test_handles_exclude_taxids_in_groups(self): + # ignored because these are not groups + resp = self.make_request(exclude_taxids_in_groups=["1", "3"]) + self.assertItemsEqual( + self.taxons + self.groups, + resp.json()["results"]) + + resp = self.make_request(exclude_taxids_in_groups=["group:positive"]) + self.assertItemsEqual( + [self.taxons[2]] + self.groups, + resp.json()["results"]) + + resp = self.make_request(exclude_taxids_in_groups=["group:positive", + "group:negative"]) + self.assertItemsEqual( + self.groups, + resp.json()["results"]) + + +class TestAutocompleteNMissing(BaseAutocompleteTestCase): + + base_url = '/autocomplete_n_missing/' + + @staticmethod + def get_expected_response(n): + return [{"id": i, "text": i} for i in range(n)] + + def test_handles_include_plasmids(self): + included = ["1", "2", "plasmid:2", "plasmid:3"] + resp = self.make_request(included=included) + self.assertEqual(self.get_expected_response(2), + resp.json()["results"]) + + resp = self.make_request(included=included, include_plasmids=True) + self.assertEqual(self.get_expected_response(4), + resp.json()["results"]) + + def test_handles_groups(self): + resp = self.make_request(included=["group:positive"]) + self.assertEqual(self.get_expected_response(2), + resp.json()["results"]) + + resp = self.make_request(included=["group:positive", "group:negative"]) + self.assertEqual(self.get_expected_response(3), + resp.json()["results"]) diff --git a/testing/webapp/test_utils.py b/testing/webapp/test_utils.py new file mode 100644 index 000000000..54a588f74 --- /dev/null +++ b/testing/webapp/test_utils.py @@ -0,0 +1,133 @@ +from django.test import SimpleTestCase + +from webapp.views.utils import AccessionFieldHandler + + +class TestAccessionFieldHandler(SimpleTestCase): + + taxons = [('1', 'Klebsiella pneumoniae R6724_16313'), + ('2', 'Klebsiella pneumoniae R6726_16314'), + ('3', 'Klebsiella pneumoniae R6728_16315')] + + plasmids = [('plasmid:1', 'Klebsiella pneumoniae R6724_16313 plasmid'), + ('plasmid:2', 'Klebsiella pneumoniae R6726_16314 plasmid'), + ('plasmid:3', 'Klebsiella pneumoniae R6728_16315 plasmid')] + + groups = [('group:all', 'all'), + ('group:negative', 'negative'), + ('group:positive', 'positive')] + + def setUp(self): + self.handler = AccessionFieldHandler() + # Because we will not commit, we need to make all modification + # on the handler's database + self.db = self.handler.db + + def tearDown(self): + self.db.server.close() + + def add_plasmid_for_taxids(self, taxids): + plasmid_term_id = self.db.server.adaptor.execute_one( + "SELECT term_id FROM term WHERE name='plasmid'")[0] + for taxid in taxids: + self.db.server.adaptor.execute( + f"UPDATE bioentry_qualifier_value SET value=1 " + f"WHERE bioentry_id={taxid} AND term_id={plasmid_term_id};") + + def assertItemsEqual(self, expected, choices): + self.assertEqual(sorted(expected), sorted(choices)) + + def test_get_choices_handles_plasmids(self): + self.assertItemsEqual( + self.taxons + self.groups, + self.handler.get_choices()) + + self.add_plasmid_for_taxids([1, 3]) + self.assertItemsEqual( + self.taxons + self.groups + self.plasmids[::2], + self.handler.get_choices()) + + self.assertItemsEqual( + self.taxons + self.groups, + self.handler.get_choices(with_plasmids=False)) + + def test_get_choices_handles_groups(self): + self.assertItemsEqual( + self.taxons + self.groups, + self.handler.get_choices()) + + def test_get_choices_handles_taxid_exclusion(self): + exclude = [el[0] for el in self.taxons[1:]] + self.assertItemsEqual( + [self.taxons[0]], + self.handler.get_choices(exclude=exclude)) + + self.add_plasmid_for_taxids([1, 2, 3]) + self.assertItemsEqual(self.taxons + self.groups + self.plasmids, + self.handler.get_choices()) + + exclude = [self.taxons[-1][0]] + self.assertItemsEqual(self.taxons[:-1] + [self.groups[2]] + self.plasmids, + self.handler.get_choices(exclude=exclude)) + + self.assertItemsEqual(self.taxons[:-1] + [self.groups[2]], + self.handler.get_choices(exclude=exclude, + with_plasmids=False)) + + def test_get_choices_handles_plasmid_exclusion(self): + self.add_plasmid_for_taxids([1, 2, 3]) + + exclude = [self.plasmids[-1][0]] + self.assertItemsEqual(self.taxons + self.groups + self.plasmids[:-1], + self.handler.get_choices(exclude=exclude)) + + exclude = [el[0] for el in self.plasmids] + self.assertItemsEqual(self.taxons + self.groups, + self.handler.get_choices(exclude=exclude)) + + def test_get_choices_handles_group_exclusion(self): + exclude = [self.groups[1][0]] + self.assertItemsEqual(self.taxons[:-1] + [self.groups[2]], + self.handler.get_choices(exclude=exclude)) + + exclude.append(self.groups[2][0]) + self.assertItemsEqual([], + self.handler.get_choices(exclude=exclude)) + + def test_get_choices_handles_exclude_taxids_in_groups(self): + exclude = [self.groups[1][0]] + self.assertItemsEqual( + self.taxons[:-1] + self.groups, + self.handler.get_choices(exclude_taxids_in_groups=exclude)) + + exclude.append(self.groups[2][0]) + self.assertItemsEqual( + self.groups, + self.handler.get_choices(exclude_taxids_in_groups=exclude)) + + def test_extract_choices_returns_none_when_include_plasmids_is_false(self): + self.assertEqual( + ([1, 3], None), + self.handler.extract_choices(["1", "3"], False)) + + self.assertEqual( + ([1, 3], []), + self.handler.extract_choices(["1", "3"], True)) + + def test_extract_choices_handles_plasmids(self): + self.assertEqual( + ([2], [1, 3]), + self.handler.extract_choices(["plasmid:1", "2", "plasmid:3"], True)) + + self.assertEqual( + ([2], None), + self.handler.extract_choices(["plasmid:1", "2", "plasmid:3"], False)) + + def test_extract_choices_handles_groups(self): + self.assertEqual( + ([3], []), + self.handler.extract_choices(["group:negative"], True)) + + self.assertEqual( + ([2, 3], [1]), + self.handler.extract_choices(["plasmid:1", "2", "group:negative"], True)) diff --git a/testing/webapp/test_views.py b/testing/webapp/test_views.py index 228e2970f..49d879e73 100644 --- a/testing/webapp/test_views.py +++ b/testing/webapp/test_views.py @@ -24,6 +24,8 @@ urls = [ '/about', '/amr_comparison', + '/autocomplete_n_missing/', + '/autocomplete_taxid/', '/blast/', '/circos/', '/circos_main/', @@ -150,9 +152,17 @@ def test_all_urlpatterns_are_tested(self): "Some patterns are not covered in the tests: please add them to " "untested_patterns or urls") - def test_all_views_render_valid_html(self): + def test_all_views_render_valid_html_or_json(self): for url in urls: resp = self.client.get(url) + if resp.get("Content-Type") == 'application/json': + try: + resp.json() + except Exception as exc: + print(f"\n\nInvalid json for {url}") + raise exc + finally: + continue try: self.assertContains(resp, "", html=True) except Exception as exc: diff --git a/webapp/chlamdb/forms.py b/webapp/chlamdb/forms.py index fdd0d8d1e..2676a094b 100644 --- a/webapp/chlamdb/forms.py +++ b/webapp/chlamdb/forms.py @@ -7,14 +7,16 @@ from Bio.SeqRecord import SeqRecord from crispy_forms.helper import FormHelper from crispy_forms.layout import Column, Fieldset, Layout, Row, Submit +from dal import forward +from dal.autocomplete import ListSelect2, Select2Multiple from django import forms from django.core.exceptions import ValidationError from django.core.validators import MaxLengthValidator, MinLengthValidator -from views.utils import EntryIdParser +from views.utils import AccessionFieldHandler, EntryIdParser def get_accessions(db, all=False, plasmid=False): - result = db.get_genomes_description(lst_plasmids=plasmid) + result = db.get_genomes_description() accession_choices = [] index = 0 reverse_index = [] @@ -307,7 +309,8 @@ def get_ref_taxid(self): def make_extract_form(db, action, plasmid=False, label="Orthologs"): - accession_choices, rev_index = get_accessions(db, plasmid=plasmid) + + accession_choices = AccessionFieldHandler().get_choices() class ExtractForm(forms.Form): checkbox_accessions = forms.BooleanField( @@ -321,52 +324,43 @@ class ExtractForm(forms.Form): orthologs_in = forms.MultipleChoiceField( label=f"{label} conserved in", choices=accession_choices, - widget=forms.SelectMultiple(attrs={'size': '%s' % "17", - "class": "selectpicker", - "data-live-search": "true"}), + widget=Select2Multiple( + url="autocomplete_taxid", + forward=(forward.Field("no_orthologs_in", "exclude"), + forward.Field("orthologs_in", "exclude_taxids_in_groups"), + forward.Field("checkbox_accessions", "include_plasmids")), + attrs={"data-close-on-select": "false", + "data-placeholder": "Nothing selected"}), required=True) no_orthologs_in = forms.MultipleChoiceField( label="%s absent from (optional)" % label, choices=accession_choices, - widget=forms.SelectMultiple(attrs={'size': '%s' % "17", - "class": "selectpicker remove-example", - "data-live-search": "true"}), + widget=Select2Multiple( + url="autocomplete_taxid", + forward=(forward.Field("orthologs_in", "exclude"), + forward.Field("no_orthologs_in", "exclude_taxids_in_groups"), + forward.Field("checkbox_accessions", "include_plasmids")), + attrs={"data-close-on-select": "false", + "data-placeholder": "Nothing selected"}), required=False) - new_choices = [['None', 'None']] + accession_choices - - frequency_choices = ((i, i) for i in range(len(accession_choices))) - frequency = forms.ChoiceField( - choices=frequency_choices, + _n_missing = forms.ChoiceField( label='Missing data (optional)', + choices=zip(range(len(accession_choices)), + range(len(accession_choices))), + widget=ListSelect2( + url="autocomplete_n_missing", + forward=(forward.Field("orthologs_in", "included"), + forward.Field("checkbox_accessions", "include_plasmids"))), required=False) - def extract_choices(self, indices): - keep_plasmids = self.cleaned_data["checkbox_accessions"] - taxids = [] - plasmids = None - if keep_plasmids: - plasmids = [] - - for index in indices: - taxid, is_plasmid = rev_index[index] - if keep_plasmids and is_plasmid: - plasmids.append(taxid) - elif is_plasmid: - continue - else: - taxids.append(taxid) - return taxids, plasmids + def extract_choices(self, indices, include_plasmids): + return AccessionFieldHandler().extract_choices( + indices, include_plasmids) def get_n_missing(self): - return int(self.cleaned_data["frequency"]) - - def get_include_choices(self): - return self.extract_choices((int(i) for i in self.cleaned_data["orthologs_in"])) - - def get_exclude_choices(self): - return self.extract_choices((int(i) for i in self.cleaned_data["no_orthologs_in"])) + return int(self.cleaned_data.get("_n_missing") or 0) def __init__(self, *args, **kwargs): self.helper = FormHelper() @@ -386,7 +380,7 @@ def __init__(self, *args, **kwargs): css_class='form-group col-lg-6 col-md-6 col-sm-12') ), Column( - Row('frequency'), + Row('_n_missing'), Submit('submit', 'Compare %s' % label, style="margin-top:15px"), @@ -401,8 +395,14 @@ def __init__(self, *args, **kwargs): def clean(self): cleaned_data = super(ExtractForm, self).clean() - self.included_taxids, self.included_plasmids = self.get_include_choices() - self.excluded_taxids, self.excluded_plasmids = self.get_exclude_choices() + + self.included_taxids, self.included_plasmids = self.extract_choices( + self.cleaned_data["orthologs_in"], + self.cleaned_data["checkbox_accessions"]) + self.excluded_taxids, self.excluded_plasmids = self.extract_choices( + self.cleaned_data["no_orthologs_in"], + self.cleaned_data["checkbox_accessions"]) + self.n_missing = self.get_n_missing() self.n_included = len(self.included_taxids) if self.included_plasmids is not None: @@ -411,7 +411,7 @@ def clean(self): err = ValidationError( "This must be smaller than the number of included genomes.", code="invalid") - self.add_error("frequency", err) + self.add_error("_n_missing", err) return cleaned_data return ExtractForm diff --git a/webapp/chlamdb/urls.py b/webapp/chlamdb/urls.py index 0ea75808d..31f9f4c52 100644 --- a/webapp/chlamdb/urls.py +++ b/webapp/chlamdb/urls.py @@ -2,8 +2,8 @@ from django.urls import re_path from django.views.generic import TemplateView from django.views.generic.base import RedirectView -from views import (custom_plots, entry_lists, fam, gwas, hits_extraction, - locus, tabular_comparison, venn, views) +from views import (autocomplete, custom_plots, entry_lists, fam, gwas, + hits_extraction, locus, tabular_comparison, venn, views) favicon_view = RedirectView.as_view(url='/assets/favicon.ico', permanent=True) @@ -73,7 +73,7 @@ re_path(r'^entry_list_ko$', entry_lists.KoEntryListView.as_view(), name="entry_list_ko"), # noqa re_path(r'^entry_list_cog$', entry_lists.CogEntryListView.as_view(), name="entry_list_cog"), # noqa re_path(r'^entry_list_amr$', entry_lists.AmrEntryListView.as_view(), name="entry_list_amr"), # noqa - re_path(r'^custom_plots/$', custom_plots.CusomPlotsView.as_view(), name="custom_plots"), + re_path(r'^custom_plots/$', custom_plots.CusomPlotsView.as_view(), name="custom_plots"), # noqa re_path(r'^cog_venn_subset/([A-Z])$', venn.VennCogSubsetView.as_view(), name="cog_venn_subset"), # noqa re_path(r'^cog_phylo_heatmap/([a-zA-Z0-9_\-]+)', views.CogPhyloHeatmap.as_view(), name="cog_phylo_heatmap"), # noqa re_path(r'^cog_comparison', tabular_comparison.CogComparisonView.as_view(), name="cog_comparison"), # noqa @@ -81,6 +81,8 @@ re_path(r'^circos_main/$', views.circos_main, name="circos_main"), re_path(r'^circos/$', views.circos, name="circos"), re_path(r'^blast/$', views.blast, name="blast"), + re_path(r'^autocomplete_taxid/$', autocomplete.AutocompleteTaxid.as_view(), name="autocomplete_taxid"), # noqa + re_path(r'^autocomplete_n_missing/$', autocomplete.AutocompleteNMissing.as_view(), name="autocomplete_n_missing"), # noqa re_path(r'^amr_comparison', tabular_comparison.AmrComparisonView.as_view(), name="amr_comparison"), # noqa re_path(r'^about$', views.about, name="about"), re_path(r'^.*$', views.home, name="home"), diff --git a/webapp/lib/db_utils.py b/webapp/lib/db_utils.py index d890f1395..e6cc2e105 100644 --- a/webapp/lib/db_utils.py +++ b/webapp/lib/db_utils.py @@ -1270,12 +1270,11 @@ def get_term_id(self, term, create_if_absent=False, ontology="Annotation Tags"): gc_term_id = result[0][0] return gc_term_id - def get_genomes_description(self, lst_plasmids=True): + def get_genomes_description(self): """ Returns the description of the genome as it has been read from the genbank files, indexed by taxon_id. The output also contains a flag - has_plasmid indicating whether the genome contains a plasmid or not, - if the lst_plasmid flag has been set. + has_plasmid indicating whether the genome contains a plasmid or not. """ has_plasmid_query = ( @@ -1488,6 +1487,13 @@ def get_taxids_for_groups(self, group_names): results = self.server.adaptor.execute_and_fetchall(query, group_names) return (el[0] for el in results) + def get_groups_containing_taxids(self, taxids): + plchd = self.gen_placeholder_string(taxids) + query = f"SELECT DISTINCT group_name FROM taxon_in_group "\ + f"WHERE taxon_id IN ({plchd});" + results = self.server.adaptor.execute_and_fetchall(query, taxids) + return (el[0] for el in results) + def load_genomes_info(self, data): sql = ( "CREATE TABLE genome_summary (taxon_id INTEGER, completeness FLOAT, " @@ -2538,7 +2544,7 @@ def get_amr_hit_counts(self, ids, indexing="taxid", search_on="taxid", ) plasmid_join = ( "INNER JOIN bioentry_qualifier_value AS is_plasmid ON " - " is_plasmid.bioentry_id=entry.bioentry_id " + " is_plasmid.bioentry_id=bioentry.bioentry_id " "INNER JOIN term AS plasmid_term ON plasmid_term.term_id=is_plasmid.term_id " " AND plasmid_term.name=\"plasmid\"" ) diff --git a/webapp/lib/queries.py b/webapp/lib/queries.py index d6df5d30f..d274fae3e 100644 --- a/webapp/lib/queries.py +++ b/webapp/lib/queries.py @@ -76,7 +76,7 @@ def get_hit_counts(self, ids, indexing="taxid", search_on="taxid", ) plasmid_join = ( "INNER JOIN bioentry_qualifier_value AS is_plasmid ON " - " is_plasmid.bioentry_id=entry.bioentry_id " + " is_plasmid.bioentry_id=bioentry.bioentry_id " "INNER JOIN term AS plasmid_term ON plasmid_term.term_id=is_plasmid.term_id " " AND plasmid_term.name=\"plasmid\"" ) @@ -93,7 +93,7 @@ def get_hit_counts(self, ids, indexing="taxid", search_on="taxid", ) all_ids = ids - if plasmids is not None: + if plasmids: all_ids += plasmids results = self.server.adaptor.execute_and_fetchall(query, all_ids) diff --git a/webapp/settings/settings.py b/webapp/settings/settings.py index 14edcd79f..00ab48bcf 100755 --- a/webapp/settings/settings.py +++ b/webapp/settings/settings.py @@ -41,6 +41,8 @@ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + 'dal', + 'dal_select2', 'chlamdb', 'gunicorn', 'templatetags', diff --git a/webapp/templates/chlamdb/extract_hits.html b/webapp/templates/chlamdb/extract_hits.html index 695562ed6..e7afeff90 100644 --- a/webapp/templates/chlamdb/extract_hits.html +++ b/webapp/templates/chlamdb/extract_hits.html @@ -97,7 +97,6 @@