Skip to content

Commit

Permalink
Merge pull request #83 from metagenlab/nj/forms
Browse files Browse the repository at this point in the history
Display form validation errors next to the corresponding fields.
  • Loading branch information
njohner committed Apr 26, 2024
2 parents 929e3a8 + 47c3e27 commit cda062a
Show file tree
Hide file tree
Showing 14 changed files with 359 additions and 395 deletions.
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ to [Common Changelog](https://common-changelog.org)

### Changed

- Display form validation errors next to the corresponding fields. ([#83](https://github.com/metagenlab/zDB/pull/83)) (Niklaus Johner)
- Filter VF hits by SeqID and coverage and keep one hit per locus. ([#77](https://github.com/metagenlab/zDB/pull/77)) (Niklaus Johner)
- Improve layout for various views, making better use of available space. ([#70](https://github.com/metagenlab/zDB/pull/70)) (Niklaus Johner)
- Configure repository to publish the docs to [readthedocs](https://zdb.readthedocs.io)
Expand Down
5 changes: 2 additions & 3 deletions webapp/assets/css/style_FILE_zDB.css
Original file line number Diff line number Diff line change
Expand Up @@ -976,10 +976,9 @@ background: #81F7F3;
}

#id_blast_input {
width: 300px;
height: 200px;
word-wrap: break-word;
width: 100%;
}

#id_motif_input {
width: 300px;
height: 20px;
Expand Down
2 changes: 1 addition & 1 deletion webapp/assets/js/genomic_region.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// strand: either +1 or -1
// locus_tag
// highlight: a list of locus tag to highlight in red
function createGenomicRegion(div, div_width, svg_id, regions, connections, highlight, window_size, ident_range) {
function createGenomicRegion(div, div_width, svg_id, regions, connections, highlight, ident_range) {
const text_field_size = 40;
const margin = { top:5, right: 5, bottom:5, left:5 };
const max_arrow_size = 350;
Expand Down
203 changes: 170 additions & 33 deletions webapp/chlamdb/forms.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
# -*- coding: utf-8 -*-
from collections import namedtuple
from io import StringIO

from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from crispy_forms.helper import FormHelper
from crispy_forms.layout import Column, Fieldset, Layout, Row, Submit
from django import forms
from django.core.exceptions import ValidationError
from django.core.validators import MaxLengthValidator
from django.core.validators import MaxLengthValidator, MinLengthValidator
from views.utils import EntryIdParser


def get_accessions(db, all=False, plasmid=False):
Expand Down Expand Up @@ -53,8 +60,11 @@ class PlotForm(forms.Form):
accession = forms.CharField(max_length=100,
required=True,
label=f"locus_tag (e.g. {locus})")
region_size = forms.CharField(max_length=5,
label="Region size (bp)", initial=8000, required=False)
region_size = forms.IntegerField(min_value=5000,
max_value=100000,
label="Region size (bp)",
initial=8000,
required=True)
genomes = forms.MultipleChoiceField(choices=accession_choices,
widget=forms.SelectMultiple(attrs={
'size': '1',
Expand Down Expand Up @@ -90,8 +100,18 @@ def __init__(self, *args, **kwargs):
css_class="col-lg-8 col-md-8 col-sm-12")
)
)

def get_accession(self):
self.db = db

def clean_accession(self):
accession = self.cleaned_data["accession"]
prot_info = db.get_proteins_info(
ids=[accession], search_on="locus_tag", as_df=True)
if prot_info.empty:
raise ValidationError("Accession not found", code="invalid")
# Accession is now the sequence id
return int(prot_info.index[0])

def get_seqid(self):
return self.cleaned_data["accession"]

def get_all_homologs(self):
Expand All @@ -111,7 +131,7 @@ def get_genomes(self):
return PlotForm


def make_metabo_from(db, add_box=False, type_choices=None):
def make_metabo_from(db, type_choices=None):

accession_choices, rev_index = get_accessions(db)

Expand All @@ -124,13 +144,9 @@ class MetaboForm(forms.Form):
"data-live-search": "true",
"multiple data-actions-box": "true"}
),
required=False
required=True
)

if add_box:
input_box = forms.CharField(
widget=forms.Textarea(attrs={'cols': 10, 'rows': 10}))

if type_choices:
comp_type = forms.ChoiceField(
choices=type_choices,
Expand All @@ -144,8 +160,6 @@ def __init__(self, *args, **kwargs):
self.helper.label_class = 'col-lg-1 col-md-6 col-sm-6'
self.helper.field_class = 'col-lg-4 col-md-6 col-sm-6'
rows = [Row('targets')]
if add_box:
rows.append(Row('input_box'))
if type_choices:
rows.append(Row('comp_type'))

Expand Down Expand Up @@ -173,7 +187,7 @@ def get_choices(self):


def make_venn_from(db, plasmid=False, label="Orthologs", limit=None,
action=""):
limit_type="upper", action=""):

accession_choices, rev_index = get_accessions(db, plasmid=plasmid)

Expand All @@ -183,13 +197,20 @@ class VennForm(forms.Form):
"data-actions-box": "true"}
help_text = ""
targets_validators = []
if limit is not None:
if limit is not None and limit_type == "upper":
attrs["data-max-options"] = f"{limit}"
help_text = f"Select a maximum of {limit} genomes"
targets_validators.append(
MaxLengthValidator(
limit,
message=f"Select a maximum of {limit} genomes")
message=f"Please select at most {limit} genomes")
)
elif limit is not None and limit_type == "lower":
help_text = f"Select a minimum of {limit} genomes"
targets_validators.append(
MinLengthValidator(
limit,
message=f"Please select at least {limit} genomes")
)

targets = forms.MultipleChoiceField(
Expand Down Expand Up @@ -377,6 +398,22 @@ def __init__(self, *args, **kwargs):
)

super(ExtractForm, self).__init__(*args, **kwargs)

def clean(self):
cleaned_data = super(ExtractForm, self).clean()
self.included_taxids, self.included_plasmids = self.get_include_choices()
self.excluded_taxids, self.excluded_plasmids = self.get_exclude_choices()
self.n_missing = self.get_n_missing()
self.n_included = len(self.included_taxids)
if self.included_plasmids is not None:
self.n_included += len(self.included_plasmids)
if self.n_missing >= self.n_included:
err = ValidationError(
"This must be smaller than the number of included genomes.",
code="invalid")
self.add_error("frequency", err)
return cleaned_data

return ExtractForm


Expand Down Expand Up @@ -453,24 +490,82 @@ class BlastForm(forms.Form):
"data-live-search": "true"})
)

input_help = "This can be either an amino-acid or a nucleotide "\
"sequence, or a set (one or more) of fasta sequences."
blast_input = forms.CharField(
widget=forms.Textarea(attrs={'cols': 50, 'rows': 5}))
widget=forms.Textarea(attrs={"placeholder": input_help, "rows": 10}))

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.helper = FormHelper()
self.helper.form_method = 'post'
self.helper.label_class = 'col-lg-4 col-md-6 col-sm-6'
self.helper.field_class = 'col-lg-6 col-md-6 col-sm-6'
self.helper.layout = Layout(
Fieldset(
Row("BLAST"),
Row('target'),
Row('blast_input'),
css_class="col-lg-5 col-md-6 col-sm-6")
"",
Row(
Column("blast", css_class='col-lg-4 col-md-4 col-sm-12'),
Column("max_number_of_hits", css_class='col-lg-4 col-md-4 col-sm-12'),
Column('target', css_class='col-lg-4 col-md-4 col-sm-12'),
),
Row(Column('blast_input', css_class='col-lg-12 col-md-12 col-sm-12')),
Submit('submit', 'Submit',
style="padding-left:15px; margin-top:1em; margin-bottom:15px "),
css_class="col-lg-10 col-md-10 col-sm-12")
)
super(BlastForm, self).__init__(*args, **kwargs)

def _get_records(self):
input_sequence = self.cleaned_data['blast_input']

if '>' in input_sequence:
self.no_query_name = False
try:
records = [i for i in SeqIO.parse(
StringIO(input_sequence), 'fasta')]
for record in records:
if len(record.seq) == 0:
raise ValidationError(
"Empty sequence in input", code="invalid")

except Exception:
raise ValidationError(
"Error while parsing the fasta query", code="invalid")
else:
self.no_query_name = True
input_sequence = "".join(input_sequence.split()).upper()
records = [SeqRecord(Seq(input_sequence))]
return records

def _check_sequence_contents(self, records):
dna = set("ATGCNRYKMSWBDHV")
prot = set('ACDEFGHIKLMNPQRSTVWYXZJOU')
sequence_set = set()
for rec in records:
sequence_set = sequence_set.union(set(rec.seq.upper()))
check_seq_DNA = sequence_set - dna
check_seq_prot = sequence_set - prot

blast_type = self.cleaned_data["blast"]
if check_seq_prot and blast_type in ["blastp", "tblastn"]:
plural = len(check_seq_prot) > 1
wrong_chars = ", ".join(check_seq_prot)
errmsg = (f"Unexpected character{'s' if plural else ''}"
f" in amino-acid query: {wrong_chars}")
raise ValidationError(errmsg, code="invalid")

elif check_seq_DNA and blast_type in ["blastn", "blastn_ffn",
"blast_fna", "blastx"]:
wrong_chars = ", ".join(check_seq_DNA)
plural = len(check_seq_DNA) > 1
errmsg = (f"Unexpected character{'s' if plural else ''}"
f" in nucleotide query: {wrong_chars}")
raise ValidationError(errmsg, code="invalid")

def clean_blast_input(self):
self.records = self._get_records()
self._check_sequence_contents(self.records)
return self.cleaned_data['blast_input']

def get_target(self):
target = self.cleaned_data["target"]
if target == "all":
Expand All @@ -485,6 +580,7 @@ def get_groups(db):


def make_gwas_form(biodb):
import pandas as pd

group_choices = get_groups(biodb)

Expand Down Expand Up @@ -540,18 +636,50 @@ def __init__(self, *args, **kwargs):
"margin-bottom:15px "),
css_class="col-lg-5 col-md-6 col-sm-6")
)
self.db = biodb
super(GwasForm, self).__init__(*args, **kwargs)

def clean(self):
cleaned_data = super(GwasForm, self).clean()
self.phenotype_file_or_groups()
self.phenotype = self.get_phenotype()
return cleaned_data

def phenotype_file_or_groups(self):
def get_phenotype(self):
has_groups = bool(self.cleaned_data["groups"])
has_file = bool(self.cleaned_data["phenotype_file"])
if (has_groups and has_file) or not (has_groups or has_file):
raise ValidationError('You have to provide either "Groups" or "Phenotype file" but not both.')
msg = 'You have to provide either "Groups" or '\
'"Phenotype file" but not both.'
raise ValidationError(msg, code="invalid")

genomes = self.db.get_genomes_description()
if self.cleaned_data["phenotype_file"]:
phenotype = pd.read_csv(self.cleaned_data["phenotype_file"],
header=None, names=["taxids", "trait"])
phenotype["trait"] = phenotype["trait"].astype(bool)
if all(phenotype.taxids.isin(genomes.index)):
return phenotype
elif all(phenotype.taxids.isin(genomes.description)):
mapping = {genome.description: taxid
for taxid, genome in genomes.iterrows()}
phenotype.taxids = phenotype.taxids.apply(lambda x: mapping[x])
else:
err = ValidationError(
"File could not be parsed and matched to genomes.",
code="invalid")
self.add_error("phenotype_file", err)
return phenotype
else:
taxids_with_phenotype = set(self.db.get_taxids_for_groups(
self.cleaned_data["groups"]))
if not set(genomes.index).difference(taxids_with_phenotype):
err = ValidationError(
"Your selection is invalid as it contains all genomes.",
code="invalid")
self.add_error("groups", err)
phenotype = [[taxid, 1 if taxid in taxids_with_phenotype else 0]
for taxid in genomes.index]
return pd.DataFrame(phenotype, columns=["taxids", "trait"])

return GwasForm

Expand All @@ -567,8 +695,11 @@ class CustomPlotsForm(forms.Form):
widget=forms.Textarea(attrs={'cols': 50, 'rows': 5}),
required=True, label="Entry IDs", help_text=help_text)

def __init__(self, *args, **kwargs):
Entry = namedtuple("Entry", "id label type")

def __init__(self, db, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db = db
self.helper = FormHelper()

self.helper.form_method = 'post'
Expand All @@ -577,22 +708,28 @@ def __init__(self, *args, **kwargs):
Column(
Row(Column(
"entries",
css_class='form-group col-lg-6 col-md-6 col-sm-12'),
css_class='form-group col-lg-12 col-md-12 col-sm-12'),
),
Row(Submit('submit', 'Make plot'),
css_class='form-group col-lg-12 col-md-12 col-sm-12'),
css_class="col-lg-8 col-md-8 col-sm-12")
)
)

def get_entries(self):
def clean_entries(self):
raw_entries = self.cleaned_data["entries"].split(",")
parser = EntryIdParser(self.db)
entries = []
entry2label = {}
for entry in raw_entries:
entry = entry.strip()
if ":" in entry:
entry, label = entry.split(":", 1)
entry2label[entry] = label
entries.append(entry)
return entries, entry2label
else:
label = entry
try:
object_type, entry_id = parser.id_to_object_type(entry)
except Exception:
raise ValidationError(f'Invalid identifier "{entry}".',
code="invalid")
entries.append(self.Entry(entry_id, label, object_type))
return entries
22 changes: 22 additions & 0 deletions webapp/lib/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2631,6 +2631,28 @@ def get_number_of_ko_entries(self):
query = "SELECT COUNT(*) FROM (SELECT DISTINCT ko_id FROM ko_hits)"
return self.server.adaptor.execute_and_fetchall(query)[0][0]

def check_entry_existence(self, entry_id, entry_col, table):
query = f'SELECT 1 FROM {table} WHERE {entry_col}="{entry_id}" LIMIT 1'
return bool(self.server.adaptor.execute_one(query))

def check_og_entry_id(self, entry_id):
return self.check_entry_existence(entry_id, "orthogroup", "og_hits")

def check_ko_entry_id(self, entry_id):
return self.check_entry_existence(entry_id, "ko_id", "ko_def")

def check_cog_entry_id(self, entry_id):
return self.check_entry_existence(entry_id, "cog_id", "cog_names")

def check_pfam_entry_id(self, entry_id):
return self.check_entry_existence(entry_id, "pfam_id", "pfam_table")

def check_vf_entry_id(self, entry_id):
return self.check_entry_existence(entry_id, "vf_gene_id", "vf_defs")

def check_amr_entry_id(self, entry_id):
return self.check_entry_existence(entry_id, "gene", "amr_hits")

def gen_placeholder_string(self, args):
return ",".join(self.placeholder for _ in args)

Expand Down
Loading

0 comments on commit cda062a

Please sign in to comment.