Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix case sensitiveness on autocomplete endpoint #67

Merged
merged 2 commits into from
Jul 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ psycopg2-binary==2.8.5
requests==2.24.0
SQLAlchemy==1.3.17
Werkzeug==1.0.1
logzero==1.5.0
114 changes: 64 additions & 50 deletions views/autocomplete.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from logzero import logger

from views import *
from views.exceptions import PhenopolisException


@application.route('/<language>/autocomplete/<query_type>/<query>')
Expand All @@ -7,66 +10,77 @@
@application.route('/autocomplete/<query>')
@requires_auth
def autocomplete(query, query_type=''):
if query_type: query_type=query_type+':'
print(query_type, query)
patient_results=[]
gene_results=[]
hpo_results=[]
regex="%"+re.escape(query)+"%"
c=postgres_cursor()
if query_type in ['gene:','']:
c.execute("select * from genes where gene_name_upper like '%s' limit 20"%regex)
headers=[h[0] for h in c.description]
gene_hits=[dict(zip(headers,r)) for r in c.fetchall()]
if query_type=='gene:':
gene_results = [x['gene_name_upper'] for x in gene_hits]
else:
gene_results = ['gene:'+x['gene_name_upper'] for x in gene_hits]
if query_type in ['phenotype:','']:
c.execute("select * from hpo where hpo_name like '%s' limit 20"%regex)
headers=[h[0] for h in c.description]
hpo_hits=[dict(zip(headers,r)) for r in c.fetchall()]
if query_type=='phenotype:':
hpo_results = [x['hpo_name'] for x in hpo_hits]
else:
hpo_results = ['phenotype:'+x['hpo_name'] for x in hpo_hits]
if query_type in ['patient:','']:
c.execute("""

logger.debug("Autocomplete query '{}' and query type '{}'".format(query, query_type))

cursor = postgres_cursor()
regex_query = "%" + re.escape(query.upper()) + "%"
if query_type == 'gene':
results = _search_genes(cursor, regex_query)
elif query_type == 'phenotype':
results = _search_phenotypes(cursor, regex_query)
elif query_type == 'patient':
results = _search_patients(cursor, regex_query)
elif query_type == '':
results = ["gene:" + x for x in _search_genes(cursor, regex_query)] + \
["phenotype:" + x for x in _search_phenotypes(cursor, regex_query)] + \
["patient:" + x for x in _search_patients(cursor, regex_query)]
else:
message = "Autocomplete request with unsupported query type '{}'".format(query_type)
logger.error(message)
raise PhenopolisException(message)
cursor.close()

# removes possible duplicates and chooses 20 suggestions
suggestions = list(itertools.islice(list(set(results)), 0, 20))
return Response(json.dumps(suggestions), mimetype='application/json')


def _search_patients(cursor, query):
cursor.execute("""
select
i.external_id, i.internal_id
from individuals i,
users_individuals ui
where
ui.internal_id=i.internal_id
and
ui.user='%s'
ui.user='{user}'
and
i.internal_id like '%s' limit 20
"""%(session['user'],regex,))
patient_hits=[dict(zip([h[0] for h in c.description],r)) for r in c.fetchall()]
if query_type=='patient:':
patient_results = [x['internal_id'] for x in patient_hits]
else:
patient_results = ['patient:'+x['internal_id'] for x in patient_hits]
#c.execute('select * from variants where "#CHROM"=? and POS=? and REF=? and ALT=? limit 20',regex.split('-'))
#headers=[h[0] for h in c.description]
#variant_hits=[dict(zip(headers,r)) for r in c.fetchall()]
#variant_results = ['variant:'+x['variant_id'] for x in variant_hits]
results = list(set(patient_results+gene_results+hpo_results))
suggestions = list(itertools.islice(results, 0, 20))
return Response(json.dumps(suggestions), mimetype='application/json')
i.internal_id like '{query}' limit 20
""".format(user=session['user'], query=query))
patient_hits = [dict(zip([h[0] for h in cursor.description], r)) for r in cursor.fetchall()]
return [x['internal_id'] for x in patient_hits]

@application.route('/best_guess/<query>')
@requires_auth
def best_guess(query=''):
print(query)
if query.startswith('gene:'): return jsonify(redirect='/gene/{}'.format(query.replace('gene:','')))
elif query.startswith('patient:') or query.startswith('PH'): return jsonify(redirect='/individual/{}'.format(query.replace('patient:','')))
elif query.startswith('phenotype:'): return jsonify(redirect='/hpo/{}'.format(query.replace('phenotype:','')))
elif query.startswith('variant:'): return jsonify(redirect='/variant/{}'.format(query.replace('variant:','')))
elif '-' in query and len(query.split('-'))==4: return jsonify(redirect='/variant/{}'.format(query.replace('variant:','')))
return jsonify(message='Could not find search query'), 420

def _search_phenotypes(cursor, query):
cursor.execute("select * from hpo where UPPER(hpo_name) like '{query}' limit 20".format(query=query))
headers = [h[0] for h in cursor.description]
hpo_hits = [dict(zip(headers, r)) for r in cursor.fetchall()]
return [x['hpo_name'] for x in hpo_hits]


def _search_genes(cursor, query):
cursor.execute("select * from genes where gene_name_upper like '{query}' or other_names like '{query}' limit 20".
format(query=query))
headers = [h[0] for h in cursor.description]
gene_hits = [dict(zip(headers, r)) for r in cursor.fetchall()]
# while the search is performed on the upper cased gene name, it returns the original gene name
return [x['gene_name'] for x in gene_hits]


@application.route('/best_guess/<query>')
@requires_auth
def best_guess(query=''):
print(query)
if query.startswith('gene:'):
return jsonify(redirect='/gene/{}'.format(query.replace('gene:', '')))
elif query.startswith('patient:') or query.startswith('PH'):
return jsonify(redirect='/individual/{}'.format(query.replace('patient:', '')))
elif query.startswith('phenotype:'):
return jsonify(redirect='/hpo/{}'.format(query.replace('phenotype:', '')))
elif query.startswith('variant:'):
return jsonify(redirect='/variant/{}'.format(query.replace('variant:', '')))
elif '-' in query and len(query.split('-')) == 4:
return jsonify(redirect='/variant/{}'.format(query.replace('variant:', '')))
return jsonify(message='Could not find search query'), 420
5 changes: 5 additions & 0 deletions views/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@


class PhenopolisException(Exception):

pass