-
Notifications
You must be signed in to change notification settings - Fork 0
/
__init__.py
executable file
·810 lines (712 loc) · 33.6 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
# -*- coding: utf-8 -*
import os
import re
import shutil
import subprocess
import shlex
import magic
import math
import sqlite3
import collections
import cairosvg
import cherrypy as cp
# import webbrowser as wb
from cherrypy.lib.static import serve_file
from genshi.template import TemplateLoader
# set file size limits for upload
MB = 1024 ** 2
MAX_PEPFILE_SIZE = MB * 500
MAX_PROTFILE_SIZE = MB * 50
MAX_FASTA_SIZE = MB * 100
MAX_UPLOAD_SIZE = MAX_PEPFILE_SIZE + MAX_PROTFILE_SIZE + MAX_FASTA_SIZE
# set file paths and suffixes
EXE_PATH = os.path.dirname(os.path.abspath(__file__))
DBSCHEMA_PATH = os.path.join(EXE_PATH, 'dbschema')
PARSER_PATH = os.path.join(EXE_PATH, 'parsers')
DBFILE_SFX = 'sqlite'
DATFILE_SFX = 'dat'
FASTA_SFX = ('fa', 'fasta', 'FASTA')
# URL = 'http://{host}:{port}/'.format(host=cp.server.socket_host,
# port=cp.server.socket_port)
loader = TemplateLoader(os.path.join(os.path.dirname(__file__), 'templates'),
auto_reload=True)
__author__ = 'Arnold Kuzniar'
__version__ = '1.0.1'
class Piqmie(object):
"""
This is the base application.
"""
sql_fetch_expnames = "SELECT DISTINCT(exp_name) FROM EXPERIMENT"
sql_fetch_nratios_pval = """
SELECT
COUNT(DISTINCT CASE WHEN quant_type LIKE '%NORMALIZED' THEN quant_type END) AS n_ratios,
CASE
WHEN COUNT(CASE WHEN quant_type LIKE '%SIGNIFICANCE B' THEN quant_type AND quant_value IS NOT NULL END) > 0 THEN 1 ELSE 0
END AS has_pvalue
FROM PGROUP_QUANT
"""
def check_content_length():
if cp.request.method == 'POST':
if int(cp.request.headers['Content-Length']) >= MAX_UPLOAD_SIZE:
raise cp.HTTPError(400, "Uploaded data exceeded the size limit of %d bytes!" % MAX_UPLOAD_SIZE)
cp.tools.check_content_length = cp.Tool('on_start_resource', check_content_length)
def _getSessionDir(self, session_id):
return os.path.join(cp.session.storage_path, session_id)
def _getFastaFile(self, session_dir):
for fname in os.listdir(session_dir):
if fname.endswith(FASTA_SFX):
return os.path.join(session_dir, fname)
def _getDbfile(self, session_id):
session_dir = self._getSessionDir(session_id)
try:
for fname in os.listdir(session_dir):
if fname.endswith(DBFILE_SFX):
return os.path.join(session_dir, fname)
except Exception as e:
cp.log(e)
raise cp.HTTPError(400, "Requested JobID: {0} was not found.".format(session_id))
def _fetchRatioTypes(self, exp_states):
ratio_types = []
if exp_states == 2: # for duplex SILAC
ratio_types.append('H/L')
ratio_types.append('L/H')
elif exp_states == 3: # for triplex SILAC
ratio_types.append('H/L')
ratio_types.append('L/H')
ratio_types.append('H/M')
ratio_types.append('M/H')
ratio_types.append('M/L')
ratio_types.append('L/M')
else:
# shutil.rmtree(session_dir)
raise cp.HTTPError(400, "No SILAC protein ratios found!")
return ratio_types
@cp.expose
def index(self):
"""
Serve HTML home page.
"""
tmpl = loader.load('home.html')
return tmpl.generate(max_pepfile_size=(MAX_PEPFILE_SIZE / MB),
max_protfile_size=(MAX_PROTFILE_SIZE / MB),
max_fasta_size=(MAX_FASTA_SIZE / MB))\
.render('html', doctype='html5')
@cp.expose
def results(self, session_id):
"""
Show post-processed MaxQuant data in an HTML page.
"""
db_file = self._getDbfile(session_id)
with sqlite3.connect(db_file) as conn:
cur = conn.cursor()
# cur.execute(Piqmie.sql_fetch_expnames)
# exp_names = [str(r[0]) for r in cur.fetchall()]
cur.execute(Piqmie.sql_fetch_nratios_pval)
(n_ratios, has_pvalue) = cur.fetchone()
cur.close()
if n_ratios == 1: # duplex SILAC
exp_states = n_ratios + 1
else: # triplex SILAC
exp_states = n_ratios
# generate HTML from template
tmpl = loader.load('results.html')
return tmpl.generate(session_id=session_id,
exp_states=exp_states,
db_file=os.path.basename(db_file))\
.render('html', doctype='html5')
@cp.expose
@cp.tools.check_content_length()
def upload(self, **kw):
"""
Upload MaxQuant result files to the server and Edit-Transform-Load (ETL)
files onto a database.
"""
dts_name = None
dts_name_key = 'dts_name'
pep_file = None
pep_file_key = 'pep_file'
prot_file = None
prot_file_key = 'prot_file'
fasta_file = None
fasta_file_key = 'fasta_file'
# by default, the session ID is passed in a cookie,
# so cookies must be enabled in the client’s browser
session_id = cp.session.id
session_dir = self._getSessionDir(session_id)
# server-side error handling for user input
if dts_name_key in kw and kw[dts_name_key]:
dts_name = kw[dts_name_key].strip().replace(' ', '_')
if pep_file_key in kw and kw[pep_file_key]:
pep_file = kw[pep_file_key].filename
if prot_file_key in kw and kw[prot_file_key]:
prot_file = kw[prot_file_key].filename
if fasta_file_key in kw and kw[fasta_file_key]:
fasta_file = kw[fasta_file_key].filename
# return error messages for incorrect/incomplete input data
if not dts_name:
raise cp.HTTPError(400, "Provide a short description of your experiment.")
if not pep_file or pep_file != 'evidence.txt':
raise cp.HTTPError(400, "Provide peptide IDs/quantitations file 'evidence.txt'.")
if not prot_file or prot_file != 'proteinGroups.txt':
raise cp.HTTPError(400, "Provide protein IDs/quantitations file 'proteinGroups.txt'.")
if not fasta_file or not fasta_file.endswith(FASTA_SFX):
raise cp.HTTPError(400, "Provide FASTA sequence library (.fa|.fasta|.FASTA) used for MaxQuant/Andromeda search.")
# populate a session directory for storing input/output files
os.mkdir(session_dir)
# copy input files to the server
data_size = 0
for fobj in [kw[prot_file_key], kw[pep_file_key], kw[fasta_file_key]]:
fname = fobj.filename
filepath = os.path.join(session_dir, fname)
with open(filepath, 'w+') as fh:
for line in fobj.file: # read data from input stream
line = line.rstrip('\r\n') + '\n' # change to Unix LF
data_size += len(line)
# set size limit on uploaded data
if data_size > MAX_UPLOAD_SIZE:
shutil.rmtree(session_dir)
raise cp.HTTPError(400, "Uploaded data exceeded the size limit of {0} bytes!".format(MAX_UPLOAD_SIZE))
else:
fh.write(line)
# check the size and type of uploaded file
file_size = os.path.getsize(filepath)
if file_size == 0: # file is empty
shutil.rmtree(session_dir)
raise cp.HTTPError(400, "Uploaded '{0}' file is empty!".format(fname))
elif fname == pep_file and file_size > MAX_PEPFILE_SIZE:
shutil.rmtree(session_dir)
raise cp.HTTPError(400, "Uploaded peptide list '{0}' is too large!".format(fname))
elif fname == prot_file and file_size > MAX_PROTFILE_SIZE:
shutil.rmtree(session_dir)
raise cp.HTTPError(400, "Uploaded protein list '{0}' is too large!".format(fname))
elif fname == fasta_file and file_size > MAX_FASTA_SIZE:
shutil.rmtree(session_dir)
raise cp.HTTPError(400, "Uploaded FASTA sequence library '{0}' is too large!".format(fname))
elif magic.from_file(filepath)[:5] != 'ASCII':
shutil.rmtree(session_dir)
raise cp.HTTPError(400, "Uploaded file '{0}' is not text/ASCII!".format(fname))
else:
pass
cp.log("Uploading dataset '{0}' completed for session '{1}'.".format(dts_name, session_id))
# parse FASTA sequence file
self._parseFastaFile(session_id)
cp.log("Parsing FASTA file completed for session '{0}'.".format(session_id))
# parse MaxQuant files
self._parseProteinList(session_id)
cp.log("Parsing protein list completed for session '{0}.".format(session_id))
self._parsePeptideList(session_id)
cp.log("Parsing peptide list completed for session '{0}'.".format(session_id))
# create SQLite database
self._createSQLiteDb(dts_name, session_id)
cp.log("Processing/uploading data into database completed for session '{0}'.".format(session_id))
# expire the current session cookie
# cp.lib.sessions.expire()
# return session (job) ID
return session_id
def _parsePeptideList(self, session_id):
"""
Method to parse MaxQuant output file 'evidence.txt'.
"""
session_dir = self._getSessionDir(session_id)
cmd = os.path.join(PARSER_PATH, "parse_evidence.pl {0}"
.format(session_dir))
args = shlex.split(str(cmd))
proc = subprocess.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdout, stderr) = proc.communicate()
if proc.wait() != 0: # parsing error
# shutil.rmtree(session_dir)
raise cp.HTTPError(400, stderr)
def _parseProteinList(self, session_id):
"""
Method to parse MaxQuant output file 'proteinGroups.txt'.
"""
session_dir = self._getSessionDir(session_id)
cmd = os.path.join(
PARSER_PATH, "parse_proteinGroups.pl {0}".format(session_dir))
args = shlex.split(str(cmd))
proc = subprocess.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdout, stderr) = proc.communicate()
if proc.wait() != 0: # parsing error
# shutil.rmtree(session_dir)
raise cp.HTTPError(400, stderr)
def _parseFastaFile(self, session_id):
"""
Method to parse FASTA sequence file.
"""
session_dir = self._getSessionDir(session_id)
fasta_file = self._getFastaFile(session_dir)
cmd = os.path.join(PARSER_PATH, "parse_fasta.pl {0}".format(
os.path.join(session_dir, fasta_file)))
args = shlex.split(str(cmd))
proc = subprocess.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdout, stderr) = proc.communicate()
if proc.wait() != 0: # parsing error
# shutil.rmtree(session_dir)
raise cp.HTTPError(400, stderr)
def _createSQLiteDb(self, dts_name, session_id):
"""
Populate a database from parsed MaxQuant files.
"""
session_dir = self._getSessionDir(session_id)
db_file = os.path.join(session_dir, '{0}.{1}'.format(dts_name, DBFILE_SFX))
sql_schema_file = os.path.join(DBSCHEMA_PATH, 'create_schema.sql')
sql_index_file = os.path.join(DBSCHEMA_PATH, 'create_indices.sql')
log_file = os.path.join(session_dir, 'LOG')
sql_create_tables = None
sql_create_indices = None
sql_update_tables = None
tables = []
sql_check_integrity_expnames = """
SELECT exp_name FROM EXPERIMENT
EXCEPT
SELECT exp_name FROM PGROUP_QUANT
"""
sql_check_integrity_mappings = """
SELECT
n_prot_acc,
n_prot_acc_link_pgrp_ids,
n_prot_acc_unlink_pgrp_ids,
ROUND(100.0 * n_prot_acc_unlink_pgrp_ids / n_prot_acc_link_pgrp_ids, 2) AS perc_prot_unlink_grps,
n_prot_acc_link_pep_ids,
n_prot_acc_unlink_pep_ids,
ROUND(100.0 * n_prot_acc_unlink_pep_ids / n_prot_acc_link_pep_ids, 2) AS perc_prot_unlink_peps
FROM V_UNMAPPED_PROTEIN_STAT
"""
sql_get_tables = "SELECT name FROM sqlite_master WHERE type = 'table'"
# populate db schema from an SQL script
if os.path.getsize(sql_schema_file) == 0:
raise cp.HTTPError(500, "The SQL batch script '{0}' is empty!"
.format(sql_schema_file))
with open(sql_schema_file) as fin:
lines = fin.readlines()
sql_create_tables = ''.join(lines)
with sqlite3.connect(db_file) as conn: # connect to db
cur = conn.cursor()
cur.executescript(sql_create_tables) # execute multiple statements
cur.execute(sql_get_tables) # fetch table names
tables = cur.fetchall() # store table names
# import data into db tables
sep = '\\t' # record separator
meta_stmt = "PRAGMA synchronous = OFF;\n"
meta_stmt += "PRAGMA journal_mode = OFF;\n"
meta_stmt += "PRAGMA cache_size = 10000;\n"
meta_stmt += ".separator {0}\n".format(sep)
sql_import_file = os.path.join(session_dir, 'import_data.sql')
for t in tables:
tname = t[0]
meta_stmt += ".import '{0}.{1}' {2}\n".format(
os.path.join(session_dir, tname), DATFILE_SFX, tname)
with open(sql_import_file, 'w') as fout: # write meta-commands
fout.write(meta_stmt)
cmd = "sqlite3 {0} < {1}".format(db_file, sql_import_file)
if os.system(cmd) != 0: # sqlite error
# shutil.rmtree(session_dir)
raise cp.HTTPError(500, "Failed to import data into the database!")
# create table indices: source create index statements from *.sql file
with open(sql_index_file) as fin: # read SQL statements into a string
lines = fin.readlines()
sql_create_indices = ''.join(lines)
cur.executescript(sql_create_indices) # execute multiple statements
# update tables and check their integrity:
# replace '\N' values with NULL => this will raise IntegrityError
# on mandatory columns (see parse_*.pl scripts for details)
sql_update_tables = """
BEGIN;
UPDATE PEPTIDE SET raw_file = NULLIF(raw_file, '{cur}'),
seq = NULLIF(seq, '{cur}'),
mods = NULLIF(mods, '{cur}'),
charge = NULLIF(charge, '{cur}'),
mass = NULLIF(mass, '{cur}'),
retime = NULLIF(retime, '{cur}'),
pep_score = NULLIF(pep_score, '{cur}'),
res_fwhm = NULLIF(res_fwhm, '{cur}'),
is_decoy = NULLIF(is_decoy, '{cur}'),
is_cont = NULLIF(is_cont, '{cur}');
UPDATE PEPTIDE_QUANT SET exp_name = REPLACE(exp_name, '{cur}', '{new}'),
quant_type = NULLIF(quant_type, '{cur}'),
quant_value = NULLIF(quant_value, '{cur}');
UPDATE PROTEIN SET id = NULLIF(id, '{cur}'),
evidence = NULLIF(evidence, '{cur}'),
gene = NULLIF(gene, '{cur}'),
db = NULLIF(db, '{cur}'),
des = NULLIF(des, '{cur}'),
org = NULLIF(org, '{cur}'),
seq = NULLIF(seq, '{cur}');
UPDATE PGROUP SET pep_score = NULLIF(pep_score, '{cur}'),
id_by_site = NULLIF(id_by_site, '{cur}'),
is_decoy = NULLIF(is_decoy, '{cur}'),
is_cont = NULLIF(is_cont, '{cur}');
UPDATE PGROUP_QUANT SET exp_name = REPLACE(exp_name, '{cur}', '{new}'),
quant_value = NULLIF(quant_value, '{cur}'),
quant_value = NULLIF(quant_value, '{cur}');
UPDATE EXPERIMENT SET exp_name = REPLACE(exp_name, '{cur}', '{new}');
END;
""".format(cur='\N', new=dts_name)
try:
cur.executescript(sql_update_tables)
except sqlite3.IntegrityError as e:
cp.log(e)
raise cp.HTTPError(400, "Data integrity error: values missing in table.column(s).")
# check the integrity of peptide/protein mappings
cur.execute(sql_check_integrity_mappings)
row = cur.fetchone()
cutoff_unmapped = 10 # max. percent of unmapped proteins
if (row[3] > cutoff_unmapped):
raise cp.HTTPError(400, "Data integrity error: %d out of %d protein accession (%.2d%%) in the protein list ('proteinGroups.txt') are not found in the FASTA sequence library used (%d accessions). Make sure to use the same FASTA sequence library from UniProtKB as used for the MaxQuant/Andromeda search!" % (row[2], row[1], row[3], row[0]))
elif (row[6] > cutoff_unmapped):
raise cp.HTTPError(400, "Data integrity error: %d out of %d protein accession (%.2d%%) in the peptide list ('evidence.txt') are not found in the FASTA sequence library used (%d accessions). Make sure to use the same FASTA sequence library from UniProtKB as used for the MaxQuant/Andromeda search!" % (row[5], row[4], row[6], row[0]))
else:
pass
# check the integrity of experiment names
cur.execute(sql_check_integrity_expnames)
row = cur.fetchone()
if row is not None:
raise cp.HTTPError(400, "Data integrity error: experiment name(s) do not match between the peptide and protein lists. Upload MaxQuant files obtained from the same run!")
# add a view with all experiments x ratio type combinations
cur.execute(Piqmie.sql_fetch_expnames)
exp_names = [str(r[0]) for r in cur.fetchall()]
cur.execute(Piqmie.sql_fetch_nratios_pval)
(n_ratios, has_pvalue) = cur.fetchone()
if n_ratios == 1: # duplex SILAC
exp_states = n_ratios + 1
else: # triplex SILAC
exp_states = n_ratios
# set ratio types
ratio_types = self._fetchRatioTypes(exp_states)
sql_create_pgrp_view = "CREATE VIEW VVV_PGROUP_QUANT AS\n"
sql_create_pgrp_view += "-- view to show protein (group) quantitations for all experiments and ratios.\n"
sql_create_pgrp_view += "SELECT\n\tgrp_id,\n"
comma = ','
n = len(exp_names) * len(ratio_types)
i = 0
for e in exp_names:
for r in ratio_types:
i += 1
if i == n:
comma = ''
r = r.replace("/", "")
c = 'norm_ratio_%s' % r
sql_create_pgrp_view += "\tCAST(GROUP_CONCAT(CASE WHEN exp_name = '{exp_name}' THEN {col} ELSE NULL END) AS NUMERIC) AS '{exp_name}_{col}',\n".format(exp_name=e, col=c)
sql_create_pgrp_view += "\tCAST(GROUP_CONCAT(CASE WHEN exp_name = '{exp_name}' THEN sig_ratio_{ratio} ELSE NULL END) AS NUMERIC) AS '{exp_name}_sig_ratio_{ratio}',\n".format(exp_name=e, ratio=r)
sql_create_pgrp_view += "\tCAST(GROUP_CONCAT(CASE WHEN exp_name = '{exp_name}' THEN sd_ratio_{ratio} ELSE NULL END) AS NUMERIC) AS '{exp_name}_sd_ratio_{ratio}',\n".format(exp_name=e, ratio=r)
sql_create_pgrp_view += "\tCAST(GROUP_CONCAT(CASE WHEN exp_name = '{exp_name}' THEN n_pep_qts_{ratio} ELSE NULL END) AS NUMERIC) AS '{exp_name}_n_pep_qts_{ratio}'{comma}\n".format(exp_name=e, ratio=r, comma=comma)
sql_create_pgrp_view += "FROM VV_PGROUP_QUANT GROUP BY grp_id"
cur.execute(sql_create_pgrp_view)
cur.close()
# write LOG file upon successful job completion
with open(log_file, 'w') as fout:
fout.write(db_file)
@cp.expose
def download(self, session_id, filename):
"""
Serve static files for download.
"""
filepath = os.path.join(cp.session.storage_path, session_id, filename)
return serve_file(filepath, 'application/x-download', 'attachment')
@cp.expose
def saveImage(self, session_id, query_str, output_format, data):
"""
Serve image files for download.
"""
filename = ".".join((query_str, output_format))
filepath = os.path.join(cp.session.storage_path, session_id, filename)
if os.path.exists(filepath):
os.remove(filepath)
data = data.encode('utf8', 'replace')
with open(filepath, 'w') as fout: # write image file (.svg|.png|.pdf)
if output_format == 'svg':
cairosvg.svg2svg(bytestring=data, write_to=fout)
elif output_format == 'png':
cairosvg.svg2png(bytestring=data, write_to=fout)
elif output_format == 'pdf':
cairosvg.svg2pdf(bytestring=data, write_to=fout)
else:
pass
return serve_file(filepath, 'application/x-download', 'attachment')
@cp.expose
def pepmap(self, session_id, grp_id, exp=None):
"""
Show the peptide coverage map in an HTML page.
"""
db_file = self._getDbfile(session_id)
with sqlite3.connect(db_file) as conn:
cur = conn.cursor()
cur.execute(Piqmie.sql_fetch_expnames)
exp_names = [str(r[0]) for r in cur.fetchall()]
if exp is None:
exp = ",".join(exp_names) # experiment names
# generate HTML from template
tmpl = loader.load('pepmap.html')
stream = tmpl.generate(
session_id=session_id, # session ID
sel_grp_id=grp_id, # selected protein group
sel_exp_name=exp, # selected experiment(s)
exp_names=exp_names # array of experiment names
)
return stream.render('html', doctype='html5')
@cp.expose
def scatterplot(self, session_id, exp1=None, exp2=None, ratio1=None, ratio2=None, cfratio=1, cfpvalue=1):
"""
Show the 2D scatterplot of protein quantitations in an HTML page.
"""
db_file = self._getDbfile(session_id)
conn = sqlite3.connect(db_file)
cur = conn.cursor()
cur.execute(Piqmie.sql_fetch_expnames)
exp_names = [str(r[0]) for r in cur.fetchall()]
cur.execute(Piqmie.sql_fetch_nratios_pval)
(n_ratios, has_pvalue) = cur.fetchone()
if n_ratios == 1: # duplex SILAC
exp_states = n_ratios + 1
else: # triplex SILAC
exp_states = n_ratios
if has_pvalue == 0:
cfpvalue_ctrl = 'disabled'
else:
cfpvalue_ctrl = '' # disable/enable HTML slider
# set defaults for URL parameters
if not exp1:
exp1 = exp_names[0]
if not exp2:
exp2 = exp_names[0]
if not ratio1:
ratio1 = 'H/L'
if not ratio2:
ratio2 = 'H/L'
ratio_types = self._fetchRatioTypes(exp_states)
# generate HTML from template
tmpl = loader.load('scatterplot.html')
stream = tmpl.generate(
session_id=session_id,
cfpvalue_ctrl=cfpvalue_ctrl, # enable/disable cfpvalue input element
sel_exp_name_1=exp1, # selected experiment 1
sel_exp_name_2=exp2, # selected experiment 2
sel_ratio_1=ratio1, # selected ratio for experiment 1
sel_ratio_2=ratio2, # selected ratio for experiment 2
sel_cfratio=cfratio, # selected ratio cutoff (applied to both experiments)
sel_cfpvalue=cfpvalue, # selected significance cutoff (applied to both experiments)
exp_names=exp_names, # set of all experiments
ratio_types=ratio_types) # set of all possible ratio types
return stream.render('html', doctype='html5')
@cp.expose
@cp.tools.json_out()
def rest(self, session_id, query, **kw):
grp_id = None
str_exp_names, exp, exp1, exp2 = None, None, None, None
ratio1, ratio2 = None, None
cfratio, cfpvalue = None, None
# select all experiments by default
db_file = self._getDbfile(session_id)
with sqlite3.connect(db_file) as conn:
cur = conn.cursor()
cur.execute(Piqmie.sql_fetch_expnames)
exp_names = [str(r[0]) for r in cur.fetchall()]
cur.close()
# parse and set URL params
if 'grp_id' in kw:
grp_id = kw['grp_id']
if 'exp1' in kw:
exp1 = kw['exp1']
if 'exp2' in kw:
exp2 = kw['exp2']
if 'ratio1' in kw:
ratio1 = kw['ratio1'].replace("/", "")
if 'ratio2' in kw:
ratio2 = kw['ratio2'].replace("/", "")
if 'cfratio' in kw:
cfratio = float(kw['cfratio'])
if 'cfpvalue' in kw:
cfpvalue = float(kw['cfpvalue'])
if cfpvalue == 1:
cfpvalue += 0.1 # N.B.: increment needed to select all rows
if 'exp' in kw:
exp = kw['exp']
str_exp_names = "','".join(exp_names) if exp is None else "','".join(exp.split(','))
#
# User-defined SQL functions (UDFs) for SQLite
#
def pepmatch(pep_seq, prot_seq):
"""
Find peptime matches ([start]-[end] positions) on the parent protein.
Multiple matches of the same peptide are separated by ','.
"""
if not pep_seq or not prot_seq:
return None
pat = re.sub('I|L', '[IL]', pep_seq)
pos = ["-".join((str(m.start() + 1), str(m.end()))) for m in re.finditer(pat, prot_seq)]
return ",".join([str(p) for p in pos])
def log(value, base):
if not value:
return None
return math.log(value) / math.log(base)
def filter(value):
"""
Return a non-redundant set of protein annotations in the
'prot_names' column. Remove duplicate, uninformative annotations
or information about isoform(s) from the set.
"""
if not value:
return None
sep = '","'
uniq = dict()
for e in value.split(sep):
new = re.sub('isoform\s+\S+\s+of\s+|,?\s+isoform.*|\s+\(Fragment\)|uncharacterized\s+protein|unknown\s+protein|"', '', e, flags=re.I)
if new not in uniq and new:
uniq[new] = None
return '"' + sep.join(uniq.keys()) + '"' if uniq.keys() else None
# SQL queries used for HTTP requests
sql_statgrp = "SELECT * FROM V_PGROUP_STAT"
sql_statpep = "SELECT * FROM V_PEPTIDE_STAT"
sql_statprot = "SELECT * FROM V_PROTEIN_STAT"
sql_statregrp = "SELECT * FROM V_REG_PGROUP_STAT"
sql_grp = """
SELECT
grp_id,
grp_size,
exp_name,
prot_accs,
FILTER(prot_names) AS prot_names,
gene_names,
grp_evidence,
org,
pep_score,
norm_ratio_HL,
norm_ratio_LH,
norm_ratio_HM,
norm_ratio_MH,
norm_ratio_ML,
norm_ratio_LM,
n_pep_qts_HL,
n_pep_qts_HM,
n_pep_qts_ML,
sd_ratio_HL,
sd_ratio_HM,
sd_ratio_ML,
sig_ratio_HL,
sig_ratio_HM,
sig_ratio_ML
FROM V_PGROUP_MEMBERS
"""
sql_pepmap = """
SELECT
db,
prot_acc,
CASE WHEN MAX(lead_prot) > 0 THEN 1 ELSE 0 END AS is_lead_prot,
gene,
des,
prot_len,
GROUP_CONCAT(pep_id || ":" || PEPMATCH(pep_seq, prot_seq) || ":" || pep_seq, ";") AS pep_map
FROM
(SELECT DISTINCT * FROM V_PEP2GRP A INNER JOIN (SELECT pep_id FROM PEPTIDE_QUANT WHERE exp_name IN ('{0}') AND SUBSTR(quant_type, 1, 1) = 'R' AND quant_value) B USING (pep_id) WHERE grp_id = :1)
GROUP BY prot_acc ORDER BY db, prot_len DESC
""".format(str_exp_names)
sql_cmpquant = """
SELECT
grp_id,
COUNT(acc) AS grp_size,
GROUP_CONCAT(db || ':' || acc) AS prot_accs,
FILTER(GROUP_CONCAT(REPLACE(des, '"', ''), '","')) AS prot_names,
GROUP_CONCAT(DISTINCT gene) AS gene_names,
CASE
WHEN MIN(evidence) = 1 THEN 'protein'
WHEN MIN(evidence) = 2 THEN 'transcript'
WHEN MIN(evidence) = 3 THEN 'homology'
WHEN MIN(evidence) = 4 THEN 'predicted'
WHEN MIN(evidence) = 5 THEN 'uncertain'
ELSE 'unknown'
END AS grp_evidence,
org,
ratio1,
ROUND(LOG(ratio1, 2), 2) AS log2_ratio1,
fc1,
sd_ratio1,
n_pep_qts1,
ratio2,
ROUND(LOG(ratio2, 2), 2) AS log2_ratio2,
fc2,
sd_ratio2,
n_pep_qts2,
sig_ratio1,
sig_ratio2
FROM (
SELECT
T.grp_id,
ratio1,
fc1,
sd_ratio1,
ratio2,
fc2,
sd_ratio2,
n_pep_qts1,
n_pep_qts2,
sig_ratio1,
sig_ratio2,
db,
acc,
IFNULL(evidence, 6) AS evidence,
gene,
des,
org
FROM (
SELECT
A.grp_id,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :1 THEN norm_ratio_{r1} ELSE NULL END) AS NUMERIC) AS ratio1,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :2 THEN norm_ratio_{r2} ELSE NULL END) AS NUMERIC) AS ratio2,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :1 THEN FC_{r1} ELSE NULL END) AS NUMERIC) AS fc1,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :2 THEN FC_{r2} ELSE NULL END) AS NUMERIC) AS fc2,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :1 THEN sd_ratio_{r1} ELSE NULL END) AS NUMERIC) AS sd_ratio1,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :2 THEN sd_ratio_{r2} ELSE NULL END) AS NUMERIC) AS sd_ratio2,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :1 THEN n_pep_qts_{r1} ELSE NULL END) AS NUMERIC) AS n_pep_qts1,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :2 THEN n_pep_qts_{r2} ELSE NULL END) AS NUMERIC) AS n_pep_qts2,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :1 THEN sig_ratio_{r1} ELSE NULL END) AS NUMERIC) AS sig_ratio1,
CAST(GROUP_CONCAT(CASE WHEN exp_name = :2 THEN sig_ratio_{r2} ELSE NULL END) AS NUMERIC) AS sig_ratio2
FROM
V_PGROUP A, VV_PGROUP_QUANT B
WHERE
A.grp_id = B.grp_id
GROUP BY A.grp_id) T, PROT2GRP C, V_PROTEIN D
WHERE
T.grp_id = C.grp_id
AND C.prot_acc = D.acc
AND fc1 >= :3 AND fc2 >= :3
AND IFNULL(sig_ratio1, 0) < :4 AND IFNULL(sig_ratio2, 0) < :4 -- N.B.: returns true for sig_ratio = 1 and cfpvalue = 1 + 0.1
ORDER BY T.grp_id, db) M
GROUP BY grp_id
""".format(r1=ratio1, r2=ratio2)
# map HTTP requests to SQL queries
q2sql = dict(statgrp=sql_statgrp,
statregrp=sql_statregrp,
statpep=sql_statpep,
statprot=sql_statprot,
grp=sql_grp,
pepmap=sql_pepmap,
cmpquant=sql_cmpquant)
if query not in q2sql:
raise cp.HTTPError(400, 'Unsupported HTTP request!')
# connect to db
with sqlite3.connect(db_file) as conn:
# register UDFs in db
conn.create_function('pepmatch', 2, pepmatch)
conn.create_function('log', 2, log)
conn.create_function('filter', 1, filter)
conn.row_factory = sqlite3.Row # access columns by name
cur = conn.cursor()
if query == 'pepmap':
cur.execute(q2sql[query], (grp_id,))
elif query == 'cmpquant':
cur.execute(q2sql[query], (exp1, exp2, cfratio, cfpvalue))
else:
cur.execute(q2sql[query])
rows = cur.fetchall()
cur.close()
return [collections.OrderedDict(xi) for xi in rows]
if __name__ == '__main__':
cp.quickstart(Piqmie())
else:
# open the default web browser
# wb.open_new_tab(URL)
# mount the root application
cp.tree.mount(Piqmie(), '/')