-
Notifications
You must be signed in to change notification settings - Fork 3
/
Makefile.common
650 lines (533 loc) · 39.7 KB
/
Makefile.common
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
# common makefile holding variables needed by all makefiles.
# First, we import secured variables
include $(PIPELINEROOT)Makefile.Config
# Force bash because we use bash constructions in Makefiles and default Makefile shell on Ubuntu is /bin/sh (different from /bin/bash)
SHELL = /bin/bash
.PHONY: all clean
# ===================================
# GENERAL PARAMETERS
# ===================================
MAJOR_RELEASE := 16
MINOR_RELEASE := 0
RELEASE := $(MAJOR_RELEASE).$(MINOR_RELEASE)
# MySQL database connection parameters
# DBUSER and DBPASS defined in Makefile.Config
# DBUSERV and DBPASSV defined in Makefile.Config
DBNAME := bgee_v$(subst .,_,$(RELEASE))
DBHOST := dbbioinfo.unil.ch
DBPORT := 3306
DBNAME_EASYBGEE := easybgee_v$(subst .,_,$(RELEASE))
DBHOST_EASYBGEE := localhost
# Pipeline container
PIPELINE_CONTAINER_NAME := BgeePipeline
PIPELINE_CONTAINER_RELEASE := 0
PIPELINE_CONTAINER_FILE := $(PIPELINE_CONTAINER_NAME)-$(RELEASE).$(PIPELINE_CONTAINER_RELEASE).sif
PIPELINE_CONTAINER_CMD := apptainer exec $(PIPELINE_CONTAINER_FILE)
# Annotator machine
# ANNOTATORLOGIN defined in Makefile.Config
ANNOTATORHOST := annotbioinfo.unil.ch
# Cluster account
# CLUSTERLOGIN and CLUSTERPASSW defined in Makefile.Config
CLUSTERHOST := curnagl.dcsr.unil.ch
# Machine used for the pipeline
# PIPELOGIN and PIPEPASSWD defined in Makefile.Config
PIPEHOST := devbioinfo.unil.ch
# Machine used for RNA-seq/Affymetrix data transfer
# DATALOGIN and DATAPASSWD defined in Makefile.Config
DATAHOST := jura0.sib.swiss
DATAPATH := /data
# Ensembl
ENSRELEASE := 112
ENSMETAZOARELEASE := 59
ENS_API_RELEASE := $(ENSRELEASE)
ENSMETAZOA_API_RELEASE := $(ENSRELEASE)
# Default, remote Ensembl db
# see https://www.ensembl.org/info/data/mysql.html
ENSDBUSER := anonymous
ENSDBPASS := ''
ENSDBHOST := ensembldb.ensembl.org
ENSDBPORT := 5306
# Default, remote Ensembl genomes db
# see https://metazoa.ensembl.org/info/data/mysql.html
ENSGDBHOST := mysql-eg-publicsql.ebi.ac.uk
ENSGDBPORT := 4157
# local BDGP
BDGPDBNAME := bdgp
# Bgee connection command line argument for Perl scripts
BGEECMD := user=$(DBUSER)__pass=$(DBPASS)__host=$(DBHOST)__port=$(DBPORT)__name=$(DBNAME)
BGEECMDV := user=$(DBUSERV)__pass=$(DBPASSV)__host=$(PIPEHOST)__port=$(DBPORT)__name=$(DBNAME)
# Ensembl connection command line argument for Perl scripts
ENSCMD := user=$(ENSDBUSER)__pass=$(ENSDBPASS)__host=$(ENSDBHOST)__port=$(ENSDBPORT)
# BDGP connection command line argument for Perl scripts
BDGPCMD := user=$(DBUSER)__pass=$(DBPASS)__host=$(DBHOST)__port=$(DBPORT)__name=$(BDGPDBNAME)
# ===================================
# COMMANDS
# ===================================
JAVALIBDIR := ../java/
BGEEPIPELINEJARNAME := bgee-pipeline-$(RELEASE)-with-dependencies.jar
BGEEPIPELINEJAR := $(JAVALIBDIR)$(BGEEPIPELINEJARNAME)
# --add-opens java.base/java.lang=ALL-UNNAMED: see https://stackoverflow.com/a/77874779/1768736
JAVA_NO_JAR := java -Xmx150g -Dbgee.dao.jdbc.username=$(DBUSER) -Dbgee.dao.jdbc.password=$(DBPASS) -Dbgee.dao.jdbc.driver.names=com.mysql.cj.jdbc.Driver,net.sf.log4jdbc.sql.jdbcapi.DriverSpy -Dbgee.dao.jdbc.url='jdbc:log4jdbc:mysql://$(DBHOST):$(DBPORT)/$(DBNAME)?useSSL=false&allowPublicKeyRetrieval=True&enableQueryTimeouts=false&sessionVariables=net_write_timeout=260000,net_read_timeout=260000,wait_timeout=260000' --add-opens java.base/java.lang=ALL-UNNAMED
# Java command allowing to connect to easyBgee without Jar. Never used without JAR for the moment but created to follow the same pattern than JAVA_NO_JAR/JAVA variables for Bgee
JAVA_NO_JAR_EASYBGEE := java -Xmx64g -Dbgee.dao.jdbc.username=$(DBUSER_EASYBGEE) -Dbgee.dao.jdbc.password=$(DBPASS_EASYBGEE) -Dbgee.dao.jdbc.driver.names=com.mysql.cj.jdbc.Driver,net.sf.log4jdbc.sql.jdbcapi.DriverSpy -Dbgee.dao.jdbc.url='jdbc:log4jdbc:mysql://$(DBHOST_EASYBGEE):$(DBPORT)/$(DBNAME_EASYBGEE)?useSSL=false&allowPublicKeyRetrieval=True&enableQueryTimeouts=false&sessionVariables=net_write_timeout=500000,net_read_timeout=500000,wait_timeout=500000'
JAVA := $(JAVA_NO_JAR) -jar $(PIPELINEROOT)$(BGEEPIPELINEJAR)
JAVA_EASYBGEE := $(JAVA_NO_JAR_EASYBGEE) -jar $(PIPELINEROOT)$(BGEEPIPELINEJAR)
# to pass arguments to Java command line
ENTRY_SEP :=,
KEY_VAL_SEP :=//
VAL_SEP :=--
EMPTY_LIST :=-
LIST_SEP :=,
SPACE :=__
MYSQLNODBNAME := mysql -u $(DBUSER) -p$(DBPASS) -h $(DBHOST) -P $(DBPORT) --init-command='SET SESSION net_write_timeout=260000; SET SESSION net_read_timeout=260000; SET SESSION wait_timeout=260000'
MYSQL := $(MYSQLNODBNAME) $(DBNAME)
MYSQLNODBNAME_EASYBGEE := mysql -u $(DBUSER_EASYBGEE) -p$(DBPASS_EASYBGEE) -h $(DBHOST_EASYBGEE) -P $(DBPORT) --init-command='SET SESSION net_write_timeout=86400; SET SESSION net_read_timeout=86400; SET SESSION wait_timeout=86400'
MYSQL_EASYBGEE := $(MYSQLNODBNAME_EASYBGEE) $(DBNAME_EASYBGEE)
GIT := git
WGET := wget -N -nv
#RSYNC := rsync -a --exclude=.DS_Store # some permission problems so don't strickly speaking use -a (== -rlptgoD)
RSYNC := rsync -rlgoD --exclude=.DS_Store --exclude=.svn --exclude=.git
CAT := cat
RM := rm -f
MV := mv -f
CP := cp -f
GUNZIP := gunzip -f
MKDIR := mkdir -p
LC_ALL = 'C'
PYTHON3 = python3
PIPENV = pipenv
# cURL command, to download files only if more recently changed
# (it was not working correctly with wget in our case)
# -s: silent, no progress bar displayed;
# -S: if silent, still shows error message if it fails;
# -L: in case of redirection, follow it and redo the request, this is necessary to correctly get modification date;
# -f: in case of error do not display the document returned.
# -R: make the local file have the same timestamp as remote file
# Sould be used with option -z filename: download remote file only if more recent than the provided file
# And with -o option to choose output file name.
# See variable APPEND_CURL_COMMAND.
CURL := curl -s -S -L -f -R
# variable to append to cURL command to download a file only if remote file more recent than targeted file,
# download in a temp file and move to stable location on success; delete temp file on error
# Variable to be recursively expanded.
# -z filename: download remote file only if last modification date more recent that modification date of downloadfile
# -o filename: store downloaded file into filename
# Redirect stderr of mv to /dev/null in case the file was not redownloaded so that the temp file does not exist
APPEND_CURL_COMMAND = -z $@ -o $@.tmp && mv -f $@.tmp $@ 2>/dev/null || rm -f $@.tmp $@
# Paths to files used in several Makefiles, relative to the Makefile pipeline root (bgee/pipeline/pipeline)
# Needs to be included after we defined some variables used in the file
include $(PIPELINEROOT)Makefile.taxon_info
# ===================================
# CLUSTER COMMANDS
# ===================================
CLUSTER_DATA_PATH := /data/FAC/FBM/DEE/mrobinso/bgee_sensitive
# use the /data folder to avoid providing user scratch directory as there is currently no project scratch directory
CLUSTER_SCRATCH_PATH := $(CLUSTER_DATA_PATH)
CLUSTER_SCRATCH_RESULTS := $(CLUSTER_SCRATCH_PATH)/$(DBNAME)/
CLUSTER_ARCHIVE_PATH := /archive/FAC/FBM/DEE/mrobinso/bgee_sensitive
CLUSTER_CURNAGL_PATH := /work/FAC/FBM/DEE/mrobinso/bgee/
CLUSTER_TOOLS_DIR := $(CLUSTER_CURNAGL_PATH)/tools/
R_LIBS_PATH_SENSITIVE := $(CLUSTER_DATA_PATH)/R_LIBS
R_LIBS_PATH_CURNAGL := $(CLUSTER_CURNAGL_PATH)/R_LIBS
PERL_LIBS_PATH_CURNAGL := $(CLUSTER_CURNAGL_PATH)/PERL_LIBS/lib/perl5
#NOTE Think to add ; at the end to ease insertion in other command lines
CLUSTER_SOFT_VITALIT := module use /software/module/;
CLUSTER_SOFT_UNIL := module load gcc/10.4.0;
CLUSTER_KALLISTO_CMD := $(CLUSTER_SOFT_VITALIT) module load UHTS/Analysis/kallisto/0.46.0;
CLUSTER_R_CMD := $(CLUSTER_SOFT_VITALIT) module load R/3.6.1;
CLUSTER_R_CMD2 := $(CLUSTER_SOFT_UNIL) module load r/4.0.5;
CLUSTER_ENSEMBL_API := module load Development/Ensembl_API/97;
CLUSTER_SOFTWARE_BIN := export PATH=/software/bin:$$PATH;
SENSITIVE_PERL_CMD := $(CLUSTER_SOFT_VITALIT) $(CLUSTER_ENSEMBL_API) $(CLUSTER_SOFTWARE_BIN)
CLUSTER_PERL_CMD := $(CLUSTER_SOFT_UNIL) module load perl/5.32.1;
CLUSTER_SRATOOLKIT_CMD := $(CLUSTER_SOFT_UNIL) module load sratoolkit/2.10.9;
CLUSTER_TOPHAT_CMD := $(CLUSTER_SOFT_VITALIT) module load UHTS/Aligner/tophat/2.1.1;
CLUSTER_FASTP_CMD := $(CLUSTER_SOFT_UNIL) module load fastp/0.23.2;
CLUSTER_BUSTOOLS_CMD := $(CLUSTER_SOFT_VITALIT) module load UHTS/Analysis/bustools/0.40.0;
# ===================================
# PATHS TO FILES
# ===================================
# Tmp path (if not enough disk space)
TMPDIR := /var/lib/mysql/tmp/bgee_v15_2/
# Path to the directory storing source files
# PIPELINEROOT and DIR_NAME are specified by each individul Makefile
# before importing Makefile.common
SOURCE_FILES_DIR := $(PIPELINEROOT)../source_files/
INPUT_DIR := $(SOURCE_FILES_DIR)$(DIR_NAME)
ANNOT_SUBMODULE_DIR := $(SOURCE_FILES_DIR)bgee-expression-annotations.git/
GENERATED_FILES_DIR := $(PIPELINEROOT)../generated_files/
OUTPUT_DIR := $(GENERATED_FILES_DIR)$(DIR_NAME)
VERIFICATIONFILE := $(OUTPUT_DIR)step_verification_$(RELEASE).txt
# =========== FILES FROM ANNOTATORS ===============
# URL to git annotation files
#ANNOTATION_GIT_URL := https://gitlab.sib.swiss/Bgee/expression-annotations/tree/develop
# Directory with curator files, from the root directory of the pipeline
#CURATIONPATH := ../../curation/
## AFFYMETRIX
#AFFY_CURATION_PATH := $(CURATIONPATH)expression_data/affymetrix/
#AFFY_CURATION_FILEPATH := $(AFFY_CURATION_PATH)annotation.xls
## SIMILARITY
#SIMILARITY_CURATION_PATH := $(CURATIONPATH)similarity/
#SIMILARITY_CURATION_FILEPATH := $(SIMILARITY_CURATION_PATH)homology.xlsx
## ESTs
#EST_CURATION_PATH := $(CURATIONPATH)expression_data/est/
#EST_UNIGENE_CURATION_PATH := $(EST_CURATION_PATH)unigene/
#EST_SMIRNADB_CURATION_PATH := $(EST_CURATION_PATH)smirnadb/
# the list of species with UniGene/EST annotation
EST_SPECIES_CURATION_FILEPATH := $(SOURCE_FILES_DIR)ESTs/unigene/species_EST_file.txt
# smiRNAdb libs mapping file
SMIRNADB_CURATION_MAPFILEPATH := $(SOURCE_FILES_DIR)ESTs/smirnadb/mapping_libs_smiRNAdb.csv
## In Situ
#BDGP_PATH := $(CURATIONPATH)expression_data/in_situ/bdgp/
BDGP2FBBT_MAPPING_FILE := BDGP_terms_to_FBbt_terms.tsv
BDGP2FBBT_MAPPING_NEW := BDGP_terms_to_FBbt_terms_$(DBNAME).tsv
STAGECORRESP_FILE := BDGP_stages_correspondence.txt
WORMBASE_EXPR_DUMP := expr_pattern.ace
## RNA Seq
#RNASEQ_CURATION_PATH := $(CURATIONPATH)expression_data/rna_seq/
#RNASEQ_CURATION_FILEPATH := $(RNASEQ_CURATION_PATH)annotations.xls
# =========== SPECIES FILES ===============
# a TSV file containing the IDs of the species used in Bgee
SPECIESFILEPATH := $(SOURCE_FILES_DIR)species/bgeeSpecies.tsv
# file containing the IDs of all the taxa used in our annotations
ANNOTTAXIDSFILEPATH := $(GENERATED_FILES_DIR)species/annotTaxIds.tsv
# file containing the IDs of the species used in Bgee, of all the taxa used in our annotations,
# and of all the taxa used in Uberon
ALLTAXIDSFILEPATH := $(GENERATED_FILES_DIR)species/allTaxIds.tsv
# our custom taxonomy ontology
TAXONOMYFILEPATH := $(GENERATED_FILES_DIR)species/bgee_ncbitaxon.owl
TAX_DAT_DOWNLOAD_URL := ftp://ftp.ebi.ac.uk/pub/databases/taxonomy/taxonomy.dat
# =========== GENE FILES ===============
HOMOLOGS_DIR := $(OUTPUT_DIR)homologs
PARALOGS_DIR := $(OUTPUT_DIR)paralogs/
ORTHOLOGS_DIR := $(OUTPUT_DIR)orthologs/
XREFS_FROM_UNIPROT_FILE := $(OUTPUT_DIR)xrefs_from_uniprot.tsv
#Where config files will be saved, these files are also used by the orthologs/paralogs app
#to restart from where it stopped in a previous execution or to execute it incrementally.
TEMPORARY_CONFIG_FILES_DIR = $(OUTPUT_DIR)temp
#The homology pairwise application directory and main file
GENERATE_HOMOLOGS_APP_DIR = $(PIPELINEROOT)genes/orthologs_paralogs_app
GENERATE_HOMOLOGS_APP_FILE = $(GENERATE_HOMOLOGS_APP_DIR)/GenerateHomologsParallelApp.py
#For the below CSV files, each species is in a new line and represented with its NCBI identifier
#The 1st line is ignored (i.e. heading).
#A csv file containing all wanted species to get orthologs and paralogs
ALL_SPECIES_FILE = $(GENERATE_HOMOLOGS_APP_DIR)/species_lists/all_speciesId.csv
#A csv file containing exclusively the NCBI species to get orthologs and paralogs
ONLY_NCBI_SPECIES_FILE = $(GENERATE_HOMOLOGS_APP_DIR)/species_lists/ncbi_speciesId.csv
#A csv file containing exclusively the FlyBASE species to get orthologs and paralogs
ONLY_FLYBASE_SPECIES_FILE = $(GENERATE_HOMOLOGS_APP_DIR)/species_lists/flybase_speciesId.csv
#the URL or file path that contains ncbi gene ids mappings to ensembl gene ids
NCBI_GENE2ENSEMBL_URL := https://ftp.ncbi.nih.gov/gene/DATA/gene2ensembl.gz
# =========== UBERON FILES ===============
UBERON_SOURCE_PATH := $(SOURCE_FILES_DIR)uberon/
UBERON_OUTPUT_PATH := $(GENERATED_FILES_DIR)uberon/
UBERON_DOWNLOAD_URL_START := http://purl.obolibrary.org/obo/uberon/
EXT_DOWNLOAD_URL := $(UBERON_DOWNLOAD_URL_START)ext.owl
COMPOSITE_DOWNLOAD_URL := $(UBERON_DOWNLOAD_URL_START)composite-metazoan.owl
DEV_STAGE_DOWNLOAD_URL := https://raw.githubusercontent.com/obophenotype/developmental-stage-ontologies/master/external/bgee/dev_stage_ontology.obo
# Sex-related info about anatomical terms
UBERON_SEX_INFO_FILE := uberon_sex_info.tsv
UBERON_SEX_INFO_FILE_PATH := $(UBERON_OUTPUT_PATH)$(UBERON_SEX_INFO_FILE)
# taxon constraints
TAXONCONSTRAINTSFILE := taxonConstraints.tsv
TAXONCONSTRAINTSFILEPATH := $(UBERON_OUTPUT_PATH)$(TAXONCONSTRAINTSFILE)
# OVERRIDE_TAXON_CONSTRAINTS := HsapDv:$(KEY_VAL_SEP)9606$(ENTRY_SEP)MmusDv:$(KEY_VAL_SEP)10090$(ENTRY_SEP)ZFS:$(KEY_VAL_SEP)7955$(ENTRY_SEP)XAO:$(KEY_VAL_SEP)8364$(ENTRY_SEP)FBdv:$(KEY_VAL_SEP)7227$(ENTRY_SEP)GgalDv:$(KEY_VAL_SEP)9031$(ENTRY_SEP)GgorDv:$(KEY_VAL_SEP)9593$(ENTRY_SEP)MmulDv:$(KEY_VAL_SEP)9544$(ENTRY_SEP)MdomDv:$(KEY_VAL_SEP)13616$(ENTRY_SEP)OanaDv:$(KEY_VAL_SEP)9258$(ENTRY_SEP)PtroDv:$(KEY_VAL_SEP)9598$(ENTRY_SEP)PpanDv:$(KEY_VAL_SEP)9597$(ENTRY_SEP)PpygDv:$(KEY_VAL_SEP)9600$(ENTRY_SEP)BtauDv:$(KEY_VAL_SEP)9913$(ENTRY_SEP)RnorDv:$(KEY_VAL_SEP)10116$(ENTRY_SEP)AcarDv:$(KEY_VAL_SEP)28377$(ENTRY_SEP)TnigDv:$(KEY_VAL_SEP)99883$(ENTRY_SEP)SscrDv:$(KEY_VAL_SEP)9823$(ENTRY_SEP)WBls:$(KEY_VAL_SEP)6239$(ENTRY_SEP)UBERON:0007220$(KEY_VAL_SEP)7227$(VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)10116$(ENTRY_SEP)UBERON:0004707$(KEY_VAL_SEP)7955$(ENTRY_SEP)UBERON:0000069$(KEY_VAL_SEP)6239$(VAL_SEP)7227$(VAL_SEP)7955$(VAL_SEP)8364$(ENTRY_SEP)UBERON:0000070$(KEY_VAL_SEP)7227$(ENTRY_SEP)UBERON:0000111$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)8364$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(VAL_SEP)10116$(ENTRY_SEP)UBERON:0000110$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)8364$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(VAL_SEP)10116$(ENTRY_SEP)UBERON:0000107$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)7955$(VAL_SEP)8364$(VAL_SEP)7227$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(ENTRY_SEP)UBERON:0000108$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)7955$(VAL_SEP)8364$(VAL_SEP)7227$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(ENTRY_SEP)UBERON:0000106$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)7955$(VAL_SEP)8364$(VAL_SEP)7227$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(ENTRY_SEP)UBERON:0004729$(KEY_VAL_SEP)6239$(ENTRY_SEP)UBERON:0004730$(KEY_VAL_SEP)7227$(ENTRY_SEP)UBERON:0007234$(KEY_VAL_SEP)7955$(VAL_SEP)8364$(ENTRY_SEP)UBERON:0007232$(KEY_VAL_SEP)7955$(VAL_SEP)8364$(ENTRY_SEP)UBERON:0000112$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)7955$(VAL_SEP)8364$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(VAL_SEP)10116$(ENTRY_SEP)UBERON:0000113$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)7955$(VAL_SEP)8364$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(VAL_SEP)10116$(ENTRY_SEP)UBERON:0009849$(KEY_VAL_SEP)8364$(ENTRY_SEP)UBERON:0014405$(KEY_VAL_SEP)$(EMPTY_LIST)$(ENTRY_SEP)UBERON:0014862$(KEY_VAL_SEP)$(EMPTY_LIST)$(ENTRY_SEP)UBERON:0014864$(KEY_VAL_SEP)$(EMPTY_LIST)$(ENTRY_SEP)UBERON:0007221$(KEY_VAL_SEP)9606$(VAL_SEP)10090$(VAL_SEP)8364$(VAL_SEP)7227$(VAL_SEP)9031$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)13616$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)9913$(VAL_SEP)28377$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(VAL_SEP)10116$(ENTRY_SEP)UBERON:0007222$(KEY_VAL_SEP)10090$(VAL_SEP)7955$(VAL_SEP)8364$(VAL_SEP)7227$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(VAL_SEP)10116$(ENTRY_SEP)UBERON:0018241$(KEY_VAL_SEP)10090$(VAL_SEP)7955$(VAL_SEP)8364$(VAL_SEP)7227$(VAL_SEP)9593$(VAL_SEP)9544$(VAL_SEP)9258$(VAL_SEP)9598$(VAL_SEP)9597$(VAL_SEP)9600$(VAL_SEP)99883$(VAL_SEP)9823$(VAL_SEP)6239$(VAL_SEP)10116$(ENTRY_SEP)AAO:$(KEY_VAL_SEP)8364$(ENTRY_SEP)ABA:$(KEY_VAL_SEP)10090$(ENTRY_SEP)EHDAA2:$(KEY_VAL_SEP)9606$(ENTRY_SEP)EMAPA:$(KEY_VAL_SEP)10090$(ENTRY_SEP)FBbt:$(KEY_VAL_SEP)7227$(ENTRY_SEP)FMA:$(KEY_VAL_SEP)9606$(ENTRY_SEP)HAO:$(KEY_VAL_SEP)7399$(ENTRY_SEP)MA:$(KEY_VAL_SEP)10090$(ENTRY_SEP)MFO:$(KEY_VAL_SEP)8089$(ENTRY_SEP)SPD:$(KEY_VAL_SEP)6893$(ENTRY_SEP)TADS:$(KEY_VAL_SEP)6939$(ENTRY_SEP)TAO:$(KEY_VAL_SEP)7955$(VAL_SEP)99883$(ENTRY_SEP)TGMA:$(KEY_VAL_SEP)44484$(ENTRY_SEP)WBbt:$(KEY_VAL_SEP)6239$(ENTRY_SEP)XAO:$(KEY_VAL_SEP)8364$(ENTRY_SEP)ZFA:$(KEY_VAL_SEP)7955$(ENTRY_SEP)EHDAA:$(KEY_VAL_SEP)9606$(ENTRY_SEP)EV:$(KEY_VAL_SEP)9606
# taxon constraints with no hacks:
# OVERRIDE_TAXON_CONSTRAINTS := HsapDv:$(KEY_VAL_SEP)9606$(ENTRY_SEP)MmusDv:$(KEY_VAL_SEP)10090$(ENTRY_SEP)ZFS:$(KEY_VAL_SEP)7955$(ENTRY_SEP)XAO:$(KEY_VAL_SEP)8364$(ENTRY_SEP)FBdv:$(KEY_VAL_SEP)7227$(ENTRY_SEP)GgalDv:$(KEY_VAL_SEP)9031$(ENTRY_SEP)GgorDv:$(KEY_VAL_SEP)9593$(ENTRY_SEP)MmulDv:$(KEY_VAL_SEP)9544$(ENTRY_SEP)MdomDv:$(KEY_VAL_SEP)13616$(ENTRY_SEP)OanaDv:$(KEY_VAL_SEP)9258$(ENTRY_SEP)PtroDv:$(KEY_VAL_SEP)9598$(ENTRY_SEP)PpanDv:$(KEY_VAL_SEP)9597$(ENTRY_SEP)PpygDv:$(KEY_VAL_SEP)9600$(ENTRY_SEP)BtauDv:$(KEY_VAL_SEP)9913$(ENTRY_SEP)RnorDv:$(KEY_VAL_SEP)10116$(ENTRY_SEP)AcarDv:$(KEY_VAL_SEP)28377$(ENTRY_SEP)TnigDv:$(KEY_VAL_SEP)99883$(ENTRY_SEP)SscrDv:$(KEY_VAL_SEP)9823$(ENTRY_SEP)WBls:$(KEY_VAL_SEP)6239$(ENTRY_SEP)PdumDv:$(KEY_VAL_SEP)6358$(ENTRY_SEP)UBERON:0000069$(KEY_VAL_SEP)6239$(VAL_SEP)7227$(VAL_SEP)7955$(VAL_SEP)8364$(ENTRY_SEP)AAO:$(KEY_VAL_SEP)8364$(ENTRY_SEP)ABA:$(KEY_VAL_SEP)10090$(ENTRY_SEP)EHDAA2:$(KEY_VAL_SEP)9606$(ENTRY_SEP)EMAPA:$(KEY_VAL_SEP)10090$(ENTRY_SEP)FBbt:$(KEY_VAL_SEP)7227$(ENTRY_SEP)FMA:$(KEY_VAL_SEP)9606$(ENTRY_SEP)HAO:$(KEY_VAL_SEP)7399$(ENTRY_SEP)MA:$(KEY_VAL_SEP)10090$(ENTRY_SEP)MFO:$(KEY_VAL_SEP)8089$(ENTRY_SEP)SPD:$(KEY_VAL_SEP)6893$(ENTRY_SEP)TADS:$(KEY_VAL_SEP)6939$(ENTRY_SEP)TAO:$(KEY_VAL_SEP)7955$(VAL_SEP)99883$(ENTRY_SEP)TGMA:$(KEY_VAL_SEP)44484$(ENTRY_SEP)WBbt:$(KEY_VAL_SEP)6239$(ENTRY_SEP)ZFA:$(KEY_VAL_SEP)7955$(ENTRY_SEP)EHDAA:$(KEY_VAL_SEP)9606$(ENTRY_SEP)EV:$(KEY_VAL_SEP)9606$(ENTRY_SEP)CEPH:$(KEY_VAL_SEP)6605$(ENTRY_SEP)CTENO:$(KEY_VAL_SEP)10197$(ENTRY_SEP)PORO:$(KEY_VAL_SEP)6040
STAGETAXONCONSTRAINTSFILE := stageTaxonConstraints.tsv
STAGETAXONCONSTRAINTSFILEPATH := $(UBERON_OUTPUT_PATH)$(STAGETAXONCONSTRAINTSFILE)
ROOTS_OF_SUBGRAPHS_TO_IGNORE := NCBITaxon:1
CELL_TYPE_ROOT_ID := GO:0005575
## Uberon
UBERON_EXT_FILE_PATH := $(UBERON_SOURCE_PATH)ext.owl
UBERON_COMPOSITE_FILE_PATH := $(UBERON_SOURCE_PATH)composite-metazoan.owl
UBERONFILE := $(UBERON_COMPOSITE_FILE_PATH)
UBERONFILEPATH := $(UBERON_SOURCE_PATH)$(UBERONFILE)
CUSTOM_UBERON_PREFIX := custom_composite
CUSTOM_UBERON_FILE := $(CUSTOM_UBERON_PREFIX).owl
CUSTOM_UBERON_FILE_PATH := $(UBERON_OUTPUT_PATH)$(CUSTOM_UBERON_FILE)
DEV_STAGE_ONT_PREFIX := dev_stage_ontology
DEV_STAGE_ONT_FILE := $(DEV_STAGE_ONT_PREFIX).obo
DEV_STAGE_ONT_FILE_PATH := $(UBERON_SOURCE_PATH)$(DEV_STAGE_ONT_FILE)
## Strains
STRAINS := Strains/
STRAIN_MAPPING_FILE := $(SOURCE_FILES_DIR)$(STRAINS)StrainMapping.tsv
# Command to get list of stages between start and end stages
# Missing arguments are SPECIES_ID INBETWEENSTAGESPORT
INBETWEENSTAGESPORT := 15444
INBETWEENSTAGES := $(JAVA) UberonSocketTool stageRange $(DEV_STAGE_ONT_FILE_PATH) $(TAXONOMYFILEPATH) $(STAGETAXONCONSTRAINTSFILEPATH)
# Command to get mapping of IDs to Uberon
# Missing arguments is: IDMAPPINGPORT
IDMAPPINGPORT := 14555
# Note: was changed to use our custom version with only terms included in Bgee
IDMAPPING := $(JAVA) UberonSocketTool idMapping $(CUSTOM_UBERON_FILE_PATH)
STGMAPPINGPORT := 13222
STGMAPPING := $(JAVA) UberonSocketTool idMapping $(DEV_STAGE_ONT_FILE_PATH)
# =========== SIMILARITY ANNOTATION FILES ===============
RAW_SIMILARITY_FILE := $(SOURCE_FILES_DIR)annotations/raw_similarity_annotations.tsv
SUMMARY_SIMILARITY_FILE := $(SOURCE_FILES_DIR)annotations/summary_similarity_annotations.tsv
# =========== EST FILES ===============
# =========== AFFYMETRIX FILES ===============
AFFYPATH := Affymetrix/
AFFY_CHIP_FILEPATH := $(SOURCE_FILES_DIR)$(AFFYPATH)affymetrixChip.tsv
AFFY_CHIP_FILEPATH_WORM := $(SOURCE_FILES_DIR)$(AFFYPATH)affymetrixChip_worm.tsv
AFFY_CHIP_FILEPATH_FULL := $(GENERATED_FILES_DIR)$(AFFYPATH)affymetrixChip_full.tsv
AFFY_CHIPINFO_FILEPATH := $(GENERATED_FILES_DIR)$(AFFYPATH)affymetrixChipInformation
MICROARRAY_EXPERIMENT_FILEPATH := $(SOURCE_FILES_DIR)$(AFFYPATH)microarrayExperiment.tsv
MICROARRAY_EXPERIMENT_FILEPATH_WORM := $(SOURCE_FILES_DIR)$(AFFYPATH)microarrayExperiment_worm.tsv
MICROARRAY_EXPERIMENT_FILEPATH_FULL := $(GENERATED_FILES_DIR)$(AFFYPATH)microarrayExperiment_full.tsv
AFFY_CHIPTYPE_FILEPATH := $(SOURCE_FILES_DIR)$(AFFYPATH)chipType.tsv
AFFY_CHIPTYPE_FILEPATH_WORM := $(SOURCE_FILES_DIR)$(AFFYPATH)chipType_worm.tsv
AFFY_CHIPTYPE_FILEPATH_FULL := $(GENERATED_FILES_DIR)$(AFFYPATH)chipType_full.tsv
AFFY_CHIPTYPEQUAL_FILEPATH := $(GENERATED_FILES_DIR)$(AFFYPATH)chipTypeCorrespondencesAndQualityThresholds
AFFY_NORMTYPE_FILEPATH := $(GENERATED_FILES_DIR)$(AFFYPATH)normalizationType
AFFY_DETCTYPE_FILEPATH := $(GENERATED_FILES_DIR)$(AFFYPATH)detectionType
# Affymetrix data path
#AFFYDATAPATH := $(CLUSTER_DATA_PATH)/Affymetrix_v$(MAJOR_RELEASE)/
AFFYDATAPATH := /var/bgee/extra/pipeline/Affymetrix_v$(MAJOR_RELEASE)/
AFFYNEWDATAPATH := $(AFFYDATAPATH)new_files/
AFFYANNOTATORPATH := /Users/anikneja/Documents/bgee/extra/pipeline/curation/Affymetrix/
# Cel file path
CELPATH := $(AFFYDATAPATH)cel_data/
CELNEWPATH := $(AFFYNEWDATAPATH)cel_data/
CELANNPATH := $(AFFYANNOTATORPATH)cel_data/
# MAS5 file path
MAS5PATH := $(AFFYDATAPATH)processed_mas5/
MAS5NEWPATH := $(AFFYNEWDATAPATH)processed_mas5/
MAS5ANNPATH := $(AFFYANNOTATORPATH)processed_mas5/
# MAS5 original file path
MAS5ORIPATH := $(AFFYDATAPATH)processed_mas5_original_files/
MAS5ORINEWPATH := $(AFFYNEWDATAPATH)processed_mas5_original_files/
MAS5ORIANNPATH := $(AFFYANNOTATORPATH)processed_mas5_original_files/
# SCHUSTER file path
SCHUSTERPATH := $(AFFYDATAPATH)processed_schuster/
# Differential file path
DIFFEXPRPATH := $(AFFYDATAPATH)processed_differential/
# Bioconductor file path
BIOCONDUCTOR := $(AFFYDATAPATH)bioconductor/
BIOCONDUCTOROUT := $(BIOCONDUCTOR)out/
BIOCONDUCTORAFFIN := $(BIOCONDUCTOR)affinities/
BIOCONDUCTORDIFF := $(BIOCONDUCTOR)differential/
BIOCONDUCTORTARG := $(BIOCONDUCTOR)targets/
# Annotation file path
ANNOTATIONPATH := $(AFFYDATAPATH)annotations/
# Probsets to genes mapping file
PROBSETMAPPINGDIR := $(GENERATED_FILES_DIR)$(AFFYPATH)biomart
# =========== RNA_Seq FILES ===============
RNASEQPATH := RNA_Seq/
EXTRAMAPPING_FILEPATH := $(SOURCE_FILES_DIR)$(RNASEQPATH)mapToUberon.tsv
RNASEQ_EXPERIMENT_FILEPATH := $(SOURCE_FILES_DIR)$(RNASEQPATH)RNASeqExperiment.tsv
RNASEQ_EXPERIMENT_FILEPATH_WORM := $(SOURCE_FILES_DIR)$(RNASEQPATH)RNASeqExperiment_worm.tsv
RNASEQ_EXPERIMENT_FILEPATH_FULL := $(GENERATED_FILES_DIR)$(RNASEQPATH)RNASeqExperiment_full.tsv
RNASEQ_LIB_FILEPATH := $(SOURCE_FILES_DIR)$(RNASEQPATH)RNASeqLibrary.tsv
RNASEQ_LIB_FILEPATH_WORM := $(SOURCE_FILES_DIR)$(RNASEQPATH)RNASeqLibrary_worm.tsv
RNASEQ_LIB_FILEPATH_FULL := $(GENERATED_FILES_DIR)$(RNASEQPATH)RNASeqLibrary_full.tsv
RNASEQ_LIB_EXCLUSION_FILEPATH_WORM := $(SOURCE_FILES_DIR)$(RNASEQPATH)RNASeqLibrary_worm_exclusion.tsv
RNASEQ_LIB_CHECKS_FILEPATH := $(SOURCE_FILES_DIR)$(RNASEQPATH)RNASeqLibraryPlatformChecks.tsv
RNASEQ_SAMPINFO_FILEPATH := $(GENERATED_FILES_DIR)$(RNASEQPATH)rna_seq_sample_info.txt
RNASEQ_SAMPEXCLUDED_FILEPATH := $(GENERATED_FILES_DIR)$(RNASEQPATH)rna_seq_sample_excluded.txt
RNASEQ_BIOTYPE_EXCLUDED_FILEPATH := $(SOURCE_FILES_DIR)$(RNASEQPATH)biotypes_excluded_absent_calls.tsv
RNASEQ_LENGTH_INFO_FILEPATH := $(GENERATED_FILES_DIR)$(RNASEQPATH)rna_seq_length_info.txt
RNASEQ_ALREADY_DOWNLOADED := $(GENERATED_FILES_DIR)$(RNASEQPATH)rna_seq_sample_downloaded.txt
RNASEQ_GLOBIN_FILEPATH := $(SOURCE_FILES_DIR)$(RNASEQPATH)globin_info_per_species.tsv
RNASEQ_BGEECALL_FILE := $(GENERATED_FILES_DIR)$(RNASEQPATH)bgeecall_input.tsv
# RNAseq cluster paths
# TODO clean-up and remove unused ones
RNASEQ_DOWNLOAD_LIB_DIR := $(CLUSTER_CURNAGL_PATH)/downloads/
RNASEQ_DOWNLOAD_LIB_DIR_FASTQ := $(RNASEQ_DOWNLOAD_LIB_DIR)/FASTQ/RNAseq/
RNASEQ_CLUSTER_READONLY := $(CLUSTER_DATA_PATH)/
RNASEQ_CLUSTER_SCRIPTS := $(RNASEQ_CLUSTER_READONLY)GIT/pipeline/
RNASEQ_DOWNLOAD_GTF := $(RNASEQ_DOWNLOAD_LIB_DIR)GTF_$(MAJOR_RELEASE)/
RNASEQ_CLUSTER_GTF := $(RNASEQ_CLUSTER_READONLY)rna_seq/GTF_$(MAJOR_RELEASE)/
RNASEQ_CLUSTER_ALL_RES_BACKUP := $(RNASEQ_CLUSTER_READONLY)rna_seq/all_results_$(DBNAME)/
RNASEQ_CLUSTER_SCRATCH := $(CLUSTER_SCRATCH_RESULTS)rna_seq/
RNASEQ_CLUSTER_ABUNDANCE_ALL := $(RNASEQ_CLUSTER_SCRATCH)abundance_all_intergenic_$(DBNAME)/
RNASEQ_CLUSTER_SUM_RES := $(RNASEQ_CLUSTER_SCRATCH)sum_by_species_$(MAJOR_RELEASE)/
RNASEQ_CLUSTER_GAUSSIAN_CHOICE := $(OUTPUT_DIR)gaussian_choice_by_species.txt
RNASEQ_CLUSTER_PRESENCE_RES := $(RNASEQ_CLUSTER_SCRATCH)presence_absence_$(DBNAME)/
RNASEQ_CLUSTER_LOG := $(RNASEQ_CLUSTER_ABUNDANCE_ALL)
RNASEQ_CLUSTER_R_LOG := $(RNASEQ_CLUSTER_SCRATCH)Rout/
RNASEQ_CLUSTER_SBATCH := $(RNASEQ_CLUSTER_SCRATCH)sbatch/
RNASEQ_CLUSTER_REPORT_ALL := $(RNASEQ_CLUSTER_ABUNDANCE_ALL)reports_info_all_samples.txt
RNASEQ_CLUSTER_BGEECALL_OUTPUT := $(RNASEQ_CLUSTER_SCRATCH)calls/
RNASEQ_CLUSTER_BGEECALL_CALLS := $(RNASEQ_CLUSTER_BGEECALL_OUTPUT)all_results/
RNASEQ_CLUSTER_CALLS_STATS := $(RNASEQ_CLUSTER_BGEECALL_CALLS)presence_absence_all_samples.txt
RNASEQ_CLUSTER_KALLISTO_STATS := $(RNASEQ_CLUSTER_BGEECALL_CALLS)reports_info_all_samples.txt
ENCRYPT_PASSWD_FILE := $(RNASEQ_CLUSTER_READONLY).passw
# RNAseq sensitive cluster path
RNASEQ_SENSITIVE_FASTQ := $(RNASEQ_CLUSTER_READONLY)FASTQ/RNAseq/
# Intergenic folders
CLUSTER_REF_INTERGENIC_FOLDER := $(RNASEQ_CLUSTER_SCRATCH)ref_intergenic_$(MAJOR_RELEASE)/
CLUSTER_OTHER_INTERGENIC_FOLDER := $(RNASEQ_CLUSTER_SCRATCH)other_intergenic_$(MAJOR_RELEASE)/
# RNAseq kallisto index kmer size
RNASEQ_KALLISTO_KMER_DEFAULT := 31
RNASEQ_KALLISTO_KMER_SHORT := 15
# Max proportion of Ns allowed in intergenic sequences
INTERGENIC_N_PROPORTION := 0.05
# RNAseq data path
RNASEQDATAPATH := $(TMPDIR)/RNA_Seq/
ABUNDANCEFILE := gene_level_abundance+calls.tsv
RNASEQALLRES := $(RNASEQDATAPATH)rna_seq_all_results_$(DBNAME)/
SC_RNASEQ_ALL_RES_FL := $(RNASEQDATAPATH)full_length_all_results_$(DBNAME)/
RNASEQLOGS := $(RNASEQALLRES)
RNASEQSAMPSTATS := $(GENERATED_FILES_DIR)$(RNASEQPATH)presence_absence_all_samples.txt
RNASEQREPORTINFO := $(GENERATED_FILES_DIR)$(RNASEQPATH)reports_info_all_samples.txt
# Differential file path
#RNASEQDIFFEXPRPATH := $(RNASEQDATAPATH)processed_differential_$(DBNAME)/
#RNASEQDIFFEXPRPATH_DEVANDANAT := $(RNASEQDIFFEXPRPATH)devAndAnat/
#RNASEQDIFFEXPRPATH_SEX := $(RNASEQDIFFEXPRPATH)sex/
# Bioconductor file path
RNASEQBIOCONDUCTOR := $(RNASEQDATAPATH)bioconductor_$(DBNAME)/
#XXX is it used anymore???
#RNASEQBIOCONDUCTORDIFF := $(RNASEQBIOCONDUCTOR)differential/
#RNASEQBIOCONDUCTORTARG := $(RNASEQBIOCONDUCTOR)targets/
#RNASEQBIOCONDUCTORTARG_SEX := $(RNASEQBIOCONDUCTORTARG)sex/
#RNASEQBIOCONDUCTORTARG_DEVANDANAT := $(RNASEQBIOCONDUCTORTARG)devAndAnat/
# TMM normalization files
RNASEQTMMTARG := $(RNASEQBIOCONDUCTOR)targets_TMM/
RNASEQTMMPATH := $(RNASEQDATAPATH)processed_TMM_$(DBNAME)/
# =========== SINGLE CELL RNA_Seq FILES ===============
# scRNASeq github file paths in the source directory
SC_RNASEQ_PATH := scRNA_Seq/
SC_RNASEQ_PATH_SOURCE := $(SOURCE_FILES_DIR)$(SC_RNASEQ_PATH)
SC_RNASEQ_ACCEPTED_PROTOCOLS := $(SC_RNASEQ_PATH_SOURCE)acceptedProtocols.tsv
### Full_length
SC_RNASEQ_FL_FOLDER_SOURCE := $(SC_RNASEQ_PATH_SOURCE)Full_length/
# FL and TB annotations are in the same file. Kept FL and TB annotation in different directories in case only one out of the two single cell pipeline is run for a release of Bgee
SC_RNASEQ_LIB_FL_FILEPATH := $(SC_RNASEQ_FL_FOLDER_SOURCE)scRNASeqLibrary_merged.tsv
SC_RNASEQ_EXP_FL_FILEPATH := $(SC_RNASEQ_FL_FOLDER_SOURCE)scRNASeqExperiment.tsv
### Target_based
SC_RNASEQ_TB_FOLDER_SOURCE := $(SC_RNASEQ_PATH_SOURCE)Target_based/
# FL and TB annotations are in the same file. Kept FL and TB annotation in different directories in case only one out of the two single cell pipeline is run for a release of Bgee
SC_RNASEQ_LIB_TB_FILEPATH := $(SC_RNASEQ_TB_FOLDER_SOURCE)scRNASeqLibrary_merged.tsv
SC_RNASEQ_EXP_TB_FILEPATH := $(SC_RNASEQ_TB_FOLDER_SOURCE)scRNASeqExperiment.tsv
SC_RNASEQ_MANIFEST_FILE_FILEPATH := $(SC_RNASEQ_TB_FOLDER_SOURCE)Manifest_file.tsv
SC_RNASEQ_WHITELIST_10X_v2 := $(SC_RNASEQ_TB_FOLDER_SOURCE)barcode_whitelist_10X_v2.txt.zip
SC_RNASEQ_WHITELIST_10X_v3 := $(SC_RNASEQ_TB_FOLDER_SOURCE)barcode_whitelist_10X_v3.txt.zip
# scRNASeq github file paths in the generated_files directory
SC_RNASEQ_PATH_GENERATED := $(GENERATED_FILES_DIR)$(SC_RNASEQ_PATH)
SC_RNASEQ_DOWNLOADED_LIB_FILEPATH := $(SC_RNASEQ_PATH_GENERATED)librariesDownloaded.tsv
### Full_length
SC_RNASEQ_FL_FOLDER_GENERATED := $(SC_RNASEQ_PATH_GENERATED)Full_length/
SC_RNASEQ_LIB_PASS_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)passScRNASeqLibrary.tsv
SC_RNASEQ_LIB_NOT_PASS_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)notPassScRNASeqLibrary.tsv
SC_RNASEQ_METADATA_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)metadata_info.tsv
SC_RNASEQ_METADATA_NOT_MATCH_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)metadata_info_not_match.tsv
SC_RNASEQ_SAMPINFO_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)scrna_seq_sample_info.tsv
SC_RNASEQ_SAMPINFO_PASS_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)NEW_scRNASeq_sample_info.tsv
SC_RNASEQ_SAMPINFO_NOT_PASS_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)Discard_scRNASeq_sample_info.tsv
SC_RNASEQ_MODALITY_FILEPATH := $(SC_RNASEQ_FL_FOLDER_GENERATED)Modality_Cell_type_per_experiment.tsv
SC_RNASEQ_BGEECALL_FILE_FULL_LENGTH := $(SC_RNASEQ_FL_FOLDER_GENERATED)bgeecall_input.tsv
SC_RNASEQ_SAMP_STATS_FL := $(SC_RNASEQ_FL_FOLDER_GENERATED)presence_absence_all_samples.txt
SC_RNASEQ_REPORT_INFO_FL := $(SC_RNASEQ_FL_FOLDER_GENERATED)reports_info_all_samples.txt
### Target_based
SC_RNASEQ_TB_FOLDER_GENERATED := $(SC_RNASEQ_PATH_GENERATED)Target_based/
SC_RNASEQ_TB_DOWNLOADED_LIB_FILEPATH := $(SC_RNASEQ_TB_FOLDER_GENERATED)librariesDownloaded.tsv
SC_RNASEQ_EXP_TB_FILEPATH_FILTERED := $(SC_RNASEQ_TB_FOLDER_GENERATED)scRNASeqExperiment.tsv
SC_RNASEQ_LIB_TB_FILEPATH_FILTERED := $(SC_RNASEQ_TB_FOLDER_GENERATED)scRNASeqLibrary_merged.tsv
SC_RNASEQ_CLEANED_BARCODES := $(SC_RNASEQ_TB_FOLDER_GENERATED)cleaned_barcodes/
SC_RNASEQ_METADATA_10X_FILEPATH := $(SC_RNASEQ_TB_FOLDER_GENERATED)metadata_info_10X.txt
SC_RNASEQ_METADATA_10X_NOT_MATCH_FILEPATH := $(SC_RNASEQ_TB_FOLDER_GENERATED)metadata_notMatch_10X.txt
SC_RNASEQ_SAMPINFO_10X_FILEPATH := $(SC_RNASEQ_PATH_GENERATED)scRNA_Seq_info_TargetBased.txt
SC_RNASEQ_REPORT_INFO_TB := $(SC_RNASEQ_FL_FOLDER_GENERATED)reports_info_all_samples.txt
# scRNAseq cluster paths (CURNAGL)
SC_RNASEQ_DOWNLOAD_PATH := $(CLUSTER_CURNAGL_PATH)/downloads/
SC_RNASEQ_DOWNLOAD_PATH_LIB_DIR := $(SC_RNASEQ_DOWNLOAD_PATH)scRNA_Seq_All/
SC_RNASEQ_DOWNLOAD_PATH_FULL_LENGTH := $(SC_RNASEQ_DOWNLOAD_PATH_LIB_DIR)scRNASeq_libraries_FullLength/
SC_RNASEQ_DOWNLOAD_PATH_DROPLET := $(SC_RNASEQ_DOWNLOAD_PATH_LIB_DIR)scRNASeq_libraries_Droplet_10X/
# scRNAseq cluster sofware
SCRNASEQ_SOFTWARE_BAMTOFASTQ := /users/jwollbre/tools/bamtofastq_linux
# scRNAseq cluster paths (SENSITIVE)
## PATH to full length and 10X data
SC_RNASEQ_FASTQ_FULL_LENGTH := $(CLUSTER_DATA_PATH)/FASTQ/scRNAseq/Full_length/
SC_RNASEQ_FASTQ_DROPLET := $(CLUSTER_DATA_PATH)/FASTQ/scRNAseq/10X/
## PATH to all results of both of pipelines
SC_RNASEQ_CLUSTER_ALL_RES := $(CLUSTER_DATA_PATH)/$(DBNAME)/scRNA-Seq_all_results/
## Specific paths to results of full-length protocols
SC_RNASEQ_CLUSTER_ALL_RES_FULL_LENGTH := $(SC_RNASEQ_CLUSTER_ALL_RES)FULL_LENGTH/
SC_RNASEQ_CLUSTER_BGEECALL := $(SC_RNASEQ_CLUSTER_ALL_RES_FULL_LENGTH)BgeeCall/
SC_RNASEQ_FL_CLUSTER_R_LOG := $(SC_RNASEQ_CLUSTER_ALL_RES_FULL_LENGTH)Rout/
SC_RNASEQ_CLUSTER_KALLISTO := $(SC_RNASEQ_CLUSTER_ALL_RES_FULL_LENGTH)Kallisto_All_Cells/
SC_RNASEQ_CLUSTER_QC_CELLPOP_RES := $(SC_RNASEQ_CLUSTER_ALL_RES_FULL_LENGTH)Quality_Control_Cell_Population/
SC_RNASEQ_CLUSTER_CALL_PRESENT := $(SC_RNASEQ_CLUSTER_ALL_RES_FULL_LENGTH)scRNAseq_Callpresent/
# scRNASeq file paths
SC_RNASEQ_MODALITY_CELL_POP := $(SC_RNASEQ_CLUSTER_QC_CELLPOP_RES)Modality_Cell_type_per_experiment.tsv
SC_RNASEQ_CLUSTER_CALLS_STATS := $(SC_RNASEQ_CLUSTER_BGEECALL)presence_absence_all_samples.txt
SC_RNASEQ_CLUSTER_KALLISTO_STATS := $(SC_RNASEQ_CLUSTER_BGEECALL)reports_info_all_samples.txt
## Specific paths to results of target-based protocols
SC_RNASEQ_STATS_LIBRARIES_10X := $(SC_RNASEQ_TB_FOLDER_GENERATED)/stats_libraries.txt
## on sensitive cluster
SC_RNASEQ_CLUSTER_ALL_RES_DROPLET := $(SC_RNASEQ_CLUSTER_ALL_RES)DROPLET_10X/
SC_RNASEQ_CLUSTER_RES_KALLISTO_DROPLET := $(SC_RNASEQ_CLUSTER_ALL_RES_DROPLET)kallisto_bus_results/
SC_RNASEQ_CLUSTER_QC_CELLTYPE := $(SC_RNASEQ_CLUSTER_ALL_RES_DROPLET)QC_CellType_identification/
# scRNASeq file paths
SC_RNASEQ_INFO_ALL_LIBRARIES_10X := $(SC_RNASEQ_CLUSTER_QC_CELLTYPE)InformationAllLibraries.txt
SC_RNASEQ_CLUSTER_CALLS_10X := $(SC_RNASEQ_CLUSTER_ALL_RES_DROPLET)calls/
SC_RNASEQ_CLUSTER_INFO_CALLS_10X := $(SC_RNASEQ_CLUSTER_CALLS_10X)All_cellPopulation_stats_10X.tsv
## On Bgee server
SC_RNASEQ_SERVER_RES_KALLISTO_DROPLET := $(TMPDIR)/DROPLET_10X/kallisto_bus_results/
SC_RNASEQ_SERVER_CALLS_10X := $(TMPDIR)/DROPLET_10X/calls/
SC_RNASEQ_SERVER_INFO_CALLS_10X := $(SC_RNASEQ_SERVER_CALLS_10X)All_cellPopulation_stats_10X.tsv
# minimum number of cells in a full-length library
MIN_CELLS_PER_LIBRARY_FULL_LENGTH := 50
# p-value used as cutoff
PVALUE_CUTOFF := 0.05
# =========== DOWNLOAD FILES ===============
# path to the root of the directory to use to store generated download files
DOWNLOAD_FILE_ROOT_PATH := $(TMPDIR)/download_files/
XREFS_PATH := $(TMPDIR)/xrefs/
EASYBGEE_PATH := $(TMPDIR)/$(DBNAME_EASYBGEE)/
R_SPECIES_INFO := $(TMPDIR)/rPackageSpeciesInfo.tsv
# paths of the different file types, relative to DOWNLOAD_FILE_ROOT_PATH
CALLS_PATH := calls/expr_calls/
H5AD_PATH := h5ad/
EXPR_CALLS_PATH := $(CALLS_PATH)expr_calls/
DIFF_EXPR_CALLS_PATH := $(CALLS_PATH)diff_expr_calls/
PROC_EXPR_VALUES_PATH := processed_expr_values/
ORTHOLOGS_PATH := orthologs/
MULTI_SPE_DIFF_EXPR_CALL_PATH := $(CALLS_PATH)multi_species_diff_expr_calls/
EXPR_VALUES_PATH := processed_expr_values/
RNA_SEQ_EXPR_VALUES_PATH := $(EXPR_VALUES_PATH)rna_seq/
AFFYMETRIX_EXPR_VALUES_PATH := $(EXPR_VALUES_PATH)affymetrix/
# file suffixes
EXPR_SIMPLE_SUF := _expr_simple.tsv
EXPR_SIMPLE_DEV_SUF := _expr_simple_development.tsv
EXPR_COMPLETE_SUF := _expr_advanced.tsv
EXPR_COMPLETE_DEV_SUF := _expr_advanced_development.tsv
DIFF_EXPR_ANATOMY_SIMPLE_SUF := _diffexpr-anatomy-simple.tsv
DIFF_EXPR_ANATOMY_COMPLETE_SUF := _diffexpr-anatomy-complete.tsv
DIFF_EXPR_DEV_SIMPLE_SUF := _diffexpr-development-simple.tsv
DIFF_EXPR_DEV_COMPLETE_SUF := _diffexpr-development-complete.tsv
ORTHOLOGS_SUF := _orthologs.tsv
MULTI_SPE_DIFF_EXPR_ANATOMY_SIMPLE_SUF := _multi-diffexpr-anatomy-simple.tsv
MULTI_SPE_DIFF_EXPR_ANATOMY_COMPLETE_SUF := _multi-diffexpr-anatomy-complete.tsv
RNA_SEQ_EXP_LIB_SUF_ZIP := _RNA-Seq_experiments_libraries.zip
RNA_SEQ_COUNT_SUF_ZIP := _RNA-Seq_read_counts_TPM_FPKM.zip
AFFYMETRIX_EXP_CHIP_SUF_ZIP := _Affymetrix_experiments_chips.zip
AFFYMETRIX_PROBESET_SUF_ZIP := _Affymetrix_probesets.zip
# FTP informations
FTP_URL := bgeeftp.unil.ch
FTP_ROOT := /u01/ftp/
FTP_CURRENT := $(FTP_ROOT)$(DBNAME)/
FTP_DOWNLOAD := $(FTP_CURRENT)download/
# =========== RANK DOWNLOAD FILES ===============
# Directories related to $(OUTPUT_DIR)
RANKS_PER_ANAT := ranks/anat_entity/
RANKS_PER_CONDITION := ranks/condition/
# =========== TRIPLE STORE FILES ===============
ONTOP_BGEE_PROP_TEMPLATE := $(INPUT_DIR)bgee_connection.template
GENEX_ONTOLOGY_URL := https://raw.githubusercontent.com/biosoda/genex/master/genex_reduced_subsumption.ttl
ONTOP_DOWNLOAD_URL := https://netcologne.dl.sourceforge.net/project/ontop4obda/ontop-5.1.0/ontop-cli-5.1.0.zip
ONTOP_MYSQL_JDBC_URL := https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.48/mysql-connector-java-5.1.48.jar
GENEX_MAPPING_URL := https://raw.githubusercontent.com/biosoda/bioquery/master/Bgee_OBDA_mappings/bgee_v15_1_genex.obda
ONTOP_DOWNLOAD_FILE := ontop.zip
GENEX_ONTOLOGY_FILE := genex.owl
GENEX_MAPPING_FILE := bgee_genex.obda
ONTOP_INSTALL_DIR := /usr/local/ontop/
ISQL_PATH := /usr/local/virtuoso-open-source/bin/isql
VIRTUOSO_HOST := localhost:1111
TTL_DIRECTORY_FULL_PATH := $(OUTPUT_DIR)
FTP_SAVE_DUMP_PATH := $(FTP_CURRENT)rdf_easybgee.zip
TTL_FILE_LIST := $(TTL_DIRECTORY_FULL_PATH)ttl_files.txt
BGEE_VIRTUOSO_FILE := $(TTL_DIRECTORY_FULL_PATH)add_bgee_virtuoso.sql
ZIP_TTL_FILE := $(TTL_DIRECTORY_FULL_PATH)rdf_easybgee.zip