Skip to content

Commit

Permalink
Merge pull request #142 from NAL-i5K/add-annotation
Browse files Browse the repository at this point in the history
Add functional annotations to gff files
  • Loading branch information
mpoelchau authored May 26, 2022
2 parents 677d79c + a193d47 commit e3b7029
Show file tree
Hide file tree
Showing 9 changed files with 194 additions and 36 deletions.
31 changes: 31 additions & 0 deletions add-annotation/add_annotation.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.2
class: CommandLineTool

baseCommand: [add_GO-KEGG_to_RefSeq-gff.pl]
arguments:
- position: 1
valueFrom: $(inputs.in_GO)
- position: 2
valueFrom: $(inputs.in_KEGG)
- position: 3
valueFrom: $(inputs.in_gff)
- position: 4
valueFrom: $(inputs.in_table)

inputs:
in_GO:
type: File
in_KEGG:
type: File
in_gff:
type: File
in_table:
type: File
outputs:
processed_gff:
type: stdout
stdout: $(inputs.in_gff.nameroot).annotated.gff


12 changes: 12 additions & 0 deletions final-workflow-short.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ gff_name: [
GCF_001298625.1_SEUB3.0_genomic.gff
]
gff_release_number: 100
#NCBI table file URL. If no functional annotation is availble, add 'NA' to the URL field.
url_table_file: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_feature_table.txt.gz
]
#GO file path. This path will be ignored if the table file URL is 'NA'. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
path_GO:
class: File
path: /app/data/GCF_001298625.1_complete.gaf.tsv
#KEGG file path. This path will be ignored if the table file URL is 'NA'. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
path_KEGG:
class: File
path: /app/data/GCF_001298625.1_KOBAS_acc_pathways.tsv
#-------------------------------------------------------------------------------
url_md5checksums: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/md5checksums.txt
Expand Down
56 changes: 44 additions & 12 deletions final-workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ inputs:
deepPATH_bigwig: string[]
organization: string
link_to_publication: string
url_table_file: string[]
path_GO: File?
path_KEGG: File?

steps:
#step1
Expand All @@ -48,14 +51,17 @@ steps:
path_transcript_fasta: path_transcript_fasta
url_cds_fasta: url_cds_fasta
path_cds_fasta: path_cds_fasta
url_table_file: url_table_file
out:
[OUT_md5checksums, #'*.txt'
OUT_genomic_fasta, #'*.gz'
OUT_genomic_gff, #'*.gz'
OUT_protein_fasta, #'*.gz'
OUT_transcript_fasta, #'*.gz'
OUT_cds_fasta, #'*.gz'
url_string]
OUT_table, #'*.gz'
url_string
]
#step2
md5checksums:
run: flow_md5checksums/workflow.cwl
Expand All @@ -66,6 +72,7 @@ steps:
in_protein_fasta: download/OUT_protein_fasta
in_transcript_fasta: download/OUT_transcript_fasta
in_cds_fasta: download/OUT_cds_fasta
in_table: download/OUT_table
path_genomic_fasta: path_genomic_fasta
path_genomic_gff: path_genomic_gff
path_protein_fasta: path_protein_fasta
Expand All @@ -77,17 +84,32 @@ steps:
url_protein_fasta: url_protein_fasta
url_transcript_fasta: url_transcript_fasta
url_cds_fasta: url_cds_fasta
url_table_file: url_table_file
out:
[
OUT_extract, #'*.txt2', extracted from *.txt
[OUT_extract, #'*.txt2', extracted from *.txt
OUT_check, #'*.log', log file for execution of md5sum -c
OUT_genomic_fasta, #'*.fa, '*.fna', '*.faa'
OUT_genomic_gff, #'*.gff', '*.gff3'
OUT_protein_fasta,
OUT_transcript_fasta,
OUT_cds_fasta
OUT_cds_fasta,
OUT_table
]
#step3
add_annotation:
run: add-annotation/add_annotation.cwl
when: $(inputs.url_table_file != "NA")
in:
url_table_file: url_table_file
in_GO: path_GO
in_KEGG: path_KEGG
in_gff:
source: [md5checksums/OUT_genomic_gff, path_genomic_gff]
pickValue: first_non_null
in_table: md5checksums/OUT_table
out:
[processed_gff]
#step4
gaps_or_not:
run: gaps_or_not.cwl
in:
Expand All @@ -98,7 +120,7 @@ steps:
[gap_lines]
#verify:
#fasta_diff,gff3_QC......
#step4
#step5
apollo2_data_processing:
run: flow_apollo2_data_processing/processing/workflow.cwl
in:
Expand All @@ -110,8 +132,8 @@ steps:
in_fasta:
source: [md5checksums/OUT_genomic_fasta, path_genomic_fasta]
pickValue: first_non_null
in_gff:
source: [md5checksums/OUT_genomic_gff, path_genomic_gff]
in_gff:
source: [add_annotation/processed_gff, md5checksums/OUT_genomic_gff, path_genomic_gff]
pickValue: first_non_null
out:
[OUT_2bi,
Expand All @@ -124,7 +146,7 @@ steps:
OUT_trackList_json,
OUT_trackList_json_bak,
]
#step5
#step6
create_assembly_readme:
run: flow_create_readme/readme-assembly-workflow.cwl
in:
Expand All @@ -134,7 +156,7 @@ steps:
url_genomic_fasta: url_genomic_fasta
link_to_publication: link_to_publication
out: [readme_file]
#step6
#step7
create_genePrediction_readme:
run: flow_create_readme/readme-genePrediction-workflow.cwl
in:
Expand All @@ -145,8 +167,13 @@ steps:
url_cds_fasta: url_cds_fasta
url_transcript_fasta: url_transcript_fasta
link_to_publication: link_to_publication
original_gff:
source: [md5checksums/OUT_genomic_gff, path_genomic_gff]
pickValue: first_non_null
processed_gff: add_annotation/processed_gff
url_table_file: url_table_file
out: [readme_file]
#step7
#step8
dispatch:
run: flow_dispatch/workflow.cwl
in:
Expand All @@ -159,8 +186,8 @@ steps:
source: [md5checksums/OUT_genomic_fasta, path_genomic_fasta]
pickValue: first_non_null
deepPATH_analyses: deepPATH_analyses
in_genomic_gff:
source: [md5checksums/OUT_genomic_gff, path_genomic_gff]
in_genomic_gff:
source: [add_annotation/processed_gff, md5checksums/OUT_genomic_gff, path_genomic_gff]
pickValue: first_non_null
#
in_protein_fasta:
Expand Down Expand Up @@ -189,6 +216,11 @@ steps:
in_gc_bigwig: apollo2_data_processing/OUT_gc_bigwig
in_trackList_json: apollo2_data_processing/OUT_trackList_json
in_trackList_json_bak: apollo2_data_processing/OUT_trackList_json_bak
processed_gff: add_annotation/processed_gff
original_gff:
source: [md5checksums/OUT_genomic_gff, path_genomic_gff]
pickValue: first_non_null
url_table_file: url_table_file
out:
[out_dummy]

Expand Down
17 changes: 16 additions & 1 deletion flow_create_readme/readme-genePrediction-workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ inputs:
url_cds_fasta: string[]
url_transcript_fasta: string[]
link_to_publication: string
original_gff: File
processed_gff: File?
url_table_file: string[]

steps:
#step1 create_yml_File
Expand Down Expand Up @@ -69,12 +72,24 @@ steps:
out:
[out_readme_file]
#step5 write last line
writeLastLine:
writeLastLine_v1:
run: writeLastLine-genePred.cwl
when: $(inputs.url_table_file == "NA")
in:
url_table_file: url_table_file
readme_file: writeInfo/out_readme_file
out:
[out_readme_file]
writeLastLine_v2:
run: writeLastLine-genePred_annotation.cwl
when: $(inputs.url_table_file != "NA")
in:
url_table_file: url_table_file
readme_file: writeInfo/out_readme_file
original_gff: original_gff
processed_gff: processed_gff
out:
[out_readme_file]

outputs:
readme_file:
Expand Down
31 changes: 31 additions & 0 deletions flow_create_readme/writeLastLine-genePred_annotation.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.2
class: CommandLineTool
baseCommand: [bash, -c]
requirements:
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:
${
var LIST = [(inputs.readme_file)];
return LIST;
}

inputs:
readme_file:
type: File
original_gff:
type: File
processed_gff:
type: File

arguments:
- position: 1
valueFrom: "echo -e '\nThe file $(inputs.original_gff.basename) was post-processed to add functional annotations from the AgBase functional annotation pipeline (https://github.com/agbase). The resulting file is: $(inputs.processed_gff.basename). This file was used for all operations within the i5k Workspace.' >> readme.txt"

outputs:
out_readme_file:
type: File
outputBinding:
glob: "readme.txt"
19 changes: 16 additions & 3 deletions flow_dispatch/2other_species/workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ inputs:
deepPATH_genomic_fasta: string[]
in_genomic_fasta: File
deepPATH_analyses: string[]
in_genomic_gff: File
processed_gff: File?
original_gff: File
in_protein_fasta: File
in_transcript_fasta: File
in_cds_fasta: File
Expand All @@ -34,6 +35,7 @@ inputs:
deepPATH_bigwig: string[]
in_gaps_bigwig: File? # this will be null if there are no gaps
in_gc_bigwig: File
url_table_file: string[]

steps:
cp_genomic_fasta:
Expand All @@ -44,14 +46,25 @@ steps:
deepPATH: deepPATH_genomic_fasta
in_data: in_genomic_fasta
out: []
cp_genomic_gff:
cp_original_genomic_gff:
run: cp_file_4_annotation.cwl
in:
PATH: PATH
deepPATH_1: deepPATH_genomic_fasta
deepPATH_2: deepPATH_analyses
tree: tree
in_data: in_genomic_gff
in_data: original_gff
out: []
cp_annotated_genomic_gff:
run: cp_file_4_annotation.cwl
when: $(inputs.url_table_file != "NA")
in:
url_table_file: url_table_file
PATH: PATH
deepPATH_1: deepPATH_genomic_fasta
deepPATH_2: deepPATH_analyses
tree: tree
in_data: processed_gff
out: []
cp_protein_fasta:
run: cp_file_4_annotation.cwl
Expand Down
11 changes: 8 additions & 3 deletions flow_dispatch/workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ inputs:
deepPATH_bigwig: string[]
in_gaps_bigwig: File? # this will be null if there are no gaps
in_gc_bigwig: File
processed_gff: File?
original_gff: File
url_table_file: string[]

steps:
setup_folder:
Expand All @@ -60,7 +63,8 @@ steps:
deepPATH_genomic_fasta: deepPATH_genomic_fasta
in_genomic_fasta: in_genomic_fasta
deepPATH_analyses: deepPATH_analyses
in_genomic_gff: in_genomic_gff
processed_gff: processed_gff
original_gff: original_gff
in_protein_fasta: in_protein_fasta
in_transcript_fasta: in_transcript_fasta
in_cds_fasta: in_cds_fasta
Expand All @@ -78,11 +82,12 @@ steps:
deepPATH_bigwig: deepPATH_bigwig
in_gaps_bigwig: in_gaps_bigwig # this will be null if there are no gaps
in_gc_bigwig: in_gc_bigwig
url_table_file: url_table_file
out: []
#To working_files
2working_files:
run: 2working_files/workflow.cwl
when: $(inputs.url_string != "NA NA NA NA NA\n" )
when: $(inputs.url_string != "NA NA NA NA NA NA\n" )
in:
url_string: url_string
in_dummy: setup_folder/out_dummy
Expand Down Expand Up @@ -118,4 +123,4 @@ steps:
outputs:
out_dummy:
type: File
outputSource: setup_folder/out_dummy
outputSource: setup_folder/out_dummy
Loading

0 comments on commit e3b7029

Please sign in to comment.