diff --git a/main.nf b/main.nf
index b465cc3..ea85b7d 100644
--- a/main.nf
+++ b/main.nf
@@ -23,7 +23,6 @@
  * Evan Floden 
  */
 
-
 /*
  * Define the default parameters
  */ 
@@ -34,7 +33,13 @@ params.blacklist  = "$baseDir/data/blacklist.bed"
 params.reads      = "$baseDir/data/reads/rep1_{1,2}.fq.gz"
 params.results    = "results"
 params.gatk       = '/usr/local/bin/GenomeAnalysisTK.jar'
-params.gatk_launch = "java -jar $params.gatk" 
+
+
+/* 
+ * Import modules 
+ */
+
+require 'modules.nf', params:[gatk: params.gatk, results: params.results]
 
 log.info """\
 C A L L I N G S  -  N F    v 1.0 
@@ -51,7 +56,6 @@ gatk     : $params.gatk
  *  Parse the input parameters
  */
 
-GATK            = params.gatk_launch
 genome_file     = file(params.genome)
 variants_file   = file(params.variants)
 blacklist_file  = file(params.blacklist)
@@ -64,160 +68,41 @@ reads_ch        = Channel.fromFilePairs(params.reads)
  * Process 1A: Create a FASTA genome index (.fai) with samtools for GATK
  */
 
-process '1A_prepare_genome_samtools' { 
-  tag "$genome.baseName"
- 
-  input: 
-      file genome from genome_file 
- 
-  output: 
-      file "${genome}.fai" into genome_index_ch  
-  
-  script:
-  """
-  samtools faidx ${genome}
-  """
-}
-
+genome_index_ch = '1A_prepare_genome_samtools'(genome_file)
 
 /*
  * Process 1B: Create a FASTA genome sequence dictionary with Picard for GATK
  */
 
-process '1B_prepare_genome_picard' {
-  tag "$genome.baseName"
-  label 'mem_xlarge'
-
-  input:
-      file genome from genome_file
-  output:
-      file "${genome.baseName}.dict" into genome_dict_ch
-
-  script:
-  """
-  PICARD=`which picard.jar`
-  java -jar \$PICARD CreateSequenceDictionary R= $genome O= ${genome.baseName}.dict
-  """
-}
+genome_dict_ch = '1B_prepare_genome_picard'(genome_file)
 
 
 /*
  * Process 1C: Create STAR genome index file.
  */
 
-process '1C_prepare_star_genome_index' {
-  tag "$genome.baseName"
-
-  input:
-      file genome from genome_file
-  output:
-      file "genome_dir" into genome_dir_ch
-
-  script:
-  """
-  mkdir genome_dir
-
-  STAR --runMode genomeGenerate \
-       --genomeDir genome_dir \
-       --genomeFastaFiles ${genome} \
-       --runThreadN ${task.cpus}
-  """
-}
+genome_dir_ch = '1C_prepare_star_genome_index'(genome_file)
 
 
 /*
  * Process 1D: Create a file containing the filtered and recoded set of variants
  */
 
-process '1D_prepare_vcf_file' {
-  tag "$variantsFile.baseName"
+prepared_vcf_ch = '1D_prepare_vcf_file'(variants_file, blacklist_file)
 
-  input: 
-      file variantsFile from variants_file
-      file blacklisted from blacklist_file
-
-  output:
-      set file("${variantsFile.baseName}.filtered.recode.vcf.gz"), file("${variantsFile.baseName}.filtered.recode.vcf.gz.tbi") into prepared_vcf_ch
-  
-  script:  
-  """
-  vcftools --gzvcf $variantsFile -c \
-           --exclude-bed ${blacklisted} \
-           --recode | bgzip -c \
-           > ${variantsFile.baseName}.filtered.recode.vcf.gz
-
-  tabix ${variantsFile.baseName}.filtered.recode.vcf.gz
-  """
-}
-
-/*
- *  END OF PART 1
- *********/
 
 
 
 /**********
- * PART 2: STAR RNA-Seq Mapping
  *
  * Process 2: Align RNA-Seq reads to the genome with STAR
  */
 
-process '2_rnaseq_mapping_star' {
-  tag "$replicateId"
-
-  input: 
-      file genome from genome_file 
-      file genomeDir from genome_dir_ch
-      set replicateId, file(reads) from reads_ch 
-
-  output: 
-      set replicateId, file('Aligned.sortedByCoord.out.bam'), file('Aligned.sortedByCoord.out.bam.bai') into aligned_bam_ch
-
-  script:    
-  """
-  # ngs-nf-dev Align reads to genome
-  STAR --genomeDir $genomeDir \
-       --readFilesIn $reads \
-       --runThreadN ${task.cpus} \
-       --readFilesCommand zcat \
-       --outFilterType BySJout \
-       --alignSJoverhangMin 8 \
-       --alignSJDBoverhangMin 1 \
-       --outFilterMismatchNmax 999
-    
-  # 2nd pass (improve alignmets using table of splice junctions and create a new index)  
-  mkdir genomeDir  
-  STAR --runMode genomeGenerate \
-       --genomeDir genomeDir \
-       --genomeFastaFiles $genome \
-       --sjdbFileChrStartEnd SJ.out.tab \
-       --sjdbOverhang 75 \
-       --runThreadN ${task.cpus}  
-    
-  # Final read alignments  
-  STAR --genomeDir genomeDir \
-       --readFilesIn $reads \
-       --runThreadN ${task.cpus} \
-       --readFilesCommand zcat \
-       --outFilterType BySJout \
-       --alignSJoverhangMin 8 \
-       --alignSJDBoverhangMin 1 \
-       --outFilterMismatchNmax 999 \
-       --outSAMtype BAM SortedByCoordinate \
-       --outSAMattrRGline ID:$replicateId LB:library PL:illumina PU:machine SM:GM12878
-
-  # Index the BAM file
-  samtools index Aligned.sortedByCoord.out.bam
-  """
-}
+aligned_bam_ch = '2_rnaseq_mapping_star'( genome_file, genome_dir_ch, reads_ch)
 
-/*
- *  END OF PART 2
- ******/
 
 
 /**********
- * PART 3: GATK Prepare Mapped Reads
  *
  * Process 3: Split reads that contain Ns in their CIGAR string.
  *            Creates k+1 new reads (where k is the number of N cigar elements) 
@@ -225,94 +110,22 @@ process '2_rnaseq_mapping_star' {
  *            the splicing events represented by the Ns in the original CIGAR.
  */
 
-process '3_rnaseq_gatk_splitNcigar' {
-  tag "$replicateId"
-  label 'mem_large'
-  
-  input: 
-      file genome from genome_file 
-      file index from genome_index_ch
-      file genome_dict from genome_dict_ch
-      set replicateId, file(bam), file(index) from aligned_bam_ch
-
-  output:
-      set replicateId, file('split.bam'), file('split.bai') into splitted_bam_ch
-  
-  script:
-  """
-  # SplitNCigarReads and reassign mapping qualities
-  $GATK -T SplitNCigarReads \
-          -R $genome -I $bam \
-          -o split.bam \
-          -rf ReassignOneMappingQuality \
-          -RMQF 255 -RMQT 60 \
-          -U ALLOW_N_CIGAR_READS \
-          --fix_misencoded_quality_scores
-  """
-}
-
-/*
- *  END OF PART 3
- ******/
+splitted_bam_ch = '3_rnaseq_gatk_splitNcigar'(genome_file, genome_index_ch, genome_dict_ch, aligned_bam_ch)
 
 
 /***********
- * PART 4: GATK Base Quality Score Recalibration Workflow
  *
  * Process 4: Base recalibrate to detect systematic errors in base quality scores, 
  *            select unique alignments and index
  *             
  */
 
-process '4_rnaseq_gatk_recalibrate' {
-  tag "$replicateId"
-  label 'mem_large'    
-
-  input: 
-      file genome from genome_file 
-      file index from genome_index_ch
-      file dict from genome_dict_ch
-      set replicateId, file(bam), file(index) from splitted_bam_ch
-      set file(variants_file), file(variants_file_index) from prepared_vcf_ch
-
-  output:
-      set sampleId, file("${replicateId}.final.uniq.bam"), file("${replicateId}.final.uniq.bam.bai") into (final_output_ch, bam_for_ASE_ch)
-  
-  script: 
-  sampleId = replicateId.replaceAll(/[12]$/,'')
-  """
-  # Indel Realignment and Base Recalibration
-  $GATK -T BaseRecalibrator \
-          --default_platform illumina \
-          -cov ReadGroupCovariate \
-          -cov QualityScoreCovariate \
-          -cov CycleCovariate \
-          -knownSites ${variants_file} \
-          -cov ContextCovariate \
-          -R ${genome} -I ${bam} \
-          --downsampling_type NONE \
-          -nct ${task.cpus} \
-          -o final.rnaseq.grp 
-
-  $GATK -T PrintReads \
-          -R ${genome} -I ${bam} \
-          -BQSR final.rnaseq.grp \
-          -nct ${task.cpus} \
-          -o final.bam
-
-  # Select only unique alignments, no multimaps
-  (samtools view -H final.bam; samtools view final.bam| grep -w 'NH:i:1') \
-  |samtools view -Sb -  > ${replicateId}.final.uniq.bam
-
-  # Index BAM files
-  samtools index ${replicateId}.final.uniq.bam
-  """
-}
-
-/*
- *  END OF PART 4
- ******/
-
+'4_rnaseq_gatk_recalibrate'(
+            genome_file, genome_index_ch, 
+            genome_dict_ch, 
+            splitted_bam_ch, 
+            prepared_vcf_ch)
+    . into { final_output_ch; bam_for_ASE_ch }
 
 
 /***********
@@ -325,45 +138,12 @@ process '4_rnaseq_gatk_recalibrate' {
  */
 
 
-process '5_rnaseq_call_variants' {
-  tag "$sampleId"
-  label 'mem_large'
-
-  input:
-      file genome from genome_file
-      file index from genome_index_ch
-      file dict from genome_dict_ch
-      set sampleId, file(bam), file(bai) from final_output_ch.groupTuple()
-  
-  output: 
-      set sampleId, file('final.vcf') into vcf_files
-
-  script:
-  """
-  # fix absolute path in dict file
-  sed -i 's@UR:file:.*${genome}@UR:file:${genome}@g' $dict
-  echo "${bam.join('\n')}" > bam.list
-  
-  # Variant calling
-  $GATK -T HaplotypeCaller \
-          -R $genome -I bam.list \
-          -dontUseSoftClippedBases \
-          -stand_call_conf 20.0 \
-          -o output.gatk.vcf.gz
-
-  # Variant filtering
-  $GATK -T VariantFiltration \
-          -R $genome -V output.gatk.vcf.gz \
-          -window 35 -cluster 3 \
-          -filterName FS -filter "FS > 30.0" \
-          -filterName QD -filter "QD < 2.0" \
-          -o final.vcf
-  """
-}
+vcf_files = '5_rnaseq_call_variants'( 
+              genome_file, 
+              genome_index_ch, 
+              genome_dict_ch, 
+              final_output_ch.groupTuple())
 
-/*
- *  END OF PART 5
- ******/
 
 
 /***********
@@ -372,83 +152,13 @@ process '5_rnaseq_call_variants' {
  * Process 6A: Post-process the VCF result  
  */
 
-process '6A_post_process_vcf' {
-  tag "$sampleId"
-  publishDir "$params.results/$sampleId" 
-
-  input:
-      set sampleId, file('final.vcf') from vcf_files
-      set file('filtered.recode.vcf.gz'), file('filtered.recode.vcf.gz.tbi') from prepared_vcf_ch 
-  output: 
-      set sampleId, file('final.vcf'), file('commonSNPs.diff.sites_in_files') into vcf_and_snps_ch
-  
-  script:
-  '''
-  grep -v '#' final.vcf | awk '$7~/PASS/' |perl -ne 'chomp($_); ($dp)=$_=~/DP\\=(\\d+)\\;/; if($dp>=8){print $_."\\n"};' > result.DP8.vcf
-  
-  vcftools --vcf result.DP8.vcf --gzdiff filtered.recode.vcf.gz  --diff-site --out commonSNPs
-  '''
-}
+vcf_and_snps_ch = '6A_post_process_vcf'( vcf_files, prepared_vcf_ch )
 
 /* 
  * Process 6B: Prepare variants file for allele specific expression (ASE) analysis
  */
 
-process '6B_prepare_vcf_for_ase' {
-  tag "$sampleId"
-  publishDir "$params.results/$sampleId" 
-
-  input: 
-      set sampleId, file('final.vcf'), file('commonSNPs.diff.sites_in_files') from vcf_and_snps_ch
-  output: 
-      set sampleId, file('known_snps.vcf') into vcf_for_ASE
-      file('AF.histogram.pdf') into gghist_pdfs
-
-  script:
-  '''
-  awk 'BEGIN{OFS="\t"} $4~/B/{print $1,$2,$3}' commonSNPs.diff.sites_in_files  > test.bed
-    
-  vcftools --vcf final.vcf --bed test.bed --recode --keep-INFO-all --stdout > known_snps.vcf
-
-  grep -v '#'  known_snps.vcf | awk -F '\\t' '{print $10}' \
-               |awk -F ':' '{print $2}'|perl -ne 'chomp($_); \
-               @v=split(/\\,/,$_); if($v[0]!=0 ||$v[1] !=0)\
-               {print  $v[1]/($v[1]+$v[0])."\\n"; }' |awk '$1!=1' \
-               >AF.4R
-
-  gghist.R -i AF.4R -o AF.histogram.pdf
-  '''
-}
-
-
-/* 
- * Group data for allele-specific expression.
- * 
- * The `bam_for_ASE_ch` emites tuples having the following structure, holding the final BAM/BAI files:
- *  
- *   ( sample_id, file_bam, file_bai )
- * 
- * The `vcf_for_ASE` channel emits tuples having the following structure, holding the VCF file:
- *  
- *   ( sample_id, output.vcf ) 
- * 
- * The BAMs are grouped together and merged with VCFs having the same sample id. Finally 
- * it creates a channel named `grouped_vcf_bam_bai_ch` emitting the following tuples: 
- *  
- *   ( sample_id, file_vcf, List[file_bam], List[file_bai] )
- */
-
-bam_for_ASE_ch
-    .groupTuple()
-    .phase(vcf_for_ASE)
-    .map{ left, right -> 
-      def sampleId = left[0]
-      def bam = left[1]
-      def bai = left[2]
-      def vcf = right[1]
-      tuple(sampleId, vcf, bam, bai)  
-    }
-    .set { grouped_vcf_bam_bai_ch }
+(vcf_for_ASE, gghist_pdfs) = '6B_prepare_vcf_for_ase'( vcf_and_snps_ch )
 
 
 /* 
@@ -458,31 +168,11 @@ bam_for_ASE_ch
  *             (ASE) analysis
  */
 
-process '6C_ASE_knownSNPs' {
-  tag "$sampleId"
-  publishDir "$params.results/$sampleId" 
-  label 'mem_large'  
-
-  input:
-      file genome from genome_file 
-      file index from genome_index_ch
-      file dict from genome_dict_ch
-      set sampleId, file(vcf),  file(bam), file(bai) from grouped_vcf_bam_bai_ch
-  
-  output:
-      file "ASE.tsv"
-  
-  script:
-  """
-  echo "${bam.join('\n')}" > bam.list
-    
-  $GATK -R ${genome} \
-          -T ASEReadCounter \
-          -o ASE.tsv \
-          -I bam.list \
-          -sites ${vcf}
-  """
-}
+'6C_ASE_knownSNPs'(
+      genome_file, 
+      genome_index_ch, 
+      genome_dict_ch, 
+      group_per_sample(bam_for_ASE_ch, vcf_for_ASE) )
 
 /*
  *  END OF PART 6
diff --git a/modules.nf b/modules.nf
new file mode 100644
index 0000000..836981e
--- /dev/null
+++ b/modules.nf
@@ -0,0 +1,347 @@
+params.gatk       = '/usr/local/bin/GenomeAnalysisTK.jar'
+params.gatk_launch = "java -jar $params.gatk" 
+params.results    = "results"
+
+GATK = params.gatk_launch
+
+
+process '1A_prepare_genome_samtools' { 
+  tag "$genome.baseName"
+ 
+  input: 
+      file genome  
+ 
+  output: 
+      file "${genome}.fai"   
+  
+  script:
+  """
+  samtools faidx ${genome}
+  """
+}
+
+process '1B_prepare_genome_picard' {
+  tag "$genome.baseName"
+  label 'mem_xlarge'
+
+  input:
+      file genome
+  output:
+      file "${genome.baseName}.dict"
+
+  script:
+  """
+  PICARD=`which picard.jar`
+  java -jar \$PICARD CreateSequenceDictionary R= $genome O= ${genome.baseName}.dict
+  """
+}
+
+process '1C_prepare_star_genome_index' {
+  tag "$genome.baseName"
+
+  input:
+      file genome 
+  output:
+      file "genome_dir"
+
+  script:
+  """
+  mkdir genome_dir
+
+  STAR --runMode genomeGenerate \
+       --genomeDir genome_dir \
+       --genomeFastaFiles ${genome} \
+       --runThreadN ${task.cpus}
+  """
+}
+
+process '1D_prepare_vcf_file' {
+  tag "$variantsFile.baseName"
+
+  input: 
+      file variantsFile
+      file blacklisted
+
+  output:
+      set file("${variantsFile.baseName}.filtered.recode.vcf.gz"), 
+          file("${variantsFile.baseName}.filtered.recode.vcf.gz.tbi")
+  
+  script:  
+  """
+  vcftools --gzvcf $variantsFile -c \
+           --exclude-bed ${blacklisted} \
+           --recode | bgzip -c \
+           > ${variantsFile.baseName}.filtered.recode.vcf.gz
+
+  tabix ${variantsFile.baseName}.filtered.recode.vcf.gz
+  """
+}
+
+
+process '2_rnaseq_mapping_star' {
+  tag "$replicateId"
+
+  input: 
+      file genome  
+      file genomeDir
+      set replicateId, file(reads) 
+
+  output: 
+      set replicateId, 
+          file('Aligned.sortedByCoord.out.bam'), 
+          file('Aligned.sortedByCoord.out.bam.bai')
+
+  script:    
+  """
+  # ngs-nf-dev Align reads to genome
+  STAR --genomeDir $genomeDir \
+       --readFilesIn $reads \
+       --runThreadN ${task.cpus} \
+       --readFilesCommand zcat \
+       --outFilterType BySJout \
+       --alignSJoverhangMin 8 \
+       --alignSJDBoverhangMin 1 \
+       --outFilterMismatchNmax 999
+    
+  # 2nd pass (improve alignmets using table of splice junctions and create a new index)  
+  mkdir genomeDir  
+  STAR --runMode genomeGenerate \
+       --genomeDir genomeDir \
+       --genomeFastaFiles $genome \
+       --sjdbFileChrStartEnd SJ.out.tab \
+       --sjdbOverhang 75 \
+       --runThreadN ${task.cpus}  
+    
+  # Final read alignments  
+  STAR --genomeDir genomeDir \
+       --readFilesIn $reads \
+       --runThreadN ${task.cpus} \
+       --readFilesCommand zcat \
+       --outFilterType BySJout \
+       --alignSJoverhangMin 8 \
+       --alignSJDBoverhangMin 1 \
+       --outFilterMismatchNmax 999 \
+       --outSAMtype BAM SortedByCoordinate \
+       --outSAMattrRGline ID:$replicateId LB:library PL:illumina PU:machine SM:GM12878
+
+  # Index the BAM file
+  samtools index Aligned.sortedByCoord.out.bam
+  """
+}
+
+
+process '3_rnaseq_gatk_splitNcigar' {
+  tag "$replicateId"
+  label 'mem_large'
+  
+  input: 
+      file genome
+      file index
+      file genome_dict
+      set replicateId, file(bam), file(index)
+
+  output:
+      set replicateId, 
+          file('split.bam'), 
+          file('split.bai')
+  
+  script:
+  """
+  # SplitNCigarReads and reassign mapping qualities
+  $GATK -T SplitNCigarReads \
+          -R $genome -I $bam \
+          -o split.bam \
+          -rf ReassignOneMappingQuality \
+          -RMQF 255 -RMQT 60 \
+          -U ALLOW_N_CIGAR_READS \
+          --fix_misencoded_quality_scores
+  """
+}
+
+process '4_rnaseq_gatk_recalibrate' {
+  tag "$replicateId"
+  label 'mem_large'    
+
+  input: 
+      file genome  
+      file index
+      file dict
+      set replicateId, file(bam), file(index)
+      set file(variants_file), file(variants_file_index)
+
+  output:
+      set sampleId, 
+          file("${replicateId}.final.uniq.bam"), 
+          file("${replicateId}.final.uniq.bam.bai")
+  
+  script: 
+  sampleId = replicateId.replaceAll(/[12]$/,'')
+  """
+  # Indel Realignment and Base Recalibration
+  $GATK -T BaseRecalibrator \
+          --default_platform illumina \
+          -cov ReadGroupCovariate \
+          -cov QualityScoreCovariate \
+          -cov CycleCovariate \
+          -knownSites ${variants_file} \
+          -cov ContextCovariate \
+          -R ${genome} -I ${bam} \
+          --downsampling_type NONE \
+          -nct ${task.cpus} \
+          -o final.rnaseq.grp 
+
+  $GATK -T PrintReads \
+          -R ${genome} -I ${bam} \
+          -BQSR final.rnaseq.grp \
+          -nct ${task.cpus} \
+          -o final.bam
+
+  # Select only unique alignments, no multimaps
+  (samtools view -H final.bam; samtools view final.bam| grep -w 'NH:i:1') \
+  |samtools view -Sb -  > ${replicateId}.final.uniq.bam
+
+  # Index BAM files
+  samtools index ${replicateId}.final.uniq.bam
+  """
+}
+
+
+process '5_rnaseq_call_variants' {
+  tag "$sampleId"
+  label 'mem_large'
+
+  input:
+      file genome
+      file index 
+      file dict 
+      set sampleId, file(bam), file(bai) 
+  
+  output: 
+      set sampleId, file('final.vcf')
+
+  script:
+  """
+  # fix absolute path in dict file
+  sed -i 's@UR:file:.*${genome}@UR:file:${genome}@g' $dict
+  echo "${bam.join('\n')}" > bam.list
+  
+  # Variant calling
+  $GATK -T HaplotypeCaller \
+          -R $genome -I bam.list \
+          -dontUseSoftClippedBases \
+          -stand_call_conf 20.0 \
+          -o output.gatk.vcf.gz
+
+  # Variant filtering
+  $GATK -T VariantFiltration \
+          -R $genome -V output.gatk.vcf.gz \
+          -window 35 -cluster 3 \
+          -filterName FS -filter "FS > 30.0" \
+          -filterName QD -filter "QD < 2.0" \
+          -o final.vcf
+  """
+}
+
+process '6A_post_process_vcf' {
+  tag "$sampleId"
+  publishDir "$params.results/$sampleId" 
+
+  input:
+      set sampleId, file('final.vcf')
+      set file('filtered.recode.vcf.gz'), file('filtered.recode.vcf.gz.tbi')
+  output: 
+      set sampleId, 
+          file('final.vcf'), 
+          file('commonSNPs.diff.sites_in_files') 
+  
+  script:
+  '''
+  grep -v '#' final.vcf | awk '$7~/PASS/' |perl -ne 'chomp($_); ($dp)=$_=~/DP\\=(\\d+)\\;/; if($dp>=8){print $_."\\n"};' > result.DP8.vcf
+  
+  vcftools --vcf result.DP8.vcf --gzdiff filtered.recode.vcf.gz  --diff-site --out commonSNPs
+  '''
+}
+
+process '6B_prepare_vcf_for_ase' {
+  tag "$sampleId"
+  publishDir "$params.results/$sampleId" 
+
+  input: 
+      set sampleId, file('final.vcf'), file('commonSNPs.diff.sites_in_files')
+  output: 
+      set sampleId, file('known_snps.vcf') 
+      file('AF.histogram.pdf')
+
+  script:
+  '''
+  awk 'BEGIN{OFS="\t"} $4~/B/{print $1,$2,$3}' commonSNPs.diff.sites_in_files  > test.bed
+    
+  vcftools --vcf final.vcf --bed test.bed --recode --keep-INFO-all --stdout > known_snps.vcf
+
+  grep -v '#'  known_snps.vcf | awk -F '\\t' '{print $10}' \
+               |awk -F ':' '{print $2}'|perl -ne 'chomp($_); \
+               @v=split(/\\,/,$_); if($v[0]!=0 ||$v[1] !=0)\
+               {print  $v[1]/($v[1]+$v[0])."\\n"; }' |awk '$1!=1' \
+               >AF.4R
+
+  gghist.R -i AF.4R -o AF.histogram.pdf
+  '''
+}
+
+
+process '6C_ASE_knownSNPs' {
+  tag "$sampleId"
+  publishDir "$params.results/$sampleId" 
+  label 'mem_large'  
+
+  input:
+      file genome 
+      file index
+      file dict 
+      set sampleId, file(vcf),  file(bam), file(bai)
+  
+  output:
+      file "ASE.tsv"
+  
+  script:
+  """
+  echo "${bam.join('\n')}" > bam.list
+    
+  $GATK -R ${genome} \
+          -T ASEReadCounter \
+          -o ASE.tsv \
+          -I bam.list \
+          -sites ${vcf}
+  """
+}
+
+
+/* 
+ * Group data for allele-specific expression.
+ * 
+ * The `bam_ch` emites tuples having the following structure, holding the final BAM/BAI files:
+ *  
+ *   ( sample_id, file_bam, file_bai )
+ * 
+ * The `vcf_ch` channel emits tuples having the following structure, holding the VCF file:
+ *  
+ *   ( sample_id, output.vcf ) 
+ * 
+ * The BAMs are grouped together and merged with VCFs having the same sample id. Finally 
+ * it creates a channel named `grouped_vcf_bam_bai_ch` emitting the following tuples: 
+ *  
+ *   ( sample_id, file_vcf, List[file_bam], List[file_bai] )
+ */
+
+def group_per_sample(bam_ch, vcf_ch) {
+  bam_ch
+    .groupTuple()
+    .phase(vcf_ch)
+    .map{ left, right -> 
+      def sampleId = left[0]
+      def bam = left[1]
+      def bai = left[2]
+      def vcf = right[1]
+      tuple(sampleId, vcf, bam, bai)  
+    }
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 3130c87..b369665 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -6,6 +6,10 @@ profiles {
         process.container = 'cbcrg/callings-nf:gatk4'
         params.gatk_launch = '/gatk-4.0.0.0/gatk'
     }
+
+    test {
+        docker.enabled = true
+    }
     
     travis {
      	docker.enabled = true