diff --git a/bin/agat_sp_compare_two_annotations.pl b/bin/agat_sp_compare_two_annotations.pl index 30ee0228..99ca41db 100755 --- a/bin/agat_sp_compare_two_annotations.pl +++ b/bin/agat_sp_compare_two_annotations.pl @@ -54,9 +54,22 @@ # --- Manage config --- $config = get_agat_config({config_file_in => $config}); +###################### +# Manage output folder # + +if (! $opt_output) { + print "Default output name: split_result\n"; + $opt_output="comparison_result"; +} + +if (-d $opt_output){ + print "The output directory choosen already exists. Please give me another Name.\n";exit(); +} +mkdir $opt_output; + ###################### # Manage output file # -my $report = prepare_fileout($opt_output); +my $report = prepare_fileout("$opt_output/report.txt"); # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MAIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -323,7 +336,9 @@ my $flat_overlap_1 = $locations1; my $flat_overlap_2 = $locations2; my $overlap_A=1; + my @overlap_A_id=($locations1->[0][3]); my $overlap_B=1; + my @overlap_B_id=($locations2->[0][3]); my $loop="top"; my $flip=2; my $current_locs; @@ -372,9 +387,11 @@ if($loop eq "top"){ $overlap_A++; + push @overlap_A_id, $locations->[0][3]; } elsif($loop eq "bot"){ $overlap_B++; + push @overlap_B_id, $locations->[0][3]; } } else{ @@ -398,14 +415,14 @@ } } } - $overlap_info{$locations1->[0][2]}{$overlap_A}{$overlap_B}++; + push @{$overlap_info{$locations1->[0][2]}{$overlap_A}{$overlap_B}}, [ [@overlap_A_id], [@overlap_B_id] ]; } # ----------------------------------- CASE 1 ----------------------------------- # location A ---------------- # location B --------------- elsif ($locations2->[scalar(@{$locations2})-1][1] < $locations1->[0][0] ){ - $overlap_info{$locations1->[0][2]}{0}{1}++; + push @{$overlap_info{$locations1->[0][2]}{0}{1}}, [[undef], [$locations2->[0][3]]]; print "Case1 notoverlap !\n\n" if ($debug); # throw loc2 remove_loc_by_id($flattened_locations2_clean_sorted, $locusID, $chimere_type, $locations2->[0][3]); @@ -420,7 +437,7 @@ my $id1 = $locations1->[0][3]; print "Case2 notoverlap !\n" if ($debug); if(! exists_keys(\%seen1, ( $id1 ) ) ){ # else it has been dealed by overlap case - $overlap_info{$locations1->[0][2]}{1}{0}++; + push @{$overlap_info{$locations1->[0][2]}{1}{0}}, [ [$locations1->[0][3]], [undef] ]; # throw loc1 remove_loc_by_id($flattened_locations1_clean_sorted, $locusID, $chimere_type, $id1); $seen1{$id1}++; @@ -442,7 +459,7 @@ foreach my $locusID ( keys %{$flattened_locations1_clean_sorted} ){ foreach my $chimere_type ( keys %{$flattened_locations1_clean_sorted->{$locusID}}){ foreach my $locations1 ( @{$flattened_locations1_clean_sorted->{$locusID}{$chimere_type}} ){ - $overlap_info{$locations1->[0][2]}{1}{0}++; + push @{$overlap_info{$locations1->[0][2]}{1}{0}}, [ [$locations1->[0][3]], [undef]]; print " Case3 !\n" if ($debug); } } @@ -454,7 +471,7 @@ foreach my $locusID ( keys %{$flattened_locations2_clean_sorted} ){ foreach my $chimere_type ( keys %{$flattened_locations2_clean_sorted->{$locusID}}){ foreach my $locations2 ( @{$flattened_locations2_clean_sorted->{$locusID}{$chimere_type}} ){ - $overlap_info{$locations2->[0][2]}{0}{1}++; + push @{$overlap_info{$locations2->[0][2]}{0}{1}}, [ [undef], [$locations2->[0][3]] ]; print " Case4 !\n" if ($debug); } } @@ -491,12 +508,50 @@ $total{$type_l1}{'B'}=0; foreach my $value1 ( sort {$a <=> $b} keys %{$overlap_info{$type_l1}} ){ foreach my $value2 ( sort {$a <=> $b} keys %{$overlap_info{$type_l1}{$value1}} ){ - $string_to_print .= "|".sizedPrint($value1,30)."|".sizedPrint($value2,30)."|".sizedPrint($overlap_info{$type_l1}{$value1}{$value2},30)."|\n"; - if ($value1 != 0){ - $total{$type_l1}{'A'} += $value1 * $overlap_info{$type_l1}{$value1}{$value2}; + $string_to_print .= "|".sizedPrint($value1,30)."|".sizedPrint($value2,30)."|".sizedPrint(scalar(@{$overlap_info{$type_l1}{$value1}{$value2}}),30)."|\n"; + + # report ids + my $report_ids = prepare_fileout("$opt_output/$value1"."_".$value2."_id_list.txt"); + # file1 + foreach my $array ( @{$overlap_info{$type_l1}{$value1}{$value2}}) { + my $cpt=0; + my $last = scalar(@{$array->[0]}); + foreach my $value ( @{$array->[0]} ) { # array0 is id overlarpA + $cpt++; + if(! $value){ + print $report_ids "-"; + } else { + if ($last == $cpt){ + print $report_ids $value; + } else { + print $report_ids $value.", "; + } + } + } + print $report_ids " | "; + my $cpt2=0; + my $last2 = scalar(@{$array->[1]}); + foreach my $value ( @{$array->[1]} ) { # array1 is id overlarpB + $cpt2++; + if(! $value){ + print $report_ids "-\n"; + } else { + if ($last2 == $cpt2){ + print $report_ids "$value" + } else { + print $report_ids "$value, " + } + } + } + print $report_ids "\n"; + } + + + if ($value1 != 0){ + $total{$type_l1}{'A'} += $value1 * scalar(@{$overlap_info{$type_l1}{$value1}{$value2}}); } if ($value2 != 0){ - $total{$type_l1}{'B'} += $value2 * $overlap_info{$type_l1}{$value1}{$value2}; + $total{$type_l1}{'B'} += $value2 * scalar(@{$overlap_info{$type_l1}{$value1}{$value2}}); } } }