#!/usr/bin/perl -w
#
# Count B model
#--------------------------------------- 
use strict;
use warnings;
my $c = 0;

# (0) take at least one yass output file
( $#ARGV  >= 0) or die("\"give at least one filename on commandline\"");

# (1) clean the table
my %code       = ("|", 2 , ".",  1 , ":", 0 );
my %rcode      = ( 2 ,"|",  1 ,"." ,  0 ,":");

# (1) clean the table
my @fullcount = ();
for my $code (0..3-1){
    $fullcount[$code] = 0;
}

# read the data on each file
foreach my $filename (@ARGV){
    my $alignment = "";
   
    open(IN, $filename) or die("Cant open \"$filename\"");
    while (<IN>){
	if ($_ =~ /^[|.: ]*$/ ){ 
            # (1) select only alignment fragments
	    chomp $_;
	    $alignment .= $_;
	}else{ 
	   
            # (2) new alignment found : process $alignment computed before
	    if ($_ =~ /^\*\(.*$/ ){ # new alignment found
		#remove end spaces of alignments
		$alignment =~ s/\s+$//;		
		#(temporary) : remove indels
		$alignment =~ s/\s+//g;

		#clear table of counts
		my @count = ();
		for my $code (0..3-1){
		    $count[$code] = 0;
		}
				
		my @bias = ();
		 for my $code (0..3-1){
		    $bias[$code] = 0;
		}
		my @trtv = ();
		for my $code (0..2){
		    $trtv[$code] = 0;
		}
		
		# code computed here
		for my $j  (0..length($alignment)-1){
		    if (!(substr($alignment,$j,1) eq "|")){
			$bias[$j%3]++;
		    }    
		    #trtv ratio
		    if ((substr($alignment,$j,1) eq ":")){
			$trtv[0]++;
		    }  
		    
		    if ((substr($alignment,$j,1) eq ".")){
			$trtv[1]++;
		    } 

		    my $cd = $code{substr($alignment,$j,1)};
		    $count[$cd]++;
		}
		
		#take the maxbias
		my $maxbias = 0;
		my $sumbias = 0;
		for my $i (0..3-1){
		    if($maxbias < $bias[$i]){
			$maxbias = $bias[$i];
		    }
		    $sumbias += $bias[$i];
		}
		
		if ($maxbias >= 0.4 * $sumbias && $maxbias <= 0.55 * $sumbias && $maxbias >= 10 && $trtv[0] <=  $trtv[1]){
		    #update fullcount
		    for my $code (0..3-1){
			$fullcount[$code] += $count[$code];
		    }
		}
		$alignment = "";
	    }	
	}
    }
    close(IN);
}

# compute probs
my $sumcount = 0;
for my $code (0..3-1){
    $sumcount +=  $fullcount[$code];
}
if($sumcount == 0){
    $sumcount = 1;
}
    
if ($c){
    print("double P[ALPHABETSIZE] = {\n");
}

for my $code (0..3-1){
    if ($c) {
	print("/* ".$rcode{$code}." */".($fullcount[$code])/$sumcount.",\t");
    } else {
	print(($fullcount[$code])/$sumcount.",");
    }
}

if ($c){
    print("}\n");
}

