#!/usr/bin/perl

use strict;

use Getopt::Std;


my %opts;
getopt('abceijm', \%opts);

my $file1 = $opts{"i"};
my $file2 = $opts{"j"};
my $matrixFile = $opts{"m"};
my $startId = $opts{"b"};
my $stopId = $opts{"e"};
# my ($file1, $file2, $startId, $stopId) = @ARGV;

if (!-e $file1) {
    print "Find bias towards Watson-Crick complementarity in sequence alignments (all-versus all comparison of multi-FASTA formated files, slow version that does compute a bias in number of diagonals). Usage: covstat -i multiFASTAFILE1 [-j multiFASTAFILE2]  [-b STARTID] [-e STOPID] [-m MATRIXFILE] [-c SEQUENCE-IDS] [-a T|F]\n";
    print "Input:\n";
    print "-i ALIGNMENTFILENAME : name of first file containing concatenated multiple sequence alignments (difference multiple sequence alignments are seperated by empty lines)\n";
    print "-j ALIGNMENTFILENAME : name of second file containing concatenated multiple sequence alignments (difference multiple sequence alignments are seperated by empty lines). The given multiple sequence alignments have to be given in the order corresponding to the first file.\n";
    print "-m MATRIXFILE : if specified, output of P-value matrix that is used for computing combined P-values (optional)\n";
    print "-b STARTID : id of first multiple sequence alignment pair to be compared\n";
    print "-e ENDID : id of last multiple sequence alignment pair to be compared (optional)\n";
    print "-c SEQUENCE-IDS : ids of multiple sequence alignment pairs to be compared (optional)\n";
    print "-a T|F : if T (true, default), then a bias towards regular anti-diagonals is being tested. If F, a bias towards forward diagonals is being tested. This can be used as additional control.\n";
    print "Result output:\n";
    print "ID: index of alighment pair (1 if only two alignments were given)\n";
    print "P: P-value of over-representation of compensatory base changes spanning the two alignments.\n";
    print "Prc: P-value of over-representation of compensatory base changes spanning the two alignments, computed with the reverse-complement of both alignments as input.\n";
    print "Pd: P-value of bias towards anti-diagonal arrangement of covarying alignment columnn pairs.\n";
    print "Pdrc: P-value of bias towards anti-diagonal arrangement of covarying alignment columnn pairs, computed with the reverse-complement of both alignments as input.\n";
    print "Pc: combined P-value computed from P and Pd. Use this combined P-value as a measure of nucleotide covariation spanning given the two alignments\n";
    print "Pcrc: combined P-value computed from Prc and Pdrc. Use this combined P-value as a measure of nucleotide covariation spanning given the reverse-complement of the two alignments\n";
    print "Note that Covstat does not perform further multiple testing-corrections in case of large sets of alignment pairs that are being evaluated.\n";
    exit(0);
}

if (!-e $file2) {
    die "Cound not open file 2: $file2\n";
}

if (length($matrixFile) == 0) {
    $matrixFile = "NULL";
} else {
    $matrixFile = "\"" . $matrixFile . "\""; # add quotes
}

my $antidiag = $opts{"a"};
if ((length($antidiag) == 0) || ($antidiag eq "T")) {
    $antidiag = "TRUE";
} elsif ($antidiag eq "F") {
    $antidiag = "FALSE";
} else {
    die "Antidiagonal has to be true of false - option -a can have values T or F\n";
} 

my $idRange = "NULL";
if ((length($startId) > 0) || (length($stopId) > 0)) {
 if (length($startId) == 0) {
    $startId = 1;
 } 
 if (length($stopId) > 0) {
    $idRange = "range($startId ,$stopId)";
 } else {
    $idRange = "range($startId ,$startId)";
 }
}
 
my $R_HOME = $ENV{"R_HOME"};
my $Rscript = "Rscript";
if (length($R_HOME) > 0) {
    $Rscript = "$R_HOME/bin/Rscript";
} 
my $result1 = "";

# print "$Rscript -e 'suppressPackageStartupMessages(library(covstat));multirun.complementarity.highestZ.matrixbias(file=\"$file1\", idv1=$idv, pMethod=\"binom\", verboseLevel=0, compute.diag=FALSE)'| grep -v WARNING | grep -v Warning | head -n1\n";
if ($idRange eq "NULL") {
 chomp($result1 = `$Rscript -e 'suppressPackageStartupMessages(library(covstat));pmultirun.complementarity.highestZ.matrixbias(file1=\"$file1\",file2=\"$file2\", pMethod=\"binom\", verboseLevel=0, compute.diag=TRUE, matrixFile=$matrixFile, antidiag=$antidiag)' | grep -v Read`);
} else {
 chomp($result1 = `$Rscript -e 'suppressPackageStartupMessages(library(covstat));pmultirun.complementarity.highestZ.matrixbias(file1=\"$file1\",file2=\"$file2\", idRange=$idRange, pMethod=\"binom\", verboseLevel=0, compute.diag=TRUE, matrixFile=$matrixFile, antidiag=$antidiag)' | grep -v Read`);
}

print("$result1\n");
