// --*- C++ -*------x---------------------------------------------------------
// $Id: rescalescores.cc,v 1.2 2006/07/05 13:25:28 bindewae Exp $
//
// Program:         - 
//
// Author:          Eckart Bindewald
//
// Description:     Program is used to rescale raw knetfold scores
//                  to approximate base pair probabilities.
//                  usage: alignedit -i filename -m matrixfilename -p parameterfilename 
// -----------------x-------------------x-------------------x-----------------

#include <iostream>
#include <fstream>
#include <string>
#include <Vec.h>
#include <debug.h>
#include <GetArg.h>
#include <FileName.h>
#include <SequenceAlignment.h>
#include <SimpleSequenceAlignment.h>
#include <sequencestat.h>
#include <vectornumerics.h>
#include <CompensationScorer.h>
#include <compass_help.h>
#include <Random.h>
#include <Stem.h>
#include <stemhelp.h>

void
helpOutput(ostream& os)
{
  os << "usage: alignedit -i filename -m matrixfilename -p parameterfilename" << endl;
}

/** output of command line parameter with which the program was called. */
void
parameterOutput(ostream& os, int argc, char** argv)
{
  for (int i = 0; i < argc; i++)
    {
      os << argv[i] << " ";
    }
  os << endl;
}



/** writes 2d matrix to stream */
/*
void
writeMatrix(ostream& os, const Vec<Vec<double> >& matrix) {
  for (unsigned int i = 0; i < matrix.size(); ++i) {
    for (unsigned int j = 0; j < matrix.size(); ++j) {
      os << matrix[i][j] << " ";
    }
    os << endl;
  }
}
*/

/** generates mean sequence conservation (sum of single column entropies */
Vec<Vec<double> >
generateConservationMatrix(const SequenceAlignment& ali,
			   const CompensationScorer& scorer)
{
  Vec<double> weights(ali.size(), 1.0);
  Vec<double> scores(ali.getLength(), 0);
  Vec<Vec<double> > matrix(ali.getLength(), Vec<double>(ali.getLength(), 0.0));
  for (SequenceAlignment::sequence_size_type i = 0; i < ali.getLength(); ++i) {
    scores[i] = scorer.singleEntropy3(ali.getColumn(i), weights);
  }
  for (SequenceAlignment::sequence_size_type i = 0; i < ali.getLength(); ++i) {
    for (SequenceAlignment::sequence_size_type j = 0; j < ali.getLength(); ++j) {
      matrix[i][j] = 0.5 * (scores[i] + scores[j]);
    }
  }
  return matrix;
}

/** writes mean sequence conservation (sum of single column entropies */
void
writeConservationMatrix(ostream& os,
			   const SequenceAlignment& ali,
			   const CompensationScorer& scorer)
{
  Vec<Vec<double> > matrix = generateConservationMatrix(ali, scorer);
  writeMatrix(os, matrix);
}

int
conformBin(int bin, 
	   int numBins)
{
  PRECOND(numBins > 0);
  if (bin < 0) {
    bin = 0;
  }
  else if (bin >= numBins) {
    bin = numBins - 1;
  }
  return bin;
}

/** evaluate histogram. If value is out of bounds, choose closest in-bound value. */
double
evaluate(double x, double y, double z,
	 const Vec<Vec<Vec<double> > >& hist,
	 double minX,
	 double minY,
	 double minZ,
	 double deltaX, 
	 double deltaY,
	 double deltaZ )
{
  int binX = static_cast<int>((x - minX) / deltaX);
  int binY = static_cast<int>((y - minY) / deltaY);
  int binZ = static_cast<int>((z - minZ) / deltaZ);
  binX = conformBin(binX, static_cast<int>(hist.size()));
  binY = conformBin(binY, static_cast<int>(hist[0].size()));
  binZ = conformBin(binZ, static_cast<int>(hist[0][0].size()));
  return hist[binX][binY][binZ];
}

/** central function that converts raw scores to probabilities */
Vec<Vec<double> >
rescaleScoringMatrix(const SequenceAlignment& ali,
		     const Vec<Vec<double> >& scores,
		     const Vec<Vec<Vec<double> > >& histogram,
		     double minX, double minY, double minZ,
		     double deltaX, double deltaY, double deltaZ,
		     const CompensationScorer& scorer)
{
  PRECOND(ali.size() > 0);
  PRECOND(histogram.size() > 0);
  // cerr << "Starting rescaleScoringMatrix!" << endl;
  double logNumSeq = log(static_cast<double>(ali.size()));
  double apriori = 0.6 / ali.getLength(); // aproximate apriori probability that contact i,j is base paired
  Vec<Vec<double> > conservationMatrix  = generateConservationMatrix(ali, scorer);
  Vec<Vec<double> > result = scores;
  for (unsigned int i = 0; i < scores.size(); ++i) {
    for (unsigned int j = 0; j < scores[i].size(); ++j) {
      result[i][j] = apriori * evaluate(logNumSeq, conservationMatrix[i][j], scores[i][j], histogram,
			      minX, minY, minZ, deltaX, deltaY, deltaZ);
      result[i][j] = result[i][j] / (1.0 + result[i][j]); // convert likelihood ratio to probability of base pair
    }
  }
  // cerr << "Finished rescaleScoringMatrix!" << endl;
  return result;
}


int
main(int argc, char ** argv)
{
  bool helpMode;
  int argcFile = 0;
  char ** argvFile = 0;
  char gapChar = '-';
  int bootstrapMode = 0; // if true perform bootstrap resampling
  unsigned int numBinX = 0;
  unsigned int numBinY = 0;
  unsigned int numBinZ = 0;
  int verboseLevel = 1;
  int inputFileFormat = 1;
  int outputFileFormat = 1;
  int uToTMode = 0;
  int tToUMode = 0;
  double deltaX, deltaY, deltaZ;
  double minX, minY, minZ;
  string alphabet = "ACGU";
  string commandFileName;
  string inputFileName;
  string logFileName; //  = "mainprogramtemplate.log";
  string matrixFileName;
  string nameKeepFileName;
  string outputFileName;
  string parameterFileName;
  string rootDir = ".";
  SimpleSequenceAlignment ali;
  CompensationScorer scorer;
  Vec<unsigned int> aliOutPairs;
  Vec<unsigned int> columnSubsetIndices, subsetIndices;
  Vec<int> absoluteIndices;
  Vec<unsigned int> pickSubsetIndices;
  Vec<int> relativeIndices;
  Vec<int> region;
  Vec<string> keepNames;
  Vec<Stem> regionStems;
  Vec<Vec<double> > matrix, rescaledMatrix;
  Vec<Vec<Vec<double> > > histogram;
  getArg("-help", helpMode, argc, argv);

  if ((argc < 2) || helpMode)  {
    helpOutput(cout);
    exit(0);
  }

  getArg("-root", rootDir, argc, argv, rootDir);
  addSlash(rootDir);

  getArg("-algorithm", scorer.algorithm, argc,argv, scorer.algorithm);
  getArg("-bootstrap", bootstrapMode, argc, argv, bootstrapMode);
  getArg("-commands", commandFileName, argc, argv, commandFileName);
  addPathIfRelative(commandFileName, rootDir);

  if (commandFileName.size() > 0) {
    ifstream commandFile(commandFileName.c_str());
    if (!commandFile) {
      if (isPresent("-commands", argc, argv)) {
	ERROR_IF(!commandFile, "Error opening command file.");
      }
      else {
	cerr << "Warning: Could not find command file: " + commandFileName 
	     << endl;
      }
    }
    else {
      argvFile = streamToCommands(commandFile, argcFile, 
				  string("mainprogramtemplate"));
    }
    commandFile.close();
  }


  getArg("i", inputFileName, argcFile, argvFile, inputFileName);
  getArg("i", inputFileName, argc, argv, inputFileName);

  getArg("-if", inputFileFormat, argcFile, argvFile, inputFileFormat);
  getArg("-if", inputFileFormat, argc, argv, inputFileFormat);
  getArg("m", matrixFileName, argcFile, argvFile, matrixFileName);
  getArg("m", matrixFileName, argc, argv, matrixFileName);
  getArg("-of", outputFileFormat, argcFile, argvFile, outputFileFormat);
  getArg("-of", outputFileFormat, argc, argv, outputFileFormat);
  getArg("-log", logFileName, argc, argv, logFileName);
  getArg("-log", logFileName, argcFile, argvFile, logFileName);
  addPathIfRelative(logFileName, rootDir);
  getArg("o", outputFileName, argc, argv, outputFileName);
  // getArg("-optimize", optimizeMode, argc, argv, optimizeMode);
  getArg("p", parameterFileName, argcFile, argvFile, parameterFileName);
  getArg("p", parameterFileName, argc, argv, parameterFileName);
  getArg("-verbose", verboseLevel, argcFile, argvFile, verboseLevel);
  getArg("-verbose", verboseLevel, argc, argv, verboseLevel);

  if (logFileName.size() > 0) {
    ofstream logFile(logFileName.c_str(), ios::app);
    parameterOutput(logFile, argc, argv);
    if (argcFile > 1) {
      logFile << "Parameters from command file: ";
      parameterOutput(logFile, argcFile, argvFile);
    }
    logFile.close();
  }

  /***************** MAIN PROGRAM *****************************/
  
  if (verboseLevel > 1) {
    cout << "Programs called with parameters: " << endl;
    parameterOutput(cout, argc, argv);
  }

  ifstream parameterFile(parameterFileName.c_str());
  ERROR_IF(!parameterFile, "Error opening parameter file!");
  parameterFile >> minX >> deltaX >> numBinX
		>> minY >> deltaY >> numBinY
		>> minZ >> deltaZ >> numBinZ
		>> histogram;

  if (verboseLevel > 0) {
    cerr << "Read parameters: " << endl
	 << minX << " " << deltaX << " " << numBinX << endl
	 << minY << " " << deltaY << " " << numBinY << endl
	 << minZ << " " << deltaZ << " " << numBinZ << endl;
      if (verboseLevel > 1) {
	cerr << histogram << endl;
      }
  }

  ERROR_IF(histogram.size() == 0,
	   "Error reading histogram!");

  ifstream inputFile(inputFileName.c_str());
  ERROR_IF(!inputFile, "Error reading input file!");

  switch (inputFileFormat) {
  case 1: // fasta format:
    ali.readFasta(inputFile);
    break;
  default:
    ERROR("Unknown input file format!");
  }

  inputFile.close();

  if (verboseLevel > 0) {
    cout << "Alignment with " << ali.size() << " sequences and length " 
	 << ali.getLength() << " read." << endl;
  }

  ifstream matrixFile(matrixFileName.c_str());
  ERROR_IF(!matrixFile, "Error opening matrix file!");
  matrix = readPlainMatrix(matrixFile);

  // adjust sequences:
  if (verboseLevel > 0) {
    cout << "Converting to upper case letters." << endl;
  }
  ali.upperCaseSequences();
  if (tToUMode) {
    ali.replace('T', 'U'); // replace DNA alphabet to RNA
  }
  else if (uToTMode) {
    ali.replace('U', 'T'); // replace DNA alphabet to RNA
  }
  ali.replace('.', gapChar); // replace "." with "-"

  rescaledMatrix = rescaleScoringMatrix(ali, matrix, histogram,
					minX, minY, minZ,
					deltaX, deltaY, deltaZ, scorer);

  writeMatrix(cout, rescaledMatrix);

  return 0;
}
