#ifndef __SEQUENCE_ALIGNMENT_TOOLS__
#define __SEQUENCE_ALIGNMENT_TOOLS__

#include <SequenceAlignment.h>
#include <iostream>
#include <algorithm>
#include <Random.h>
#include <SimpleSequenceAlignment.h>
#include <vectornumerics.h>

class SequenceAlignmentTools {

 public:

  typedef SequenceAlignment::size_type size_type;

  /** Return most common character */
  static char computeConsensusCharacter(string s) {
    Vec<char> cVec(s.size());
    for (string::size_type i = 0; i < s.size(); ++i) {
      cVec[i] = s[i];
    }
    Vec<char> uchars = uniqueSet(cVec);
    Vec<int> counts(uchars.size());
    for (Vec<char>::size_type i = 0; i < uchars.size(); ++i) {
      for (string::size_type j = 0; j < s.size(); ++j) {
	if (s[j] == uchars[i]) {
	  counts[i] += 1;
	}
      }
    }
    unsigned int bestId = findMaxIndex(uchars);
    return uchars[bestId];
  }

  static string computeConsensusSequence(const SequenceAlignment& ali) {
    string s;
    for (SequenceAlignment::sequence_size_type i = 0; i < ali.getLength(); ++i) {
      SequenceAlignment::sequence_type col = ali.getColumn(i); 
      char c = computeConsensusCharacter(col);
      s = s + c;
    }
    return s;
  }

  /** returns fraction of G or Cs among non-gap characters for certain region */
  static double computeGCContent(const SequenceAlignment& ali,
			  SequenceAlignment::size_type start,
			  SequenceAlignment::size_type stop);

  /** returns fraction of G or Cs among non-gap characters */
  static double computeGCContent(const SequenceAlignment& ali);

  /** returns fraction of G or Cs among non-gap characters for a set of windows */
  static Vec<double> computeWindowGCContent(const SequenceAlignment& ali, SequenceAlignment::size_type winLength);

  static size_type dinucleotideShuffle(SequenceAlignment& ali,
				       const string& alphabet,
				       double normLimit,
				       size_type iterations,
				       bool shuffleColumnMode);

  /** Maximum norm between two matrices: returns true if maximum difference between two matrix elements 
   * is smaller than normlimit */
  static bool checkFrequenciesOk(const Vec<Vec<double> >& f1,
				 const Vec<Vec<double> >& f2,
				 double normLimit);

  /** Reads simple set of sequences */
  static SimpleSequenceAlignment readSimpleSequences(istream& is) {
    SimpleSequenceAlignment ali;
    vector<string> lines = getLines(is);
    for (vector<string>::size_type i = 0; i < lines.size(); ++i) {
      string name = "x";
      name[0] = static_cast<char>((static_cast<int>('A') + static_cast<int>(i)));
      ASSERT(name.size() == 1);
      ali.addSequence(lines[i], name); 
    }
    return ali;
  }

  /** returns matrix with di-nucleotide frequencies */
  static Vec<Vec<double> > computeDinucleotideFrequencies(const SequenceAlignment& ali,
							  const string& alphabet);

  /** Returns concatenated sequences with ids between min (inclusive) and max (exclusive).
   * If max == 0, max will be set to the total number of sequences of the alignment 
   */
  static SequenceAlignment::sequence_type generateConcatenatedSequences(const SequenceAlignment& ali,
									SequenceAlignment::size_type min=0,
									SequenceAlignment::size_type max=0);
  
  /** Returns zero-based start positions of first residue of each sequence. */
  static Vec<int> toStarts(const SequenceAlignment& ali) {
    Vec<int> result;
    int current = 0;
    for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
      result.push_back(current);
      current += ali.getSequence(i).size();
    }
    ASSERT(result.size() == ali.size()); 
    ASSERT(result[0] == 0);
    return result;
  }

 private:

  /** shuffled alignment by swapping one pair of columns, one of the columns is at position pos. 
   * Returns true if one pair of columns was found. */
  static bool dinucleotideShuffleIterationPosition(SequenceAlignment& ali,
						   const string& alphabet, 
						   size_type pos,
						   const Vec<Vec<double> >& origFrequencies,
						   double normLimit,
						   bool shuffleColumnMode);

  /** one iteration of dinucleotide-preserving shuffling */
  static size_type dinucleotideShuffleIteration(SequenceAlignment& ali,
						const string& alphabet,
						const Vec<Vec<double> >& origFrequencies,
						double normLimit,
						bool shuffleColumnMode);

  /* ATTRIBUTES */
  

};

#endif
