// --*- C++ -*------x---------------------------------------------------------

#ifndef __HASH_CORRELATION_FINDER3__
#define __HASH_CORRELATION_FINDER3__

#include <string>
#include <debug.h>
#include <CorrelationFinder.h>
#include <CorrelationTools.h>
#include <MAFAlignment.h>
#include <MAFSearchTables3.h>
#include <SearchRange.h>
#include <iomanip>
#include <NucleotideTools.h>
#include <RankedSolution7.h>
#include <SingleLinkage2DProgressiveFilter.h>

#ifdef COVARNA_CONCURRENT_VECTOR
#include <tbb/concurrent_vector.h>
#include <tbb/concurrent_unordered_set.h>
#else
#include <ser_blocked_range.h>
#endif

using namespace std;

#define REMARK cout << "# "

class HashCorrelationFinder3 : public CorrelationFinder {

 public:

  typedef MAFSearchTables3::set_type set_type;

  typedef MAFSearchTables3::compressed_type compressed_type;

  typedef MAFSearchTables3::table_type table_type;

  typedef map<string, double> double_hash_type;

  typedef string::size_type size_t;

  typedef Vec<RankedSolution7<string, size_t, size_t, size_t> > queue_type;

  // typedef blocked_range<length_type> range_type;

  typedef SingleLinkage2DProgressiveFilter::result_type cluster_result_type;

  typedef MAFAlignment::count_hash_type count_hash_type; 

#ifdef COVARNA_CONCURRENT_VECTOR
  typedef concurrent_vector<length_type> result_vector_type;
  typedef concurrent_vector<result_vector_type * > result_type;
#else
  typedef Vec<length_type> result_vector_type;
  typedef Vec<result_vector_type * > result_type;
#endif
  enum { PREFILTER_CUTOFF = 40, PREFILTER_DELAY = 40, PREFILTER_CLUSTER_MIN = 4 };

 private:

  MAFAlignment *maf;
  result_type *resultBins; // vector of vector of results. First dimension corresponds to number of columns
  MAFSearchTables3 *tables;

  size_type basepairTypeMin;
  Vec<Correlation>::size_type reservefac;
  string residues; // = "ACGT";
  size_type minNonGap; // minimum number of non-gap characters
  size_type nonGapCharacterMin; // this many DIFFERENT non-gap characters in an interesting column
  length_type corrDistMin; // minimum distance between correlations
  length_type outIntervall; // output every so often
  bool allowGu;
  double allowedGuFrac;
  bool allowGap;
  double allowedGapFrac;
  bool complementMode;
  Stem::index_type stemLengthMin; // minimum number of consecutive correlations. 3 or 2
  bool removeIsolated;
  bool reverseMode;
  set_type::size_type searchColumnMax;
  length_type searchColumnSplit; // if greater zero: search only covariations whith start position < split and stop position > split
  int verbose;
  set<string> assemblies;
  mutable SingleLinkage2DProgressiveFilter clusterFilter;
  bool adjacentMode; // if false, filter out adjacent matching covarying columns
  mutable size_type localMatchPairCount; 
  mutable count_hash_type localMatchPairCountHash; // stores for each assembly combination the number of found matches
  mutable count_hash_type localMatchPairCountHash2; // stores for each assembly combination the number of found matches

  bool checkNeighborMode;
  int checkAntiNeighborMode;

  /** total number of found matching column pairs */
  static size_type matchPairCount; 
  static count_hash_type matchPairCountHash; 
  static count_hash_type matchPairCountHash2; 

  // static length_type ignoredCount;

 public:

  /** Standard constructor */
  HashCorrelationFinder3(MAFAlignment *_maf, MAFSearchTables3 *_tables, result_type * _resultBins) :
    clusterFilter(PREFILTER_CUTOFF, 0), adjacentMode(true) {
      //   HashCorrelationFinder3(MAFAlignment *_maf, MAFSearchTables3 *_tables, result_type * _resultBins) : clusterFilter(PREFILTER_CUTOFF, 0), adjacentMode(true) {
    PRECOND((_maf != 0) && (_resultBins != 0) && (_maf->getTotalLength() == static_cast<length_type>(_resultBins->size())));
    PRECOND(_tables->validate());
    maf = _maf; // set pointer to MAF alignment. Avoid copying!
    tables = _tables;
    resultBins = _resultBins;
    setDefaultValues();
    // ignoredCount = 0;
    POSTCOND(validate());
  }

  /** Copy constructor */
  HashCorrelationFinder3(const HashCorrelationFinder3& other) : clusterFilter(other.clusterFilter) {
    copy(other);
  }

  /** Destructor. Consider take-down of resultBins array? */ 
  ~HashCorrelationFinder3() { }

  /** Assignment operator */
  virtual HashCorrelationFinder3& operator = (const HashCorrelationFinder3& other) {
    if (this != &other) {
      copy(other);
    }
    return *this;
  }

  /** Returns natural logarithm of  probability of a particular stem to be found at a particular position. Multiply with number of possible positions
   * to obtain E-value (either (N*(N-1))/2 for one MAF alignment (N == totalLength), or N*M for two MAF alignments */ 
  virtual double computeLogPValue(const Stem& stem) const;

  /** Returns natural logarithm of  probability of a particular stem to be found at a particular position. Multiply with number of possible positions
   * to obtain E-value (either (N*(N-1))/2 for one MAF alignment (N == totalLength), or N*M for two MAF alignments */ 
  virtual double computeForwardLogPValue(const Stem& stem) const;

  /** Compute search area for each assembly combination. Assumes that MAF is a concatenation of two parts. The index of the first 
   * column of the second part is given as the variable searchMax.
   */
  count_hash_type computeSplitSearchAreas(length_type searchMax) const;

  /** Compute search area for each assembly combination. Assumes that MAF is a NOT a concatenation of two parts; instead the whole MAF is searched
   * in an all-versus-all manner.
   */
  count_hash_type computeUnsplitSearchAreas() const;
  
  /** Compute densities of found covarying alignment columns. Assumes that search has been performed and is completed.
   * searchMax indicates where MAF is split in two parts. If searchMax >= maf->getTotalLength(), then it is assumed 
   * that the genomic alignment consists not of two regions (only one original MAF was supplied).
   */
  virtual double_hash_type computeDensities(length_type searchMax, bool addEmpty, ostream * os) const;

  /** Augments densities for areas where no covariation was found in the shuffled alignments.
   */
  void augmentDensities(double_hash_type& result, length_type searchMax) const;

  /** Copy method */
  virtual void copy(const HashCorrelationFinder3& other) {
    if (other.verbose > 1) {
      REMARK << "Copying correlation finder object..." << endl;
    }
    maf = other.maf; // copies POINTER to alignment
    resultBins = other.resultBins; // vector of vector of results. First dimension corresponds to number of columns
    tables = other.tables;
    basepairTypeMin = other.basepairTypeMin;
    reservefac = other.reservefac;
    residues = other.residues; // = "ACGT";
    minNonGap = other.minNonGap; // minimum number of non-gap characters
    nonGapCharacterMin = other.nonGapCharacterMin; // this many different non-gap characters in an interesting column
    corrDistMin = other.corrDistMin; // minimum distance between correlations
    outIntervall = other.outIntervall; // output every so often
    // refAssembly = other.refAssembly;
    allowGu = other.allowGu;
    allowGap = other.allowGap;
    allowedGuFrac = other.allowedGuFrac;
    allowedGapFrac = other.allowedGapFrac;
    complementMode = other.complementMode;
    removeIsolated = other.removeIsolated;
    reverseMode = other.reverseMode;
    searchColumnMax = other.searchColumnMax;
    searchColumnSplit = other.searchColumnSplit; 
    stemLengthMin = other.stemLengthMin;
    verbose = other.verbose;
    assemblies = other.assemblies;
    clusterFilter = other.clusterFilter;
    adjacentMode = other.adjacentMode;
    // assemblyPairFraction = other.assemblyPairFraction;  // what fraction of all possible assembly pairs should be stored as hash tables? Between 0 and 1
    if (other.verbose > 2) {
      REMARK << "Finished copying correlation finder object." << endl;
    }
    localMatchPairCount = other.localMatchPairCount;
    localMatchPairCountHash = other.localMatchPairCountHash;
    localMatchPairCountHash2 = other.localMatchPairCountHash2;
    checkNeighborMode = other.checkNeighborMode;
    checkAntiNeighborMode = other.checkAntiNeighborMode;
  }

  /** Finds highest number of sequences ni block alignment. FIXIT: only look in range */
  virtual size_type findHighestSequenceCount(length_type startcol, length_type endcol) const;

  // static length_type getIgnoredCount() { return ignoredCount; }

  virtual size_type getBasepairTypeMin() const { return basepairTypeMin; }
  
  virtual SingleLinkage2DProgressiveFilter& getClusterFilter() const { return clusterFilter; }

  virtual MAFAlignment* getMaf() const { return maf; }

  virtual cluster_result_type getResultClusters() const;

  /** Collects and returns results; */
  virtual result_container getResults() const;

  /** Minimum number of consecutive correlations */
  virtual Stem::index_type getStemLengthMin() const { return stemLengthMin; }

  /** Returns verbosity level. 0: silent, 1: default, > 1: more and more output */
  virtual int getVerbose() const { return verbose; }

  /** Filters resultBins datastructure. The "const" is somewhat misleading but technically correct. */
  virtual void filterIsolatedCorrelation3(length_type colid) const; 
  
  /** Check it i,j co-occurs with i-1,j+1 or i+1,j-1. Makes sense for RNA helix interactions */
  virtual set<length_type> filterNeighborCorrelations(const set<length_type>& leftSet, const set<length_type>& middleSet, const set<length_type>& righSet) const;

  /** Returns true, if covarying columns can be identified between two MAF blocks.
   * This is currently only the case, if the assemblies corresponding to one MAF block are equal or a subset of the assemblies of the other MAF block.
   */
  virtual bool isBlockPairSearchable(size_t blockId1, size_t blockId2) const {
    size_t assemblies1Count = (*maf)[blockId1].size();
    size_t assemblies2Count = (*maf)[blockId2].size();
    if ((assemblies1Count < minNonGap) || (assemblies2Count < minNonGap) ) {
      return false;
    }
    if (blockId1 == blockId2) {
      return true;
    }
    size_t commonAssemblyCount = maf->getCommonAssemblies(blockId1,blockId2).size();
    if ((assemblies1Count == commonAssemblyCount) || (assemblies2Count == commonAssemblyCount)) { // one is subset of other
      return true;
    }
    return false;
  }

  /** Returns true, if covarying columns can be identified between two MAF blocks.
   * This is currently only the case, if the assemblies corresponding to one MAF block are equal or a subset of the assemblies of the other MAF block.
   */
  /*
  virtual length_type computeBlockSearchArea(size_t blockId) const {
    size_t assemblies1Count = (*maf)[blockId1].size();
    if (assemblies1Count < minNonGap) {
      return 0;
    }
    size_t commonAssemblyCount = maf->getCommonAssemblies(blockId1,blockId2).size();
    if ((assemblies1Count == commonAssemblyCount) || (assemblies2Count == commonAssemblyCount)) { // one is subset of other
      return true;
    }
    return false;
  }
  */

  /** Returns true, if covarying columns can be identified between two MAF blocks.
   * This is currently only the case, if the assemblies corresponding to one MAF block are equal or a subset of the assemblies of the other MAF block.
   */
  virtual length_type computeBlockPairSearchArea(size_t blockId1, size_t blockId2) const {
    size_t assemblies1Count = (*maf)[blockId1].size();
    size_t assemblies2Count = (*maf)[blockId2].size();
    if ((assemblies1Count < minNonGap) || (assemblies2Count < minNonGap) ) {
      return 0;
    }
    if (blockId1 == blockId2) {
      return true;
    }
    size_t commonAssemblyCount = maf->getCommonAssemblies(blockId1,blockId2).size();
    if ((assemblies1Count == commonAssemblyCount) || (assemblies2Count == commonAssemblyCount)) { // one is subset of other
      return true;
    }
    return false;
  }

  /** Sets adjacent mode. Default: true. If false, no adjacent column matches are allowed */
  virtual void setAdjacentMode(bool mode) { adjacentMode = mode; }

  virtual void setBasepairTypeMin(size_type n) { basepairTypeMin = n; }

  virtual void setCheckAntiNeighborMode(int mode) {  checkAntiNeighborMode = mode; }

  /** Sets minimum distance between correlated columns. */
  virtual void setCorrDistMin(length_type distMin) { corrDistMin = distMin; }

  /** Sets default member attribute values */
  virtual void setDefaultValues() {
    basepairTypeMin = 2; // default: at least one compensatory base change; could also by GC and GU
    residues = "ACGT";
    minNonGap = 10; // minimum number of non-gap characters
    nonGapCharacterMin = 2; // this many different characters required in a column
    corrDistMin = 3; // minum size of loop
    allowGu = true;
    allowGap = false;
    allowedGuFrac = 0.5; // 0.0;
    allowedGapFrac = 0.2; // 0.0;
    complementMode = true; // look for complementary columns, not identical ones
    reverseMode = true;
    reservefac = 10; // correlation expected every this many nucleotides
    searchColumnMax = 1000000;
    searchColumnSplit = 0; // default: no split 
    stemLengthMin = 1;
    if (stemLengthMin > 1) {
      removeIsolated = true;
    } else {
      removeIsolated = false; // even stems of length 1 are picked up
    }
    verbose = 1;
    assemblies = maf->getAssemblies();
    outIntervall = 100000; 
    clusterFilter.setSizeMin(PREFILTER_CLUSTER_MIN); // at least this many correlations per initial cluster
    adjacentMode = true; // false;
    localMatchPairCount = 0;
    localMatchPairCountHash.clear();
    localMatchPairCountHash2.clear();
    checkNeighborMode = true;
    checkAntiNeighborMode = 0;
    // refAssembly = "";
  }
  
  /** Require this many non-gap characters in a column */
  virtual void setNonGapMin(size_type n) { minNonGap = n; } 

  /** Sets intermediate output intervall. */
  virtual void setOutIntervall(length_type intervall) { outIntervall = intervall; }

  /** If true (default) , search reverse *complement* columns, not reverse columns */
  virtual void setComplementMode(bool flag) { complementMode = flag; }

  /** Sets the removeIsolated flag: iff true, remove isolated correlations. */
  virtual void setRemoveIsolated(bool flag) { removeIsolated = flag; }

  /** Returns static variable that keeps track of total number of found matching column pairs. */
  static size_type getMatchPairCount() { return matchPairCount; }

  /** Returns static variable that keeps track of total number of found matching column pairs for different kinds of alignment blocks. */
  static count_hash_type getMatchPairCountHash() { return matchPairCountHash; }

  /** Returns static variable that keeps track of total number of found matching column pairs for different kinds of assembly combinations. */
  static count_hash_type getMatchPairCountHash2() { return matchPairCountHash2; }

  /** Returns static variable that keeps track of total number of found matching column pairs for different kinds of assembly combinations.
   *  searchMax indicates where the alignment is split in two parts.
   */
  virtual count_hash_type getMatchPairCountHash3(length_type searchMax) const;

  /** Sets the "active" status of the cluster filter. If false, then the filter will simply pass through all input values. */
  virtual void setClusterFilterActive(bool active) { clusterFilter.setActive(active); ASSERT(clusterFilter.isActive() == active); } 

  /** Sets minimum size of clusters */
  virtual void setClusterFilterSizeMin(size_t sizeMin) { clusterFilter.setSizeMin(sizeMin); }

  /** Sets static variable that keeps track of total number of found matching column pairs. */
  static void setMatchPairCount(size_type count) { matchPairCount = count; }

  /** If true, checks for stretches i,j; i+1,j-1, ... . Otherwise: i,j;i+1,j+1;i-1,j-1 etc. */
  virtual void setReverseMode(bool flag) { reverseMode = flag; clusterFilter.setReverseMode(flag); }

  /** Sets maximum number of columns to be searched with linear search */ 
  virtual void setSearchColumnMax(set_size_type value) { searchColumnMax = value; }

  /** If set to value greater zero, it means one is search for covarying alignment column pairs with start < value and stop > value.
   * This is usually used for interchromosomal searches. */ 
  virtual void setSearchColumnSplit(set_size_type value) { searchColumnSplit = value; }

  /** Minimum number of consecutive correlations */
  virtual void setStemLengthMin(Stem::index_type len) { 
    ASSERT((len >= 1) && (len<=3));
    stemLengthMin = len;
    if (len == 1) {
      removeIsolated = false; // switch off filtering of isolated correlations
    }
  }

  /** Sets verbosity level. 0: silent, 1: default, > 1: more and more output */
  virtual void setVerbose(int level) { verbose = level; }

  /** Runs algorithm in defined index vector set. Here: indices must be consecutive. They are counted as internal column number ,
   * this is different than the external genome position. */ 
  virtual void run(const range_type& range) const;

  virtual void run() const {
    // blocked_range<length_type> range(0, maf->getTotalLength(), maf-> getTotalLength());
    range_type range(0, maf->getTotalLength());
    run(range);
  }

  /** Runs algorithm in defined index vector set. Here: indices must be consecutive. They are counted as internal column number ,
   * this is different than the external genome position. */ 
  // template <typename _Range>
  // void operator () (const _Range& range) const {
  void operator () (const range_type& range) const {
    run(range);
  }
  
  /** Sets cutoff of cluster filter and re-initilized the cluster-filter. Careful: previously set values will be lost. */
  virtual void setClusterCutoffAndInit(length_type cutoff) {
    clusterFilter = SingleLinkage2DProgressiveFilter(cutoff, 0); // generate copy
  }

  /** Returns true, iff run() method can be run on this object 
   */
  virtual bool validate() const {
    return tables->validate() && (assemblies.size() > 0) 
      && (residues.size() > 0) && (maf != 0) && (resultBins!= 0) && maf->validate() && (maf->getTotalLength() == static_cast<length_type>(resultBins->size()));
  }

  /**Writes contests of results datastructure. Further filtering will be applied, but this helps to understand to estimate the density of hits.
   */
  virtual void writeRawResults(ostream& os, length_type searchMax) const;
  
  friend class HashCorrelationFinderTest;

 private:

  /** Add found clusters of correlations to main data structure */ 
  void addClusters(const cluster_result_type& clusterResult) const {
    for (size_type clusterId = 0; clusterId < clusterResult.size(); ++clusterId) {
      for (size_type clusterElement = 0; clusterElement < clusterResult[clusterId].size(); ++clusterElement) {
	// ++resultSetCount;
	length_type x = clusterResult[clusterId][clusterElement].first; 
	length_type y = clusterResult[clusterId][clusterElement].second; 
	// if ((*it) > (colid - 1) ) {
	if (y > x ) {
	  // Correlation corr(colid - 1, *it);
	  // if (resultSet.find(corr) == resultSet.end()) { // check if it exists already
	  // results.push_back(corr);
	  if ((*resultBins)[x] == 0) {
	    (*resultBins)[x] = (new result_vector_type()); // not thread-safe
	  }
	  (*resultBins)[x]->push_back(y); // was: push_back it is possible that one duplicate is being inserted
	  // }
	} else if (y < x) { // found correlation with position smaller than query:
	  if ((*resultBins)[y] == 0) {
	    (*resultBins)[y] = (new result_vector_type()); // not thread-safe
	  }
	  (*resultBins)[y]->push_back(x); // it is possible that one duplicate is being inserted
	}
      }
    }
  }

  bool isCorrelationFound(length_type i, length_type j) const;

  bool isCorrelationIsolated2(length_type i, length_type j) const;

  bool isCorrelationIsolated3(length_type i, length_type j) const;

  /** Returns true, if column can be used for search */
  bool isSearchColumnOK(const string& column) const;

  /** Reranking priority queue of hash tables to be searched. 
   * Idea is to penalize a hash table (multiply its score == size with a factor greater one) 
   * if among the n previous hash table the same assembly was found.
   */ 
  void rerankQueue(queue_type& queue, size_t firstId, size_t lastId, int previous, double scorePenalty) const;

  /** Resets all start positions to beginning of hash tables */
  void resetPositionHashStarts() const;
  
  void createSearchHashTable(const set<string>& assemblies);

  /** Returns column ids of MAF columns that are compatible with the given column-assembly search */
  set<length_type> searchMafColumn(const string& column, const Vec<string>& colAssemblies,
				   length_type posMin, queue_type& queue) const;
  
  };

/** Finds highest number of sequences in block alignment. FIXIT: only look in range */
inline
HashCorrelationFinder::size_type
HashCorrelationFinder3::findHighestSequenceCount(length_type startcol, length_type endcol) const {
  size_type count = 0;
  for (size_type i = 0; i < maf->size(); ++i) {
    size_type sz = (*maf)[i].size();
    if (sz > count) {
      count = sz;
    }
  }
  return count;
}

/** Returns true, if column can be used for search */
inline
bool
HashCorrelationFinder3::isSearchColumnOK(const string& column) const {
  size_type nonGapCount = NucleotideTools::countNonGapsInChars(column);
  if (nonGapCount < minNonGap) {
    // ++ignoredCount; // THREADISSUE ?
    return false;
  }
  if (NucleotideTools::nongapCharacterCount(column) < nonGapCharacterMin) {
    // ++ignoredCount; // THREADISSUE ?
    return false;
  }
  return true;
}


/** Runs algorithm in defined index vector set. Here: indices must be consecutive. They are counted as internal column number ,
 * this is different than the external genome position. */ 
// template <typename _Range>
inline
void
HashCorrelationFinder3::run(const range_type& range) const {
  // resetPositionHashStarts(); // THREADISSUE
  set<string> assemblies = maf->getAssemblies();
  length_type startcol = range.begin();
  length_type endcol = range.end();
  localMatchPairCount = 0;
  localMatchPairCountHash.clear();
  localMatchPairCountHash2.clear();
  clusterFilter.reset();
  if (startcol > 0) {
    clusterFilter.setDelay(PREFILTER_DELAY); // if non-starting interval: filter is only active after "warmup" phase of PREFILTER_DELAY nucleotides
  }
  ASSERT(clusterFilter.getElementCount() == 0);
  if (verbose > 0) {
    REMARK << "Starting search for complementary alignment columns in region " << (startcol + 1) << " - " << endcol << " ..." << endl;
    if (clusterFilter.isActive()) {
      REMARK << "Warning: cluster filter is active. The use of this option is discouraged." << endl;
    }
  }
  ASSERT((startcol >= 0) && (endcol <= maf->getTotalLength()));
  ASSERT(endcol >= startcol);
  Vec<set<length_type > > currCorrelations(3);
  set<length_type > foundCorrelations;
  // Vec<size_type> perm(3); // this is now replaced by SingleLinkage2DClusterFilter
  // perm[0] = 0;
  // perm[1] = 1;
  // perm[2] = 2;
  // size_type resultSetCount = 0;
  Vec<string> colAssemblies;
  length_type aliOldId = maf->size();
  size_type highestSeqCount = findHighestSequenceCount(startcol, endcol); 
  queue_type queue((highestSeqCount * (highestSeqCount-1))/2);
  for (length_type colid = startcol; colid < endcol; ++colid) {   // main loop over search columns
    if (((colid - startcol) % outIntervall) == 0) {
      if (verbose > 0) {
	REMARK << "Progress: Column " << (colid+1) << " ( " << (startcol+1) << " - " << endcol << " , "
	     << setprecision(3) << (100.0 * (colid-startcol) / static_cast<double>(endcol-startcol)) 
	       << "%)." << endl; //  Found correlations for this task: " << resultSetCount << endl;
      }
    }
    // copy all content from MIDDLE id to main container:
    // rotate(perm.begin(), perm.begin()+1, perm.end());
    // currCorrelations[perm[2]].clear(); // make room for new elements to be found and filled
    ASSERT(colid >= 0);
    ASSERT(colid < maf->getTotalLength());
    length_type aliId = maf->getAlignmentId(colid);
    string aliIdHash = maf->getAlignmentAssembliesHash(aliId); 
    if (verbose > 2) {
      REMARK << "Searching MAF block with assembly hash " << aliIdHash << endl;
    }
    ASSERT(aliId < static_cast<length_type>(maf->size()));
    string slice = maf->getSlice(colid); // getAlignmentSlice((*maf)[[aliId]], colId)
    string column = slice;
    if (NucleotideTools::isConserved(column)) {
      continue; // ignore conserved columns (gaps are ignored in determination)
    }
    if (complementMode) {
      column = NucleotideTools::dnaComplement(slice); // important: one normally looks for complements
    }
    if (!isSearchColumnOK(column) ) {
      continue;
    }
    ASSERT(column.size() == slice.size());
    // get all assembly words. Concerned about speed
    if (aliId != aliOldId) { // alignment has changed in current column compared to previous column
      colAssemblies = maf->getAlignmentAssemblies(aliId); // (*maf)[aliId].propertyValues("assembly"); 
      aliOldId = aliId;
    }
    ASSERT(colAssemblies.size() > 0);
    ASSERT(colAssemblies.size() == (*maf)[aliId].size());
    ASSERT(column.size() == colAssemblies.size());
    ASSERT(column.size() == colAssemblies.size());
    // currCorrelations[perm[2]] = searchMafColumn(column, colAssemblies, colid, queue); // central command; finds complementary columns!
    foundCorrelations = searchMafColumn(column, colAssemblies, colid, queue); // central command; finds complementary columns!
    if (verbose > 4) {
      REMARK << "Found " << foundCorrelations.size() << " covarying columns for query column " << (colid+1) << " " << column;
      if (verbose > 5) {
	for (set<length_type>::const_iterator ii = foundCorrelations.begin(); ii != foundCorrelations.end(); ii++) {
	  cout << " " << ((*ii) + 1);
	}
      }
      cout << endl;
    }
    // increase total count of found matching pairs. Note that this might lead to an overestimation
    // of found matching pairs, because not all matches found by hash tables are true matches.
    // An overestimation is an error on the "good side" , because it decreases the statistical significance (increases the P value)
    // that is based on the Poisson distribution and is performed in InteractionClusterAnalyzer
    // Make sure all threads are increasing this appropriately; local variables are different for each thread 
    localMatchPairCount += foundCorrelations.size(); 
    for (set<length_type>::const_iterator it = foundCorrelations.begin(); it != foundCorrelations.end(); it++) {

      if (*it == colid) { // this can happen if not looking for complementary but matching columns (complementMode == false)
	ASSERT(!complementMode);
	continue; // not interested in matches with self
      }
      cluster_result_type clusterResult = clusterFilter.push(colid, *it);
      // check assembly:
      length_type aliId2 = maf->getAlignmentId(*it);
      string aliIdHash2 = maf->getAlignmentAssembliesHash(aliId2); // string that is similar to "hg18_panTro_mm8" etc.
      // if (aliIdHash2 == aliIdHash) {
      // note that matches are potentially counted twice (searching rectangular area and not triangular area)
      string commonHash = maf->getCommonAssembliesHash(aliId, aliId2);
      localMatchPairCountHash[aliIdHash] += 1; // increase counter for this alignment  
      localMatchPairCountHash[aliIdHash2] += 1; // increase counter for this alignment  
      localMatchPairCountHash2[commonHash] += 1; // increase counter for this alignment  
      // } 
      addClusters(clusterResult);
    }
    // below relates to a deprecated version that uses a 3-stem filter; 
    /*
    if ((colid - startcol) >= 1) {
      // filter MIDDLE of currCorrelations:
      if (removeIsolated) {
	ERROR("Remove isolated is currently not supported!");
	currCorrelations[perm[1]] = filterNeighborCorrelations(currCorrelations[perm[0]], currCorrelations[perm[1]], 
							       currCorrelations[perm[2]]);
      }
      // now add to main container: // THREADISSUE ?
      ASSERT(colid > 0);
      for (set<length_type>::const_iterator it = currCorrelations[perm[1]].begin(); it != currCorrelations[perm[1]].end(); it++) {
	if (*it == (colid - 1)) { // this can happen if not looking for complementary but matching columns (complementMode == false)
	  ASSERT(!complementMode);
	  continue; // not interested in matches with self
	}
        cluster_result_type clusterResult = clusterFilter.push(colid - 1, *it);
	addClusters(clusterResult);
      }
    } else if (colid == startcol) { // first searched column
      // special case: add results of first and last column; solved threadissue
      for (set<length_type>::const_iterator it = currCorrelations[perm[2]].begin(); it != currCorrelations[perm[2]].end(); it++) {
	if (*it == colid) {
	  ASSERT(!complementMode);
	  continue;
	}
        cluster_result_type clusterResult = clusterFilter.push(colid, *it);
	addClusters(clusterResult);
      }
    } else if ((colid + 1) == endcol) {
      // special case: add results of last column; solved threadissue
      for (set<length_type>::const_iterator it = currCorrelations[perm[2]].begin(); it != currCorrelations[perm[2]].end(); it++) {
	if (*it == colid) {
	  ASSERT(!complementMode);
	  continue;
	}
        cluster_result_type clusterResult = clusterFilter.push(colid, *it);
	addClusters(clusterResult);
      }
    }  else {
      cout << "Weird column id case: " << colid << " " << startcol << " " << endcol << endl;
      ERROR("Internal error: This column id was not accounted for!");
    }
 
    if (removeIsolated) { 
      ERROR("Remove isolated is currently not supported!");
      if (((colid - startcol) >= 5) && ((colid + 1) <= endcol) && ((*resultBins)[colid-3] != 0)) {
	filterIsolatedCorrelation3(colid - 3); // used to save memory. Final filtering is performed in getResults NOT THREAD-SAFE!
      }
    }
    */
  } // loop over columns
  cluster_result_type clusterResult = clusterFilter.flushAll(); // clean up: make sure no correlations are left in filter
  ASSERT(clusterFilter.getElementCount() == 0);
  addClusters(clusterResult);
  // FIXIT: if different threads try to issue this command simultaneously, it might lead to a bus error. Find thread-safe workaround.
  matchPairCount += localMatchPairCount; // update static variable that counts all matching pairs 
  // careful: not really clean programming for multi-threading
  for (map<string,size_type>::iterator it = localMatchPairCountHash.begin(); it != localMatchPairCountHash.end(); it++) {
    matchPairCountHash[it->first] = matchPairCountHash[it->first] + it->second;
  }
  /** Alternative for area computation */
  for (map<string,size_type>::iterator it = localMatchPairCountHash2.begin(); it != localMatchPairCountHash2.end(); it++) {
    matchPairCountHash2[it->first] = matchPairCountHash2[it->first] + it->second;
  }
  POSTCOND(clusterFilter.getElementCount() == 0);
}

#endif
