#include<InteractionClusterAnalyzer.h>
#include <MainTools.h>
#include <debug.h>


/** Find default density of found matching columns for a certain position. This can be necessary,
 * if no density for a certain assembly combination has been stored. This in turn can be the case,
 * if no hits were found for a certain assembly combination for shuffled alignments.
* If internalMode is true: assume internal coordinates, otherwise coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates 
*/
/*
double
InteractionClusterAnalyzer::findDefaultDensity(length_type pos,
					       const MAFAlignment& maf,
                                               const count_hash_type& assemblyHashLengths,
					       bool internalMode, int verboseLevel) {
  // convert to internal coordinates
  if (!internalMode) {
    if (pos >= maf.getRefAssemblyTotLength()) {
      cout << "Warning: requesting density of non-sensical alignment column position (ext. coordinates)."  
	   << pos << " " << maf.getRefAssemblyTotLength() << endl;
    }
    pos = maf.convertAssemblyPositionToColumnId(pos);
  }
  if (pos >= maf.getTotalLength()) {
    cout << "Warning: requesting density of non-sensical alignment column position (internal. coordinates)."  
	 << pos << " " << maf.getTotalLength() << " " << maf.getRefAssemblyChrom() << endl;
  }
  // ERROR_IF(pos >= maf.getTotalLength(),
  // "Position greater than number of columns in alignment chosen.");
  if (pos >= maf.getTotalLength()) {
     return 0.0; // could not be found. This can legitimately happen, for example,
                 // a position corresponding to a gap in MAF blocks was requested.
  }
  length_type aliId = maf.getAlignmentId(pos);
  ERROR_IF(aliId >= static_cast<length_type>(maf.size()), "Internal error: could not convert coordinate to alignment id.");
  string hash = maf.getAlignmentAssembliesHash(aliId);
  count_hash_type::const_iterator fi = assemblyHashLengths->find(hash)
 // double_hash_type::const_iterator it = densities.find(hash);
  if(it == densities.end()) {
    if (verboseLevel >= 2) {
      cout << "Problem with position " << pos << " Could not find hash: " << hash << endl;
    }
    return -1.0; // not found can happen in certain circumstances: interval might "gloss over" certain small alignments
  }
  // ERROR_IF(it == densities.end(), "Internal error: Could not find hash in densities: " + hash);
  double density = it->second;
  // ERROR_IF(density <= 0, "Internal error: Density should never be less than zero!"); 
  return density;
}
*/

/** find highest density in cluster area.
* If internalMode is true: assume internal coordinates, otherwise coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates 
*/
double
InteractionClusterAnalyzer::findDensity(length_type pos,
					const double_hash_type& densities,
					const MAFAlignment& maf,
					// const count_hash_type& assemblyCombLengths,
					bool internalMode, int verboseLevel) {
  // convert to internal coordinates
  if (!internalMode) {
    if (pos >= maf.getRefAssemblyTotLength()) {
      cout << "Warning: requesting density of non-sensical alignment column position (ext. coordinates)."  
	   << pos << " " << maf.getRefAssemblyTotLength() << endl;
    }
    pos = maf.convertAssemblyPositionToColumnId(pos);
  }
  if (pos >= maf.getTotalLength()) {
    cout << "Warning: requesting density of non-sensical alignment column position (internal coordinates)."  
	 << (pos+1) << " " << maf.getTotalLength() << " " << maf.getRefAssemblyChrom() << endl;
    cout << "DEBUG: " << (pos+1) << " " << maf.convertAssemblyPositionToColumnId(pos) << endl;
    cout << "DEBUG-B: " << (pos) << " " << maf.convertAssemblyPositionToColumnId(pos-1) << endl;
    cout << "DEBUG2: " << (pos+1) << " " << maf.convertAssemblyPositionToColumnIdSlow(pos) << endl;
    cout << "DEBUG2-B: " << (pos) << " " << maf.convertAssemblyPositionToColumnIdSlow(pos-1) << endl;
    cout << "DEBUG3: " << (pos+1) << " " << maf.convertAssemblyPositionToColumnIdVerySlow(pos) << endl;
    cout << "DEBUG3-B: " << (pos) << " " << maf.convertAssemblyPositionToColumnIdVerySlow(pos-1) << endl;
  }
  // ERROR_IF(pos >= maf.getTotalLength(),
  // "Position greater than number of columns in alignment chosen.");
  if (pos >= maf.getTotalLength()) {
     return 0.0; // could not be found. This can legitimately happen, for example,
                 // a position corresponding to a gap in MAF blocks was requested.
  }
  length_type aliId = maf.getAlignmentId(pos);
  ERROR_IF(aliId >= static_cast<length_type>(maf.size()), "Internal error: could not convert coordinate to alignment id.");
  string hash = maf.getAlignmentAssembliesHash(aliId);
  double_hash_type::const_iterator it = densities.find(hash);
  double density = -1.0;
  if(it == densities.end()) {
    // density = generateDefaultDensity(hash, assemblyCombLengths); // maf.countAssembliesHashLengths
    if (verboseLevel >= 2) {
      REMARK << "Problem with position " << pos << " Could not find hash " << hash << " in stored densities." << endl; // Generated default density: " << density << endl;
    }
  } else {
    // ERROR_IF(it == densities.end(), "Internal error: Could not find hash in densities: " + hash);
    density = it->second;
  }
  // ERROR_IF(density <= 0, "Internal error: Density should never be less than zero!"); 
  return density;
}

/** find highest density in cluster area. Coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates! */
double
InteractionClusterAnalyzer::findHighestDensity(const IntervallInt& interval,
					       const double_hash_type& densities,
					       const MAFAlignment& maf,
					       bool internalMode, int verboseLevel) {
  ASSERT(maf.size() > 0);
  double highestDensity = 0.0;
  for (length_type i = interval.getLower(); i <= interval.getUpper(); ++i) {
    double density = findDensity(i, densities, maf, internalMode, verboseLevel);
    if (verboseLevel > 2){
      cout << "Density of position " << i << " : " << density << endl;
    }
    if (density > highestDensity) { // automatically ignores negative cases
      highestDensity = density;
    }
  }
  // ERROR_IF(highestDensity <= 0, "Internal error: highest density is smaller or equal zero.!");
  return highestDensity;
}

/** find highest density in cluster area. Coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates! */
double
InteractionClusterAnalyzer::findHighestDensity(const Correlation& correlation, 
					       const double_hash_type& densities,
					       const MAFAlignment& maf,
					       bool internalMode, int verboseLevel) {
  ASSERT(maf.size() > 0);
  double density1 = findDensity(correlation.getStart(), densities, maf, internalMode, verboseLevel);
  double density2 = findDensity(correlation.getStop(), densities, maf, internalMode, verboseLevel);
  double highestDensity = density1 > density2 ? density1 : density2;
  ASSERT(highestDensity >= density1 && highestDensity >= density2);
  // ERROR_IF(highestDensity <= 0, "Internal error: highest density is smaller or equal zero.!");
  return highestDensity;
}

/** find highest density in cluster area. Coordinates are assumbed to be 0-based assembly coordinates and not internal coordinates! */
double
InteractionClusterAnalyzer::findHighestDensity(const Correlation& correlation, 
					       const double_hash_type& densities,
					       const MAFAlignment& maf,
					       const MAFAlignment& maf2,
					       bool internalMode, int verboseLevel) {
  ASSERT(maf.size() > 0);
  double density1 = findDensity(correlation.getStart(), densities, maf, internalMode, verboseLevel);
  double density2 = -1;
  if (internalMode) { //  maf : contains concatenation of ma and maf2; so even though we are choosing maf2, we use maf with this internal coordinate:
    density2 = findDensity(correlation.getStop(), densities, maf, internalMode, verboseLevel);
  } else {
    density2 = findDensity(correlation.getStop(), densities, maf2, internalMode, verboseLevel);
  }
  double highestDensity = density1 > density2 ? density1 : density2;
  ASSERT(highestDensity >= density1 && highestDensity >= density2);
  // ERROR_IF(highestDensity <= 0, "Internal error: highest density is smaller or equal zero.!");
  return highestDensity;
}

/** find highest density in cluster area */
double
InteractionClusterAnalyzer::findHighestDensity(const dimension_type& clusterDimensions,
					       const double_hash_type& densities,
					       const MAFAlignment& maf,
					       bool internalMode,  int verboseLevel) {
  ASSERT(maf.size() > 0);
  IntervallInt interval1 = clusterDimensions.first;
  IntervallInt interval2 = clusterDimensions.second;
  double dens1 = findHighestDensity(interval1, densities, maf, internalMode, verboseLevel);
  double dens2 = findHighestDensity(interval2, densities, maf, internalMode, verboseLevel); // should be correct
  double result = (dens1 > dens2) ? dens1 : dens2; // return highest found density
  if (verboseLevel > 2){
    cout << "Density of intervals " << interval1 << ", " << interval2 << " : " << dens1 << " " << dens2 << " " 
	 << result << endl;
  }
  // ERROR_IF(result <= 0, "Internal error: highest density is smaller or equal zero.!");
  return result;
}

/** find highest density in cluster area */
double
InteractionClusterAnalyzer::findHighestDensity2(const dimension_type& clusterDimensions,
					       const double_hash_type& densities,
					       const MAFAlignment& maf,
					       bool internalMode,  int verboseLevel) {
  ASSERT(maf.size() > 0);

  IntervallInt interval1 = clusterDimensions.first;
  IntervallInt interval2 = clusterDimensions.second;
  length_type xmin = interval1.getLower()+1; // recent
  length_type xmax = interval1.getUpper()-1;
  length_type ymin = interval2.getLower()+1; // recent
  length_type ymax = interval2.getUpper()-1;
  // REMARK << "Starting findHighestDensity2(1) " << xmin << " " << xmax << " " << ymin << " " << ymax << endl;
  if (!internalMode) {
    xmin = maf.convertAssemblyPositionToColumnId(xmin);
    xmax = maf.convertAssemblyPositionToColumnId(xmax);
    ymin = maf.convertAssemblyPositionToColumnId(ymin);
    ymax = maf.convertAssemblyPositionToColumnId(ymax);
  }
  // REMARK << "Continuing findHighestDensity2(1) " << xmin << " " << xmax << " " << ymin << " " << ymax << endl;
  size_type ali1Min = maf.getAlignmentId(xmin);
  size_type ali1Max = maf.getAlignmentId(xmax);
  size_type ali2Min = maf.getAlignmentId(ymin);
  size_type ali2Max = maf.getAlignmentId(ymax);
  double result = 0.0;
  ERROR_IF(ali1Min >= maf.size(), "Internal error: first alignment id corresponding to minimum position of cluster is out or range");
  ERROR_IF(ali1Max >= maf.size(), "Internal error: first alignment id corresponding to maximum position of cluster is out or range");
  ERROR_IF(ali2Min >= maf.size(), "Internal error: second alignment id corresponding to minimum position of cluster is out or range");
  ERROR_IF(ali2Max >= maf.size(), "Internal error: second alignment id corresponding to maximum position of cluster is out or range");
  for (size_type alix = ali1Min; alix <= ali1Max; alix++) {
    for (size_type aliy = ali2Min; aliy <= ali2Max; aliy++) {
      string assemblies = maf.getCommonAssembliesHash(alix, aliy);
      double_hash_type::const_iterator it = densities.find(assemblies);
      if (it != densities.end()) {
	double d = it->second;
	if (d > result) {
	  result = d;
	}
      } else {
	REMARK << "Could not find density of " << assemblies << " " << (alix +1) << " " << (aliy+1) << endl;
      }
    }
  }
  // ERROR_IF(result <= 0, "Internal error: highest density is smaller or equal zero.!");
  return result;
}


/** find highest density in cluster area */
double
InteractionClusterAnalyzer::findHighestDensity2(const dimension_type& clusterDimensions,
					       const double_hash_type& densities,
					       const MAFAlignment& maf,
					       const MAFAlignment& maf2,
					       bool internalMode,  int verboseLevel) {
  ASSERT(maf.size() > 0);
  ASSERT(maf2.size() > 0);
  IntervallInt interval1 = clusterDimensions.first;
  IntervallInt interval2 = clusterDimensions.second;
  length_type xmin = interval1.getLower();
  length_type xmax = interval1.getUpper();
  length_type ymin = interval2.getLower();
  length_type ymax = interval2.getUpper();
  // REMARK << "Starting findHighestDensity2(2) " << xmin << " " << xmax << " " << ymin << " " << ymax << endl;
  if (!internalMode) {
    xmin = maf.convertAssemblyPositionToColumnId(xmin);
    xmax = maf.convertAssemblyPositionToColumnId(xmax);
    ymin = maf2.convertAssemblyPositionToColumnId(ymin);
    ymax = maf2.convertAssemblyPositionToColumnId(ymax);
  }
  // REMARK << "Continuing findHighestDensity2(2) " << xmin << " " << xmax << " " << ymin << " " << ymax << endl;
  size_type ali1Min = maf.getAlignmentId(xmin);
  size_type ali1Max = maf.getAlignmentId(xmax);
  size_type ali2Min = maf2.getAlignmentId(ymin);
  size_type ali2Max = maf2.getAlignmentId(ymax);
  double result = 0.0;
  ERROR_IF(ali1Min >= maf.size(), "Internal error: first alignment id corresponding to minimum position of cluster is out or range");
  ERROR_IF(ali1Max >= maf.size(), "Internal error: first alignment id corresponding to maximum position of cluster is out or range");
  ERROR_IF(ali2Min >= maf2.size(), "Internal error: second alignment id corresponding to minimum position of cluster is out or range");
  ERROR_IF(ali2Max >= maf2.size(), "Internal error: second alignment id corresponding to maximum position of cluster is out or range");

  for (size_type alix = ali1Min; alix <= ali1Max; alix++) {
    for (size_type aliy = ali2Min; aliy <= ali2Max; aliy++) {
      string assemblies = maf.getCommonAssembliesHash(alix, maf2, aliy);
      double_hash_type::const_iterator it = densities.find(assemblies);
      if (it != densities.end()) {
	double d = it->second;
	if (d > result) {
	  result = d;
	}
      } else {
	REMARK << "Warning: could not find densitie of " << assemblies << " " << (alix +1) << " " << (aliy+1) << endl;
      }
    }
  }
  // ERROR_IF(result <= 0, "Internal error: highest density is smaller or equal zero.!");
  return result;
}


/** find highest density in cluster area */
double
InteractionClusterAnalyzer::findHighestDensity(const dimension_type& clusterDimensions,
					       const double_hash_type& densities,
					       const MAFAlignment& maf,
					       const MAFAlignment& maf2, 
					       bool internalMode, int verboseLevel) {
  ASSERT(maf.size() > 0);
  ASSERT(maf2.size() > 0);
  ASSERT(maf.size() > 0);
  IntervallInt interval1 = clusterDimensions.first;
  IntervallInt interval2 = clusterDimensions.second;
  double dens1 = findHighestDensity(interval1, densities, maf, internalMode, verboseLevel);
  double dens2 = -1;
  if (!internalMode) {
    dens2 = findHighestDensity(interval2, densities, maf2, internalMode, verboseLevel);
  } else {
    dens2 = findHighestDensity(interval2, densities, maf, internalMode, verboseLevel); // maf is concatenation!
  }
  double result = (dens1 > dens2) ? dens1 : dens2; // return highest found density
  if (verboseLevel > 2){
    cout << "Density of intervals " << interval1 << ", " << interval2 << " : " << dens1 << " " << dens2 << " " 
	 << result << endl;
  }
  // ERROR_IF(result <= 0, "Internal error: highest density is smaller or equal zero.!");
  return result;
  
}

/** Compute the area corresonding to a cluster of stems. Unit: nucleotides squared */
InteractionClusterAnalyzer::dimension_type
InteractionClusterAnalyzer::computeClusterDimensions(const Vec<Stem>& stems) {
    ASSERT(stems.size() > 0);
    const Stem& firstStem = stems[0];
    Stem::index_type xmin = firstStem.getStart();
    Stem::index_type xmax = firstStem.getStart() + firstStem.getLength() - 1;
    Stem::index_type ymin = firstStem.getStop()  - firstStem.getLength() + 1;
    Stem::index_type ymax = firstStem.getStop();
    for (Vec<unsigned int>::size_type i = 1; i < stems.size(); ++i) {
      const Stem& stem = stems[i];
      if (stem.getStart() < xmin) {
	xmin = stem.getStart();
      }
      if ((stem.getStart() + stem.getLength() - 1) > xmax) {
	xmax = stem.getStart() + stem.getLength() - 1;
      }
      if ((stem.getStop() - stem.getLength() + 1) < ymin) {
	ymin = stem.getStop() - stem.getLength() + 1;
      }
      if (stem.getStop() > ymax) {
	ymax = stem.getStop();
      }
    }
    return dimension_type(IntervallInt(xmin, xmax), IntervallInt(ymin, ymax));
  }


/** Analyzer and cluster found stems */
void
InteractionClusterAnalyzer::analyzeClusters(ostream& os, ostream& bos,
					    Vec<Stem>& stems,
					    double clusterCutoff,
					    double totalLength1,
					    double totalLength2,
					    bool sameStrandMode,
					    double stemDensity,
					    double eMax,
					    double stemPMax, 
					    index_type stemLengthMin,
					    size_type clusterColMin, // number of column pairs with comp base changes
					    int expandClusterMaxAllowed, 
					    bool complementMode,
					    bool reverseMode,
					    const MAFAlignment& maf,
					    const MAFAlignment& maf2,
					    const count_hash_type& matchCounts,
					    const count_hash_type& assemblyCombLength1,
					    const count_hash_type& assemblyCombLength2,
					    bool useStemEnergiesAsDensities, int multiTestMode,
					    double_hash_type densities,
					    bool stemBiasPMode) {

    ASSERT(stemLengthMin > 0);
    ASSERT(totalLength1 > 0.0);
    ASSERT(StemTools::expandStemTest());
    ASSERT(RnaSecondaryStructureTools::isReverseComplementTest());
    ASSERT(RnaSecondaryStructureTools::generateComplementDnaTest());
    ASSERT(RnaSecondaryStructureTools::isWatsonCrickTest());
    ASSERT(stemInvariantsOkTest());
    ASSERT((complementMode && reverseMode) 
	   || ((!complementMode) && (!reverseMode)));
    // ASSERT(isSimilar(computeStemInvariantP(10, 20, 10), 1.0));
    bool allowGu = true;
    int verbose = 1;
    bool computeDensityMode = false;
    bool internalMode = false; // important: coordinates in stems are assumed to be in assembly coordinates not in internal coordinates
    index_type stemInvariantTypeCountMin = 1; // at least 2 different types of stem invariants 
    index_type stemInvariantCountMin = 1; // at last 1 different base pairs for these minimum type. 
    // double_hash_type densities;
    // careful: actually densities that are stored already in stems are used because of useStemEnergiesAsDensities Flag
    // if (assemblyCombLength2.size() > 0) {
    // 	densities = generateDensities(matchCounts, assemblyCombLength1, assemblyCombLength2);
    // } else {
    // 	densities = generateDensities(matchCounts, assemblyCombLength1);
    // }
    if (verbose > 1) {
      REMARK << "Using " << densities.size() << " distinct densities." << endl;
    }
    // if (verbose > 1) {
    //   writeHashs(os, densities, assemblyCombLength1, assemblyCombLength2); // , "-", "\t");
    // }
    // if (densOs != 0) {
    //   REMARK << "Writing densities to file!" << endl;
    //   writeDensities(*densOs, densities);
    // }
    if (multiTestMode == NO_CLUSTERING) {
      return;
    }
    string chrom1 = maf.getRefAssemblyChrom();
    string chrom2 = chrom1;
    if (maf2.size() > 0) {
      chrom2 = maf2.getRefAssemblyChrom();
    }
    // double areaSideMin = 2*clusterCutoff; // 500.0; // minimum area

    bool ignoreSingles = true;
    if (stems.size() == 0) {
      REMARK << "No stems found." << endl;
      return;
    }
    ASSERT(stems.size() > 0);
    // do not assume areas smaller than this, otherwise finding a stem in it is not a "rare event" and the assumptions of Poisson distr. do not hold 
    // double areaMin = areaSideMin * areaSideMin;
    double totalArea = totalLength1 * totalLength2;
    if (totalLength2 <= 0.0) {
      totalArea = totalLength1 * (totalLength1-1) / 2; // stems are counted only once, and the interactions are symmetric
    }
    if (bos) { // write header line that can after deleting the initial '#' character be parsed by R:
      bos << "# \"Id\"" "\"Chrom1\"" "\"Start1\"" "\"End1\"" "\"Strand1\"" "\"Chrom2\"" "\"Start2\"" "\"End2\"" "\"Site\"" "\"Dist\"" "\"Name\"" "\"Covariations\"" "\"Edens\"" "\"Pstem\"" "\"Ecomb\"" "\"StemCount\"" "\"Stems\"" << endl;
    }
    if (stems.size() > 0) {
      Vec<Vec<unsigned int> > clusters = clusterStemsFast(stems, clusterCutoff, ignoreSingles); // singleLinkage(distances, clusterCutoff);
      // Vec<Vec<double> > distances = StemTools::convertStemsToDistanceMatrix(stems);
      size_type subStemCount = countSubStems(stems,stemLengthMin);
      if (stemDensity <= 0.0) {
	stemDensity = subStemCount / totalArea;
	if (verbose > 1) {
	  REMARK << "Estimating stem density because no stem density provided." << endl;
	}
      }
      if (verbose > 1) {
	REMARK << "Found " << stems.size() << " ( " << subStemCount << " ) stems in area " << totalArea << " bases squared. Estimated density of stems: " << stemDensity << " Initial clusters: " << clusters.size() << endl;
	REMARK << "Reporting clusters with a combined E-value of less or equal than " <<  eMax
	       << " and a stem-bias P-value of " << stemPMax << endl;
      }
      Vec<Vec<unsigned int> >::size_type clusterCount = 0;
      // loop over indidual raw clusters:
      for (Vec<Vec<unsigned int> >::size_type i = 0; i < clusters.size(); ++i) {
        ASSERT(clusters[i].size() > 0);
	Vec<Stem> clusterStemVec = getSubset(stems, clusters[i]);
	// at least this many different types with at least stemInvariantMin total length 
	if (!stemInvariantsOk(clusterStemVec, stemInvariantTypeCountMin, stemInvariantCountMin, reverseMode)) {
	  if (verbose > 1) {
	    REMARK << "Ignoring initial cluster " << (i+1) << " : " << clusterStemVec << endl;
	  }
	  continue;
	}
	Vec<Stem> expandedClusterStemVec = expandStems(clusterStemVec, stemLengthMin, reverseMode); // contains stems of lengths stemLengthMin .. (stemLengthMin-1) 
	ERROR_IF(expandedClusterStemVec.size() == 0,
		 "Internal error: this cluster of covarying columns does not contain any data!");
	if (verbose > 1) {
	  REMARK << "Working on cluster " << (i + 1) << endl;
	  REMARK << "Stems of this cluster: " << clusterStemVec << endl;
	  REMARK << "Expanded stems of this cluster: " << expandedClusterStemVec << endl;
	}
	double poisson = 1.0; // default value
	dimension_type clusterDimensions = computeClusterDimensions(expandedClusterStemVec);
	/** Compute P-value for stem bias. Superceded by external methods */
        double stemBiasP = 1.0;
	if (stemBiasPMode) {
	  stemBiasP = computeStemInvariantBiasP(expandedClusterStemVec, clusterCutoff, reverseMode);
	}
	// find highest density in cluster area:
	double highestDensity = 0.0;
	double area = computeClusterArea(clusterDimensions, clusterCutoff); // clusterCutoff is "border"
	double areaSmall = computeClusterArea(clusterDimensions, 0.0); // no "border"
        double expected = -1.0; // how many covarying column pairs are expected in area? Only used if computeDensityMode is set to true
	ASSERT(area >= (4 * clusterCutoff * clusterCutoff));
	double eValue = 1.0; 
	double eValueStems = -1; // combined E-value; if negative: not used
	if (computeDensityMode) { // in this mode, compute Poisson test based on observed and expected density of covarying alignment columns
	if (useStemEnergiesAsDensities) {
	  for (Vec<Vec<unsigned int> >::size_type k = 0; k < clusters[i].size(); ++k) {
	    double dens = stems[clusters[i][k]].getEnergy();
	    if (dens > highestDensity) {
	      highestDensity = dens;
	    }
	  } 
	  ASSERT(false); // no supported anymore
	} else {
	  // ERROR("Density estimation mode not supported anymore!");
	  highestDensity = (maf2.size() > 0) ? findHighestDensity2(clusterDimensions, densities, maf, maf2, internalMode, 0):
	    findHighestDensity2(clusterDimensions, densities, maf, internalMode, 0);
	}
	if (highestDensity <= 0.0) {
	  REMARK << "Internal error: could not identify highest density of cluster:" << clusterStemVec << endl;
	  cout << ((maf2.size() > 2) ? findHighestDensity(clusterDimensions, densities, maf, maf2, internalMode, 3):
		   findHighestDensity(clusterDimensions, densities, maf, internalMode,3)) << endl;
	  cout << clusterStemVec << endl;
	  continue;
	}
	// 	if (area < areaMin) {
	// 	  area = areaMin; 
	// 	}
	ERROR_IF(area <= 0.0 , "Internal error: cluster area cannot be zero or negative!");
	expected =  area * highestDensity; // stemDensity;
	// how likely is it to observe this many or more counts?
        // same as probability as NOT observing less counts
	// double poisson = 1.0-poissonCdf(static_cast<int>(expandedClusterStemVec.size())-1, expected); //  * exp(-expected) / factorial(clusters[i].size());
	double poisson = poissonCdfP(static_cast<int>(expandedClusterStemVec.size()), expected); 
	ERROR_IF(poisson < 0.0, "Internal error computing P value based on Poisson distribution!"); 
	
	switch (multiTestMode) {
	case MTEST_TOTAL_AREA: 
	  eValue = poisson * totalArea; // most conservative
	  break;
	case MTEST_CLUSTER_AREA:
	  eValue = poisson * (totalArea / area);
	  break;
	case MTEST_CLUSTER_AREA_SMALL:
	  eValue = poisson * (totalArea / areaSmall);
	  break;
	default:
	  ERROR("Unknown multiple-testing correction mode!");
	}
	eValueStems = eValue * stemBiasP;
	}

	// double areaFrac = totalArea / area; // areaSmall; //  / area;

	int interactionClassification = INTERACTION_INTERCHROM;
	if (sameStrandMode || (maf2.size() == 0)) {
	  interactionClassification = classifyInteraction(clusterDimensions);
	}

	// notice how we have a more conservative multiple-testing correction: one could argue that
        // multiplying by (totalArea/area) would be sufficient

	length_type clusterInt = static_cast<length_type>(clusterCutoff);
	if (((eMax <= 0) || (eValue <= eMax)) && (stemBiasP <= stemPMax) && (expandedClusterStemVec.size() >= clusterColMin)) {
	  if (verbose > 1) {
	    REMARK << "Cluster " << (++clusterCount) << " ( " << (i+1) << " ) " << " size: " << clusters[i].size() 
		   << " ( " << expandedClusterStemVec.size() << " ) Region: " 
		   << (clusterDimensions.first.getLower() + 1) << "-" << (clusterDimensions.first.getUpper() + 1) << ":"
		   << (clusterDimensions.second.getLower() + 1) << "-" << (clusterDimensions.second.getUpper() + 1);
	  }
	  if (computeDensityMode) {
	    if (verbose > 1) {
	      cout << " Eff. Region: " 
		   << (clusterDimensions.first.getLower() - clusterInt + 1) << "-" << (clusterDimensions.first.getUpper() + clusterInt + 1) << ":"
		   << (clusterDimensions.second.getLower() - clusterInt + 1) << "-" << (clusterDimensions.second.getUpper() + clusterInt + 1)
		   << " Poisson: " << poisson 
		   << " area: " << area << " expected: " << expected << " density: " << highestDensity << " E: " << eValue
		   << " stem bias p: " << stemBiasP << " E2: " << eValueStems;
	    }
	  } 
          string interactionName = "unclassified"; 
	  if (sameStrandMode) {
	    interactionName = convertToInteractionName(interactionClassification);
	  }
	  else {
	    interactionName = "interchrom";
	  }
	  if (verbose > 1) {
	    os << " " << interactionName;
	  }
	  if (computeDensityMode) {
	    if (static_cast<double>(clusters[i].size()) < expected) { // only consider high-value tail of Poisson distribution
	      if (verbose > 1) {
		os << " warning:low_density_cluster";
	      }
	    }
	  }
	  if (verbose > 1) {
	    os << endl;
	  }
	  int strandPlusCount = 0;
	  int strandMinusCount = 0;
	  int erSum = 0;
	  int ecSum = 0;
	  int econsSum = 0;
	  index_type covarCount = 0;
	  int expandClusterMax = 0;
	  for (Vec<unsigned int>::size_type j = 0; j < clusters[i].size(); ++j) {
	    Stem& stem = stems[clusters[i][j]];
            covarCount += stem.getLength(); // stems consist only of sets of covarying columns
	    if (verbose > 1) {
	      MainTools::writeStem(os, stem, 0.0, true, reverseMode);
	    }
	    if (maf2.size() > 0) {
	      MAFAlignmentTools::augmentStemSequence(maf, maf2, stem, reverseMode);
	    } else {
	      MAFAlignmentTools::augmentStemSequence(maf, maf, stem, reverseMode);
	    }
	    if (verbose > 1) {
	      os << " ( ";
	    }
	    int er = 0; // extendability of stem (using regular complementarity) 
	    int ec = 0; // extendability of stem (using complement, corresponding to minus strand) 
            int econs = 0; // extenability of conserved stem
	    if (maf2.size() > 0) {
	      er = MAFAlignmentTools::stemExpandability(maf, maf2, stem, true, false,
					complementMode, reverseMode); // regular
	      ec = MAFAlignmentTools::stemExpandability(maf, maf2, stem, true, true,
					complementMode, reverseMode); // expandability for complement
	      // expandability of conserved stem:
	      econs = MAFAlignmentTools::stemConservedExpandability(maf, maf2, stem, complementMode, reverseMode,
								    allowGu); 

	    } else {
	      er = MAFAlignmentTools::stemExpandability(maf, maf, stem, true, false,
                      complementMode, reverseMode); // regular
	      ec = MAFAlignmentTools::stemExpandability(maf, maf, stem, true, true,
                      complementMode, reverseMode); // expandability for complement
	      // expandability of conserved stem:
	      econs = MAFAlignmentTools::stemConservedExpandability(
				    maf, maf, stem, complementMode, reverseMode, allowGu); 
	      
	    }
	    erSum += er;
	    ecSum += ec;
	    econsSum += econs;
	    if (er > expandClusterMax) {
	      expandClusterMax = er;
	    }
	    if (ec > expandClusterMax) {
	      expandClusterMax = ec;
	    }
	    int dc = er - ec;
	    if (verbose > 1) {
	      os << er << " , " << ec << " , " << dc << " , " << econs << " ) " << stem.getSequence1() << " " << stem.getSequence2() << endl;
	    }
	    if (dc > 0) {
	      ++strandPlusCount;
	    } else if (dc < 0) {
	      ++strandMinusCount;
	    } 
	  }
	  int strandEvidenceCount = strandPlusCount + strandMinusCount;
	  int strandPrediction = 0;
	  if (strandEvidenceCount == 4) {
	    if (strandPlusCount == 4) {
	      strandPrediction = 1;
	    } else if (strandMinusCount == 4) {
	      strandPrediction = -1;
	    }
	  } else if (strandEvidenceCount == 5) {
	    if (strandPlusCount >= 4) {
	      strandPrediction = 1;
	    } else if (strandMinusCount >= 4) {
	      strandPrediction = -1;
	    }
	  } else if (strandEvidenceCount > 5) {
	    if ((strandPlusCount+1)/(static_cast<double>(strandEvidenceCount) + 2) > 0.75) { // use pseudo-counts
	      strandPrediction = 1;
	    } else if ((strandMinusCount+1)/(static_cast<double>(strandEvidenceCount) + 2) > 0.75) { // use pseudo-counts
	      strandPrediction = -1;
	    }
	  } 
          string strandIndicator = "?"; 
	  if (strandPrediction == 1) {
	    strandIndicator = "+";
	  } else if (strandPrediction == -1) {
	    strandIndicator = "-";
	  } 
	  if (verbose > 1) {
	    os << "strand: " << strandIndicator << " pl: " << strandPlusCount << " mi: " << strandMinusCount << " size: " << clusters[i].size() << " rs: " 
	       << erSum << " cs: " << ecSum << " cons: " << econsSum << " v: " 
	       << covarCount << endl;
	  }
	  if (expandClusterMax > expandClusterMaxAllowed) {
	    if (verbose > 1) {
	      os << " Note: The maximum strand extendability in this cluster was suspiciously large, suggesting sequence duplication. Ignoring cluster in BED format output.";
	    }
	  }
	  os << endl;
          if (bos && (expandClusterMax <= expandClusterMaxAllowed)) {
	    // output of BED format in second file: (pay attention to different convention for 0-based start and 1-based stop positions
            string name = chrom2 + "_" + itos(clusterDimensions.first.getLower()+1) + "_" + itos(clusterDimensions.second.getUpper() + 1) + "_" + (itos(clusterDimensions.first.getUpper()-clusterDimensions.first.getLower() + 1));
	    if (chrom1 != chrom2) {
	      name = chrom1 +"_" + name;
	    }
	    bos << chrom1 << " " << clusterDimensions.first.getLower()  << " " << (clusterDimensions.first.getUpper() + 1) << " " 
	        << strandIndicator << " " 
		<< chrom2 << " " << clusterDimensions.second.getLower() << " " << (clusterDimensions.second.getUpper() + 1)
	        << " " << clusterCount << " " << interactionName << " 1 " << name << "_A " << covarCount << " " << expandClusterMax << " " << eValue << " " << stemBiasP << " " << eValueStems << " " << clusters[i].size() << " ";
	    for (Vec<unsigned int>::size_type j = 0; j < clusters[i].size(); ++j) {
	      ERROR_IF(clusters[i][j] >= stems.size(), "Internal error in line 603 while clustering stems.");
	      Stem stem = stems[clusters[i][j]];
	      bos << (stem.getStart() + 1) << "," << (stem.getStop() + 1) << "," << stem.getLength();
	      if ((j + 1) < clusters[i].size()) {
		bos << ";";
	      } else {
		bos << endl;
	      }
	    }
	    // same info reported the other way around: 
	    bos << chrom2 << " " << clusterDimensions.second.getLower() << " " << (clusterDimensions.second.getUpper() + 1) << " "
		<< strandIndicator << " "
		<< chrom1 << " " << clusterDimensions.first.getLower()  << " " << (clusterDimensions.first.getUpper() + 1)
	        << " " << clusterCount << " " << interactionName << " 2 " << name << "_B " << covarCount << " " << expandClusterMax << " " << eValue << " " << stemBiasP << " " << eValueStems << " " << clusters[i].size() << " ";
	    for (Vec<unsigned int>::size_type j = 0; j < clusters[i].size(); ++j) {
	      ERROR_IF(clusters[i][j] >= stems.size(), "Internal error in line 618 while clustering stems.");
	      Stem stem = stems[clusters[i][j]];
	      bos << (stem.getStart() + 1) << "," << (stem.getStop() + 1) << "," << stem.getLength();
	      if ((j + 1) < clusters[i].size()) {
		bos << ";";
	      } else {
		bos << endl;
	      }
	    }
	  }
	} // otherwise skip this cluster
      }
      // if (clusterCount > 0) {
      // 	REMARK << "Found " << clusterCount << " clusters with an E-value less than " << eMax << endl;
      // } else {
      // 	REMARK << "No clusters with an E-value less than " << eMax << " found." << endl;
      // }
    }
}

/** Analyzer and cluster found stems */
void
InteractionClusterAnalyzer::analyzeClustersFast(ostream& os, ostream& bos,
						Vec<Stem>& stems,
						double clusterCutoff,
						double totalLength1,
						double totalLength2,
						bool sameStrandMode,
						double stemDensity,
						double eMax,
						double stemPMax, 
						index_type stemLengthMin,
						size_type clusterColMin, // number of column pairs with comp base changes
						int expandClusterMaxAllowed, 
						bool complementMode,
						bool reverseMode,
						const MAFAlignment& maf,
						const MAFAlignment& maf2,
						const count_hash_type& matchCounts,
						const count_hash_type& assemblyCombLength1,
						const count_hash_type& assemblyCombLength2,
						bool useStemEnergiesAsDensities, int multiTestMode,
						double_hash_type densities) {

    ASSERT(stemLengthMin > 0);
    ASSERT(totalLength1 > 0.0);
    ASSERT(StemTools::expandStemTest());
    ASSERT(RnaSecondaryStructureTools::isReverseComplementTest());
    ASSERT(RnaSecondaryStructureTools::generateComplementDnaTest());
    ASSERT(RnaSecondaryStructureTools::isWatsonCrickTest());
    ASSERT(stemInvariantsOkTest());
    ASSERT((complementMode && reverseMode) 
	   || ((!complementMode) && (!reverseMode)));
    // ASSERT(isSimilar(computeStemInvariantP(10, 20, 10), 1.0));
    //  bool allowGu = true;
    int verbose = 1;
    bool computeDensityMode = false;
    bool internalMode = false; // important: coordinates in stems are assumed to be in assembly coordinates not in internal coordinates
    index_type stemInvariantTypeCountMin = 1; // at least 2 different types of stem invariants 
    index_type stemInvariantCountMin = 1; // at last 1 different base pairs for these minimum type. 
    // double_hash_type densities;
    // careful: actually densities that are stored already in stems are used because of useStemEnergiesAsDensities Flag
    // if (assemblyCombLength2.size() > 0) {
    // 	densities = generateDensities(matchCounts, assemblyCombLength1, assemblyCombLength2);
    // } else {
    // 	densities = generateDensities(matchCounts, assemblyCombLength1);
    // }
    if (verbose > 1) {
      REMARK << "Using " << densities.size() << " distinct densities." << endl;
    }
    // if (verbose > 1) {
    //   writeHashs(os, densities, assemblyCombLength1, assemblyCombLength2); // , "-", "\t");
    // }
    // if (densOs != 0) {
    //   REMARK << "Writing densities to file!" << endl;
    //   writeDensities(*densOs, densities);
    // }
    if (multiTestMode == NO_CLUSTERING) {
      return;
    }
    string chrom1 = maf.getRefAssemblyChrom();
    string chrom2 = chrom1;
    if (maf2.size() > 0) {
      chrom2 = maf2.getRefAssemblyChrom();
    }
    // double areaSideMin = 2*clusterCutoff; // 500.0; // minimum area

    bool ignoreSingles = true;
    if (stems.size() == 0) {
      REMARK << "No stems found." << endl;
      return;
    }
    
    ASSERT(stems.size() > 0);
    // do not assume areas smaller than this, otherwise finding a stem in it is not a "rare event" and the assumptions of Poisson distr. do not hold 
    // double areaMin = areaSideMin * areaSideMin;
    double totalArea = totalLength1 * totalLength2;
    if (totalLength2 <= 0.0) {
      totalArea = totalLength1 * (totalLength1-1) / 2; // stems are counted only once, and the interactions are symmetric
    }
    if (bos) { // write header line that can after deleting the initial '#' character be parsed by R:
      bos << "# \"Id\"" "\"Chrom1\"" "\"Start1\"" "\"End1\"" "\"Strand1\"" "\"Chrom2\"" "\"Start2\"" "\"End2\"" "\"Site\"" "\"Dist\"" "\"Name\"" "\"Covariations\"" "\"Edens\"" "\"Pstem\"" "\"Ecomb\"" "\"StemCount\"" "\"Stems\"" << endl;
    }
    if (stems.size() > 0) {
      Vec<Vec<unsigned int> > clusters = clusterStemsFast(stems, clusterCutoff, ignoreSingles); // singleLinkage(distances, clusterCutoff);
      // Vec<Vec<double> > distances = StemTools::convertStemsToDistanceMatrix(stems);
      size_type subStemCount = countSubStems(stems,stemLengthMin);
      if (stemDensity <= 0.0) {
	stemDensity = subStemCount / totalArea;
	if (verbose > 1) {
	  REMARK << "Estimating stem density because no stem density provided." << endl;
	}
      }
      if (verbose > 1) {
	REMARK << "Found " << stems.size() << " ( " << subStemCount << " ) stems in area " << totalArea << " bases squared. Estimated density of stems: " << stemDensity << " Initial clusters: " << clusters.size() << endl;
	REMARK << "Reporting clusters with a combined E-value of less or equal than " <<  eMax
	       << " and a stem-bias P-value of " << stemPMax << endl;
      }
      Vec<Vec<unsigned int> >::size_type clusterCount = 0;
      // loop over indidual raw clusters:
      for (Vec<Vec<unsigned int> >::size_type i = 0; i < clusters.size(); ++i) {
        ASSERT(clusters[i].size() > 0);
	Vec<Stem> clusterStemVec = getSubset(stems, clusters[i]);
	// at least this many different types with at least stemInvariantMin total length 
	if (!stemInvariantsOk(clusterStemVec, stemInvariantTypeCountMin, stemInvariantCountMin, reverseMode)) {
	  if (verbose > 1) {
	    REMARK << "Ignoring initial cluster " << (i+1) << " : " << clusterStemVec << endl;
	  }
	  continue;
	}
	Vec<Stem> expandedClusterStemVec = expandStems(clusterStemVec, stemLengthMin, reverseMode); // contains stems of lengths stemLengthMin .. (stemLengthMin-1) 
	ERROR_IF(expandedClusterStemVec.size() == 0,
		 "Internal error: this cluster of covarying columns does not contain any data!");
	if (verbose > 1) {
	  REMARK << "Working on cluster " << (i + 1) << endl;
	  REMARK << "Stems of this cluster: " << clusterStemVec << endl;
	  REMARK << "Expanded stems of this cluster: " << expandedClusterStemVec << endl;
	}
	// double poisson = 1.0; // default value
	dimension_type clusterDimensions = computeClusterDimensions(expandedClusterStemVec);
        double stemBiasP = 1.0; // deactivated computeStemInvariantBiasP(expandedClusterStemVec, clusterCutoff, reverseMode);
	// find highest density in cluster area:
	double highestDensity = 0.0;
	double area = computeClusterArea(clusterDimensions, clusterCutoff); // clusterCutoff is "border"
	double areaSmall = computeClusterArea(clusterDimensions, 0.0); // no "border"
        double expected = -1.0; // how many covarying column pairs are expected in area? Only used if computeDensityMode is set to true
	ASSERT(area >= (4 * clusterCutoff * clusterCutoff));
	double eValue = 1.0; 
	double eValueStems = -1; // combined E-value; if negative: not used
	if (computeDensityMode) { // in this mode, compute Poisson test based on observed and expected density of covarying alignment columns
	if (useStemEnergiesAsDensities) {
	  for (Vec<Vec<unsigned int> >::size_type k = 0; k < clusters[i].size(); ++k) {
	    double dens = stems[clusters[i][k]].getEnergy();
	    if (dens > highestDensity) {
	      highestDensity = dens;
	    }
	  } 
	  ASSERT(false); // no supported anymore
	} else {
	  // ERROR("Density estimation mode not supported anymore!");
	  highestDensity = (maf2.size() > 0) ? findHighestDensity2(clusterDimensions, densities, maf, maf2, internalMode, 0):
	    findHighestDensity2(clusterDimensions, densities, maf, internalMode, 0);
	}
	if (highestDensity <= 0.0) {
	  REMARK << "Internal error: could not identify highest density of cluster:" << clusterStemVec << endl;
	  cout << ((maf2.size() > 2) ? findHighestDensity(clusterDimensions, densities, maf, maf2, internalMode, 3):
		   findHighestDensity(clusterDimensions, densities, maf, internalMode,3)) << endl;
	  cout << clusterStemVec << endl;
	  continue;
	}
	// 	if (area < areaMin) {
	// 	  area = areaMin; 
	// 	}
	ERROR_IF(area <= 0.0 , "Internal error: cluster area cannot be zero or negative!");
	expected =  area * highestDensity; // stemDensity;
	// how likely is it to observe this many or more counts?
        // same as probability as NOT observing less counts
	// double poisson = 1.0-poissonCdf(static_cast<int>(expandedClusterStemVec.size())-1, expected); //  * exp(-expected) / factorial(clusters[i].size());
	double poisson = poissonCdfP(static_cast<int>(expandedClusterStemVec.size()), expected); 
	ERROR_IF(poisson < 0.0, "Internal error computing P value based on Poisson distribution!"); 
	
	switch (multiTestMode) {
	case MTEST_TOTAL_AREA: 
	  eValue = poisson * totalArea; // most conservative
	  break;
	case MTEST_CLUSTER_AREA:
	  eValue = poisson * (totalArea / area);
	  break;
	case MTEST_CLUSTER_AREA_SMALL:
	  eValue = poisson * (totalArea / areaSmall);
	  break;
	default:
	  ERROR("Unknown multiple-testing correction mode!");
	}
	eValueStems = eValue * stemBiasP;
	}

	// double areaFrac = totalArea / area; // areaSmall; //  / area;

	int interactionClassification = INTERACTION_INTERCHROM;
	if (sameStrandMode || (maf2.size() == 0)) {
	  interactionClassification = classifyInteraction(clusterDimensions);
	}
	string interactionName = "unclassified"; 
	if (sameStrandMode) {
	  interactionName = convertToInteractionName(interactionClassification);
	}
	else {
	  interactionName = "interchrom";
	}

	// notice how we have a more conservative multiple-testing correction: one could argue that
        // multiplying by (totalArea/area) would be sufficient

	// length_type clusterInt = static_cast<length_type>(clusterCutoff);
	if (((eMax <= 0) || (eValue <= eMax)) && (stemBiasP <= stemPMax) && (expandedClusterStemVec.size() >= clusterColMin)) {
	  if (verbose > 1) {
	    REMARK << "Cluster " << (++clusterCount) << " ( " << (i+1) << " ) " << " size: " << clusters[i].size() 
		   << " ( " << expandedClusterStemVec.size() << " ) Region: " 
		   << (clusterDimensions.first.getLower() + 1) << "-" << (clusterDimensions.first.getUpper() + 1) << ":"
		   << (clusterDimensions.second.getLower() + 1) << "-" << (clusterDimensions.second.getUpper() + 1);
	  }
	  // if (computeDensityMode) {
	  //   cout << " Eff. Region: " 
	  // 	 << (clusterDimensions.first.getLower() - clusterInt + 1) << "-" << (clusterDimensions.first.getUpper() + clusterInt + 1) << ":"
	  // 	 << (clusterDimensions.second.getLower() - clusterInt + 1) << "-" << (clusterDimensions.second.getUpper() + clusterInt + 1)
	  // 	 << " Poisson: " << poisson 
	  // 	 << " area: " << area << " expected: " << expected << " density: " << highestDensity << " E: " << eValue
	  // 	 << " stem bias p: " << stemBiasP << " E2: " << eValueStems;
	  // } 
	  if (verbose > 1) {
	    os << " " << interactionName;
	  }
	  if (computeDensityMode) {
	    if (static_cast<double>(clusters[i].size()) < expected) { // only consider high-value tail of Poisson distribution
	      if (verbose > 1) {
		os << " warning:low_density_cluster";
	      }
	    }
	  }
	  if (verbose > 1) {
	    os << endl;
	  }
	  // for (Vec<unsigned int>::size_type j = 0; j < clusters[i].size(); ++j) {
	  //   Stem& stem = stems[clusters[i][j]];
          //   covarCount += stem.getLength(); // stems consist only of sets of covarying columns
	  //   MainTools::writeStem(os, stem, 0.0, true, reverseMode);
	  //   if (maf2.size() > 0) {
	  //     MAFAlignmentTools::augmentStemSequence(maf, maf2, stem, reverseMode);
	  //   } else {
	  //     MAFAlignmentTools::augmentStemSequence(maf, maf, stem, reverseMode);
	  //   }
	  //   os << " ( ";
	  //   int er = 0; // extendability of stem (using regular complementarity) 
	  //   int ec = 0; // extendability of stem (using complement, corresponding to minus strand) 
          //   int econs = 0; // extenability of conserved stem
	  //   if (maf2.size() > 0) {
	  //     er = MAFAlignmentTools::stemExpandability(maf, maf2, stem, true, false,
	  // 				complementMode, reverseMode); // regular
	  //     ec = MAFAlignmentTools::stemExpandability(maf, maf2, stem, true, true,
	  // 				complementMode, reverseMode); // expandability for complement
	  //     // expandability of conserved stem:
	  //     econs = MAFAlignmentTools::stemConservedExpandability(maf, maf2, stem, complementMode, reverseMode,
	  // 							    allowGu); 

	  //   } else {
	  //     er = MAFAlignmentTools::stemExpandability(maf, maf, stem, true, false,
          //             complementMode, reverseMode); // regular
	  //     ec = MAFAlignmentTools::stemExpandability(maf, maf, stem, true, true,
          //             complementMode, reverseMode); // expandability for complement
	  //     // expandability of conserved stem:
	  //     econs = MAFAlignmentTools::stemConservedExpandability(
	  // 			    maf, maf, stem, complementMode, reverseMode, allowGu); 
	      
	  //   }
	  //	    erSum += er;
	  // ecSum += ec;
	  // econsSum += econs;
	  //  if (er > expandClusterMax) {
	  //    expandClusterMax = er;
	  //  }
	  //  if (ec > expandClusterMax) {
	  //   expandClusterMax = ec;
	  //  }
	  //  int dc = er - ec;
	  //  os << er << " , " << ec << " , " << dc << " , " << econs << " ) " << stem.getSequence1() << " " << stem.getSequence2() << endl;
	  //  if (dc > 0) {
	  //    ++strandPlusCount;
	  //  } else if (dc < 0) {
	  //    ++strandMinusCount;
	  //  } 
	  //	} // moved to end of bed file (bos stream) writing !
	//  int strandEvidenceCount = strandPlusCount + strandMinusCount;
	  // int strandPrediction = 0;
	  // if (strandEvidenceCount == 4) {
	  //   if (strandPlusCount == 4) {
	  //     strandPrediction = 1;
	  //   } else if (strandMinusCount == 4) {
	  //     strandPrediction = -1;
	  //   }
	  // } else if (strandEvidenceCount == 5) {
	  //   if (strandPlusCount >= 4) {
	  //     strandPrediction = 1;
	  //   } else if (strandMinusCount >= 4) {
	  //     strandPrediction = -1;
	  //   }
	  // } else if (strandEvidenceCount > 5) {
	  //   if ((strandPlusCount+1)/(static_cast<double>(strandEvidenceCount) + 2) > 0.75) { // use pseudo-counts
	  //     strandPrediction = 1;
	  //   } else if ((strandMinusCount+1)/(static_cast<double>(strandEvidenceCount) + 2) > 0.75) { // use pseudo-counts
	  //     strandPrediction = -1;
	  //   }
	  // } 
          // string strandIndicator = "?"; 
	  // if (strandPrediction == 1) {
	  //   strandIndicator = "+";
	  // } else if (strandPrediction == -1) {
	  //   strandIndicator = "-";
	  // } 
	  // os << "strand: " << strandIndicator << " pl: " << strandPlusCount << " mi: " << strandMinusCount << " size: " << clusters[i].size() << " rs: " 
	  //    << erSum << " cs: " << ecSum << " cons: " << econsSum << " v: " 
	  //    << covarCount << endl;
	// if (expandClusterMax > expandClusterMaxAllowed) {
	//   os << " Note: The maximum strand extendability in this cluster was suspiciously large, suggesting sequence duplication. Ignoring cluster in BED format output.";
	// }
	// os << endl;
	if (bos) { // && (expandClusterMax <= expandClusterMaxAllowed)) {
	    // output of BED format in second file: (pay attention to different convention for 0-based start and 1-based stop positions
            string name = chrom2 + "_" + itos(clusterDimensions.first.getLower()+1) + "_" + itos(clusterDimensions.second.getUpper() + 1) + "_" + (itos(clusterDimensions.first.getUpper()-clusterDimensions.first.getLower() + 1));
	    if (chrom1 != chrom2) {
	      name = chrom1 +"_" + name;
	    }
	    bos << chrom1 << " " << clusterDimensions.first.getLower()  << " " << (clusterDimensions.first.getUpper() + 1) << " " 
	        << "0" << " " 
		<< chrom2 << " " << clusterDimensions.second.getLower() << " " << (clusterDimensions.second.getUpper() + 1)
	        << " " << 0 << " " << interactionName << " 1 " << name << "_A " << 0 << " " << 0 << " " << eValue << " " << stemBiasP << " " << eValueStems << " " << clusters[i].size() << " ";
	    for (Vec<unsigned int>::size_type j = 0; j < clusters[i].size(); ++j) {
	      ERROR_IF(clusters[i][j] >= stems.size(), "Internal error in line 603 while clustering stems.");
	      Stem stem = stems[clusters[i][j]];
	      bos << (stem.getStart() + 1) << "," << (stem.getStop() + 1) << "," << stem.getLength();
	      if ((j + 1) < clusters[i].size()) {
		bos << ";";
	      } else {
		bos << endl;
	      }
	    }
	    // same info reported the other way around: 
	    bos << chrom2 << " " << clusterDimensions.second.getLower() << " " << (clusterDimensions.second.getUpper() + 1) << " "
		<< 0 << " "
		<< chrom1 << " " << clusterDimensions.first.getLower()  << " " << (clusterDimensions.first.getUpper() + 1)
	        << " " << 0 << " " << interactionName << " 2 " << name << "_B " << 0 << " " << 0 << " " << eValue << " " << stemBiasP << " " << eValueStems << " " << clusters[i].size() << " ";
	    for (Vec<unsigned int>::size_type j = 0; j < clusters[i].size(); ++j) {
	      ERROR_IF(clusters[i][j] >= stems.size(), "Internal error in line 618 while clustering stems.");
	      Stem stem = stems[clusters[i][j]];
	      bos << (stem.getStart() + 1) << "," << (stem.getStop() + 1) << "," << stem.getLength();
	      if ((j + 1) < clusters[i].size()) {
		bos << ";";
	      } else {
		bos << endl;
	      }
	    }
	  }

	}

      } // otherwise skip this cluster
    }
    // if (clusterCount > 0) {
    //   REMARK << "Found " << clusterCount << " clusters with an E-value less than " << eMax << endl;
    // } else {
    //   REMARK << "No clusters with an E-value less than " << eMax << " found." << endl;
    // }
}


/** Generates densities from counts of matching alignment columns */
/* double
InteractionClusterAnalyzer::generateDefaultDensity(const string& assemblies,
						   const count_hash_type& assemblyCombLength1) {
  double pseudoCount = 1.0;
  size_t totLen = 0;
  for (count_hash_type::const_iterator it = assemblyCombLength1.begin(); it != assemblyCombLength1.end(); it++) {
    totLen += it->second;
  }
  // string assemblies = it->first;
  size_type count = 0; // pseudocount will be added
  ASSERT(assemblyCombLength1.find(assemblies) != assemblyCombLength1.end());
  size_type length = assemblyCombLength1.find(assemblies)->second;
  ERROR_IF(length == 0, "Internal error: could not find length for assembly combination " + assemblies); 
  double area = (static_cast<double>(length) * static_cast<double>(totLen)); // changed from small square to rectangle
  double density = static_cast<double>(count + pseudoCount) / area;
  return density;
}
*/

/** Generates densities from counts of matching alignment columns.
 * Any changes to this method should also be made to generateDefaultDensity method */
InteractionClusterAnalyzer::double_hash_type
InteractionClusterAnalyzer::generateDensities(const count_hash_type& matchCounts,
					      const count_hash_type& assemblyCombLength1) {
  ERROR("Internal error in InteractionClusterAnalyzer::generateDensities: called deprecated method.");
  double_hash_type result;
  double pseudoCount = 1.0;
  size_t totLen = 0;
  for (count_hash_type::const_iterator it = assemblyCombLength1.begin(); it != assemblyCombLength1.end(); it++) {
    totLen += it->second;
  }
  for (count_hash_type::const_iterator it = matchCounts.begin(); it != matchCounts.end(); it++) {
    string assemblies = it->first;
    size_type count = it->second;
    // cannot be that "hit" was detected for certain assembly combination and this combination is somehow not found in the MAF alignment:
    ERROR_IF(assemblyCombLength1.find(assemblies) != assemblyCombLength1.end(), "Internal error in generateDensities: Could not find assembly " + assemblies); 
    size_type length = assemblyCombLength1.find(assemblies)->second;
    ERROR_IF(length == 0, "Internal error: could not find length for assembly combination " + assemblies); 
    double area = (static_cast<double>(length) * static_cast<double>(totLen)); // changed from small square to rectangle
    double density = static_cast<double>(count + pseudoCount) / area;
    result[assemblies] = density;
  }
  return result;
}

/** Generates densities from counts of matching alignment columns.
 * Any changes to this method should also be made to generateDefaultDensity method */
/*
InteractionClusterAnalyzer::double_hash_type
InteractionClusterAnalyzer::generateDensities2(const count_hash_type& matchCounts,
					       const count_hash_type& assemblyCombAreas) {
  double_hash_type result;
  double pseudoCount = 1.0;
  for (count_hash_type::const_iterator it = matchCounts.begin(); it != matchCounts.end(); it++) {
    string assemblies = it->first;
    size_type count = it->second;
    // cannot be that "hit" was detected for certain assembly combination and this combination is somehow not found in the MAF alignment:
    ERROR_IF(assemblyCombLength1.find(assemblies) != assemblyCombLength1.end(), "Internal error in generateDensities: Could not find assembly " + assemblies); 
    size_type length = assemblyCombLength1.find(assemblies)->second;
    ERROR_IF(length == 0, "Internal error: could not find length for assembly combination " + assemblies); 
    double area = (static_cast<double>(length) * static_cast<double>(totLen)); // changed from small square to rectangle
    double density = static_cast<double>(count + pseudoCount) / area;
    result[assemblies] = density;
  }
  return result;
}
*/

/** Estimate density of found "hits" for given assembly combinations. 
 *  Important: Any changes to this method should also be made to method generateDefaultDensity
 */
InteractionClusterAnalyzer::double_hash_type
InteractionClusterAnalyzer::generateDensities(const count_hash_type& matchCounts,
					      const count_hash_type& assemblyCombLength1,
					      const count_hash_type& assemblyCombLength2) {
  double_hash_type result;
  size_t totLen1 = 0;
  for (count_hash_type::const_iterator it = assemblyCombLength1.begin(); it != assemblyCombLength1.end(); it++) {
    totLen1 += it->second;
  }
  size_t totLen2 = 0;
  for (count_hash_type::const_iterator it = assemblyCombLength2.begin(); it != assemblyCombLength2.end(); it++) {
    totLen2 += it->second;
  }
  for (count_hash_type::const_iterator it = matchCounts.begin(); it != matchCounts.end(); it++) {
    string assemblies = it->first;
    size_type count = it->second;
    if (assemblyCombLength1.find(assemblies) == assemblyCombLength1.end()) {
      REMARK << "Could not find assembly combination " << assemblies << " in first genomic alignment." << endl;
      continue;
    }
    if (assemblyCombLength2.find(assemblies) == assemblyCombLength2.end()) {
      REMARK << "Could not find assembly combination " << assemblies << " in second genomic alignment." << endl;
      continue;
    }
    ASSERT(assemblyCombLength1.find(assemblies) != assemblyCombLength1.end());
    ASSERT(assemblyCombLength2.find(assemblies) != assemblyCombLength2.end());
    size_type length1 = assemblyCombLength1.find(assemblies)->second;
    size_type length2 = assemblyCombLength2.find(assemblies)->second;
    ERROR_IF(length1 == 0, "Internal error: could not find first length for assembly combination " + assemblies); 
    ERROR_IF(length2 == 0, "Internal error: could not find second length for assembly combination " + assemblies); 
    double area1 = length1 * totLen1;
    double area2 = length2 * totLen2; // count is accumulated over area of two rectangles
    double density = static_cast<double>(count)/(area1 + area2);
    ERROR_IF(density > 1.0, "Internal error: density greater one encountered for assembly combination " + assemblies);
    ERROR_IF(density < 0.0, "Internal error: density less than zero encountered for assembly combination " + assemblies);
    result[assemblies] = density;
  }
  return result;
}

/** In case no density has been stored for an assembly combination, use this method */
/*
double
InteractionClusterAnalyzer::generateDefaultDensity(const string& assemblies,
						   const count_hash_type& assemblyCombLength1,
						   const count_hash_type& assemblyCombLength2) {
  size_t totLen1 = 0;
  for (count_hash_type::const_iterator it = assemblyCombLength1.begin(); it != assemblyCombLength1.end(); it++) {
    totLen1 += it->second;
  }
  size_t totLen2 = 0;
  for (count_hash_type::const_iterator it = assemblyCombLength2.begin(); it != assemblyCombLength2.end(); it++) {
    totLen2 += it->second;
  }
  //  for (count_hash_type::const_iterator it = matchCounts.begin(); it != matchCounts.end(); it++) {
  // string assemblies = it->first;
  size_type count = 1;
  ASSERT(assemblyCombLength1.find(assemblies) != assemblyCombLength1.end());
  ASSERT(assemblyCombLength2.find(assemblies) != assemblyCombLength2.end());
  size_type length1 = assemblyCombLength1.find(assemblies)->second;
  size_type length2 = assemblyCombLength2.find(assemblies)->second;
  ERROR_IF(length1 == 0, "Internal error: could not find first length for assembly combination " + assemblies); 
  ERROR_IF(length2 == 0, "Internal error: could not find second length for assembly combination " + assemblies); 
  double area1 = length1 * totLen1;
  double area2 = length2 * totLen2; // count is accumulated over area of two rectangles
  double density = static_cast<double>(count)/(area1 + area2);
  ERROR_IF(density > 1.0, "Internal error: density greater one encountered for assembly combination " + assemblies);
  ERROR_IF(density < 0.0, "Internal error: density less than zero encountered for assembly combination " + assemblies);
  return density;
}
  
*/


