#include <LocoRnaParMain.h>
#include <iostream>
#include <iomanip>
#include <debug.h>
#include <MAFAlignment.h>
// #include <HashCorrelationFinder.h>
#include <HashCorrelationFinder3.h>
#include <Timer.h>
#include <fstream>
#include <GetArg.h>
#include <CorrelationTools.h>
#include <MainTools.h>
#include <clusterAlgorithms.h>
#include <stemhelp.h>
#include <StemTools.h>
#include <InteractionClusterAnalyzer.h>
#include <AnnotationTools.h>

#include <tbb/task_scheduler_init.h>
#include <tbb/parallel_for.h>
#include <tbb/partitioner.h>
#include <tbb/blocked_range.h>

#ifndef REMARK
#define REMARK cout << "# " 
#endif

// This defines if using nucleotide pair or triplet mode. The pair mode was shown to be superior.
// #define _PAIR_MODE_

using namespace tbb;

void
LocoRnaParMain::setDefaultValues() {
  LocoRnaMain::setDefaultValues();
  blockSizeMin = 20000;
  tasksPerThread = 4;// 32; // 16;
  threadCount = task_scheduler_init::automatic; // alternative: 1; "automatic": let scheduler initialize number of threads based on hardware
}

void
LocoRnaParMain::parseCommandLine() {
  LocoRnaMain::parseCommandLine();
  getArg("-granularity", blockSizeMin, argc, argv, blockSizeMin);
  getArg("t", threadCount, argc, argv, threadCount);
  ERROR_IF((threadCount < 1) && (threadCount != task_scheduler_init::automatic), "Thread count must be larger zero!");
  ERROR_IF(threadCount >= 1000, "Thread count must be smaller than 1000"); // do not believe in such high compute power 
}

void
LocoRnaParMain::writeWelcomeMessage(ostream& os) {
  os << "# Welcome to COVARNAP (v" << getVersion() << "), the alignment correlation finder (multi-threaded version)." << endl;
}

int
LocoRnaParMain::run() {
  Timer timer;
  timer.start();

  REMARK << "Program called with parameters: ";
  MainTools::parameterOutput(*osp, argc, argv); // output of parameters

  InteractionClusterAnalyzer * clusterAnalyzer = new InteractionClusterAnalyzer;
  
  task_scheduler_init init(threadCount);

  length_type ali2Offset = 0;
  bool clusterAgainMode = false; // problems lead to problems with MAF access
  length_type offsetMin = 10000;
  bool useStemEnergiesAsDensities = false; // an internal constant used in final clustering
  BEDRegions bed1, bed2;
  if (bedFileName1.size() > 0) {
    ifstream bedFile(bedFileName1.c_str());
    ERROR_IF(!bedFile, "Error opening BED format file: " + bedFileName1);
    bed1.read(bedFile, BEDRegions::STRAND_IGNORE);
    bedFile.close();
    ERROR_IF(refAssembly.size() == 0, 
	     "If BED filter is specified with -b, the reference assembly (example: hg18) has to be specified with option -a");
    bed1.setAssembly(refAssembly); // bed data must be with respect to reference assembly
    REMARK << "Read BED Format data for filtering from " << bedFileName << endl;
    if (verbose > 1) {
      REMARK << endl << bed1 << endl;
    }
    ERROR_IF(!bed1.validate(), "Internal error: Filter data does not validate!");
  } else {
    ERROR_IF(bed1.validate(), "Internal error: Filter data validates even though it is not defined!");
  }
  if (bedFileName2.size() > 0) {
    if (bedFileName2 == bedFileName) {
      bed2 = bed1;
    } else {
      ifstream bedFile(bedFileName2.c_str());
      ERROR_IF(!bedFile, "Error opening BED format file: " + bedFileName2);
      bed2.read(bedFile, BEDRegions::STRAND_IGNORE);
      bedFile.close();
      ERROR_IF(refAssembly.size() == 0, 
	       "If BED filter is specified with -b, the reference assembly (example: hg18) has to be specified with option -a");
      bed2.setAssembly(refAssembly); // bed data must be with respect to reference assembly
      REMARK << "Read BED Format data for filtering from " << bedFileName << endl;
      if (verbose > 1) {
	REMARK << endl << bed2 << endl;
      }
    }
    ERROR_IF(!bed2.validate(), "Internal error: Filter data does not validate!");
  } else {
    ERROR_IF(bed2.validate(), "Internal error: Filter data validates even though it is not defined!");
  }
  if (padding > 0) {
    if (verbose > 0) {
      REMARK << "Adding flanking regions of up to " << padding << " nt to all filter intervals" << endl;
    }
    bed1.addPadding(padding);
    bed2.addPadding(padding);
    if (verbose > 1) {
      REMARK << "Filter intervals (1) after addition of flanking regions: " << endl;
      REMARK << endl << bed1 << endl;
      REMARK << "Filter intervals (2) after addition of flanking regions: " << endl;
      REMARK << endl << bed2 << endl;
    }
  }
  if (bed1.validate() && (verbose > 0)) {
    REMARK << "Total lengths of filtered regions (1) (per chromosome):" << endl;
    bed1.writeLengthCounts(cout, "# "); // also provide prefix for output
  }
  if (bed2.validate() && (verbose > 0)) {
    REMARK << "Total lengths of filtered regions (2) (per chromosome):" << endl;
    bed2.writeLengthCounts(cout, "# "); // also provide prefix for output
  }
  MAFAlignment maf;
  MAFAlignment maf2; // careful: this was formerly a local variable, but for analysis purposes it is kept at this level. More memory consumption!
  if (refAssembly.size() > 0) {
    maf.setRefAssembly(refAssembly);
  }
  MAFAlignment::count_hash_type assemblyCombLengths1; // stores for each combination of assemblies of first MAF the total number of alignment columns
  MAFAlignment::count_hash_type assemblyCombLengths2; // stores for each combination of assemblies of second MAF the total number of alignment columns
  maf.reserve(MAF_RESERVE);
  maf.setVerbose(verbose);
  maf.setPruneAfter(pruneAfter);
  maf.setRequiredAssemblies(tokenizeToSet(requiredAssemblyNames,",;:%!"));
  maf.setSeqMin(seqMin);
  maf.setStrandMode(strandMode1);
  if (shuffleMode == 2) {
    REMARK << "WARNING: MAF alignments are shuffled during reading!" << endl;
    maf.setShuffleMode(true);
  }
  maf.setTabooAssemblies(tokenizeToSet(tabooAssemblyNames,",;:%!"));
  ASSERT(maf.getStrandMode() == strandMode1);
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Error reading input file!");
  REMARK << "Reading " << argv[1] << endl;
  Timer readTimer;
  readTimer.start();
  if (bed1.validate()) {
    maf.read(ifs, bed1, blockMin, blockMax);
  } else {
    BEDRegions emptyBed; 
    maf.read(ifs, emptyBed, blockMin, blockMax);
  }
  ifs.close();
  bed1 = BEDRegions(); // intervall data not needed anymore, save memory
  REMARK << "Successfully read alignment data with " << maf.size() << " alignment blocks and " << maf.getTotalLength() << " columns." << endl;
  REMARK << "Total number of stored characters: " << maf.computeCharacterCount() << endl;
  // length_type endcol = maf.getTotalLength();
  if (refAssembly.size() == 0) {
    refAssembly = maf.getRefAssembly();
    //     SequenceAlignment::properties_type::const_iterator pit = maf[0].getSequenceProperties(0).find("assembly");
    //     if (pit != maf[0].getSequenceProperties(0).end()) {
    //       refAssembly = pit->second;
    //     }
    REMARK << "No reference assembly specified. Using information from first sequence of first alignment block: " 
	   << refAssembly << endl; 
  }
  if (collapseAssembly.size() > 0) {
    REMARK << "Collapsing alignments with respect to this assembly: " << collapseAssembly << " ... " << endl;
    maf.collapseAssembly(collapseAssembly);
    REMARK << "New total length: " << maf.getTotalLength() << endl;
  }
  if (shuffleMode == 3) {
    REMARK << "Shuffling first set of alignment blocks vertically ...";
    maf.shuffleVertical();
    cout << " Done." << endl;
  }
  assemblyCombLengths1 = maf.countAssembliesHashLengths();
  length_type searchMax = maf.getTotalLength(); // might be changed later
  length_type totalLength1 = maf.getTotalLength();
  length_type totalLength2 = 0; // no second alignment defined yet
  length_type refAssemblyTotLength1 = maf.getRefAssemblyTotLength();
  length_type refAssemblyTotLength2 = maf.getRefAssemblyTotLength(); // override later
  string refAssemblyChrom = maf.getRefAssemblyChrom();
  string refAssemblyChrom2 = maf.getRefAssemblyChrom();
  if (appendFileName.size() > 0) {
    corrDistMin = 0;
    if (appendFileName == filename) {
      REMARK << "WARNING: the names of the two specified alignment files are identical. This can lead to a skewed analysis; better provide this filename only once, not using the -e option" << endl;
    }
    ifstream ifs2(appendFileName.c_str());
    ERROR_IF(!ifs2, "Error reading input file!");
    if (refAssembly.size() > 0) {
      maf2.setRefAssembly(refAssembly);
    }
    maf2.reserve(MAF_RESERVE);
    maf2.setPruneAfter(pruneAfter);
    maf2.setRequiredAssemblies(tokenizeToSet(requiredAssemblyNames,","));
    maf2.setSeqMin(seqMin);
    maf2.setStrandMode(strandMode2);
    if (shuffleMode == 2) {
      maf2.setShuffleMode(true);
    }
    maf2.setTabooAssemblies(tokenizeToSet(tabooAssemblyNames,","));
    maf2.setVerbose(verbose);
    REMARK << "Starting to read second alignment " << appendFileName << " ... " << endl;
    if (bed2.validate()) {
      maf2.read(ifs2, bed2, blockMin2, blockMax2);
    } else {
      BEDRegions emptyBed;
      maf2.read(ifs2, emptyBed, blockMin2, blockMax2);
    }
    ifs2.close();
    REMARK << "Successfully read second alignment with " << maf2.size() << " alignment blocks and " << maf2.getTotalLength() << " columns. Appending to first alignment:" << endl;
    REMARK << "Total number of stored characters of second alignment : " << maf2.computeCharacterCount() << endl;
    refAssemblyTotLength2 = maf2.getRefAssemblyTotLength();
    refAssemblyChrom2 = maf2.getRefAssemblyChrom();
    bed2 = BEDRegions(); // intervall data not needed anymore, save memory
    if (collapseAssembly.size() > 0) {
      REMARK << "Collapsing alignments of second MAF with respect to this assembly: " << collapseAssembly << " ... " << endl;
      maf2.collapseAssembly(collapseAssembly);
      REMARK << "New total length of second MAF: " << maf2.getTotalLength() << endl;
    }
    assemblyCombLengths2 = maf2.countAssembliesHashLengths();
    ali2Offset = maf.getChromStart(maf.size() -1, refAssembly) + maf[maf.size()-1].getLength() + offsetMin;
    if (ali2Offset != 0) {
      maf2.addChromStartOffset(ali2Offset, refAssembly); 
    }
    if (shuffleMode == 3) {
      REMARK << "Shuffling second set of alignment blocks vertically ...";
      maf2.shuffleVertical();
      cout << " Done." << endl;
    }
    totalLength2 = maf2.getTotalLength();
    maf.append(maf2); // do not measure this time
    searchMax = maf.getTotalLength();
    if (ali2Offset != 0) {
      maf2.addChromStartOffset(-ali2Offset, refAssembly);  // subtracting offset again
      if (verbose > 3) {
	REMARK << "Subtracting again offset for alignment 2: " << ali2Offset << endl;
	// REMARK << "Debug info 3:" <<  maf.getChromStart(maf.size()-1, refAssembly) << " " << maf2.getChromStart(0, refAssembly) << endl;
	// REMARK << "Debug info 4:" <<  maf.getRefChromStarts() << endl;
      }
    }
  } else {
    bed2 = BEDRegions(); // intervall data not needed anymore, save memory
  }
  readTimer.stop();
  switch (shuffleMode) {
  case 0: break; // no shuffling;
  case 1: {
    REMARK << "Shuffling alignments!" << endl;
    bool shuffleColumnMode = true; // shuffle rows AND columns
    maf.dinucleotideShuffle(shuffleNormLimit, shuffleColumnMode);
    // maf2.dinucleotideShuffle(shuffleNormLimit, iterations, shuffleColumnMode); // maf2 not needed any more
  }
    break;
  case 2:
    REMARK << "MAF files where shuffled horizontally during reading!" << endl;
    
    break;
  case 3: 
    REMARK << "Alignment blocks where shuffled vertically after reading." << endl;
    break;
  default: ERROR("Unknown shuffle mode!");
  }
  REMARK << "Initializing search between " << refAssembly << " " 
	 << refAssemblyChrom << " ( " << refAssemblyTotLength1 << " ) and " 
	 << refAssemblyChrom2 << " ( " << refAssemblyTotLength2 << " ) " << endl;
  if (refAssemblyChrom == refAssemblyChrom2) {
    REMARK << "Searching within same chromosome." << endl;
    sameChrom = true;
  }
  REMARK << "Starting to initialize finder..." << endl;
  HashCorrelationFinder3::result_type resultBins(maf.getTotalLength());
  for (size_type i = 0; i < resultBins.size(); ++i) {
    resultBins[i] = (new HashCorrelationFinder3::result_vector_type());  
  }
  Timer hashTimer;
  hashTimer.start();
//   MAFSearchTables searchTables(&maf); // creates hash tables
//   searchTables.setAssemblyPairFraction(assemblyPairFraction);
//   if (appendFileName.size() > 0) { // in this mode, search results have to be part of second MAF, queries are part of first MAF:
//     searchTables.setSearchRangeMin(searchMax+1);
//   }
//   searchTables.setVerbose(verbose);
//   searchTables.run();

#ifdef _PAIR_MODE_
  REMARK << "Using nucleotide-pair mode!" << endl;
  ERROR("Nucleotide pair mode not implemented.");
    // HashCorrelationFinder * finder;
    // MAFSearchTables * tables = new MAFSearchTables();
    // tables->setMAF(&maf);
    // tables->setAssemblyPairFraction(assemblyPairFraction);
    // if (appendFileName.size() > 0) { // in this mode, search results have to be part of second MAF, queries are part of first MAF:
    //   // tables->setSearchRangeMin(searchMax+1);
    // }
    // tables->setVerbose(verbose);
    // tables->run();
    // ERROR_IF(!tables->validate(), "Internal error: search tables did not validate!");
    // finder = new HashCorrelationFinder(&maf, tables, &resultBins);
#else
    if (verbose > 1) {
      REMARK << "Using nucleotide-triplet mode." << endl;
    }
    HashCorrelationFinder3 * finder;
    MAFSearchTables3 * tables = new MAFSearchTables3;
    tables->setAmbiguityMode(ambiguityMode);
    tables->setMAF(&maf);
    tables->setAssemblyPairFraction(assemblyPairFraction);
    tables->setVerbose(verbose);
    tables->run(refAssembly);
    ERROR_IF(!tables->validate(), "Internal error: search tables did not validate!");
    finder = new HashCorrelationFinder3(&maf, tables, &resultBins);
    if (appendFileName.size() > 0) { // in this mode, search results have to be part of second MAF, queries are part of first MAF:
      // tables->setSearchRangeMin(searchMax+1);
      finder->setSearchColumnSplit(totalLength1); // split search : only interested in finding covariations that span the two regions.
    }
#endif
    hashTimer.stop();
    finder->setBasepairTypeMin(basepairTypeMin);
    finder->setClusterCutoffAndInit(static_cast<length_type>(clusterCutoff));
    finder->setCheckAntiNeighborMode(checkAntiNeighborMode);
    finder->setClusterFilterActive(clusterFilterActive);
    finder->setClusterFilterSizeMin(clusterColMin);
    finder->setComplementMode(complementMode);
    finder->setCorrDistMin(corrDistMin);
    finder->setNonGapMin(seqMin);
    finder->setOutIntervall(outputIntervall);
    finder->setSearchColumnMax(searchColumnMax);
    finder->setStemLengthMin(stemLengthMin);
    finder->setReverseMode(reverseMode);
    finder->setVerbose(verbose);
    if (verbose > 0) {
      REMARK << "Successfully initialized finder." << endl;
    }
    if (threadCount == 1) {
      tasksPerThread = 1; // there is only one thread, no parallelism
    }

  if (threadCount > 0) {
    blockSizeMin = maf.getTotalLength() / (tasksPerThread * threadCount);
  }  // otherwise: automatic, we do not know how many threads there are
  static affinity_partitioner ap; 
  if (threadCount > 0) {
    REMARK << "Starting search with " << threadCount << " threads, approx. " << (tasksPerThread*threadCount) 
	   << " tasks and at least " << blockSizeMin << " columns per task." << endl;
  } else {
    REMARK << "Starting search with default number threads, at most " << (searchMax/blockSizeMin + 1)
	   << " tasks and at least " << blockSizeMin << " columns per task." << endl;
  }
  ERROR_IF(blockSizeMin < 10, "Thread granularity is too small! Consider decreasing number of threads (option -t) or using non-parallel program version.");
  blocked_range<length_type> range(0, searchMax, blockSizeMin);
  // finder.run(range);
  Timer searchTimer;
  searchTimer.start();
  /******** central method for starting threaded search ******/
  parallel_for(range, *finder, ap); 
  searchTimer.stop();
  REMARK << "Search finished!" << endl;//  Fraction of ignored columns (%): " 
  // << setprecision(3) << (100.0 * static_cast<double>(finder.getIgnoredCount()) / maf.getTotalLength()) << endl;
  ASSERT(finder->getClusterFilter().isActive() == clusterFilterActive);
  InteractionClusterAnalyzer::double_hash_type densities;
  if (densInFileName.size() > 0) {
    ifstream densInFile(densInFileName.c_str());
    ERROR_IF(!densInFile,"Error reading dens in file!");
    REMARK << "Reading densities from file " << densInFileName << endl;
    densities = InteractionClusterAnalyzer::readDensities(densInFile);
    densInFile.close();
  } else {
    bool addEmpty = true; // write densities for regions in which no covaration was found
    if (densOutFileName.size() > 0) {
      ofstream densOutFile(densOutFileName.c_str());
      ERROR_IF(!densOutFile, "Error writing density file " + densOutFileName);
      REMARK << "Writing densities to file " << densOutFileName << endl;
      densities = finder->computeDensities(searchMax, addEmpty, &densOutFile); // HashCorrelationFinder3::computeDensities();
      // InteractionClusterAnalyzer::writeDensities(densOutFile, densities);
      densOutFile.close();
    } else {
      // densities = finder->computeDensities(searchMax, addEmpty, NULL); // HashCorrelationFinder3::computeDensities();
    }
    // if (assemblyCombLengths2.size() > 0) {
      // densities = InteractionClusterAnalyzer::generateDensities(HashCorrelationFinder3::getMatchPairCountHash(), assemblyCombLengths1, assemblyCombLengths2);
    // } else {
      // densities = InteractionClusterAnalyzer::generateDensities(HashCorrelationFinder3::getMatchPairCountHash(), assemblyCombLengths1);
    // }
  }
  result_container results = finder->getResults();
  // after densities have been written to file, augment densities of areas in which no covariation was found:
  // finder->augmentDensities(densities, searchMax);
  REMARK << "Found " << results.size() << " correlations." << endl;
  if (verbose > 4) { // output of unfilteed covarying columns
    REMARK << "Unfiltered results: " << endl;
    finder->writeRawResults(cout, searchMax);
  }

  if (appendFileName.size() > 0) {
    result_container resultsTmp;
    for (result_container::size_type jj = 0; jj < results.size(); ++jj) {
      // cout << "# checking " << results[jj] << "\t" << totalLength1 << "\t" << totalLength2 << "\t" << searchMax << " : ";
      if (((results[jj].getStart() < totalLength1) && (results[jj].getStop() >= totalLength1))
	  || ((results[jj].getStart() >= totalLength1) && (results[jj].getStop() < totalLength1)) ) {
	resultsTmp.push_back(results[jj]);
      }  else {
      }
    }
    if (resultsTmp.size() < results.size()) {
      results = resultsTmp;
    }
    REMARK << "Only kept " << results.size() << " correlations that span the two specified genomic regions." << endl;
  }

  // the found covariation columns will be augmented with densities:
  // InteractionClusterAnalyzer::augmentCorrelationDensitities(results, densities, maf, maf2, true); // true : still working in internal coordinate mode
  if (verbose > 1) {
    writeResults(*osp, maf, results, refAssembly); // write output
  }
  if (refAssembly.size() > 0) {
    if (verbose > 0) {
      REMARK << "Convert to coordinates of assembly (first column has index one): " << refAssembly << endl;
    }
    CorrelationTools::convertInternalToAssemblyCoordinates(results.begin(), results.end(), refAssembly, maf);
  }
  if (ali2Offset != 0) {
    REMARK << "Filtering such that only cross-correlations remain... ";
    results = filterCrossCorrelations(results, ali2Offset, offsetMin);// filter out only correlations that go between two alignments
    REMARK << results.size() << endl;
  }
  if (annotateInFileName.size() > 0) {
    ifstream annotateInFile(annotateInFileName.c_str());
    ERROR_IF(!annotateInFile, "Error opening annotation file: " + annotateInFileName);
    AnnotationTools annotator;    
    if (annotateOutFileName.size() ==  0) {
      annotateOutFileName = annotateInFileName + "_anno.bed";
    }
    ofstream annotateOutFile(annotateOutFileName.c_str());
    ERROR_IF(!annotateOutFile, "Error opening annotation output file: " + annotateOutFileName);      
    if (verbose > 0) {
      REMARK << "Annotated regions read from " << annotateInFileName << " are written to file " << annotateOutFileName << endl;
    }
    annotator.annotateRegions(annotateInFile, annotateOutFile, results, refAssemblyChrom, refAssemblyChrom2, static_cast<length_type>(ceil(clusterCutoff)));
    annotateInFile.close();
    annotateOutFile.close();
  }
  Vec<Stem> stems;
  if (reverseMode) {
    if (complementMode) {
      REMARK << "Converting found correlations into regular reverse-complement stems:" << endl;
    } else {
      REMARK << "Converting found correlations into reverse-matching (not complementary) stems:" << endl;
    }
    if (appendFileName.size() == 0) {
      stems = CorrelationTools::convertCorrelationsToStems(results, corrDistMin, stemLengthMin);
    } else {
      stems = CorrelationTools::convertCorrelationsToStems(results, 0, stemLengthMin); // corrDistMin does not make sense for two-sequence mode
    }
    if (pvalMode) { // currently p-value computation only for reverse matches
      if (verbose > 0) {
	REMARK << "Computing E-values..." << endl;
      }
      addPValue(stems, *finder, false); // write compute p-values in "energy" attribute. "false" means that one has to temporarily convert to internal coordinates
      if (verbose > 0) {
	REMARK << "Computing E-values finished." << endl;
      }
    }
  } else {
    REMARK << "Due to chosen mode (reverse: " << reverseMode << " complement: " << complementMode << ") , cannot convert found correlations into regular reverse-complement stems, using instead \"forward\" regions:"  << endl;
    if (appendFileName.size() == 0) {
      stems = CorrelationTools::convertCorrelationsToForwardStems(results, corrDistMin, stemLengthMin);
    } else {
      stems = CorrelationTools::convertCorrelationsToForwardStems(results, 0, stemLengthMin); // corrDistMin does not make sense for two-sequence mode
    }
    if (pvalMode) { // currently p-value computation only for reverse matches
      if (verbose > 0) {
	REMARK << "Computing E-values..." << endl;
      }
      addForwardPValue(stems, *finder, false); // write compute p-values in "energy" attribute. "false" means that one has to temporarily convert to internal coordinates
      if (verbose > 0) {
	REMARK << "Computing E-values finished." << endl;
      }
    }
  }
  // convert notation to + strand
  if (strandMode1 == MAFAlignment::STRAND_MINUS) {
    // convert start position to plus strand
    REMARK << "Computing + strand coordinates of stem starts ..." << endl;
    reverseStemStarts(stems, refAssemblyTotLength1);
  }
  if (strandMode2 == MAFAlignment::STRAND_MINUS) {
    // convert start position to plus strand
    REMARK << "Computing + strand coordinates of stem stops ..." << endl;
    reverseStemStops(stems, refAssemblyTotLength2);
  }
  if (noSelfMode) {
    stems = filterNoSelfStems(stems); // filter out self stems (from stemhelp)
  }
  // cluster again using SingleLinkage2DProgressiveFilter:
  if (clusterAgainMode && reverseMode && complementMode) {
    ERROR("cluster-again mode currently node supported!");
    SingleLinkage2DProgressiveFilter clusterFilter = finder->getClusterFilter(); // makes a copy
    ERROR_IF(clusterFilter.getElementCount() > 0, "Internal error: cluster filter should be empty after search.");
    clusterFilter.setDelay(0); // actually changing filter behavior, because not used for parallel mode anymore
    clusterFilter.reset(); // prepare for another search
    REMARK << "Clustering again using single-linkage filter with initially " << stems.size() << " stems. Result: " << endl;
    stems = CorrelationTools::singleLinkageFilter(stems, clusterFilter);
    REMARK << stems.size() << " stems." << endl;
  } 
  if (verbose > 1) {
    REMARK << "Initial list of " << stems.size() << " found stems:" << endl;
    writeStems(*osp, stems, totalLength1, totalLength2);
  }
  Timer clusterTimer;
  clusterTimer.start();
  REMARK << "Writing results to file " << bedOutFileName << endl;
  ofstream bedOutFile(bedOutFileName.c_str());
  if (bedOutFile) {
    bedOutFile << "# Result generated by COVARNA (version " << getVersion() << " ) " << endl;
    bedOutFile << "# Parameters: ";
    MainTools::parameterOutput(bedOutFile, argc, argv); // output of parameters
  }
  if ((stemDensity <= 0.0) && (stemLengthMin == 1)) {
    double totalArea = static_cast<double>(totalLength1) * static_cast<double>(totalLength2);
    if (totalArea <= 0.0) {
      totalArea = static_cast<double>(totalLength1) * static_cast<double>(totalLength1);
    }
    stemDensity = HashCorrelationFinder3::getMatchPairCount() / totalArea;
    if (verbose > 1) {
      cout << "Estimating density of column pairs as " << stemDensity << " based on count of " 
	   << HashCorrelationFinder3::getMatchPairCount() << " matches in an area of " << totalArea << " nucleotides squared." << endl;
    }
  }
  clusterAnalyzer->analyzeClustersFast(cout, bedOutFile, stems, clusterCutoff, 
				   static_cast<double>(totalLength1),static_cast<double>(totalLength2),
				   sameChrom, stemDensity, eMax, stemPMax, stemLengthMin,
				   clusterColMin, expandClusterMaxAllowed, 
                                   complementMode, reverseMode, maf, maf2,
				   HashCorrelationFinder3::getMatchPairCountHash(), assemblyCombLengths1,
				   assemblyCombLengths2, useStemEnergiesAsDensities,
				       multiTestMode, densities); // , stemBiasPMode);

  clusterTimer.stop();
  timer.stop();

  if (bedOutFile) {
    bedOutFile.close();
  }

  REMARK << "Compute time : Total: " << timer << " Reading: " << readTimer << " Hash-creation: " << hashTimer 
	 << " Searching: " << searchTimer << " Clustering: " << clusterTimer << endl;
  REMARK << "Good bye!" << endl;
  
  return 0;
}
