// --*- C++ -*------x---------------------------------------------------------
#ifndef _BED_REGIONS_
#define _BED_REGIONS_

#include <iostream>
#include <fstream>
#include <Vec.h>
#include <IntervallInt.h>
#include <string>
#include <StringTools.h>
#include <debug.h>
#include <map>
#include <IntervallIntSet.h>
#include <limits.h>
#include <ctype.h>

using namespace std;

#ifndef REMARK
#define REMARK cout << "# "
#endif

class BEDRegions {

 public:
  typedef int length_type;
  typedef IntervallIntSet interval_set_type;
  typedef map<string, IntervallIntSet> map_type;
  typedef IntervallIntSet::size_type size_type;
  enum {STRAND_PLUS = 1, STRAND_MINUS = -1, STRAND_BOTH = 0, STRAND_IGNORE = 2 }; // what strands to read
  /** column with start position info, column with end position, required number of columns in BED format file. Zero-based counting. */
  enum { CHROM_COL = 0, START_COL = 1, END_COL = 2, STRAND_COL = 5, REQ_COL = 3 }; 

 private:

  string assembly; // name of this assembly. Example: hg18  
  map_type intervalls;

 public:

  BEDRegions() { }

  BEDRegions(const BEDRegions& other) : assembly(other.assembly), intervalls(other.intervalls) { }

  BEDRegions& operator = (const BEDRegions& other) {
    if (&other != this) {
      copy(other);
    }
    return (*this);
  }

  void clear() {
    assembly.clear();
    intervalls.clear();
  }

  void copy(const BEDRegions& other) {
    assembly = other.assembly;
    intervalls = other.intervalls;
  }
  
  /** Returns true if index is contained in set of intervalls */
  bool contains(const string& chrom, length_type id) const {
    map_type::const_iterator it = intervalls.find(chrom);
    if (it == intervalls.end()) {
      return false;
    }
    return it->second.isOverlappingSafe(IntervallInt(id, id)); // FIXIT: check for correct chromosome!
  }

  /** Returns true if index is contained in set of intervalls */
  bool isOverlapping(const string& chrom, const IntervallInt& interval) const {
    map_type::const_iterator it = intervalls.find(chrom);
    if (it == intervalls.end()) {
      // cerr << "Chromosome not found:" << chrom << endl;
      return false;
    }
    return it->second.isOverlappingSafe(interval); // FIXIT: check for correct chromosome!
  }

  const string& getAssembly() const { return assembly; }

  /** Adds padding to each intervall, avoiding collisions. */
  void addPadding(length_type padding) {
    for (map_type::iterator it = intervalls.begin(); it != intervalls.end(); it++) {
      it->second.addPadding(padding, 0, (INT_MAX-2)); // iterator over each chromosome. Unfortunately: max length of chromosome not known here.
    }
  }

  /** Returns total number of intervall sizes */
  length_type getLengthCount() {
    length_type result =  0;
    for (map_type::iterator it = intervalls.begin(); it != intervalls.end(); it++) {
      result += it->second.getLengthCount();
    }
    return result;
  }

  /** Writes total number of intervall sizes */
  length_type writeLengthCounts(ostream& os, const string& remark) {
    length_type result =  0;
    for (map_type::iterator it = intervalls.begin(); it != intervalls.end(); it++) {
      os << remark << (it->first) << " : " << (it->second.getLengthCount()) << endl;
      result += it->second.getLengthCount();
    }
    os << remark << "Total: " << result << endl;
    return result;
  }

  /** Reads UCSC BED format data. */
  bool read(istream& is) {
    return read(is, STRAND_BOTH);
  }

  /** Reads UCSC BED format data. Careful: usually use mode STRAND_BOTH */
  bool read(istream& is, int strandMode) {
    // clear();
    intervalls.clear(); // do not delete assembly info 
    vector<string> lines = getLines(is);
    for (vector<string>::size_type i = 0; i < lines.size(); ++i) {
      vector<string> words = getTokens(lines[i]);
      if ((words.size() >= REQ_COL) && isdigit(words[START_COL][0]) && isdigit(words[END_COL][0])) {
	bool strandOk = true;
	if (words.size() > STRAND_COL) { 
	  switch (strandMode) {
	  case STRAND_PLUS:
	    if ((words[STRAND_COL] == "+") || (words[STRAND_COL] == "1")) {
	      strandOk = true;
	    }
	    break;
	  case STRAND_MINUS:
	    if ((words[STRAND_COL] == "-") || (words[STRAND_COL] == "-1")) {
	      strandOk = true;
	    }
	    break;
	  case STRAND_BOTH:
	    if ((words[STRAND_COL] == "+") || (words[STRAND_COL] == "-") 
		|| (words[STRAND_COL] == "-1") || (words[STRAND_COL] == "1")) {
	      strandOk = true;
	    }
	    break;
	  case STRAND_IGNORE:
	    break;
	  default:
	    ERROR("Internal error in BEDRegions.read: Unknown strand mode!");
	  }
	}
	if (strandOk) {
	  // check if name found:
	  string chrom = words[0];
	  map_type::const_iterator it = intervalls.find(chrom);
	  if (it == intervalls.end()) { // chromsome so far not found, start new set of intervalls
	    intervalls[chrom] = IntervallIntSet();
	  }
	  // REMARK << "Adding intervall for line " << (i+1) << " " << lines[i] << endl; 
	  intervalls[chrom].add(IntervallInt(stoi(words[START_COL]), stoi(words[END_COL])), false); // false: no early consolidating intervalls (speed)
	} 
      } else {
	if ((words[0][0] != '#') && (i > 0) ) {
	  REMARK << "Line " << (i+1) << " Weird line encountered in BED file, ignoring: " << lines[i] << endl; 
	}
      }
    }
    // consolidating all intervalls: // only necessary if user provided strange overlapping intervalls. Not done here for speed/memory reasons!
//     for (map_type::iterator it = intervalls.begin(); it != intervalls.end(); it++) {
//       REMARK << "Consolidating intervalls for chromosome " << (it->first) << endl;
//       it->second.consolidate();
//     }  
    return true;
  }

  void setAssembly(const string& _assembly) { assembly = _assembly; }

  size_type size() const { return intervalls.size(); }
  
  bool validate() const { return (assembly.size() > 0) && (size() >0); }

  friend ostream& operator << (ostream& os, const BEDRegions& regions);

  friend istream& operator >> (istream& is, const BEDRegions& regions);
  
};

/** Output */
inline
ostream& operator << (ostream& os, const BEDRegions& regions){
  for (BEDRegions::map_type::const_iterator it = regions.intervalls.begin(); it != regions.intervalls.end(); it++) {
    const IntervallIntSet& chromRegions = it->second;
    const string& chrom = it->first;
    for (IntervallIntSet::size_type i = 0; i < chromRegions.size(); ++i) {
      os << chrom << "\t" << chromRegions[i].getLower() << "\t" << chromRegions[i].getUpper() << endl;
    }
  }
  return os;
}

#endif
