
diagValuesList <- function(mtx, antidiag=TRUE) {
  lowest <- 1 + 1
  highest <- ncol(mtx) + nrow(mtx)
  result <- list()
  for (i in 1:nrow(mtx)) {
   for (j in 1:ncol(mtx)) {
     d <- 0
     if (antidiag) {
      d <- i + j
     } else {
      d <- i - j
     }
     dn <- as.character(d)
     v <- result[[dn]] 
     if (is.null(v)) {
      v <- c()
     }
     v[length(v)+1] <- mtx[i,j]
     result[[dn]] <- v
   }
  }
  result
}

# generates matrix that is filled with "helix-invariants" (sum or difference of coordinates)
generateDiagMatrix <- function(nrow, ncol, antidiag=TRUE) {
  mtx <- matrix(0, ncol=ncol, nrow=nrow)
  for (i in 1:nrow) {
    jStart = 1
    jStop = ncol
    for (j in jStart:jStop) {
      if (antidiag) { 
         mtx[i,j] <- i+j  # for regular reverse-complementary stems
      } else {
         mtx[i,j] <- i-j  # for stems between + and - strands
      }
    }
  }
  mtx
}

test.generateDiagMatrix <- function() {
  result <- generateDiagMatrix(5,6)
   result
}


ConvertToBinary <- function(mtx, thresh) {
 mtxn <- matrix(0,ncol=ncol(mtx), nrow=nrow(mtx))
 for (i in 1:nrow(mtx)) {
  for (j in 1:ncol(mtx)) {
   if (mtx[i,j] < thresh) {
        mtxn[i,j] <- 0
   } else {
       mtxn[i,j] <- 1
   }
  }
 }
 mtxn
}

# central statistical (randomization) test, that determines a P-value
# for a bias toward the formation of helices (antidiagonals) or forward-matches (diagonals)
# If convertToBinary is FALSE, the matrix mtx must contain zero and ones only, otherwise it will be converted using the threshold thresh
# careful: currently only makes sense for NON-symmetrical matrices; for example interaction part of two regions
# pLimFast : limit for shortcut of randomization test in case of non-significant cases.
# at pLimFastRounds  iterations, a check is performed. If the current p estimate is greater pLimFast, the calculation is terminated.
diagBias <- function(mtx, rounds=10000, antidiag=TRUE, verbose=FALSE, thresh=0.5, convertToBinary=TRUE,
                      pLimFast=0.2, pLimFastRounds=5000,use.transpose=FALSE, symmetric.triangle.only=FALSE) {
  nc <- ncol(mtx)
  nr <- nrow(mtx)
  if (convertToBinary) {
   mtx <- ConvertToBinary(mtx, thresh)
  }
  if (symmetric.triangle.only) { # set lower triangular matrix to zero
    stopifnot(nc == nr) # must be symmetric square matrix
    for (i in 1:nr) {
     for (j in 1:i) {
        mtx[i,j] <- 0
     }
    }    
  }
  mtxt <- t(mtx) # transpose
  ntc <- ncol(mtxt)
  ntr <- nrow(mtxt)
  diagMtx <- generateDiagMatrix(nr,nc, antidiag=antidiag)
  diagMtxt <- generateDiagMatrix(nc,nr, antidiag=antidiag)
  vMtx <- mtx * diagMtx
  vMtxt <- mtxt * diagMtxt
  uniqDiags <- unique(sort(as.vector(vMtx)))
  stopifnot(uniqDiags[0] != 0) # there must be at least one zero
  ndiag <- length(uniqDiags) - 1 # there are this many diagonals (minus one because zeros where counted originally also but they are not a valid diagonal)
  foundExtremes <- 0
  if (verbose) {
   cat("Original number of diagonals:", ndiag, "\n")
  }
  effRounds <- 0 # iterations that were actually performed
  ndiagTotal <- ndiag
  if (use.transpose) {
   uniqDiagst <- unique(sort(as.vector(vMtx)))
   stopifnot(uniqDiagst[0] != 0) # there must be at least one zero
   ndiagt <- length(uniqDiagst) - 1 # there are this many diagonals (minus one because zeros where counted originally also but they are not a valid diagonal)
   ndiagTotal <- ndiag - ndiagt
  }
  for (r in 1:rounds) {
     effRounds <- effRounds + 1
     stopifnot(effRounds == r)
     mtxr <- mtx[sample(nr), sample(nc)] # shuffle columns and rows
     stopifnot(ncol(mtx) == ncol(mtxr))
     stopifnot(nrow(mtx) == nrow(mtxr))
     ndiagr <- length(unique(sort(as.vector(mtxr * diagMtx)))) - 1
     ndiagRTotal <- ndiagr
     if (use.transpose) {
      mtxtr <- mtxt[sample(ntr), sample(ntc)] # shuffle columns and rows
      ndiagtr <- length(unique(sort(as.vector(mtxtr * diagMtxt)))) - 1
      ndiagRTotal <- ndiagr - ndiagtr
     } 
     if (verbose) {
      cat(ndiagr, " number of diagonals in round", r, ":", ndiagr, "\n")
     }
     if (ndiagRTotal <= ndiagTotal) {
       foundExtremes <- foundExtremes + 1
     } 
     if (r == pLimFastRounds) {
        if ((foundExtremes / r) > pLimFast) {
          break # prematurely quit loop because the result is likely to be non-significant
        }
     }
  }
  pval <- foundExtremes / effRounds 
  if (foundExtremes == 0) {
    pval <- 1/effRounds # upper bound: assume one success (like pseudocount)
  }
  result <- list()
  result[["p.value"]] <- pval
  result[["rounds"]] <- effRounds
  result[["successes"]] <- foundExtremes
  result[["diagonals"]] <- ndiag
  result[["sum"]] <- sum(mtx)
  result[["nrow"]] <- nrow(mtx)
  result[["ncol"]] <- ncol(mtx)
  result[["antidiag"]] <- antidiag
  result[["thresh"]] <- thresh
  result
}


# central statistical (randomization) test, that determines a P-value
# for a bias toward the formation of helices (antidiagonals) or forward-matches (diagonals)
# If convertToBinary is FALSE, the matrix mtx must contain zero and ones only, otherwise it will be converted using the threshold thresh
# careful: currently only makes sense for NON-symmetrical matrices; for example interaction part of two regions
# pLimFast : limit for shortcut of randomization test in case of non-significant cases.
# at pLimFastRounds  iterations, a check is performed. If the current p estimate is greater pLimFast, the calculation is terminated.
diagBiasTile <- function(mtx, sideLength=20, rounds=10000, antidiag=TRUE, thresh=0.5, convertToBinary=TRUE,
                      pLimFast=0.2, pLimFastRounds=1000, pLimStore=0.05, verbose=0) {
  if (verbose > 1) {
   cat("Starting diagBiasTile\n")
  } 
  nc <- ncol(mtx)
  nr <- nrow(mtx)
  if (convertToBinary) {
   mtx <- ConvertToBinary(mtx, thresh)
  }
  nTilesR <- floor(nr/sideLength)
  nTilesC <- floor(nc/sideLength)
  pMtx <- matrix(1.0, nrow=nTilesR, ncol=nTilesC)
  pMtxF <- matrix(1.0, nrow=nr, ncol=nc)
  matrixList <- list()
  matrixBiasList <- list()
  matrixPositionList <- list()
  result <- list()
  for (i in 1:nTilesR) {
   iMin = (i-1)*sideLength + 1
   iMax = iMin + sideLength-1
   if (verbose > 0) {
    cat("Progress of outer loop of diagBiasTile:", i, "out of", nTilesR, "(", round(100.0*i/nTilesR), "%)\n")
   }
   for (j in 1:nTilesC) {
    jMin = (j-1)*sideLength + 1
    jMax = jMin + sideLength-1
    mt <- mtx[iMin:iMax, jMin:jMax]
    bias <- diagBias(mt, rounds=rounds)
    if ((verbose > 2) || ((verbose > 0) > 1) && (pMtx[i,j] <= pLimStore)) {
      cat("Working on tile", i,j, sideLength, iMin,":", iMax, ",", jMin, ":", jMax, "\n") 
      print(bias)
    }
    pMtx[i,j] <- bias[["p.value"]]
    pMtxF[iMin:iMax,jMin:jMax] <- bias[["p.value"]]
    if (is.numeric(pMtx[i,j]) && (pMtx[i,j] <= pLimStore)) {
     matrixList[[length(matrixList) + 1]] <- mt
     matrixBiasList[[length(matrixBiasList) + 1]] <- bias
     matrixPositionList[[length(matrixPositionList) + 1]] <- c(iMin,iMax, sideLength, i,j)
    }
   }
  }
  result[["p.matrix"]] <- pMtxF
  result[["p.matrix.tile"]] <- pMtx
  result[["matrices"]] <- matrixList
  result[["matrices.bias"]] <- matrixBiasList
  result[["matrices.position"]] <- matrixPositionList
  if (verbose > 1) {
   cat("Finished diagBiasTile\n")
  } 
  result
}

# test with random matrix. p-value should be non-significant.
# must contain zero and ones only
test.diagBias.t1 <- function(probLim=0.05, verbose=TRUE, pLimFastRounds=1000) {
  nr <- 20
  nc <- 12
  mtx <- matrix(0, nrow=nr,ncol=nc) 
  rv <- runif(nr*nc)
  pc <- 1
  counter <- 0
  for (i in 1:nr) {
   for (j in 1:nc) {
    if (rv[pc] < probLim) {
      mtx[i,j] <- 1
      counter <- counter + 1
     }
    pc <- pc + 1
   }
  }
  cat("Overall placed", counter, "non-zero elements.\n")
  print(mtx)
  result <- diagBias(mtx,rounds=5000, antidiag=TRUE, verbose=verbose, pLimFast=0.1, pLimFastRounds=pLimFastRounds)
  result
}

# contains a "helix", so should give p value less than zero
test.diagBias.t2 <- function(probLim=0.1, verbose=TRUE, use.transpose=FALSE) {
  nr <- 6
  nc <- 6
  mtx <- matrix(0, nrow=nr,ncol=nc) 
  rv <- runif(nr*nc)
  pc <- 1
  counter <- 0
  start <- 2
  stop <- 5
  helixLen <- 3
  for (i in 1:helixLen) {
     mtx[start+i,stop-i] <- 1
     counter <- counter + 1
  }
  cat("Overall placed", counter, "non-zero elements.\n")
  print(mtx)
  result <- diagBias(mtx,rounds=1000, antidiag=TRUE, verbose=verbose, use.transpose=use.transpose)
  print(result)
#  result2 <- diagBias(mtx,rounds=1000, antidiag=FALSE, verbose=verbose)
#  print(result2)
  checkTrue(result[["p.value"]] < 1.0)
#  checkTrue(result2[["p.value"]] == 1.0)
  result
}

# contains a "opposing-helix", so should give p value less than zero
test.diagBias.t3 <- function(probLim=0.1, verbose=TRUE) {
  nr <- 6
  nc <- 6
  mtx <- matrix(0, nrow=nr,ncol=nc) 
  rv <- runif(nr*nc)
  pc <- 1
  counter <- 0
  start <- 2
  stop <- 2
  for (i in 1:3) {
     mtx[start+i,stop+i] <- 1
     counter <- counter + 1
  }
  cat("Overall placed", counter, "non-zero elements.\n")
  print(mtx)
  result <- diagBias(mtx,rounds=1000, antidiag=TRUE, verbose=verbose)
  print(result)
  checkTrue(result[["p.value"]] == 1.0)
  result2 <- diagBias(mtx,rounds=1000, antidiag=FALSE, verbose=verbose)
  print(result2)
  checkTrue(result2[["p.value"]] < 1.0)
  result2
}


# contains a "opposing-helix", so should give p value less than zero
test.diagBias.t5 <- function(probLim=0.1, verbose=TRUE) {
  mtx <- read.table(system.file("data/s.fasta.17_diagbias.analysis_43.matrix", package="rnafolding"), header=TRUE)
  print(mtx)
  result <- diagBias(mtx,rounds=1000, antidiag=TRUE, verbose=verbose, use.transpose=TRUE)
  print(result)
  checkTrue(result[["p.value"]] > 0.1)
  result
}

filterPromiscous <- function(mtx, fracMax = 0.25, cutoff = 0.5, filter.columns=TRUE, filter.rows=TRUE) {
 result <- mtx
 if (filter.columns) {
 for (i in 1:ncol(mtx)) {
  v <- mtx[,i]
  n <- length(v[v >= cutoff])
  frac = n/length(v)
  if (frac > fracMax) {
   result[,i] <- 0
  }
 }
 }
 if (filter.rows) {
 for (i in 1:nrow(mtx)) {
  v <- mtx[i,]
  n <- length(v[v >= cutoff])
  frac = n/length(v)
  if (frac > fracMax) {
   result[i,] <- 0
  }
 }
 }
 result
}

test.filterPromiscous <- function() {
 mtx <- matrix(0, nrow=4, ncol=4)
 mtx[,2] <- c(1,0,1,1)
 print(mtx)
 result <- filterPromiscous(mtx)
 print(result)
 checkEquals(sum(result), 0.0)
 result2 <- filterPromiscous(mtx, fracMax=0.9)
 print(result2)
 checkEquals(sum(result2), 3.0)
 result
}

# central method for assessing bias towards antidiagonals given two sets of sequences
ComplementaryColumnBias <- function(seqs1, seqs2, minWords=2,seqVec=NULL,verbose=FALSE, singleGapOK=TRUE, 
       doubleGapOK=TRUE,
       rounds=100000, convertGUtoGC=TRUE, addMatrix=TRUE, tile=NULL, fracMax=1.0) {
 stopifnot(length(seqs1) == length(seqs2))
 ary1 <- seqsToCharArray(seqs1)
 ary2 <- seqsToCharArray(seqs2)
 ali1Len <- ncol(ary1)
 ali2Len <- ncol(ary2)
 ary <- cbind(ary1, ary2) # paste columns
 if (!is.null(seqVec)) {
  ary <- ary[seqVec, ] # ignore certain sequences
 }
 if (verbose) {
  print(ary)
 }
 mtx <- findAllComplementaryColumns(ary, minWords=minWords, asMatrix=TRUE, queryMax=ali1Len, targetMin=(ali1Len+1),
          singleGapOK=TRUE, doubleGapOK=doubleGapOK, convertGUtoGC=convertGUtoGC)
 if (fracMax < 1.0) {
  mtx <- filterPromiscous(mtx, fracMax=fracMax)
 }
 if (verbose) {
   print(mtx)
 }
 stopifnot(nrow(mtx) == ali1Len)
 stopifnot(ncol(mtx) == ali2Len)
 bias <- NULL
 if (is.null(tile)) {
  bias <- diagBias(mtx, rounds=rounds)
 } else {
  bias <- diagBiasTile(mtx, rounds=rounds, sideLength=tile)
 }
 if (addMatrix) {
  bias[["matrix"]] <- mtx
 }
 bias
}

test.ComplementaryColumnBias <- function(file1=system.file("data/chr2L_12625849_20443308_18_A.fa", package="rnafolding"),
                                         file2=system.file("data/chr2L_12625849_20443308_18_B.fa", package="rnafolding"), 
                                         minWords=2,seqVec=NULL, verbose=TRUE,
                                         singleGapOK=TRUE, doubleGapOK=TRUE, reverseComplement2=FALSE, dna=TRUE, strict="N",
                                         convertGUtoGC=TRUE, rounds=10000, fracMax=0.25) {
 stopifnot(file.exists(file1))
 stopifnot(file.exists(file2))
 seqs1 <- readFASTA(file1, strip.descs=TRUE)
 seqs2 <- readFASTA(file2, strip.descs=TRUE)
 stopifnot(length(seqs1) > 0)
 stopifnot(length(seqs1) == length(seqs2))
 if (reverseComplement2) {
  seqs2 = alignment.reverseComplement(seqs2, dna=dna, strict=strict)
 }
 result <- ComplementaryColumnBias(seqs1, seqs2, minWords=minWords, seqVec=seqVec, verbose=verbose,
              singleGapOK=singleGapOK, doubleGapOK=doubleGapOK, convertGUtoGC=convertGUtoGC, fracMax=fracMax, rounds=rounds)
 checkTrue(!is.null(result))
 checkTrue(is.numeric(result$p.value))
 result
}


test.ComplementaryColumnBiasTile <- function(file1=system.file("data/chr2L_12625849_20443308_18_A.fa", package="rnafolding"),
                                         file2=system.file("data/chr2L_12625849_20443308_18_B.fa", package="rnafolding"), 
                                         minWords=2,seqVec=NULL, verbose=TRUE,
                                         singleGapOK=TRUE, doubleGapOK=TRUE, reverseComplement2=FALSE, dna=TRUE, strict="N",
                                         convertGUtoGC=TRUE, rounds=100, tile=10) {
 stopifnot(file.exists(file1))
 stopifnot(file.exists(file2))
 seqs1 <- readFASTA(file1, strip.descs=TRUE)
 seqs2 <- readFASTA(file2, strip.descs=TRUE)
 stopifnot(length(seqs1) > 0)
 stopifnot(length(seqs1) == length(seqs2))
 if (reverseComplement2) {
  seqs2 = alignment.reverseComplement(seqs2, dna=dna, strict=strict)
 }
 result <- ComplementaryColumnBias(seqs1, seqs2, minWords=minWords, seqVec=seqVec, verbose=verbose,
              singleGapOK=singleGapOK, doubleGapOK=doubleGapOK, convertGUtoGC=convertGUtoGC, rounds=rounds, tile=tile)
 checkTrue(!is.null(result))
# checkTrue(is.numeric(result$p.value))
 result
}

# only returns complementarity matrix
test.ComplementaryColumnBias.matrix <- function(file1=system.file("data/chr2L_12625849_20443308_18_A.fa", package="rnafolding"),
                                         file2=system.file("data/chr2L_12625849_20443308_18_B.fa", package="rnafolding"), 
                                         minWords=2,seqVec=NULL, verbose=TRUE,
                                         singleGapOK=TRUE, doubleGapOK=TRUE, reverseComplement2=FALSE, dna=TRUE, strict="N",
                                         convertGUtoGC=TRUE) {
 stopifnot(file.exists(file1))
 stopifnot(file.exists(file2))
 seqs1 <- readFASTA(file1, strip.descs=TRUE)
 seqs2 <- readFASTA(file2, strip.descs=TRUE)
 stopifnot(length(seqs1) > 0)
 stopifnot(length(seqs1) == length(seqs2))
 if (reverseComplement2) {
  seqs2 = alignment.reverseComplement(seqs2, dna=dna, strict=strict)
 }
 result <- ComplementaryColumnBias(seqs1, seqs2, minWords=minWords, seqVec=seqVec, verbose=verbose,
              singleGapOK=singleGapOK, doubleGapOK=doubleGapOK, convertGUtoGC=convertGUtoGC, rounds=1)
 checkTrue(!is.null(result))
 checkTrue(is.numeric(result$p.value))
 checkTrue(is.matrix(result[["matrix"]]))
 result[["matrix"]]
}
