# returns all sequeces of an alignment
seqsToCharArray <- function(sequences, transform.upper=FALSE) {
	 nRow <- length(sequences)
	 nCol <- nchar(sequences[[1]]$seq)
         result <- matrix("", nrow=nRow, ncol=nCol)
	 for (i in 1:length(sequences)) {
             if (!transform.upper){
	      result[i,] <- strsplit(sequences[[i]]$seq,"")[[1]]
             } else {
 	      result[i,] <- strsplit(toupper(sequences[[i]]$seq),"")[[1]]
             }
	 }
	 stopifnot(nrow(result) == length(sequences))
	 stopifnot(ncol(result) == nchar(sequences[[1]]$seq))
	 result
}	 

# given a matrix of single-character nucleotide characters, find 
# for a column all other column ids that are ccomple
findComplementaryColumns <- function(ary, col, gu.allowed=TRUE, doubleGapOK=TRUE, minWords=1, verbose=0, queryMin=1,queryMax=ncol(ary),
     convertGUtoGC=FALSE) {
 result <- c()
 query <- paste(ary[,col], collapse="")
 for (i in queryMin:queryMax) {
  if (i != col) {
   target <- paste(ary[,i], collapse="")
   if (is.complementary(query, target, gu.allowed=gu.allowed,doubleGapOK=doubleGapOK)) {
      if ((minWords < 2) || (length(findColumnPairWords(query, target, convertGUtoGC=convertGUtoGC)) >= minWords)) {
        result <- c(result, i)
        if (verbose > 2) {
         cat("Columns",col, i, "are complementary:", target, query, "\n")
        }
      }
   } 
  }
 }
 result
}

# given a matrix of single-character nucleotide characters, find 
# for a column all other column ids that are ccomplementary
findAllComplementaryColumns <- function(ary,gu.allowed=TRUE, singleGapOK=TRUE, doubleGapOK=TRUE, minWords=1, asMatrix=TRUE, queryMin=1, queryMax=ncol(ary),
             targetMin=1,targetMax=ncol(ary), convertGUtoGC=FALSE) {
 result1 <- c()
 result2 <- c()
 for (i in queryMin:queryMax) {
  query <- paste(ary[,i], collapse="")
  tMin = max((i+1), targetMin)
  if (tMin <= targetMax) {
  for (j in tMin:targetMax) {
   stopifnot(j > i)
   target <- paste(ary[,j], collapse="")
    if (is.complementary(query, target, gu.allowed=gu.allowed,singleGapOK=singleGapOK, doubleGapOK=doubleGapOK)) {
         if (length(findColumnPairWords(query, target, convertGUtoGC=convertGUtoGC)) >= minWords) {
          result1[length(result1)+1] <- i
          result2[length(result2)+1] <- j
         }
      }
    } 
  }
  }
  result <- data.frame(cbind(ID1=result1,ID2=result2))
  stopifnot(!is.factor(result$ID1))
  stopifnot(!is.factor(result$ID2))
  finalResult <- result
  if (asMatrix) {
     finalResult <- matrix(0, nrow=ncol(ary), ncol=ncol(ary))
     for (i in 1:length(result1)) {
       finalResult[result1[i],result2[i]] <- 1
       finalResult[result2[i],result1[i]] <- 1
     }
     finalResult <- finalResult[queryMin:queryMax, targetMin:targetMax]
  }
  finalResult
}


# test function for findComplementaryColumns
test.findComplementaryColumns <- function(verbose=3) {
  ary <- matrix("", ncol=3, nrow=3) 
  ary[1,] <- strsplit("ACU", "")[[1]]
  ary[2,] <- strsplit("GUC", "")[[1]]
  ary[3,] <- strsplit("UGA", "")[[1]]
  col <- 1
  result <- findComplementaryColumns(ary, col, verbose=verbose)
  print(result)
  stopifnot(result == 3)
  result
}

# chr2L_12625849_20443308_18_A.fa: first parse has 18 nucleotides , second part has 93-18=65
test.findComplementaryColumns_fromAlignment <- function(col=1,minWords=1,verbose=3) {
 ifile = system.file("data/chr2L_12625849_20443308_18_AB12.fa", package="rnafolding")
 stopifnot(file.exists(ifile))
 seqs <- readFASTA(ifile, strip.descs=TRUE)
 stopifnot(length(seqs) > 0)
 print(seqs)
 ary <- seqsToCharArray(seqs)
 if (verbose > 1) {
	print(ary) 
 }
 result <- findComplementaryColumns(ary, col, minWords=minWords, verbose=verbose)
 print(result)
 stopifnot(length(result) > 0)
 result
}

test.findAllComplementaryColumns_fromAlignment <- function(minWords=1,verbose=FALSE,asMatrix=FALSE, ali1Len=18) {
 ifile = system.file("data/chr2L_12625849_20443308_18_AB12.fa", package="rnafolding")
 stopifnot(file.exists(ifile))
 seqs <- readFASTA(ifile, strip.descs=TRUE)
 stopifnot(length(seqs) > 0)
 print(seqs)
 ary <- seqsToCharArray(seqs)
 if (verbose) {
	print(ary) 
 }
 result <- findAllComplementaryColumns(ary, minWords=minWords, asMatrix=asMatrix, queryMax=ali1Len, targetMin=(ali1Len+1))
 print(result)
 stopifnot(nrow(result) > 0)
 result
}

# read 2 alignments, find all complementary column pairs between them
test.findAllComplementaryColumns_from2Alignments <- function(
   ifile1 = system.file("data/chr2L_12625849_20443308_18_A.fa", package="rnafolding"),
   ifile2 = system.file("data/chr2L_12625849_20443308_18_B.fa", package="rnafolding"),
   gu.allowed=TRUE,
   singleGapOK=TRUE, doubleGapOK=TRUE,
   minWords=1,verbose=FALSE,asMatrix=FALSE) {
 stopifnot(file.exists(ifile1))
 stopifnot(file.exists(ifile2))
 seqs1 <- readFASTA(ifile1, strip.descs=TRUE)
 seqs2 <- readFASTA(ifile2, strip.descs=TRUE)
 stopifnot(length(seqs1) > 0)
 stopifnot(length(seqs2) > 0)
# print(seqs)
 ary1 <- seqsToCharArray(seqs1)
 ali1Len <- ncol(ary1)
 ary2 <- seqsToCharArray(seqs2)
 ali2Len <- ncol(ary2)
 ary <- cbind(ary1, ary2)
 if (verbose) {
	print(ary) 
 }
 result <- findAllComplementaryColumns(ary, gu.allowed=gu.allowed, singleGapOK=singleGapOK, doubleGapOK=doubleGapOK, minWords=minWords, asMatrix=asMatrix, queryMax=ali1Len, targetMin=(ali1Len+1))
 print(result)
# stopifnot(nrow(result) > 0)
 result
}



# test function for findComplementaryColumns
test.findAllComplementaryColumns <- function() {
  ary <- matrix("", ncol=3, nrow=3) 
  ary[1,] <- strsplit("ACU", "")[[1]]
  ary[2,] <- strsplit("GUC", "")[[1]]
  ary[3,] <- strsplit("UGA", "")[[1]]
  col <- 1
  result <- findAllComplementaryColumns(ary)
  print(result)
  result
}

# for two strings representing two columns in a sequence alignment, 
# count number of different two-character words.
# any rows with one or more gap characters will be ignored.
findColumnPairWords <- function(query, target, gu.allowed=TRUE, convertGUtoGC=FALSE) {
 if (length(query) == 1) {
  query = strsplit(query, "")[[1]]
 }
 if (length(target) == 1) {
  target = strsplit(target, "")[[1]]
 }
 result <- c()
 n <- length(query)
 words <- list()
 for (i in 1:n) {
  c1 <- query[i]
  c2 <- target[i] 
  if ((!is.gap(c1)) && (!is.gap(c2))) {
   w <- paste(c1, c2, sep="")
   if (convertGUtoGC) {
    if ((w == "GU") || (w == "GT")) {
     w <- "GC"
    } else if ((w == "UG") || (w == "TG")) {
     w <- "CG"
    }
   }
   result[[w]] <- i
  }
 }
 x <- names(result)
 x
}

# finds set of two-characters words of two columns 
test.findColumnPairWords <- function(s1 = "AAGGG", s2="UUCCC", convertGUtoGC=FALSE) {
 result <- findColumnPairWords(s1, s2, convertGUtoGC=convertGUtoGC)
 cat("Result words of", s1, s2, ":\n")
 print(result)
 stopifnot(length(result) > 0)
 result
}

# finds set of two-characters words of two columns 
test.findColumnPairWords2 <- function(s1="AUGG", s2="UACU") {
 cat("Converting GU to GC: Result words of", s1, s2, ":\n")
 result <- findColumnPairWords(s1, s2, convertGUtoGC=TRUE)
 cat("Result words of", s1, s2, ":\n")
 print(result)
 cat("Not converting GU to GC: Result words of", s1, s2, ":\n")
 result2 <- findColumnPairWords(s1, s2, convertGUtoGC=FALSE)
 print(result2)
 stopifnot(length(result) <= length(result2))
 finalResult <- list()
 finalResult[["noConvertGCtoGC"]] <- result2
 finalResult[["convertGCtoGC"]] <- result
 result
}

ConvertDNAtoRNAString <- function(s) {
 gsub("T", "U", toupper(s))	
}

test.ConvertDNAtoRNAString <- function() {
 checkEquals(ConvertDNAtoRNAString("T"), "U")
 checkEquals(ConvertDNAtoRNAString("t"), "U")
 checkEquals(ConvertDNAtoRNAString("At"), "AU")
 checkEquals(ConvertDNAtoRNAString("Ac"), "AC")
}

ConvertDNAtoRNAAlignment <- function(seqs) {
 for (i in 1:length(seqs)) {
   seqs[[i]]$seq <- ConvertDNAtoRNAString(seqs[[i]]$seq)
 }
 seqs
}

ConvertToUpperAlignment <- function(seqs) {
 for (i in 1:length(seqs)) {
   seqs[[i]]$seq <- toupper(seqs[[i]]$seq)
 }
 seqs
}

# reverse order of sequence. Rarely used, maybe as special case of shuffling
alignment.reverse <- function(sequences) {
 stopifnot(is.list(sequences))
 for (i in 1:length(sequences)) {
  sequences[[i]]$seq <- paste(rev(strsplit(sequences[[i]]$seq, split = "")[[1]]), collapse = "") 
 }
 sequences
}

test.alignment.reverse <- function(file=system.file("data/tRNA_ecoli_anticodon.fa", package="rnafolding")) {
 seqs <- readFASTA(file, strip.descs=TRUE)
 cat("# Sequences:\n")
 print(seqs)
 cat("# Sequences in reversed order:\n")
 print(alignment.reverse(seqs))
}
