/*************************************************************************
 *                                                                       *
 *       (c) Copyright 2003                                              *
 *       All rights reserved                                             *
 *       Programs written by Jianghui Liu (NJIT)                         *
 *                                                                       *
 *       Permission to use, copy, modify, and distribute this software   *
 *       and its documentation for any purpose and without fee is her-   *
 *       eby granted, provided that this copyright notice appears in     *
 *       all copies.   Programmer(s) makes no representations about      *
 *       the suitability of this software for any purpose.  It is pro-   *
 *       vided "as is" without express or implied warranty.              *
 *                                                                       *
 *       08/06/2003                                                      *
 *************************************************************************/
package RNA;
import java.io.*;
import java.util.*;
import java.text.NumberFormat;

//--------------------------------------------------------
// This class implements the database searching functions
//--------------------------------------------------------
public class DBSearcher
{
    private RNA query;

    public DBSearcher(String queryFile)
    {
        try {
            BufferedReader in = new BufferedReader(new FileReader(queryFile));
            query = RNAReader.getRNA(in);
            in.close();
        } catch(IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }
    public DBSearcher( RNA a)
    {
        query = a;
    }

    // ---------------------------------------------------------------------------------
    //  Database searching: return the top 'ntop' structures. The scoring matrices and
    //  atom gap penalty are provided as arguments. The output is formated as the
    //  returned String value.
    // ---------------------------------------------------------------------------------
    public String getInfoOfTopN(String DBFile, int ntop, String Score_matrices, double Penalty)
    {
        double[]  RawScores = null;
        String doll = new String("-\\|/");

        // -----------------------------------------------------
        // First scan of database to get the size information
        // -----------------------------------------------------
        RNAReader in = new RNAReader(DBFile);
        int count = 0;
        RNA data = in.getNextRNA();
        System.out.println("\n");
        while( data != null) {
            count ++;
            int dummy = count % 4;
            System.out.print("\tDeterming the database volumn. Wait ... " + doll.substring(dummy, dummy+1));

            data = in.getNextRNA();
        }
        System.out.println( "\n" + count + " RNA structures found in the Database.\n");

        int nStruct = count;
        RawScores = new double[nStruct];    // array of raw scores;

        // set up code translation table and score matrix
        String scoreMatrixFile = Score_matrices;
        String codeTableFile = "codeTable.properties";
        Matcher m = new Matcher(codeTableFile, scoreMatrixFile);
        int[] queryMap = new int[query.seqLength()];

        // -------------------------------------------------------
        // Second Scan of the database to get the scores
        // -------------------------------------------------------
        in = new RNAReader(DBFile);
        count = 0;
        data = in.getNextRNA();
        System.out.println("\n");
        while( data != null) {
            count ++;
            int dummy = count % 4;
            System.out.print("Searching against database. Wait ... " + doll.substring(dummy, dummy+1));

            /*int[] dataMap = new int[data.seqLength()];
            for(int j=0; j<dataMap.length; j++)
                dataMap[j] = -1;
            for(int i=0; i<queryMap.length; i++)
                queryMap[i] = -1;*/

            RawScores[count - 1] = RNA.localMatch2D(query, data, null, null, m, Penalty);
            data = in.getNextRNA();
        }

        // ----------------------------------------------------
        // Get the top '2*ntop' scores and set up the cutoff
        // ----------------------------------------------------
        double cutoff = 0;
        double[] topScores = new double[2*ntop];
        for(int i=0; i<topScores.length; i++)
            topScores[i] = Double.NEGATIVE_INFINITY;

        for(int i=0; i<RawScores.length; i++) {
            int slot = getSlot(topScores, RawScores[i]);
            if( slot != -1)
                topScores[slot] = RawScores[i];
        }
        Arrays.sort(topScores);        // Ascending order !!

        if( topScores[0] < topScores[ntop-1])
            cutoff = topScores[ntop-1];
        else{
            int i = ntop - 1;
            while( topScores[0] >= topScores[i] && i < 2*ntop) i++;
            if( i < 2*ntop)
                cutoff = topScores[i];
            else
                cutoff = topScores[0];
        }

        System.out.println("\n\nFinal score interval: " + cutoff + 
                           " -- " + topScores[2*ntop-1] + "\n");

        // -------------------------------------------------------
        // Third Scan of the database to get the FINAL result 
        // -------------------------------------------------------

        RNA[] topRNAs = new RNA[2*ntop];
        in = new RNAReader(DBFile);
        count = 0;
        data  = in.getNextRNA();
        System.out.println("\n");
        while( data != null) {
            count ++;
            int dummy = count % 4;
            System.out.print("\tPreparing the final candidates. Wait ... " + doll.substring(dummy, dummy+1));

            if( RawScores[count-1] >= cutoff ) {
                int index = getIndex(topScores, RawScores[count-1]);
                if( index > 0) {
                    topRNAs[2*ntop - index - 1] = data;
                    topScores[index] = Double.NEGATIVE_INFINITY;
                }
            }
            data = in.getNextRNA();
        }

        StringBuffer result = new StringBuffer();
        for(int i=0; i<topRNAs.length; i++) {
            if( topRNAs[i] != null) {
                int[] dataMap = new int[topRNAs[i].seqLength()];
                for(int j=0; j<dataMap.length; j++)
                    dataMap[j] = -1;
                for(int j=0; j<queryMap.length; j++)
                    queryMap[j] = -1;

                double score = RNA.localMatch2D(query, topRNAs[i], queryMap, dataMap, m, Penalty);
                int[] measure = HomoUtil.getMeasures(query, queryMap, topRNAs[i], dataMap);
                String ret = new String(query.getName() + "\t" + topRNAs[i].getName() + "\t" + score + "\t" +
                                      (measure[0]+measure[1]) + "\t" + measure[0] + "\t" + measure[1] + "\t" + measure[6] + "\t" +
                                      measure[7] + "\t" + measure[8] + "\t" + measure[9] + "\t" +
                                      (measure[2]+measure[3]) + "\t" + measure[2] + "\t" + measure[3] +
                                      "\t" + (measure[4]+measure[5]) + "\t" + measure[4] + "\t" + measure[5]);
                result.append("\n" + ret); 
            }
        }
        return result.toString();
    }

    // -------------------------------------------------------------------------------------
    //  Database searching: return the top 'ntop' structures using two-distribution method.
    //  The scoring matrices and atom gap penalty are provided as arguments.
    // -------------------------------------------------------------------------------------
    public RNA[] getTopN(String DBFile, int ntop, String Score_matrices, double Penalty)
    {
        double[]  RawScores = null;
        int[]     AlignLens = null;
        double[]  IdenPercent = null;
        double[]  FinalScores = null;

        String doll = new String("-\\|/");

        // -----------------------------------------------------
        // First scan of database to get the size information
        // -----------------------------------------------------
        RNAReader in = new RNAReader(DBFile);
        int count = 0;
        RNA data = in.getNextRNA();
        System.out.println("\n");
        while( data != null) {
            count ++;
            int dummy = count % 4;
            System.out.print("\tDetermine the database volumn. Wait ... " + doll.substring(dummy, dummy+1));

            data = in.getNextRNA();
        }
        System.out.println( count + " RNA structures found in the Database.\n");

        int nStruct = count;
        RawScores = new double[nStruct];    // array of raw scores;
        AlignLens = new int[nStruct];       // array of alignment length;
        IdenPercent = new double[nStruct];  // array of structural identical percentagies;
        FinalScores = new double[nStruct];  // array of normalized scores;

        // set up code translation table and score matrix
        String scoreMatrixFile = Score_matrices;
        String codeTableFile = "codeTable.properties";
        Matcher m = new Matcher(codeTableFile, scoreMatrixFile);
        int[] queryMap = new int[query.seqLength()];

        // -------------------------------------------------------
        // Second Scan of the database to get the distributions
        // -------------------------------------------------------
        System.out.println("Determine raw-score and aligned-length distributions. Wait ...");
        in = new RNAReader(DBFile);
        count = 0;
        data = in.getNextRNA();
        System.out.println("\n");
        while( data != null) {
            count ++;
            int dummy = count % 4;
            System.out.print("\tDetermine the row score and aligned length distributions. Wait ... " +
                              doll.substring(dummy, dummy+1));

            int[] dataMap = new int[data.seqLength()];
            for(int j=0; j<dataMap.length; j++)
                dataMap[j] = -1;
            for(int i=0; i<queryMap.length; i++)
                queryMap[i] = -1;

            RawScores[count - 1] = RNA.localMatch2D(query, data, queryMap, dataMap, m, Penalty);
            int[] region = HomoUtil.getAlignedRegion(query, queryMap);
            int start = region[0];
            int end   = region[1];
            int alignedLen = end - start + 1;
            AlignLens[count - 1] = alignedLen;

            int IdLen = 0;
            for(int i=start; i<=end; i++) {
                if( queryMap[i] != -1)
                    IdLen ++;
            }
            IdenPercent[count - 1] = (100.0 * IdLen) / alignedLen;

            data = in.getNextRNA();
        }

        // Two IMPORTANT distributions are set up !!
        TextHistogram scoreDist = new TextHistogram(RawScores);
        TextHistogram lengthDist = new TextHistogram(AlignLens);

        // The final scores are calculated !
        double factor = Math.log(10) * Math.log(10);
        for(int i=0; i<FinalScores.length; i++) {
            double p1 = scoreDist.getPValue(RawScores[i]);
            double p2 = lengthDist.getPValue(AlignLens[i]);
            FinalScores[i] = -1 * Math.log(p1 * p2) * IdenPercent[i] / factor;
        }

        // --------------------------
        // Get the top 'ntop' scores
        // --------------------------
        double[] topScores = new double[ntop];
        for(int i=0; i<topScores.length; i++)
            topScores[i] = Double.NEGATIVE_INFINITY;

        for(int i=0; i<FinalScores.length; i++) {
            int slot = getSlot(topScores, FinalScores[i]);
            if( slot != -1)
                topScores[slot] = FinalScores[i];
        }
        Arrays.sort(topScores);        // Ascending order !!

        System.out.println("\nFinal score interval: " + topScores[0] + 
                           " -- " + topScores[ntop-1]);

        // -------------------------------------------------------
        // Third Scan of the database to get the FINAL result 
        // -------------------------------------------------------
        RNA[] topRNAs = new RNA[ntop];
        in = new RNAReader(DBFile);
        count = 0;
        data  = in.getNextRNA();
        int index = 0;
        System.out.println("\n");
        while( data != null && index < ntop)
        {
            count ++;
            int dummy = count % 4;
            System.out.print("\rPreparing the final candidate. Wait ... " +
                             doll.substring(dummy, dummy+1));

            if( FinalScores[count-1] >= topScores[0] ) {
                topRNAs[index] = data;
                index ++;
            }
            data = in.getNextRNA();
        }

        return topRNAs;
    }
        

    //----------------------------------------------------------------------
    // The same function of searching database by two distribution method, 
    // except that this one will format the verbose output to a output file
    //-----------------------------------------------------------------------
    private void searchDB(String DBFile, int ntop, String Score_matrices, 
                           double Penalty, String outputFile)
    {
        double[]  RawScores  = null;
        int[]     AlignLens  = null;
        double[]  IdenPercent= null;
        double[]  FinalScores= null;

        String doll = new String("-\\|/");

        // -----------------------------------------------------
        // First scan of database to get the size information
        // -----------------------------------------------------
        RNAReader in = new RNAReader(DBFile);
        int count = 0;
        RNA data = in.getNextRNA();
        System.out.println("\n");
        while( data != null) {
            count ++;
            int dummy = count % 4;
            System.out.print("\rDetermine the database size. Wait ... " +
                              doll.substring(dummy, dummy+1));
            data = in.getNextRNA();
        }
        System.out.println( "\n" + count + " RNA structures found in the Database.\n");
        int nStruct = count;

        RawScores = new double[count];
        AlignLens = new int[count];
        IdenPercent = new double[count];
        FinalScores = new double[count];

        // set up code translation table and score matrix
        String scoreMatrixFile = Score_matrices;
        String codeTableFile = "codeTable.properties";
        Matcher m = new Matcher(codeTableFile, scoreMatrixFile);
        int[] queryMap = new int[query.seqLength()];

        // -------------------------------------------------------
        // Second Scan of the database to get the distributions
        // -------------------------------------------------------
        in = new RNAReader(DBFile);
        count = 0;
        data  = in.getNextRNA();
        System.out.println("\n");
        while( data != null)
        {
            count ++;
            int dummy = count % 4;
            System.out.print("\rDeterming row score and aligned length distribution. Wait ... " +
                             doll.substring(dummy, dummy+1));

            int[] dataMap = new int[data.seqLength()];
            for(int j=0; j<dataMap.length; j++)
                dataMap[j] = -1;
            for(int i=0; i<queryMap.length; i++)
                queryMap[i] = -1;

            RawScores[count - 1] = RNA.localMatch2D(query, data, queryMap, dataMap, m, Penalty);

            int[] region = HomoUtil.getAlignedRegion(query, queryMap);
            int start = region[0];
            int end   = region[1];
            int alignedLen = end - start + 1;
            AlignLens[count - 1] = alignedLen;

            int IdLen = 0;
            for(int i=start; i<=end; i++) {
                if( queryMap[i] != -1)
                    IdLen ++;
            }
            IdenPercent[count - 1] = 100.0 * IdLen / alignedLen;

            data = in.getNextRNA();
        }

        // Two IMPORTANT distributions are set up !!
        TextHistogram scoreDist = new TextHistogram(RawScores);
        TextHistogram lengthDist = new TextHistogram(AlignLens);

        // The final scores are calculated !
        double factor = Math.log(10) * Math.log(10);
        for(int i=0; i<FinalScores.length; i++) {
            double p1 = scoreDist.getPValue(RawScores[i]);
            double p2 = lengthDist.getPValue(AlignLens[i]);
            FinalScores[i] = -1 * Math.log(p1 * p2) 
                             * IdenPercent[i] / factor;
        }

        // --------------------------------------------
        // Get the top 'ntop' scores
        // --------------------------------------------
        double[] topScores = new double[ntop];
        for(int i=0; i<topScores.length; i++)
        {
            topScores[i] = Double.NEGATIVE_INFINITY;
        }
        for(int i=0; i<FinalScores.length; i++)
        {
            int slot = getSlot(topScores, FinalScores[i]);
            if( slot != -1)
                topScores[slot] = FinalScores[i];
        }
        Arrays.sort(topScores);        // Ascending order !!

        System.out.println("\nFinal score interval: " + topScores[0] + " -- " + topScores[ntop-1]);

        // -------------------------------------------------------
        // Third Scan of the database to get the FINAL result 
        // -------------------------------------------------------
        StringBuffer[] alignments = new StringBuffer[ntop];  // detailed strucuture alignment;
        RNA[] topRNAs = new RNA[ntop];                       // the final selected candidates;
        double[] candScores = new double[ntop];              // the final set of scores;
        String[] headers = new String[ntop];
        for(int i=0; i<ntop; i++)
            candScores[i] = Double.NEGATIVE_INFINITY;

        in = new RNAReader(DBFile);
        count = 0;
        NumberFormat nf = NumberFormat.getInstance();
        nf.setMaximumFractionDigits(2);
        data = in.getNextRNA();
        int index = 0;
        System.out.println("\n");
        while( data != null && index < ntop) {
            count ++;
            int dummy  = count % 4;
            System.out.print("\rPreparing the final candidates. Wait ... " +
                             doll.substring(dummy, dummy+1));

            boolean found = false;
            for(int k=0; k<index; k++) {
                if( topRNAs[k].getName().equals(data.getName()) ) {
                    found = true;
                    break;
                }
            }
            if( found == true) {
                data = in.getNextRNA();
                continue;
            }

            if( FinalScores[count-1] >= topScores[0] ) {
                int[] dataMap = new int[data.seqLength()];
                for(int j=0; j<dataMap.length; j++)
                    dataMap[j] = -1;
                for(int i=0; i<queryMap.length; i++)
                    queryMap[i] = -1;

                RNA.localMatch2D(query, data, queryMap, dataMap, m, Penalty);
                alignments[index] = new StringBuffer(RNA.alignMatch(query, queryMap, data, dataMap, m));
                candScores[index] = FinalScores[count-1];
                topRNAs[index] = data;

                int[] region1 = HomoUtil.getAlignedRegion(query, queryMap);
                int[] region2 = HomoUtil.getAlignedRegion(data, dataMap);
                headers[index] = fixedWidth( ((region1[0]+1) + "-" + (region1[1]+1)), 7) + "  " +
                                  fixedWidth( data.getName(), 22) + " "
                                  + fixedWidth( ((region2[0]+1) + "-" + (region2[1]+1)), 7) + " "  
                                  + data.getAnnotate();
                index ++;
            }
            data = in.getNextRNA();
        }
 
        StringBuffer briefs = new StringBuffer();
        StringBuffer details= new StringBuffer();
        for(int i=0; i<ntop; i++) {
            int slot = getMax(candScores);
            if( candScores[slot] <= Double.NEGATIVE_INFINITY )
                break;

            String strRank = new String("" + (i+1));
            briefs.append("" + fixedWidth(strRank, 4) + "  " + fixedWidth(nf.format(candScores[slot]), 8) + "  " 
                          + headers[slot] + "\n");

            details.append("===============================================================\n");
            details.append("\nRank: " + (i+1) + "  Score: " + nf.format(candScores[slot]) );
            details.append(alignments[slot]);

            candScores[slot] = Double.NEGATIVE_INFINITY;   // reset the score !
        }

        System.out.println("\nDumping the output to " + outputFile + " ...");

        try {
            PrintWriter out  = null;
            out= new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));

            out.println("#=== Query ===#\n ");
            out.println(query.dispRNA());

            out.println("\n#=== Hits ===#\n");
            // brief result
            out.println(briefs);

            out.println("\n\n Detailed Results for the top list: ");
            out.println(details);
       
            TextHistogram his = new TextHistogram(FinalScores);
            out.println("\n The score distribution is: \n");
            out.println(his.display());

            out.println("\n\n The score matrices used are : \n");
            out.println(m.getScoreMatrices());
            out.println("\nGap penalty is set as: " + Penalty);
            out.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // ---------------------------------------------------
    // Given a SET, find the minimum item which is smaller
    // than the given the item.
    // ---------------------------------------------------
    private int getSlot(double[] scores, double oneScore)
    {
        double min = scores[0];
        int slot = 0;
        for(int i=1; i<scores.length; i++) {
            if( scores[i] < min) {
                min = scores[i];
                slot = i;
            }
        }

        if( scores[slot] < oneScore)
            return slot;
        else
            return -1;
    }

    // ----------------------------------------
    //  find the maximum item from a SET 
    // ----------------------------------------
    private int getMax(double[] scores)
    {
        double max = scores[0];
        int slot = 0;
        for(int i=1; i<scores.length; i++)
        {
            if( scores[i] > max)
            {
                max = scores[i];
                slot = i;
            }
        }
        return slot;
    }

    // ------------------------------------------------
    //  find the value from an array of values
    // -----------------------------------------------
    private int getIndex(double[] scores, double value)
    {
        for(int i=0; i<scores.length; i++) {
            if( scores[i] == value)
                return i;
        }

        return -1;
    }

    private String fixedWidth(String str, int width)
    {
        StringBuffer ret = new StringBuffer();
        int length = str.length();
        if( length > width)
            length = width;
        ret.append(str.substring(0, length));

        for(int i=0; i<width-length; i++)
            ret.append(" ");
        return ret.toString();
    }
}

