/*
 * HomologHMM 1.04
 * (c) Lukas K�ll
 * Distributable under GPL license.
 * See terms of license at gnu.org.
 */
package se.ki.cgb.hmmdecode;

import se.ki.cgb.anhmmfile.*;
import se.ki.cgb.labeledhmm.*;
import java.io.*;
import java.util.*;
import org.biojava.bio.dp.*;
import org.biojava.bio.seq.*;
import org.biojava.bio.seq.io.*;

/**
 * Class to handle a prediction session
 * with homology decode
 * @author Lukas.Kall@cgb.ki.se
 * @version $Revision: 1.9 $
 */

public class Run {

	protected static String HH_VER ="1.04";

	private static void printUsage() {
		System.err.println("usage:	 homologhmm [-a] [-v] [-m] [-w (f|c|g|u|i)] modelfile input");
		System.err.println("              cat input | homologhmm [-a] [-v] [-m] [-w (f|c|g|u|i)] modelfile");
		System.err.println("              homologhmm [-a] [-w (f|c|g|u|i)] -pw input");
		System.err.println("              homologhmm -h");
		System.err.println("option	-a    Treat input file as an aligned fasta and do common prediction");
		System.err.println("option	-m    Maximum PLP prediction");
		System.err.println("option	-o    Optimal accuracy prediction (default)");
		System.err.println("option	-v    Viterbi prediction");
		System.err.println("option	-w f  Select flat weighting scheme");
		System.err.println("option	-w c  Select Henikoff-Henikoff weighting scheme only taking (conserved) columns without gaps in account");
		System.err.println("option	-w u  Select Henikoff-Henikoff weighting scheme ignoring gaps and columns where query sequence contain gap");
		System.err.println("option	-w g  Select Henikoff-Henikoff weighting scheme counting gaps as an amino acid");
		System.err.println("option	-w i  Select Henikoff-Henikoff weighting scheme ignoring gaps");
		System.err.println("option	-w e  Select Henikoff-Henikoff weighting scheme ala HMMer ignoring gaps and scaling with true sequence length (default)");
		System.err.println("option	-pw   Print Weights and exit");
		System.err.println("option	-plp  Print posterior label probabilities in first sequence coordinates");
		System.err.println("option	-aplp Print posterior label probabilities in alignment coordinates");
		System.err.println();
	}

	public static void main( String[] args ) throws Exception {
		printGreater();
		boolean align=false;
		boolean optacc_def=true;
		boolean optacc=false;
		boolean maxplp=false;
		boolean viterbi=false;
		boolean printWeights = false;
		int printPLP = 0;
		int nBest = 0;
		Constraints constraints = null;
		
		int nargs=0;
		String modelFile="",queryFile="",weights=" ";
		

		for (int i=0;i<args.length;i++) {
			if (args[i].equals("-h") || args[i].equals("-?")) {
				printUsage();
				System.exit(0);
			} else if (args[i].equals("-v")) {
				viterbi=true;
				optacc_def=false;
			} else if (args[i].equals("-m")) {
				maxplp = true;
				optacc_def=false;
			} else if (args[i].equals("-N")) {
				try {
					nBest=Integer.parseInt(args[i+1]);
					i++;
				}
				catch (Exception e) {
					nBest = 1;
				}
				optacc_def=false;
			} else if (args[i].equals("-o")) {
				optacc = true;
			} else if (args[i].equals("-a")) {
				align = true;
			} else if (args[i].equals("-w")) {
				i++;
				weights = args[i];
			} else if (args[i].equals("-plp")) {
				printPLP = 1;
				optacc_def=false;
			} else if (args[i].equals("-aplp")) {
				printPLP = 3;
				align = true;
				optacc_def=false;
			} else if (args[i].equals("-pw")) {
				printWeights = true;
			} else if (args[i].equals("-c")) {
				if (constraints==null) constraints = new Constraints();
				while (constraints.setConstraint(args[i+1])) {i++;}
			} else {
				if (nargs==0) modelFile = args[i];
				if (nargs==1) queryFile = args[i];
				nargs++;
			}
		}
		optacc |= optacc_def;
		if (( !printWeights && (nargs==0 || nargs>2)) ||
		   ( printWeights && nargs>1)) {
			System.err.println("Wrong number of arguments: " + nargs);
			printUsage();
			System.exit(-1);
		}
		switch (Character.toUpperCase(weights.charAt(0))) {
			case 'F':	WeightScheme.selectScheme(WeightScheme.FLAT); break;
			case 'U':	WeightScheme.selectScheme(WeightScheme.UNGAPPED_HENIKOFF); break;
			case 'G':	WeightScheme.selectScheme(WeightScheme.GAPPED_HENIKOFF); break;
			case 'C':	WeightScheme.selectScheme(WeightScheme.CONSERVED_HENIKOFF); break;
			case 'E':	WeightScheme.selectScheme(WeightScheme.EDDY_HENIKOFF); break;
			case 'I':	WeightScheme.selectScheme(WeightScheme.IGNORE_HENIKOFF); break;
			case ' ':	break; // Use default scheme
			default:
				System.err.println("Wrong weighting scheme type: " + weights);
				printUsage();
				System.exit(-1);			
		}
		if (printWeights) {
			// Non-cosher
			// Read sequences and Print out assigned weights
			Sequence [] seqArr = readSequences(modelFile,true);
			printWeights(seqArr);
			System.exit(0);
		}
		// Normal processing
        // Read the model
		MarkovModel model =null;
		try {
			BufferedReader in = new BufferedReader(new FileReader(modelFile));
			System.err.println("Read ANHMM File "+ modelFile);
			model = AnReader.getInstance().parseStream(in);
			in.close();
		} catch (Exception e) {
			System.err.println("Error ocured while reading the file: " + modelFile);
			System.err.println("Got the following StackTrace:");
			e.printStackTrace();
			System.exit(-1);
		}
		AlignedDP dp = new AlignedDP(model,constraints);
        // Read the sequences
		Sequence [] seqArr = readSequences(queryFile,align);
		OutputHandler.setInstance(new FastaOutputHandler());
		dp.go(seqArr, align, optacc, maxplp, viterbi, printPLP,nBest);
		System.exit(0);
	}

	static public Sequence [] readSequences(String queryFile, boolean checkLen) {
		SequenceFormat form = new LabeledFastaFormat();
		Sequence [] seqArr = null;
		try {
			BufferedReader br = null;
			if (queryFile.length()==0) {
				br = new BufferedReader(new InputStreamReader(System.in));
			} else {
				br = new BufferedReader(new FileReader(queryFile));
			}
			SequenceBuilderFactory sbf = new LabeledFastaParser.Factory(
												 SimpleSequenceBuilder.FACTORY);
			SymbolTokenization protParser = ProteinTools.getAlphabet().getTokenization("token");
			StreamReader stream = new StreamReader(br,form,protParser,sbf);
//			System.err.print("Processing sequence file: " + queryFile +"...\n");			
			ArrayList sequences= new ArrayList();
			while (stream.hasNext()) {
				sequences.add(stream.nextSequence());
			}
			seqArr = new Sequence[sequences.size()];
			sequences.toArray(seqArr);
		} catch (Exception e) {
			System.err.println("Error ocured while reading the file: " + queryFile);
			System.err.println("Got the following StackTrace:");
			e.printStackTrace();
			System.exit(-1);
		}
		if (checkLen && seqArr.length>1) {
			for (int i=1;i<seqArr.length;i++) {
				if (seqArr[i].length() != seqArr[0].length()) {
					System.err.println("Error ocured while reading the file: " + queryFile);
					System.err.println("Difference in length between 1st and " +i + "rd sequece in "+ queryFile);
					System.exit(-1);
				}
			}
		}
		return seqArr;    
	}
	private static void printGreater() {
		System.err.println("HomologHMM version " + HH_VER);
		System.err.println("(c) Lukas Kall 2005");
	}

    static void printWeights(Sequence [] seqArr) {
		double [] weights = WeightScheme.getInstance().getWeights(seqArr);
		for (int i=0;i<seqArr.length;i++) {
			System.out.println(seqArr[i].getName() + " " + weights[i] + " " + weights[i]);
		}
    }

}
