/*
 * HomologHMM 1.04
 * (c) Lukas Kll
 * Distributable under GPL license.
 * See terms of license at gnu.org.
 */
package se.ki.cgb.labeledhmm;

import java.util.*;
import org.biojava.bio.seq.*;
import org.biojava.bio.symbol.*;

/** 
 * This class implements the Heinkoff and Heinkoff weighting scheme
 * as it is interpreted by Sean Eddy in the HMMer code.
 * Gaps in other sequences gives a zero contribution to final weights.
 * The weights are then down sized with the sequence length to compensate
 * for long inserts.
 * @author Lukas.Kall@cgb.ki.se
 * @version $Revision: 1.3 $
 */
public class EddyHenikoffWeights extends WeightScheme {

	protected EddyHenikoffWeights() {
		super();
	}

	/** Calculate weights acording to the Henikoff and Henikoff
	 *  Ignore gaps and reduce weights by actual sequence length
	 */
	public double [] getWeights(Sequence [] align) {
		double [] weights = new double[align.length];
		int [] lengths = new int[align.length];
    	if (align.length==0) return weights;
//		Symbol [] syms = new Symbol[align.length];
//		int [] freq = new int[align.length];
		HashMap freq = new HashMap(); 
       	for (int i=1; i<=align[0].length();i++) {
			for (int j=0;j<align.length;j++) {
				Symbol sym = align[j].symbolAt(i);
				if (sym.getName().length()==3) {
					lengths[j]++;
					if (freq.containsKey(sym)) {
						int times = ((Integer) freq.get(sym)).intValue() + 1;
						freq.put(sym,new Integer(times));
					} else {
						freq.put(sym,new Integer(1));
					}
				}
			}
			double numSym = freq.size();
			for (int j=0;j<align.length;j++) {
				Symbol sym = align[j].symbolAt(i);
				if (sym.getName().length()==3) {
					int times = ((Integer) freq.get(sym)).intValue();
					weights[j] += 1.0/(numSym*times);				
				}
			}    		
			freq.clear();
    	}
    	double sum = 0;
		for (int j=0;j<align.length;j++) weights[j]/=lengths[j];
		for (int j=0;j<align.length;j++) sum += weights[j];
		for (int j=0;j<align.length;j++) weights[j]/=sum;		     		
		return weights;
	}

}
