User:Chase-san/StatBufferSet

From Robowiki
< User:Chase-san
Revision as of 15:14, 4 July 2010 by Chase-san (talk | contribs) (StatBufferSet handling)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

This is a set of stat buffer configuration to make all those nightmares of dealing with and updating multiple multidimensional VCS bins, go away.

chase.s2.stat.StatBuffer

package chase.s2.stat;

import org.csdgn.utils.VDA;

/**
 * This is a visit count stat buffer. However, it is such a buffer where it may
 * have any number of segmentation or dimensionality.
 * 
 * For those uninformed, this is part of a statistical data mining system.
 * 
 * @author Chase
 */
public class StatBuffer {
	protected static final int scanWidth = 1;
	
	/** Handing the VDA manually is dangerous, and could wipe out all your data */
	private VDA<double[]> _binSet;
	public int _binTotal;
	public int[] _dataIndex;
	/** This is fine, since java can handle jagged arrays */
	public double[][] _splits;
	
	/**
	 * Constructs a new buffer
	 * @param splits The data defining the data splits, usually a jagged array
	 * @param dataIndex the index of the raw stats each split is on
	 * @param binTotal total number of indexes for this buffer
	 */
	public StatBuffer(double[][] splits, int[] dataIndex, int binTotal) {
		/* TODO: splits and dataIndex lengths don't equal throw exception */
		_binTotal = binTotal;
		_dataIndex = dataIndex;
		_splits = splits;
		
		int[] dimensions = new int[Math.max(1, splits.length)];
		dimensions[0] = 1;
		for(int i=0; i<splits.length; ++i)
			dimensions[i] = splits[i].length + 1;
		
		_binSet = new VDA<double[]>(dimensions);
		
		Object[] obj = _binSet.getBackingArray();
		for(int i=0; i<obj.length; ++i) {
			double[] bins = new double[binTotal];
			for(int j=0; j<binTotal; ++j)
				bins[j] = 0;
			obj[i] = bins;
		}
	}
	
	/**
	 * Constructs a new stat buffer, using the lazy SplitSet
	 * @param ss SplitSet to use
	 * @param binTotal total number of indexes for this buffer
	 */
	public StatBuffer(SplitSet ss, int binTotal) {
		this(ss.getSplitSet(),ss.getIndexSet(),binTotal);
	}
	
	/**
	 * This creates a flat, single dimension stat buffer.
	 * @param binTotal total number of indexes for this buffer
	 */
	public StatBuffer(int binTotal) {
		this(new double[0][0], new int[0], binTotal);
	}
	
	/**
	 * This method splits the raw data as defined by the data index
	 * and split information. It outputs a data array which can be
	 * fed to the VDA to get the desired bin set. 
	 * @param data raw data
	 * @return data indexes
	 */
	public int[] convertData(StatData data) {
		int[] indexes = new int[Math.max(1, _dataIndex.length)];
		if(_splits.length == 0) return indexes;
		for(int i = 0; i < indexes.length; ++i) {
			int index = 0;
			for(;index < _splits[i].length; ++index) {
				if(data._data[_dataIndex[i]] <  _splits[i][index]) break;
			}
			indexes[i] = index;
		}
		return indexes;
	}
	
	/**
	 * Gets the individual stat buffer for the give data.
	 */
	public double[] getStatBin(StatData data) {
		return _binSet.get(convertData(data));
	}
	
	/**
	 * Updates this stat buffer with the given raw data
	 */
	public void update(StatData data, double weight) {
		double[] bins = getStatBin(data);
		double index = GFToIndex(data._gf, _binTotal);
		updateBin(bins, index, weight);
	}
	
	/**
	 * Gets the best bin from the raw data
	 */
	public int getBestIndex(StatData data) {
		double[] bins = getStatBin(data);
		return getBestIndex(bins, scanWidth);
	}
	
	/**
	 * Gets the best guessfactor from the raw data
	 */
	public double getBestGF(StatData data) {
		return indexToGF(getBestIndex(data),_binTotal);
	}
	
	/**
	 * TODO: Replace this with some kind of externalized update method.
	 * That way it is easier to replace with rolling and different distributions
	 */
	public static final void updateBin(double[] bin, double index, double weight) {
		for(int i=0; i<bin.length; ++i) {
			double amount = Math.abs(i - index);
			if(amount == 0) bin[i] += weight;
			if(amount > 0 && amount < 1) {
				bin[i] += amount * weight;
			}
		}
	}
	
	/**
	 * Static method used to find the best index. Width is number
	 * of extra bins to each side to test.
	 */
	public static final int getBestIndex(double[] bin, int width) {
		double[] score = getBinScores(bin,width);
		int bestIndex = 0; /* TODO: This could be problematic */
		for(int i=0; i<bin.length; ++i)
			if(score[i] > score[bestIndex])
				bestIndex = i;
		return bestIndex;
	}
	
	/**
	 * Gets the scores for each bin in the given stat bin.
	 */
	protected static final double[] getBinScores(double[] bin, int width) {
		double[] scores = new double[bin.length];
		for(int i=0; i<bin.length; ++i) {
			double score = 0;
			//It is a little complicated in there, but things will
			//   never go out of bounds
			for(int j=-width; j<width+1; ++j) {
				int abs = Math.abs(j) + 1;
				if(i+j < 0 || i+j >= bin.length) {
					if(i == 0 || i == bin.length - 1) {
						score += bin[i - j] / abs;
					} else {
						int offset = (j < 0) ? -3 : 3;
						score += bin[i - j + offset] / abs;
					}
				} else {
					score += bin[i + j] / abs;
				}
			}
			scores[i] = score;
		}
		return scores;
	}
	
	/**
	 * Converts a bin index to a guessfactor based on total number of bins.
	 */
	public static final double indexToGF(int index, int bins) {
		index = Math.min(Math.max(0,index),bins - 1);
		double half = (bins-1.0)/2.0;
		return (index / half) - 1.0;
	}
	
	/**
	 * Converts a guessfactor to a bin index, based on number of bins. This
	 * returns a double, incase there is any need to know how much it over
	 * hangs each bin. Cast to an int for the raw information.
	 */
	public static final double GFToIndex(double gf, int bins) {
		gf = Math.min(Math.max(-1.0,gf),1.0) + 1.0;
		double half = (bins-1.0)/2.0;
		return gf * half;
	}
}

chase.s2.stat.StatBufferSet

package chase.s2.stat;

import java.util.ArrayList;
import java.util.List;

/**
 * This class handles multiple stat buffers, as if they were one.
 */
public final class StatBufferSet {
	private static final class StatItem {
		StatBuffer buffer;
		double weight;
	}
	
	private List<StatItem> autobin = new ArrayList<StatItem>();
	
	/**
	 * Add a stat buffer to this buffer set.
	 */
	public void add(StatBuffer bin, double weight) {
		StatItem si = new StatItem();
		si.buffer = bin;
		si.weight = weight;
		autobin.add(si);
	}
	
	/**
	 * Gets the single best GF from each StatBuffer.
	 */
	public double[] getBestGFList(StatData data) {
		double[] output = new double[autobin.size()];
		int i = 0;
		for(StatItem s : autobin) {
			output[i++] = s.buffer.getBestGF(data);
		}
		return output;
	}
	
	/**
	 * Gets a list of all the bins for the raw data.
	 */
	private double[][] getBinList(StatData data) {
		double[][] output = new double[autobin.size()][];
		int i = 0;
		for(StatItem s : autobin) {
			output[i++] = s.buffer.getStatBin(data);
		}
		return output;
	}
	
	/**
	 * Gets the list of weights.
	 */
	private double[] getWeightList() {
		double[] output = new double[autobin.size()];
		int i = 0;
		for(StatItem s : autobin) {
			output[i++] = s.weight;
		}
		return output;
	}
	
	public double[] getCombinedScoreData(StatData sd) {
		double[][] data = getBinList(sd);
		double[] weights = getWeightList();
		return getCombinedScoreData(data,weights);
	}
	
	/**
	 * This combines the data over all the bins in such
	 * a way that it can be read easily as if it were a
	 * single stat bin.
	 */
	private double[] getCombinedScoreData(double[][] data, double[] weights) {
		if(weights.length != data.length)
			throw new IllegalArgumentException("Data length and weights length must match.");
		
		/* Determine the longest and shortest bin set */
		int shortest = Integer.MAX_VALUE;
		int longest = Integer.MIN_VALUE;
		for(double[] d : data) {
			if(d.length > longest) longest = d.length;
			if(d.length < shortest) shortest = d.length;
		}
		
		/* Get each of their scores, and record the largest of each. */
		double[][] score = new double[data.length][];
		double[] largest = new double[data.length];
		for(int i=0; i<data.length; ++i) {
			score[i] = StatBuffer.getBinScores(data[i], StatBuffer.scanWidth);
			for(int j=0;j<score[i].length;++j)
				if(score[i][j] > largest[i])
					largest[i] = score[i][j]; 
		}
		
		/* Equalize each of the stat bins */
		for(int i=0; i<data.length; ++i)
			for(int j=0;j<score[i].length;++j)
				score[i][j] /= largest[i];
		
		/* If they are equal, take the easy route */
		if(shortest == longest) {		
			largest = new double[shortest];
			for(int i=0; i<data.length; ++i)
				for(int j=0;j<shortest;++j)
					largest[j] = data[i][j] * weights[i];
			
			return largest;
		}
		
		/* Otherwise we have to do things the 'hard' way! */
		/* We use the longest and combine the sums from the others, based on gf */
		largest = new double[longest];
		for(int i=0; i<data.length; ++i) {
			double[] tmpSum = new double[longest];
			/* Get the gf for the new length and multiply it by the old weight */
			for(int j=0; j<score[i].length; ++j) {
				double index = StatBuffer.GFToIndex(StatBuffer.indexToGF(j, score[i].length), longest);
				StatBuffer.updateBin(tmpSum, index, score[i][j]);
			}
			/* Add to output buffer */
			for(int j=0; j<longest; ++j) {
				largest[j] += tmpSum[j] * weights[i];
			}
		}
		
		return largest;
	}
	
	/**
	 * Gets the best GF based on weights.
	 */
	public double getBestGF(StatData data) {
		double[] score = getCombinedScoreData(data);
		int bestIndex = 0; /* TODO: This could be problematic */
		for(int i=0; i<score.length; ++i)
			if(score[i] > score[bestIndex])
				bestIndex = i;
		return StatBuffer.indexToGF(bestIndex, score.length);
	}
	
	/**
	 * Updates this stat buffer handler with the given raw data
	 */
	public void update(StatData data, double weight) {
		for(StatItem s : autobin) {
			s.buffer.update(data, weight);
		}
	}

chase.s2.stat.SplitSet

package chase.s2.stat;

import java.util.ArrayDeque;
import java.util.Deque;

/**
 * For the really lazy!
 * 
 * @author Chase
 */
public class SplitSet {
	public Deque<double[]> split = new ArrayDeque<double[]>();
	public Deque<Integer> indexes = new ArrayDeque<Integer>();
	
	public void add(double[] d, int index) {
		split.add(d);
		indexes.add(index);
	}
	
	public double[][] getSplitSet() {
		Object[] obj = split.toArray();
		double[][] output = new double[obj.length][];
		for(int i=0; i<obj.length; ++i) {
			output[i] = (double[])obj[i];
		}
		return output;
	}
	
	public int[] getIndexSet() {
		Object[] obj = indexes.toArray();
		int[] output = new int[obj.length];
		for(int i=0; i<obj.length; ++i) {
			output[i] = (Integer)obj[i];
		}
		return output;
	}
}

chase.s2.stat.StatData

package chase.s2.stat;

/**
 * This holds data.
 * 
 * @author Chase
 */
public class StatData {
	public double _data[], _gf;
	public StatData(double[] data, double gf) {
		_data = data;
		_gf = gf;
	}
}