User:Chase-san/StatBufferSet

From Robowiki
< User:Chase-san
Revision as of 00:23, 9 July 2010 by Chase-san (talk | contribs) (→‎chase.s2.stat.StatData: Updated for new range support.)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

This is a set of stat buffer configuration to make all those nightmares of dealing with and updating multiple multidimensional VCS bins, go away.

This makes use of my Variable Dimension Array so be sure to get a copy of that as well.

This and all my other code in which I display on the robowiki falls under the ZLIB License.


How to use, the easy way.

//...

//In your setup.
SplitSet sset = new SplitSet();

//We add a split for the Lateral Velocity
sset.add(new double[]{2.0, 6.0}, 1);

//We add a split for the time change.
sset.add(new double[]{5, 12, 24, 46},2);

//We add a split for the distance
sset.add(new double[]{150.0, 300.0, 450.0}, 0);

//Notice how they do not have to be in order


//Create a stat buffer with 41 bins, based on our buffer set
StatBuffer sb = new StatBuffer(sset, 41);

//Create a segmentless stat buffer with 31 bins. You can have different number of bins!
StatBuffer sb2 = new StatBuffer(31);

StatBufferSet sbs = new StatBufferSet();
sbs.add(sb2, 1.0); //Unsegmented, it has a low weight.
sbs.add(sb, 100.0); //Segemented, it has a high weight.

// ...
//In your aiming

//The data we use
double[] data = new double[]{
	enemyDistance,
	absLateralVelocity,
	timeSinceLastVelChange
};

StatData sd = new StatData(data, 0);
double gf = sbs.getBestGF(sd);

// ...
//In your wave
StatData updateSD = new StatData(data, gf);
if(realWave)
	sbs.update(updateSD, 50);
else
	sbs.update(updateSD, 1);

Keep in mind you can add buffers to it at any time, with very little work you can also remove buffers. This could be very useful for dynamic segmentation!


chase.s2.stat.StatBuffer

package chase.s2.stat;

import chase.s2.misc.VDA;

/**
 * This is a visit count stat buffer. However, it is such a buffer where it may
 * have any number of segmentation or dimensionality.
 * 
 * For those uninformed, this is part of a statistical data mining system.
 * 
 * @author Chase
 */
public class StatBuffer {
	protected static final int scanWidth = 1;

	/** Handing the VDA manually is dangerous, and could wipe out all your data */
	private VDA<double[]> _binSet;
	public int _binTotal;
	public int[] _dataIndex;
	/** This is fine, since java can handle jagged arrays */
	public double[][] _splits;

	public double halfLife = -1;

	/**
	 * Constructs a new buffer
	 * 
	 * @param splits
	 *            The data defining the data splits, usually a jagged array
	 * @param dataIndex
	 *            the index of the raw stats each split is on
	 * @param binTotal
	 *            total number of indexes for this buffer
	 */
	public StatBuffer(double[][] splits, int[] dataIndex, int binTotal) {
		/* TODO: splits and dataIndex lengths don't equal throw exception */
		_binTotal = binTotal;
		_dataIndex = dataIndex;
		_splits = splits;

		int[] dimensions = new int[Math.max(1, splits.length)];
		dimensions[0] = 1;
		for (int i = 0; i < splits.length; ++i)
			dimensions[i] = splits[i].length + 1;

		_binSet = new VDA<double[]>(dimensions);
	}

	/**
	 * Initializes a given location in the stat buffer, used for memory
	 * conservation via lazy initialization.
	 */
	public void initialize(int[] pos) {
		double[] d = _binSet.get(pos);
		if (d == null) {
			_binSet.set(new double[_binTotal], pos);
		}
	}

	/**
	 * Constructs a new stat buffer, using the lazy SplitSet
	 * 
	 * @param ss
	 *            SplitSet to use
	 * @param binTotal
	 *            total number of indexes for this buffer
	 */
	public StatBuffer(SplitSet ss, int binTotal) {
		this(ss.getSplitSet(), ss.getIndexSet(), binTotal);
	}

	/**
	 * This creates a flat, single dimension stat buffer.
	 * 
	 * @param binTotal
	 *            total number of indexes for this buffer
	 */
	public StatBuffer(int binTotal) {
		this(new double[0][0], new int[0], binTotal);
	}

	/**
	 * This method splits the raw data as defined by the data index and split
	 * information. It outputs a data array which can be fed to the VDA to get
	 * the desired bin set.
	 * 
	 * @param data
	 *            raw data
	 * @return data indexes
	 */
	public int[] convertData(StatData data) {
		int[] indexes = new int[Math.max(1, _dataIndex.length)];
		if (_splits.length == 0)
			return indexes;
		for (int i = 0; i < indexes.length; ++i) {
			int index = 0;
			for (; index < _splits[i].length; ++index) {
				if (data._data[_dataIndex[i]] < _splits[i][index])
					break;
			}
			indexes[i] = index;
		}
		return indexes;
	}

	/**
	 * Gets the individual stat buffer for the give data.
	 */
	public double[] getStatBin(StatData data) {
		int[] indexes = convertData(data);
		initialize(indexes);
		return _binSet.get(indexes);
	}

	/**
	 * Sets the decay half-life of this buffer anything < 0 is infinite.
	 */
	public void setHalflife(double hlife) {
		if (hlife < 0) {
			halfLife = -1;
		} else {
			halfLife = hlife;
		}
	}

	/**
	 * Updates this stat buffer with the given raw data
	 */
	public void update(StatData data, double weight) {
		double[] bins = getStatBin(data);
		double index = GFToIndex(data._gf, _binTotal);

		if (halfLife > 0) {
			if (data.range) {
				double end = GFToIndex(data._max, _binTotal);
				updateBinRollingRange(bins, index, end, weight, halfLife);
			} else {
				updateBinRolling(bins, index, weight, halfLife);
			}
		} else {
			if (data.range) {
				double end = GFToIndex(data._max, _binTotal);
				updateBinRange(bins, index, end, weight);
			} else {
				updateBin(bins, index, weight);
			}
		}
	}

	/**
	 * Gets the best bin from the raw data
	 */
	public int getBestIndex(StatData data) {
		double[] bins = getStatBin(data);
		return getBestIndex(bins, scanWidth);
	}

	/**
	 * Gets the best guessfactor from the raw data
	 */
	public double getBestGF(StatData data) {
		return indexToGF(getBestIndex(data), _binTotal);
	}

	/**
	 * return the array that backs the VDA
	 */
	public Object[] getBackBuffer() {
		return _binSet.getBackingArray();
	}

	/**
	 * TODO: Replace this with some kind of externalized update method. That way
	 * it is easier to replace with rolling and different distributions
	 */
	public static final void updateBin(double[] bin, double index, double weight) {
		int iIndex = (int) Math.round(index);
		for (int i = iIndex; i < bin.length && i <= iIndex + 1; ++i) {
			double amount = Math.abs(i - index);
			if (amount < 1e-6)
				bin[i] += weight;
			else if (amount > 0 && amount < 1)
				bin[i] += amount * weight;
		}
	}

	/**
	 * Updates a bin set based on a gf range.
	 */
	public static final void updateBinRange(double[] bin, double start,
			double end, double weight) {
		for (int i = (int) (start); i < bin.length && i <= (int) (end + 1); ++i) {
			if (i >= start && i <= end) {
				bin[i] += weight;
			} else {
				bin[i] += Math.min(Math.abs(i - start), Math.abs(i - end))
						* weight;
			}
		}
	}

	/**
	 * Updates a bin set, with rolling averages.
	 */
	public static final void updateBinRolling(double[] bin, double index,
			double weight, double halflife) {
		double decay = computeDecayRate(halflife);
		for (int i = 0; i < bin.length; ++i)
			bin[i] *= decay;
		updateBin(bin, index, weight * (1 - decay));
	}

	/**
	 * Updates a bin set, with rolling averages and a gf range.
	 */
	public static final void updateBinRollingRange(double[] bin, double start,
			double end, double weight, double halflife) {
		double decay = computeDecayRate(halflife);
		for (int i = 0; i < bin.length; ++i)
			bin[i] *= decay;
		updateBinRange(bin, start, end, weight * (1 - decay));
	}

	/**
	 * Static method used to find the best index. Width is number of extra bins
	 * to each side to test.
	 */
	public static final int getBestIndex(double[] bin, int width) {
		double[] score = getBinScores(bin, width);
		int bestIndex = (bin.length - 1) / 2; /* TODO: This could be problematic */
		for (int i = 0; i < bin.length; ++i)
			if (score[i] > score[bestIndex])
				bestIndex = i;
		return bestIndex;
	}

	/**
	 * Gets the scores for each bin in the given stat bin.
	 */
	protected static final double[] getBinScores(double[] bin, int width) {
		double[] scores = new double[bin.length];
		for (int i = 0; i < bin.length; ++i) {
			double score = 0;
			// It is a little complicated in there, but things will
			// never go out of bounds
			for (int j = -width; j < width + 1; ++j) {
				int abs = Math.abs(j) + 1;
				if (i + j < 0 || i + j >= bin.length) {
					if (i == 0 || i == bin.length - 1) {
						score += bin[i - j] / abs;
					} else {
						int offset = (j < 0) ? -3 : 3;
						score += bin[i - j + offset] / abs;
					}
				} else {
					score += bin[i + j] / abs;
				}
			}
			scores[i] = score;
		}
		return scores;
	}

	/**
	 * Converts a bin index to a guessfactor based on total number of bins.
	 */
	public static final double indexToGF(int index, int bins) {
		index = Math.min(Math.max(0, index), bins - 1);
		double half = (bins - 1.0) / 2.0;
		return (index / half) - 1.0;
	}

	/**
	 * Converts a guessfactor to a bin index, based on number of bins. This
	 * returns a double, incase there is any need to know how much it over hangs
	 * each bin. Cast to an int for the raw information.
	 */
	public static final double GFToIndex(double gf, int bins) {
		gf = Math.min(Math.max(-1.0, gf), 1.0) + 1.0;
		double half = (bins - 1.0) / 2.0;
		return gf * half;
	}

	/**
	 * Computation of decay rate
	 */
	public static double computeDecayRate(double halfLife) {
		return Math.exp(Math.log(0.5) / halfLife);
	}
}

chase.s2.stat.StatBufferSet

package chase.s2.stat;

import java.util.ArrayList;
import java.util.List;

/**
 * This class handles multiple stat buffers, as if they were one.
 */
public final class StatBufferSet {
	private static final class StatItem {
		StatBuffer buffer;
		double weight;
	}

	private List<StatItem> autobin = new ArrayList<StatItem>();

	/**
	 * Add a stat buffer to this buffer set.
	 */
	public void add(StatBuffer bin, double weight) {
		StatItem si = new StatItem();
		si.buffer = bin;
		si.weight = weight;
		autobin.add(si);
	}

	/**
	 * Get a stat buffer from this buffer set.
	 */
	public StatBuffer get(int index) {
		return autobin.get(index).buffer;
	}

	/**
	 * Remove a statbuffer from this buffer set
	 */
	public void remove(int index) {
		autobin.remove(index);
	}

	/**
	 * Returns the number of stat buffers.
	 */
	public int size() {
		return autobin.size();
	}

	/**
	 * Gets the single best GF from each StatBuffer.
	 */
	public double[] getBestGFList(StatData data) {
		double[] output = new double[autobin.size()];
		int i = 0;
		for (StatItem s : autobin) {
			output[i++] = s.buffer.getBestGF(data);
		}
		return output;
	}

	/**
	 * Gets a list of all the bins for the raw data.
	 */
	private double[][] getBinList(StatData data) {
		double[][] output = new double[autobin.size()][];
		int i = 0;
		for (StatItem s : autobin) {
			output[i++] = s.buffer.getStatBin(data);
		}
		return output;
	}

	/**
	 * Gets the list of weights.
	 */
	private double[] getWeightList() {
		double[] output = new double[autobin.size()];
		int i = 0;
		for (StatItem s : autobin) {
			output[i++] = s.weight;
		}
		return output;
	}

	/**
	 * Gets the combined score data for the given stat data.
	 */
	public double[] getCombinedScoreData(StatData sd) {
		double[][] data = getBinList(sd);
		double[] weights = getWeightList();
		return getCombinedScoreData(data, weights);
	}

	/**
	 * This combines the data over all the bins in such a way that it can be
	 * read easily as if it were a single stat bin.
	 */
	private double[] getCombinedScoreData(double[][] data, double[] weights) {
		if (weights.length != data.length)
			throw new IllegalArgumentException(
					"Data length and weights length must match.");

		/* Determine the longest and shortest bin set */
		int shortest = Integer.MAX_VALUE;
		int longest = Integer.MIN_VALUE;
		for (double[] d : data) {
			if (d.length > longest)
				longest = d.length;
			if (d.length < shortest)
				shortest = d.length;
		}

		/* Get each of their scores, and record the largest of each. */
		double[][] score = new double[data.length][];
		double[] largest = new double[data.length];
		for (int i = 0; i < data.length; ++i) {
			score[i] = StatBuffer.getBinScores(data[i], StatBuffer.scanWidth);
			for (int j = 0; j < score[i].length; ++j)
				if (score[i][j] > largest[i])
					largest[i] = score[i][j];
		}

		/* Equalize each of the stat bins */
		for (int i = 0; i < data.length; ++i)
			for (int j = 0; j < score[i].length; ++j)
				score[i][j] /= largest[i];

		/* If they are equal, take the easy route */
		if (shortest == longest) {
			largest = new double[shortest];
			for (int i = 0; i < data.length; ++i)
				for (int j = 0; j < shortest; ++j)
					largest[j] += data[i][j] * weights[i];

			return largest;
		}

		/* Otherwise we have to do things the 'hard' way! */
		/* We use the longest and combine the sums from the others, based on gf */
		largest = new double[longest];
		for (int i = 0; i < data.length; ++i) {
			double[] tmpSum = new double[longest];
			/* Get the gf for the new length and multiply it by the old weight */
			for (int j = 0; j < score[i].length; ++j) {
				double index = StatBuffer.GFToIndex(StatBuffer.indexToGF(j,
						score[i].length), longest);
				StatBuffer.updateBin(tmpSum, index, score[i][j]);
			}
			/* Add to output buffer */
			for (int j = 0; j < longest; ++j) {
				largest[j] += tmpSum[j] * weights[i];
			}
		}

		return largest;
	}

	/**
	 * Gets the best GF based on weights.
	 */
	public double getBestGF(StatData data) {
		double[] score = getCombinedScoreData(data);
		int bestIndex = (score.length - 1) / 2;
		for (int i = 0; i < score.length; ++i)
			if (score[i] > score[bestIndex])
				bestIndex = i;
		return StatBuffer.indexToGF(bestIndex, score.length);
	}

	/**
	 * Updates this stat buffer handler with the given raw data
	 */
	public void update(StatData data, double weight) {
		for (StatItem s : autobin) {
			s.buffer.update(data, weight);
		}
	}
}

chase.s2.stat.SplitSet

package chase.s2.stat;

import java.util.ArrayDeque;
import java.util.Deque;

/**
 * For the really lazy!
 * 
 * @author Chase
 */
public class SplitSet {
	public Deque<double[]> split = new ArrayDeque<double[]>();
	public Deque<Integer> indexes = new ArrayDeque<Integer>();
	
	public void add(double[] d, int index) {
		split.add(d);
		indexes.add(index);
	}
	
	public double[][] getSplitSet() {
		Object[] obj = split.toArray();
		double[][] output = new double[obj.length][];
		for(int i=0; i<obj.length; ++i) {
			output[i] = (double[])obj[i];
		}
		return output;
	}
	
	public int[] getIndexSet() {
		Object[] obj = indexes.toArray();
		int[] output = new int[obj.length];
		for(int i=0; i<obj.length; ++i) {
			output[i] = (Integer)obj[i];
		}
		return output;
	}
}

chase.s2.stat.StatData

package chase.s2.stat;

/**
 * This holds data.
 * 
 * @author Chase
 */
public class StatData {
	public double _data[], _gf, _max;
	public boolean range;
	public StatData(double[] data, double gf) {
		_data = data;
		_gf = gf;
		range = false;
	}
	public StatData(double[] data, double min, double max) {
		_data = data;
		_gf = min;
		_max = max;
		range = true;
	}
}