Difference between revisions of "User:Skilgannon/KDTree"

From Robowiki
Jump to navigation Jump to search
(update!)
(Some tweaks in bounds memory layout, split on mean of largest variance)
Line 35: Line 35:
 
   //one big self-expanding array to keep all the node bounding boxes so that they stay in cache
 
   //one big self-expanding array to keep all the node bounding boxes so that they stay in cache
 
   // node bounds available at:
 
   // node bounds available at:
   //low:  2 * _dimensions * node.index
+
   //low:  2 * _dimensions * node.index + 2 * dim
   //high: 2 * _dimensions * node.index + _dimensions
+
   //high: 2 * _dimensions * node.index + 2 * dim + 1
 
   private ContiguousDoubleArrayList nodeMinMaxBounds;
 
   private ContiguousDoubleArrayList nodeMinMaxBounds;
 
/*
 
/*
Line 125: Line 125:
 
       _dimensions = dimensions;
 
       _dimensions = dimensions;
 
    
 
    
       nodeMinMaxBounds = new ContiguousDoubleArrayList(2 * dimensions);
+
  //initialise this so that it ends up in 'old' memory
 +
       nodeMinMaxBounds = new ContiguousDoubleArrayList(512 * 1024 / 8 + 2*_dimensions);
 
       mem_recycle = new double[_bucketSize*dimensions];
 
       mem_recycle = new double[_bucketSize*dimensions];
 
    
 
    
 
       bounds_template = new double[2*_dimensions];
 
       bounds_template = new double[2*_dimensions];
       Arrays.fill(bounds_template,0,_dimensions,Double.POSITIVE_INFINITY);
+
       Arrays.fill(bounds_template,Double.NEGATIVE_INFINITY);
       Arrays.fill(bounds_template,_dimensions,2*_dimensions,Double.NEGATIVE_INFINITY);
+
       for(int i = 0, max = 2*_dimensions; i < max; i+=2)
 +
        bounds_template[i] = Double.POSITIVE_INFINITY;
 
    
 
    
 
   //and.... start!
 
   //and.... start!
Line 195: Line 197:
 
//If you want lowest priority items kept, negate your values
 
//If you want lowest priority items kept, negate your values
 
   private static class PrioQueue<S>{
 
   private static class PrioQueue<S>{
      //ArrayList<Element> elements;
+
 
 
       Object[] elements;
 
       Object[] elements;
 
       double[] priorities;
 
       double[] priorities;
 
       private double minPrio;
 
       private double minPrio;
 
       private int size;
 
       private int size;
      //private int min;
+
 
 
       PrioQueue(int size, boolean prefill){
 
       PrioQueue(int size, boolean prefill){
 
        
 
        
Line 332: Line 334:
 
       }
 
       }
 
       private final double pointRectDist(double[] location){
 
       private final double pointRectDist(double[] location){
         int minOffset = 2*index*_dimensions;
+
         int offset = 2*(index+1)*_dimensions;
        int maxOffset = minOffset+_dimensions;
 
 
         double distance=0;
 
         double distance=0;
 
         double[] array = nodeMinMaxBounds.array;
 
         double[] array = nodeMinMaxBounds.array;
 
         for(int i = _dimensions; i-- > 0; ){
 
         for(int i = _dimensions; i-- > 0; ){
 +
            offset -= 2;
 
             double diff = 0;
 
             double diff = 0;
             double bv = array[i+minOffset];
+
             double bv = array[offset];
 
             if(bv > location[i])
 
             if(bv > location[i])
 
               diff = bv-location[i];
 
               diff = bv-location[i];
 
             else{
 
             else{
               bv=array[i+maxOffset];
+
               bv=array[offset+1];
 
               if(location[i]>bv)
 
               if(location[i]>bv)
 
                   diff = location[i]-bv;
 
                   diff = location[i]-bv;
Line 381: Line 383:
 
       private void expandBounds(double[] location){
 
       private void expandBounds(double[] location){
 
         entries++;
 
         entries++;
         int minOffset = index*2*_dimensions;
+
         int mio = index*2*_dimensions;
        int maxOffset = index*2*_dimensions+_dimensions;
 
 
         for(int i = 0; i < _dimensions;i++){
 
         for(int i = 0; i < _dimensions;i++){
            int mio = minOffset+i,mao=maxOffset+i;
+
             nodeMinMaxBounds.array[mio] = Math.min(nodeMinMaxBounds.array[mio++],location[i]);
             nodeMinMaxBounds.array[mio] = Math.min(nodeMinMaxBounds.array[mio],location[i]);
+
             nodeMinMaxBounds.array[mio] = Math.max(nodeMinMaxBounds.array[mio++],location[i]);
             nodeMinMaxBounds.array[mao] = Math.max(nodeMinMaxBounds.array[mao],location[i]);
+
            //mio++;
 
         }
 
         }
 
       }
 
       }
Line 396: Line 397:
 
       }
 
       }
 
       private void split(){
 
       private void split(){
         double diff = -1;
+
         // double SA = Double.POSITIVE_INFINITY;
 
         int offset = index*2*_dimensions;
 
         int offset = index*2*_dimensions;
 +
       
 +
        double diff = 0;
 
         for(int i = 0; i < _dimensions; i++){
 
         for(int i = 0; i < _dimensions; i++){
             double min = nodeMinMaxBounds.array[offset+i];
+
             double min = nodeMinMaxBounds.array[offset];
             double max = nodeMinMaxBounds.array[offset+_dimensions+i];
+
             double max = nodeMinMaxBounds.array[offset+1];
             if(max - min > diff){
+
             if(max-min>diff){
               diff = max - min;
+
               double mean = 0;
               splitVal = 0.5*(max + min);
+
               for(int j = 0; j < entries; j++)
               splitDim = i;
+
                  mean += pointLocations.array[i+_dimensions*j];
 +
           
 +
              mean = mean/entries;
 +
              double varianceSum = 0;
 +
           
 +
              for(int j = 0; j < entries; j++)
 +
                  varianceSum += sqr(mean-pointLocations.array[i+_dimensions*j]);
 +
           
 +
               if(varianceSum>diff*entries){
 +
                  diff = varianceSum/entries;
 +
                  splitVal = mean;
 +
                  splitDim = i;
 +
              }
 
             }
 
             }
 +
            offset += 2;
 
         }
 
         }
 
        
 
        
 +
       
 +
        if(splitVal == Double.POSITIVE_INFINITY)
 +
            splitVal = Double.MAX_VALUE;
 +
        else if(splitVal == Double.NEGATIVE_INFINITY)
 +
            splitVal = Double.MIN_VALUE;
 +
        else if(splitVal == nodeMinMaxBounds.array[index*2*_dimensions + 2*splitDim + 1])
 +
            splitVal = nodeMinMaxBounds.array[index*2*_dimensions + 2*splitDim]; 
 
        
 
        
 
         less = new Node(mem_recycle);//recycle that memory!
 
         less = new Node(mem_recycle);//recycle that memory!
Line 414: Line 437:
 
         //reduce garbage by factor of _bucketSize by recycling this array
 
         //reduce garbage by factor of _bucketSize by recycling this array
 
         double[] pointLocation = new double[_dimensions];
 
         double[] pointLocation = new double[_dimensions];
 +
        offset = -_dimensions;
 
         for(int i = 0; i < entries; i++){
 
         for(int i = 0; i < entries; i++){
             System.arraycopy(pointLocations.array,i*_dimensions,pointLocation,0,_dimensions);
+
             //System.arraycopy(pointLocations.array,i*_dimensions,pointLocation,0,_dimensions);
 +
            offset += _dimensions;
 +
            for(int j = _dimensions; j-- > 0;){
 +
              pointLocation[j] = pointLocations.array[j+offset];
 +
            }
 
             T load = pointPayloads.get(i);
 
             T load = pointPayloads.get(i);
 
          
 
          
Line 460: Line 488:
 
       }
 
       }
 
       ContiguousDoubleArrayList add(double[] da){
 
       ContiguousDoubleArrayList add(double[] da){
         if(size + da.length >= array.length){
+
         if(size + da.length > array.length){
 
             array = Arrays.copyOf(array,(array.length+da.length)*2);
 
             array = Arrays.copyOf(array,(array.length+da.length)*2);
 +
            //System.out.println("Doubling!");
 
         }
 
         }
 +
       
 
         System.arraycopy(da,0,array,size,da.length);
 
         System.arraycopy(da,0,array,size,da.length);
 
         size += da.length;
 
         size += da.length;

Revision as of 13:03, 18 July 2013

/*
** KDTree.java by Julian Kent
** Licenced under the  Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License
** See full licencing details here: http://creativecommons.org/licenses/by-nc-sa/3.0/
** For additional licencing rights please contact jkflying@gmail.com
**
** Example usage is given in the main method, as well as benchmarking code against Rednaxela's Gen2 Tree
*/


package jk.mega;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
//import ags.utils.*;
//import ags.utils.dataStructures.*;

public class KDTree<T>{

//use a big bucketSize so that we have less node bounds (for more cache hits) and better splits
   private static final int  _bucketSize = 50;

   private final int _dimensions;
   private int _nodes;   
   private Node root;
   
   //prevent GC from having to collect _bucketSize*dimensions*8 bytes each time a leaf splits
   private double[] mem_recycle;
   
   //the starting values for bounding boxes, for easy access
   private final double[] bounds_template;

   //one big self-expanding array to keep all the node bounding boxes so that they stay in cache
   // node bounds available at:
   //low:  2 * _dimensions * node.index + 2 * dim
   //high: 2 * _dimensions * node.index + 2 * dim + 1
   private ContiguousDoubleArrayList nodeMinMaxBounds;
/*
   public static void main(String[] args){
      int dims = 1;
      int size = 2000000;
      int testsize = 1;
      int k = 40;
      int iterations = 1;
      System.out.println(
         "Config:\n"
         + "No JIT Warmup\n"
         + "Tested on random data.\n" 
         + "Training and testing points shared across iterations.\n"
         + "Searches interleaved.");
      System.out.println("Num points:     " + size);
      System.out.println("Num searches:   " + testsize);
      System.out.println("Dimensions:     " + dims);
      System.out.println("Num Neighbours: " + k);
      System.out.println();
      ArrayList<double[]> locs = new ArrayList<double[]>(size);
      for(int i = 0; i < size; i++){
         double[] loc = new double[dims];
         for(int j = 0; j < dims; j++)
            loc[j] = Math.random();
         locs.add(loc);
      }
      ArrayList<double[]> testlocs = new ArrayList<double[]>(testsize);
      for(int i = 0; i < testsize; i++){
         double[] loc = new double[dims];
         for(int j = 0; j < dims; j++)
            loc[j] = Math.random();
         testlocs.add(loc);
      }
      for(int r = 0; r < iterations; r++){
         long t1 = System.nanoTime();
         KDTree<double[]> t = new KDTree<double[]>(dims);// This tree
         for(int i = 0; i < size; i++){
            t.addPoint(locs.get(i),locs.get(i));
         }
         long t2 = System.nanoTime();
         KdTree<double[]> rt = new KdTree.Euclidean<double[]>(dims,null); //Rednaxela Gen2
         for(int i = 0; i < size; i++){
            rt.addPoint(locs.get(i),locs.get(i));
         }
         long t3 = System.nanoTime();
      
         long jtn = 0;
         long rtn = 0;
         long mjtn = 0;
         long mrtn = 0;
      
         double dist1 = 0, dist2 = 0;
         for(int i = 0; i < testsize; i++){
            long t4 = System.nanoTime();
            dist1 += t.nearestNeighbours(testlocs.get(i),k).iterator().next().distance;
            long t5 = System.nanoTime();
            dist2 += rt.nearestNeighbor(testlocs.get(i),k,true).iterator().next().distance;
            long t6 = System.nanoTime();
            long t7 = System.nanoTime();
            jtn += t5 - t4 - (t7 - t6);
            rtn += t6 - t5 - (t7 - t6); 
            mjtn = Math.max(mjtn,t5 - t4 - (t7 - t6));
            mrtn = Math.max(mrtn,t6 - t5 - (t7 - t6));
         }
      
         System.out.println("Accuracy: " + (Math.abs(dist1-dist2) < 1e-10?"100%":"BROKEN!!!"));
         if(Math.abs(dist1-dist2) > 1e-10){
            System.out.println("dist1: " + dist1 + "    dist2: " + dist2);
         }
         long jts = t2 - t1;
         long rts = t3 - t2;
         System.out.println("Iteration:      " + (r+1) + "/" + iterations);
      
         System.out.println("This tree add avg:  " + jts/size + " ns");
         System.out.println("Reds tree add avg:  " + rts/size + " ns");
      
         System.out.println("This tree knn avg:  " + jtn/testsize + " ns");
         System.out.println("Reds tree knn avg:  " + rtn/testsize + " ns");
         System.out.println("This tree knn max:  " + mjtn + " ns");
         System.out.println("Reds tree knn max:  " + mrtn + " ns");
         System.out.println();
      }
   }
   // */

   public KDTree(int dimensions){
      _dimensions = dimensions;
   
   //initialise this so that it ends up in 'old' memory
      nodeMinMaxBounds = new ContiguousDoubleArrayList(512 * 1024 / 8 + 2*_dimensions);
      mem_recycle = new double[_bucketSize*dimensions];
   
      bounds_template = new double[2*_dimensions];
      Arrays.fill(bounds_template,Double.NEGATIVE_INFINITY);
      for(int i = 0, max = 2*_dimensions; i < max; i+=2)
         bounds_template[i] = Double.POSITIVE_INFINITY;
   
   //and.... start!
      root = new Node();
   }
   public int nodes(){
      return _nodes;
   }
   public int addPoint(double[] location, T payload){
   
      Node addNode = root;
   //Do a Depth First Search to find the Node where 'location' should be stored
      while(addNode.pointLocations == null){
         addNode.expandBounds(location);
         if(location[addNode.splitDim] < addNode.splitVal)
            addNode = addNode.less;
         else
            addNode = addNode.more;
      }
      addNode.expandBounds(location);
   
      int nodeSize = addNode.add(location,payload);
   
      if(nodeSize % _bucketSize == 0)
      //try splitting again once every time the node passes a _bucketSize multiple
         addNode.split();
   
      return root.entries;
   }


   public ArrayList<SearchResult<T>> nearestNeighbours(double[] searchLocation, int K){
   
      Node searchNode = null;
      ArrayDeque<Node> stack = new ArrayDeque<Node>(50);
      PrioQueue<T> results = new PrioQueue<T>(K,true);
   
      stack.push(root);
   
      boolean needMore = true;
      double bestDist = Double.POSITIVE_INFINITY;
      while(stack.size() > 0 ){
         searchNode = stack.pop();
      
         if(needMore || bestDist >= searchNode.pointRectDist(searchLocation))
            if(searchNode.search(searchLocation,stack,results)){
               bestDist = -results.peekPrio();
               needMore = bestDist == Double.POSITIVE_INFINITY;
            }
      
      }
      
      ArrayList<SearchResult<T>> returnResults = new ArrayList<SearchResult<T>>(K);
   
      for(int i = K; i-- > 0;){//Reverse (furthest first, like Rednaxela Gen2)
         SearchResult s = new SearchResult(-results.priorities[i],results.elements[i]);
         returnResults.add(s);
      }
      return returnResults;
   }
   

     //NB! This Priority Queue keeps things with the HIGHEST priority. 
//If you want lowest priority items kept, negate your values
   private static class PrioQueue<S>{

      Object[] elements;
      double[] priorities;
      private double minPrio;
      private int size;

      PrioQueue(int size, boolean prefill){
      
         elements = new Object[size];
         priorities = new double[size];
         Arrays.fill(priorities,Double.NEGATIVE_INFINITY);
         if(prefill){
            minPrio = Double.NEGATIVE_INFINITY;
            this.size = size;
         }
      }
       //uses O(log(n)) comparisons and one big shift of size O(N)
       //and is MUCH simpler than a heap --> faster on small sets, faster JIT
       //returns true if the value was accepted
      boolean offer(S value, double priority){
         
         //is this point worthy of joining the exulted ranks?
         if(priority > minPrio){
            
            addNoGrow(value,priority);
            
            return true;
         }
         return false;
      }
      
      void addNoGrow(S value, double priority){
         int index = searchFor(priority);
         int nextIndex = index + 1;
         int length = size - nextIndex;
         System.arraycopy(elements,index,elements,nextIndex,length);
         System.arraycopy(priorities,index,priorities,nextIndex,length);
         elements[index]=value;
         priorities[index]=priority;
            
         minPrio = priorities[size-1];
      }
     
   
      int searchFor(double priority){
         int i = size-1;
         int j = 0;   
         while(i>=j){
            int index = (i+j)>>>1;
         
            if( priorities[index] > priority)
               j = index+1;
            else 
               i = index-1;
         }
         return j;
      }
      double peekPrio(){
         return minPrio;
      }
      /*
       //Methods for using it as a priority stack - leave them out for now
      void push(S value, double priority){
         if(++size > elements.length){
            elements = Arrays.copyOf(elements,size*2);
            priorities = Arrays.copyOf(priorities,size*2);
            Arrays.fill(priorities,size,size*2,Double.NEGATIVE_INFINITY);
            System.out.println("Expanding PrioQueue to " + elements.length);
         }
         addNoGrow(value,priority);
      }
      void pushTop(S value, double priority){
         if(++size > elements.length){
            elements = Arrays.copyOf(elements,size*2);
            priorities = Arrays.copyOf(priorities,size*2);
            Arrays.fill(priorities,size,size*2,Double.NEGATIVE_INFINITY);
            System.out.println("Expanding PrioQueue to " + elements.length);
         }
      
         elements[size-1] = value;
         priorities[size-1] = priority;
         minPrio = priority;
         
      }
      
      S pop(){
         Object value = elements[--size];
         priorities[size] = Double.NEGATIVE_INFINITY;
         if(size == 0)
            minPrio = Double.NEGATIVE_INFINITY;
         else
            minPrio = priorities[size-1];
         return (S)value;
      }
      int size(){
         return size-min;
      }
     //   */
   }


   public static class SearchResult<S>{
      public double distance;
      public S payload;
      SearchResult(double dist, S load){
         distance = dist;
         payload = load;
      }
   }

   private class Node {
   
   //for accessing bounding box data 
   // - if trees weren't so unbalanced might be better to use an implicit heap?
      int index;
      
   //keep track of size of subtree
      int entries;
   
   //leaf
      ContiguousDoubleArrayList pointLocations ;
      ArrayList<T> pointPayloads = new ArrayList<T>(_bucketSize);
      
   //stem
      Node less, more;
      int splitDim;
      double splitVal;
   
      private Node(){
         this(new double[_bucketSize*_dimensions]);
      }
      private Node(double[] pointMemory){
         pointLocations = new ContiguousDoubleArrayList(pointMemory);
         index = _nodes++;
         nodeMinMaxBounds.add(bounds_template);
      }
      private final double pointRectDist(double[] location){
         int offset = 2*(index+1)*_dimensions;
         double distance=0;
         double[] array = nodeMinMaxBounds.array;
         for(int i = _dimensions; i-- > 0; ){
            offset -= 2;
            double diff = 0;
            double bv = array[offset];
            if(bv > location[i])
               diff = bv-location[i];
            else{
               bv=array[offset+1];
               if(location[i]>bv)
                  diff = location[i]-bv;
            }
            distance += sqr(diff);
         }
         return distance;
      }
      private final double pointDist(double[] location, int index){
         double distance = 0;
         int offset = index*_dimensions;
         for(int i = _dimensions; i-- > 0;)
            distance += sqr(pointLocations.array[offset+i] - location[i]);
         return distance;
      }
      private boolean search(double[] searchLocation, ArrayDeque<Node> stack, PrioQueue<T> results){
      
         if(pointLocations == null){
            if(searchLocation[splitDim] < splitVal){
               stack.push(more);
               stack.push(less);//less will be popped first
            }
            else{
               stack.push(less);
               stack.push(more);//more will be popped first
            }
            return false;
         }
         else{
            boolean updated = false;
            for(int j = entries; j-- > 0;){
               double distance = pointDist(searchLocation,j);
               updated |= results.offer(pointPayloads.get(j),-distance);
            }
            return updated;
         }
      }
      private void expandBounds(double[] location){
         entries++;
         int mio = index*2*_dimensions;
         for(int i = 0; i < _dimensions;i++){
            nodeMinMaxBounds.array[mio] = Math.min(nodeMinMaxBounds.array[mio++],location[i]);
            nodeMinMaxBounds.array[mio] = Math.max(nodeMinMaxBounds.array[mio++],location[i]);
            //mio++;
         }
      }
   
      private int add(double[] location, T load){
         pointLocations.add(location);
         pointPayloads.add(load);
         return entries;
      }
      private void split(){
         // double SA = Double.POSITIVE_INFINITY;
         int offset = index*2*_dimensions;
         
         double diff = 0;
         for(int i = 0; i < _dimensions; i++){
            double min = nodeMinMaxBounds.array[offset];
            double max = nodeMinMaxBounds.array[offset+1];
            if(max-min>diff){
               double mean = 0;
               for(int j = 0; j < entries; j++)
                  mean += pointLocations.array[i+_dimensions*j];
             
               mean = mean/entries;
               double varianceSum = 0;
            
               for(int j = 0; j < entries; j++)
                  varianceSum += sqr(mean-pointLocations.array[i+_dimensions*j]);
            
               if(varianceSum>diff*entries){
                  diff = varianceSum/entries;
                  splitVal = mean;
                  splitDim = i;
               }
            }
            offset += 2;
         }
      
         
         if(splitVal == Double.POSITIVE_INFINITY)
            splitVal = Double.MAX_VALUE;
         else if(splitVal == Double.NEGATIVE_INFINITY)
            splitVal = Double.MIN_VALUE;
         else if(splitVal == nodeMinMaxBounds.array[index*2*_dimensions + 2*splitDim + 1])
            splitVal = nodeMinMaxBounds.array[index*2*_dimensions + 2*splitDim];   
      
         less = new Node(mem_recycle);//recycle that memory!
         more = new Node();
         
         //reduce garbage by factor of _bucketSize by recycling this array
         double[] pointLocation = new double[_dimensions];
         offset = -_dimensions;
         for(int i = 0; i < entries; i++){
            //System.arraycopy(pointLocations.array,i*_dimensions,pointLocation,0,_dimensions);
            offset += _dimensions;
            for(int j = _dimensions; j-- > 0;){
               pointLocation[j] = pointLocations.array[j+offset];
            }
            T load = pointPayloads.get(i);
         
            if(pointLocation[splitDim] < splitVal){
               less.expandBounds(pointLocation);
               less.add(pointLocation,load);
            }
            else{
               more.expandBounds(pointLocation);   
               more.add(pointLocation,load);
            }
         }
         if(less.entries*more.entries == 0){
         //one of them was 0, so the split was worthless. throw it away.
            less = null;
            more = null;
         }
         else{
         
         //we won't be needing that now, so keep it for the next split to reduce garbage
            mem_recycle = pointLocations.array;
         
            pointLocations = null;
         
            pointPayloads.clear();
            pointPayloads = null;
         }
      }
   
   }


   private static class ContiguousDoubleArrayList{
      double[] array;
      int size;
      ContiguousDoubleArrayList(){
         this(300);
      }
      ContiguousDoubleArrayList(int size){
         this(new double[size]);
      }
      ContiguousDoubleArrayList(double[] data){
         array = data;
      }
      ContiguousDoubleArrayList add(double[] da){
         if(size + da.length > array.length){
            array = Arrays.copyOf(array,(array.length+da.length)*2);
            //System.out.println("Doubling!");
         }
         
         System.arraycopy(da,0,array,size,da.length);
         size += da.length;
         return this;
      }
   }

   private static final double sqr(double d){
      return d*d;}

}