Difference between revisions of "User:Skilgannon/KDTree"

From Robowiki
Jump to navigation Jump to search
m (pointer stack instead of object stack... prevents loading object contents unless the path is followed)
(Back to 1-step search)
Line 165: Line 165:
 
   
 
   
 
   public ArrayList<SearchResult<T>> nearestNeighbours(double[] searchLocation, int K){
 
   public ArrayList<SearchResult<T>> nearestNeighbours(double[] searchLocation, int K){
  //don't actually store the objects so that they aren't a possible cache miss
 
  //until we've verified that we need to access them
 
 
       IntStack stack = new IntStack();
 
       IntStack stack = new IntStack();
 
       PrioQueue<T> results = new PrioQueue<T>(K,true);
 
       PrioQueue<T> results = new PrioQueue<T>(K,true);
Line 173: Line 171:
 
    
 
    
 
       int added = 0;
 
       int added = 0;
      while(added < K )
 
        added += nodeList.get(stack.pop()).search(searchLocation,stack,results);
 
 
    
 
    
 
       while(stack.size() > 0 ){
 
       while(stack.size() > 0 ){
 
         int nodeIndex = stack.pop();
 
         int nodeIndex = stack.pop();
         if(results.peekPrio() > pointRectDist((2*_dimensions)*nodeIndex,searchLocation))
+
         if(added < K || results.peekPrio() > pointRectDist(nodeIndex,searchLocation))
             nodeList.get(nodeIndex).search(searchLocation,stack,results);
+
             added += nodeList.get(nodeIndex).search(searchLocation,stack,results);
 
       }
 
       }
 
    
 
    
Line 191: Line 187:
 
       return returnResults;
 
       return returnResults;
 
   }
 
   }
   private double pointRectDist(int offset, double[] location){
+
 
 +
 
 +
   private double pointRectDist(int offset, final double[] location){
 +
      offset *= (2*_dimensions);
 
       double distance=0;
 
       double distance=0;
       double[] array = nodeMinMaxBounds.array;
+
       final double[] array = nodeMinMaxBounds.array;
 
       for(int i = 0; i < location.length; i++,offset += 2){
 
       for(int i = 0; i < location.length; i++,offset += 2){
 
          
 
          
Line 291: Line 290:
 
       double splitVal;
 
       double splitVal;
 
    
 
    
       private Node(){
+
       Node(){
 
         this(new double[_bucketSize*_dimensions]);
 
         this(new double[_bucketSize*_dimensions]);
 
       }
 
       }
       private Node(double[] pointMemory){
+
       Node(double[] pointMemory){
 
         pointLocations = new ContiguousDoubleArrayList(pointMemory);
 
         pointLocations = new ContiguousDoubleArrayList(pointMemory);
 
         index = _nodes++;
 
         index = _nodes++;
Line 300: Line 299:
 
         nodeMinMaxBounds.add(bounds_template);
 
         nodeMinMaxBounds.add(bounds_template);
 
       }
 
       }
       private final double pointDist(double[] location, int index){
+
       double pointDist(final double[] location, int index){
         double[] arr = pointLocations.array;
+
         final double[] arr = pointLocations.array;
 
         double distance = 0;
 
         double distance = 0;
 
         int offset = (index+1)*_dimensions;
 
         int offset = (index+1)*_dimensions;
 
          
 
          
 
         for(int i = _dimensions; i-- > 0 ;){
 
         for(int i = _dimensions; i-- > 0 ;){
            double d;
+
             distance += sqr(arr[--offset] - location[i]);
             distance += (d = arr[--offset] - location[i])*d;
 
 
         }
 
         }
 
         return distance;
 
         return distance;
Line 313: Line 311:
 
    
 
    
 
       //returns number of points added to results
 
       //returns number of points added to results
       private int search(double[] searchLocation, IntStack stack, PrioQueue<T> results){
+
       int search(double[] searchLocation, IntStack stack, PrioQueue<T> results){
 
         if(pointLocations == null){
 
         if(pointLocations == null){
 
              
 
              
Line 336: Line 334:
 
       }
 
       }
 
    
 
    
       private void expandBounds(double[] location){
+
       void expandBounds(double[] location){
 
         entries++;
 
         entries++;
 
         int mio = index*2*_dimensions;
 
         int mio = index*2*_dimensions;
Line 345: Line 343:
 
       }
 
       }
 
    
 
    
       private int add(double[] location, T load){
+
       int add(double[] location, T load){
 
         pointLocations.add(location);
 
         pointLocations.add(location);
 
         pointPayloads.add(load);
 
         pointPayloads.add(load);
 
         return entries;
 
         return entries;
 
       }
 
       }
       private void split(){
+
       void split(){
 
         int offset = index*2*_dimensions;
 
         int offset = index*2*_dimensions;
 
        
 
        
Line 465: Line 463:
 
   }
 
   }
 
   
 
   
   private static final double sqr(double d){
+
   static final double sqr(double d){
 
       return d*d;}
 
       return d*d;}
 
   
 
   
 
}
 
}
 
</syntaxhighlight></code>
 
</syntaxhighlight></code>

Revision as of 11:39, 21 July 2013

/*
** KDTree.java by Julian Kent
** Licenced under the  Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License
** See full licencing details here: http://creativecommons.org/licenses/by-nc-sa/3.0/
** For additional licencing rights please contact jkflying@gmail.com
**
** Example usage is given in the main method, as well as benchmarking code against Rednaxela's Gen2 Tree
*/
 
 
package jk.mega;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
//import ags.utils.*;
//import ags.utils.dataStructures.*;
 
public class KDTree<T>{
 
//use a big bucketSize so that we have less node bounds (for more cache hits) and better splits
   private static final int  _bucketSize = 50;
 
   private final int _dimensions;
   private int _nodes;   
   private final Node root;
   private final ArrayList<Node> nodeList = new ArrayList<Node>();
 
   //prevent GC from having to collect _bucketSize*dimensions*8 bytes each time a leaf splits
   private double[] mem_recycle;
 
   //the starting values for bounding boxes, for easy access
   private final double[] bounds_template;
 
   //one big self-expanding array to keep all the node bounding boxes so that they stay in cache
   // node bounds available at:
   //low:  2 * _dimensions * node.index + 2 * dim
   //high: 2 * _dimensions * node.index + 2 * dim + 1
   private final ContiguousDoubleArrayList nodeMinMaxBounds;

/*
   public static void main(String[] args){
      int dims = 1;
      int size = 2000000;
      int testsize = 1;
      int k = 40;
      int iterations = 1;
      System.out.println(
         "Config:\n"
         + "No JIT Warmup\n"
         + "Tested on random data.\n" 
         + "Training and testing points shared across iterations.\n"
         + "Searches interleaved.");
      System.out.println("Num points:     " + size);
      System.out.println("Num searches:   " + testsize);
      System.out.println("Dimensions:     " + dims);
      System.out.println("Num Neighbours: " + k);
      System.out.println();
      ArrayList<double[]> locs = new ArrayList<double[]>(size);
      for(int i = 0; i < size; i++){
         double[] loc = new double[dims];
         for(int j = 0; j < dims; j++)
            loc[j] = Math.random();
         locs.add(loc);
      }
      ArrayList<double[]> testlocs = new ArrayList<double[]>(testsize);
      for(int i = 0; i < testsize; i++){
         double[] loc = new double[dims];
         for(int j = 0; j < dims; j++)
            loc[j] = Math.random();
         testlocs.add(loc);
      }
      for(int r = 0; r < iterations; r++){
         long t1 = System.nanoTime();
         KDTree<double[]> t = new KDTree<double[]>(dims);// This tree
         for(int i = 0; i < size; i++){
            t.addPoint(locs.get(i),locs.get(i));
         }
         long t2 = System.nanoTime();
         KdTree<double[]> rt = new KdTree.Euclidean<double[]>(dims,null); //Rednaxela Gen2
         for(int i = 0; i < size; i++){
            rt.addPoint(locs.get(i),locs.get(i));
         }
         long t3 = System.nanoTime();
 
         long jtn = 0;
         long rtn = 0;
         long mjtn = 0;
         long mrtn = 0;
 
         double dist1 = 0, dist2 = 0;
         for(int i = 0; i < testsize; i++){
            long t4 = System.nanoTime();
            dist1 += t.nearestNeighbours(testlocs.get(i),k).iterator().next().distance;
            long t5 = System.nanoTime();
            dist2 += rt.nearestNeighbor(testlocs.get(i),k,true).iterator().next().distance;
            long t6 = System.nanoTime();
            long t7 = System.nanoTime();
            jtn += t5 - t4 - (t7 - t6);
            rtn += t6 - t5 - (t7 - t6); 
            mjtn = Math.max(mjtn,t5 - t4 - (t7 - t6));
            mrtn = Math.max(mrtn,t6 - t5 - (t7 - t6));
         }
 
         System.out.println("Accuracy: " + (Math.abs(dist1-dist2) < 1e-10?"100%":"BROKEN!!!"));
         if(Math.abs(dist1-dist2) > 1e-10){
            System.out.println("dist1: " + dist1 + "    dist2: " + dist2);
         }
         long jts = t2 - t1;
         long rts = t3 - t2;
         System.out.println("Iteration:      " + (r+1) + "/" + iterations);
 
         System.out.println("This tree add avg:  " + jts/size + " ns");
         System.out.println("Reds tree add avg:  " + rts/size + " ns");
 
         System.out.println("This tree knn avg:  " + jtn/testsize + " ns");
         System.out.println("Reds tree knn avg:  " + rtn/testsize + " ns");
         System.out.println("This tree knn max:  " + mjtn + " ns");
         System.out.println("Reds tree knn max:  " + mrtn + " ns");
         System.out.println();
      }
   }
   // */
 
   public KDTree(int dimensions){
      _dimensions = dimensions;
   
   //initialise this big so that it ends up in 'old' memory
      nodeMinMaxBounds = new ContiguousDoubleArrayList(512 * 1024 / 8 + 2*_dimensions);
      mem_recycle = new double[_bucketSize*dimensions];
   
      bounds_template = new double[2*_dimensions];
      Arrays.fill(bounds_template,Double.NEGATIVE_INFINITY);
      for(int i = 0, max = 2*_dimensions; i < max; i+=2)
         bounds_template[i] = Double.POSITIVE_INFINITY;
   
   //and.... start!
      root = new Node();
   }
   public int nodes(){
      return _nodes;
   }
   public int addPoint(double[] location, T payload){
   
      Node addNode = root;
   //Do a Depth First Search to find the Node where 'location' should be stored
      while(addNode.pointLocations == null){
         addNode.expandBounds(location);
         if(location[addNode.splitDim] < addNode.splitVal)
            addNode = nodeList.get(addNode.lessIndex);
         else
            addNode = nodeList.get(addNode.moreIndex);
      }
      addNode.expandBounds(location);
   
      int nodeSize = addNode.add(location,payload);
   
      if(nodeSize % _bucketSize == 0)
      //try splitting again once every time the node passes a _bucketSize multiple
         addNode.split();
   
      return root.entries;
   }
 
 
   public ArrayList<SearchResult<T>> nearestNeighbours(double[] searchLocation, int K){
      IntStack stack = new IntStack();
      PrioQueue<T> results = new PrioQueue<T>(K,true);
   
      stack.push(root.index);
   
      int added = 0;
   
      while(stack.size() > 0 ){
         int nodeIndex = stack.pop();
         if(added < K || results.peekPrio() > pointRectDist(nodeIndex,searchLocation))
            added += nodeList.get(nodeIndex).search(searchLocation,stack,results);
      }
   
      ArrayList<SearchResult<T>> returnResults = new ArrayList<SearchResult<T>>(K);
      double[] priorities = results.priorities;
      Object[] elements = results.elements;
      for(int i = 0; i < K; i++){//forward (closest first)
         SearchResult s = new SearchResult(priorities[i],(T)elements[i]);
         returnResults.add(s);
      }
      return returnResults;
   }
   
   
   private double pointRectDist(int offset, final double[] location){
      offset *= (2*_dimensions);
      double distance=0;
      final double[] array = nodeMinMaxBounds.array;
      for(int i = 0; i < location.length; i++,offset += 2){
         
         double diff = 0;
         double bv = array[offset];
         double lv = location[i];
         if(bv > lv)
            diff = bv-lv;
         else{
            bv=array[offset+1];
            if(lv>bv)
               diff = lv-bv;
         }
         distance += sqr(diff);
      }
      return distance;
   }
 
     //NB! This Priority Queue keeps things with the LOWEST priority. 
//If you want highest priority items kept, negate your values
   private static class PrioQueue<S>{
   
      Object[] elements;
      double[] priorities;
      private double minPrio;
      private int size;
   
      PrioQueue(int size, boolean prefill){
         elements = new Object[size];
         priorities = new double[size];
         Arrays.fill(priorities,Double.POSITIVE_INFINITY);
         if(prefill){
            minPrio = Double.POSITIVE_INFINITY;
            this.size = size;
         }
      }
       //uses O(log(n)) comparisons and one big shift of size O(N)
       //and is MUCH simpler than a heap --> faster on small sets, faster JIT
   
      void addNoGrow(S value, double priority){
         int index = searchFor(priority);
         int nextIndex = index + 1;
         int length = size - index - 1;//remove dependancy on nextIndex
         System.arraycopy(elements,index,elements,nextIndex,length);
         System.arraycopy(priorities,index,priorities,nextIndex,length);
         elements[index]=value;
         priorities[index]=priority;
      
         minPrio = priorities[size-1];
      }
   
      int searchFor(double priority){
         int i = size-1;
         int j = 0;   
         while(i>=j){
            int index = (i+j)>>>1;
         
            if( priorities[index] < priority)
               j = index+1;
            else 
               i = index-1;
         }
         return j;
      }
      double peekPrio(){
         return minPrio;
      }
   }
 
 
   public static class SearchResult<S>{
      public double distance;
      public S payload;
      SearchResult(double dist, S load){
         distance = dist;
         payload = load;
      }
   }
 
   private class Node {
   
   //for accessing bounding box data 
   // - if trees weren't so unbalanced might be better to use an implicit heap?
      int index;
   
   //keep track of size of subtree
      int entries;
   
   //leaf
      ContiguousDoubleArrayList pointLocations ;
      ArrayList<T> pointPayloads = new ArrayList<T>(_bucketSize);
   
   //stem
      //Node less, more;
      int lessIndex, moreIndex;
      int splitDim;
      double splitVal;
   
      Node(){
         this(new double[_bucketSize*_dimensions]);
      }
      Node(double[] pointMemory){
         pointLocations = new ContiguousDoubleArrayList(pointMemory);
         index = _nodes++;
         nodeList.add(this);
         nodeMinMaxBounds.add(bounds_template);
      }
      double pointDist(final double[] location, int index){
         final double[] arr = pointLocations.array;
         double distance = 0;
         int offset = (index+1)*_dimensions;
         
         for(int i = _dimensions; i-- > 0 ;){
            distance += sqr(arr[--offset] - location[i]);
         }
         return distance;
      }
   
      //returns number of points added to results
      int search(double[] searchLocation, IntStack stack, PrioQueue<T> results){
         if(pointLocations == null){
            
            if(searchLocation[splitDim] < splitVal)
               stack.push(moreIndex).push(lessIndex);//less will be popped first
            else
               stack.push(lessIndex).push(moreIndex);//more will be popped first
            
         }
         else{
            int updated = 0;
            for(int j = entries; j-- > 0;){
               double distance = pointDist(searchLocation,j);
               if(results.peekPrio() > distance){
                  updated++;
                  results.addNoGrow(pointPayloads.get(j),distance);
               }
            }
            return updated;
         }
         return 0;
      }
   
      void expandBounds(double[] location){
         entries++;
         int mio = index*2*_dimensions;
         for(int i = 0; i < _dimensions;i++){
            nodeMinMaxBounds.array[mio] = Math.min(nodeMinMaxBounds.array[mio++],location[i]);
            nodeMinMaxBounds.array[mio] = Math.max(nodeMinMaxBounds.array[mio++],location[i]);
         }
      }
   
      int add(double[] location, T load){
         pointLocations.add(location);
         pointPayloads.add(load);
         return entries;
      }
      void split(){
         int offset = index*2*_dimensions;
      
         double diff = 0;
         for(int i = 0; i < _dimensions; i++){
            double min = nodeMinMaxBounds.array[offset];
            double max = nodeMinMaxBounds.array[offset+1];
            if(max-min>diff){
               double mean = 0;
               for(int j = 0; j < entries; j++)
                  mean += pointLocations.array[i+_dimensions*j];
            
               mean = mean/entries;
               double varianceSum = 0;
            
               for(int j = 0; j < entries; j++)
                  varianceSum += sqr(mean-pointLocations.array[i+_dimensions*j]);
            
               if(varianceSum>diff*entries){
                  diff = varianceSum/entries;
                  splitVal = mean;
               
                  splitDim = i;
               }
            }
            offset += 2;
         }
      
         //kill all the nasties
         if(splitVal == Double.POSITIVE_INFINITY)
            splitVal = Double.MAX_VALUE;
         else if(splitVal == Double.NEGATIVE_INFINITY)
            splitVal = Double.MIN_VALUE;
         else if(splitVal == nodeMinMaxBounds.array[index*2*_dimensions + 2*splitDim + 1])
            splitVal = nodeMinMaxBounds.array[index*2*_dimensions + 2*splitDim];   
      
         Node less = new Node(mem_recycle);//recycle that memory!
         Node more = new Node();
         lessIndex = less.index;
         moreIndex = more.index;
      
         //reduce garbage by factor of _bucketSize by recycling this array
         double[] pointLocation = new double[_dimensions];
         for(int i = 0; i < entries; i++){
            System.arraycopy(pointLocations.array,i*_dimensions,pointLocation,0,_dimensions);
            T load = pointPayloads.get(i);
         
            if(pointLocation[splitDim] < splitVal){
               less.expandBounds(pointLocation);
               less.add(pointLocation,load);
            }
            else{
               more.expandBounds(pointLocation);   
               more.add(pointLocation,load);
            }
         }
         if(less.entries*more.entries == 0){
         //one of them was 0, so the split was worthless. throw it away.
            _nodes -= 2;//recall that bounds memory
            nodeList.remove(moreIndex);
            nodeList.remove(lessIndex);
         }
         else{
         
         //we won't be needing that now, so keep it for the next split to reduce garbage
            mem_recycle = pointLocations.array;
         
            pointLocations = null;
         
            pointPayloads.clear();
            pointPayloads = null;
         }
      }
   
   }
 
 
   private static class ContiguousDoubleArrayList{
      double[] array;
      int size;
      ContiguousDoubleArrayList(){this(300);}
      ContiguousDoubleArrayList(int size){this(new double[size]);}
      ContiguousDoubleArrayList(double[] data){array = data;}
      
      ContiguousDoubleArrayList add(double[] da){
         if(size + da.length > array.length)
            array = Arrays.copyOf(array,(array.length+da.length)*2);
         
         System.arraycopy(da,0,array,size,da.length);
         size += da.length;
         return this;
      }
   }
   private static class IntStack{
      int[] array;
      int size;
      IntStack(){this(64);}
      IntStack(int size){this(new int[size]);}
      IntStack(int[] data){array = data;}
      
      IntStack push(int i){
         if(size>= array.length)
            array = Arrays.copyOf(array,(array.length+1)*2);
         
         array[size++] = i;
         return this;
      }
      int pop(){
         return array[--size];
      }
      int size(){
         return size;
      }
   }
 
   static final double sqr(double d){
      return d*d;}
 
}