The Fast Lookup Tree

The jdk_fasttree is a c++ class I wrote that is a container object for variable length strings of other objects (typically char). It uses a LOT of memory but is incredibly fast to look up an arbitrary value. It is faster than any hash table. The amount of time it takes to look up a value for a string is bounded by the length of the string - the number of strings in the tree has no bearing on the speed.

For instance, on a PPC processor, the 'find' loop takes 14 instructions per character. If you are trying to find a string that is 10 characters long, the find method will take a maximum of 140 + (function call overhead) instructions. This speed comes at a price, though - It is very very memory hungry.

e-mail me for more info. Here is the tree and an example c++ program using it.

The Fast Lookup Tree C++ Template

File: jdk_fasttree.h

#ifndef __JDK_FASTTREE_H
#define __JDK_FASTTREE_H


template <typename VALUE, int RANGE, typename KEYTYPE>
struct jdk_fasttree
{
  typedef VALUE value_t;
  enum 
  {
    range_mask = RANGE-1,
    range = RANGE
  };
  typedef KEYTYPE key_t;  
  typedef jdk_fasttree<VALUE,RANGE,KEYTYPE> type_t;
     
  value_t result;
  type_t * parent;
  type_t * table[RANGE];

  struct iterator_callback
  {
    virtual bool operator() ( key_t *key, int len, value_t value ) = 0;
  };
  
    
  inline jdk_fasttree()
    : result(),
    parent(0)
  {
    for( int i=0; i<range; ++i )
    {
      table[i] = 0;  
    }    
  }
  
  inline jdk_fasttree( type_t *parent_ )
    : result(), 
    parent( parent_ )
  {
    for( int i=0; i<range; ++i )
    {
      table[i] = 0;  
    }    
  }
  
  
  inline ~jdk_fasttree()
  {
    for( int i=0; i<range; ++i )
    {
      if( table[i] )
        delete table[i];
    }
  }
    
  
  inline void add( const key_t *buf, int len, value_t v )
  {
    type_t * t = this;
    
    while( len-- )
    {
      key_t c = (*buf)&range_mask;
      
      if( !t->table[c] )
      {
        t->table[c] = new type_t(t);
      }
      t = t->table[c];
      ++buf;
    }
    t->result = v;
  }
  
  inline value_t find( const key_t *buf, int len )
  {
    type_t * t = this;
    
    while( len-- )
    {
      t = t->table[ *(buf++)&(range_mask) ];
      if( !t )
      {
        return 0;
      }    
    }

    return t->result;    
  }

  inline const value_t find( const key_t *buf, int len ) const
  {
    type_t * t = this;
    
    while( len-- )
    {
      t = t->table[ *(buf++)&(range_mask) ];
      if( !t )
      {
        return 0;
      }    
    }

    return t->result;    
  }
  
  inline bool remove( const key_t *buf, int len )
  {
    type_t * last_fork = this;
    int last_fork_pos=0;
    int pos=0;
    type_t * t = this;
    

    // follow the branch, but remember where the last fork in the branch is

    while( len-- )
    {
      int entry_count=0;
      for(int i=0; i<range; ++i )
      {
        entry_count += (int)(t->table[i]!=0);
      }
    
      if( entry_count>1 )
      {
        last_fork_pos = pos;
        last_fork = t;
      }
      
      t = t->table[ buf[pos]&(range_mask) ];
      if( !t )
      {
        return false;
      }
      
         ++pos;  
    }    

    // ok, we found a match. Now delete the branch starting from our last fork position
    
    type_t **kill_point = &last_fork->table[ buf[last_fork_pos]&(range_mask) ];
    delete *kill_point;
    *kill_point = 0;
    
    return true;
  }

  inline bool iterate( key_t *buf, int max_len, iterator_callback &cb, int buf_pos=0 )
  {
    if( buf_pos>=max_len )
    {
      return false;
    }
    
    if( buf_pos>0 )
    {      
      if( !cb( buf, buf_pos, result ) )
      {
        return false;
      }
    }        
    
    for(int i=0; i<range; ++i )
    {
      if( table[i] )
      {        
          buf[buf_pos]=i;
        if( !table[i]->iterate( buf, max_len, cb, buf_pos+1 ) )
        {
          return false;
        }        
      }
    }
    return true;
  }  
};



#endif

Example of usage:

File: fasttree_example.cpp


#include <iostream>
#include <string>

#include "jdk_fasttree.h"


typedef jdk_fasttree<int,128,char> my_tree;

template <class TREE>
struct my_iterator : public TREE::iterator_callback
{
  bool operator () ( typename TREE::key_t *key, int len, typename TREE::value_t value )
  {
    if( value!=0 )
    {      
      for( int i=0; i<len; ++i )
      {
        std::cout << char(key[i]);
      }
      std::cout << std::endl;
    }
    return true;
  }
};

my_tree::value_t find( my_tree &tree, const char *str_to_find )
{
  int len_of_str=strlen(str_to_find);
  return tree.find( str_to_find, len_of_str );
}


int main(int argc, char **argv )
{
  my_tree tree;
  
  tree.add( "testing1", strlen("testing1"), 1 );
  tree.add( "testtree", strlen("testtree"), 2 );
  tree.add( "jeffrey", strlen("jeffrey"), 3 );
  tree.add( "macintosh", strlen("macintosh"), 4 );
  
  tree.remove( "jeffrey", strlen("jeffrey") );
 
  std::cout << "Dumping tree:" << std::endl;

  my_iterator<my_tree> i;
  my_tree::key_t buf[1024];
  tree.iterate( buf, 1024, i );

  std::cout << "Finding 'macintosh' in tree:" << std::endl;  
  std::cout << "Value is: " << find(tree,"macintosh") << std::endl;
  
  return 0;
}

e-mail: jeffk@jdkoftinoff.com

PGP/GNU PG Public Key