1 {------------------------------------------------------------------------------
    2                               TABLES
    3 
    4 A Table is a set of entries, each containing a key and an associated value, the
    5 key being used to look up the value.
    6 
    7 In database-style applications, the value may be a record, and the key may be a
    8 field in it. The normal effect of sharing of subexpressions should avoid
    9 serious space problems. However, `computed' keys may cause a space problem.
   10 
   11 Keys are assumed to be unique. The effect of non-unique keys can be obtained by
   12 associated a list value such as [v1,v2,...] with each key.
   13 
   14 With the `enterList' function, the first entry for a key takes precedence over
   15 any later ones with the same key. This allows a table to be built `lazily', the
   16 entries in the list only being evaluated as needed to satisfy `find' calls.
   17 
   18 REQUIREMENTS:
   19    The results module `result.g' must be loaded before this one.
   20    The key type must be ordered (an instance of class Ord).
   21 
   22 EXPORTS:
   23    Table k v        the type of tables; k and v are the key and value types
   24    newTable         an empty table
   25    enter t k v      add entry to t (no effect if old entry for k exists)
   26    enterList t es   add a list of (key,val) pairs to t
   27    update t k v     change entry in t (or add new entry if necessary)
   28    updateList t es  change a list of (key,val) pairs in t
   29    find t k         lookup k in t giving (success v) or (failure "not found")
   30    delete t k       remove entry in t for key k (if any)
   31    entries t        return list of all (key,val) pairs in t in key order
   32 ------------------------------------------------------------------------------}
   33 
   34 module Table where
   35 import Result
   36 
   37 -- The implementation here uses a binary search tree, giving `log n' time
   38 -- operations, provided that the tree remains well-balanced.  Eventually, there
   39 -- should be a constant-time version with the same semantics.
   40 
   41 data Table k v = Empty | Fork (Table k v) (k,v) (Table k v)
   42 
   43 newTable = Empty
   44 
   45 find Empty key = failure "not found"
   46 find (Fork left (k,v) right) key
   47    | key <  k  =  find left key
   48    | key == k  =  success v
   49    | key >  k  =  find right key
   50 
   51 enter Empty key val = Fork Empty (key,val) Empty
   52 enter (Fork left (k,v) right) key val
   53    | key <  k  =  Fork (enter left key val) (k,v) right
   54    | key == k  =  Fork left (k,v) right
   55    | key >  k  =  Fork left (k,v) (enter right key val)
   56 
   57 update Empty key val  =  Fork Empty (key,val) Empty
   58 update (Fork left (k,v) right) key val
   59    | key <  k  =  Fork (update left key val) (k,v) right
   60    | key == k  =  Fork left (key,val) right
   61    | key >  k  =  Fork left (k,v) (update right key val)
   62 
   63 delete Empty key =  Empty
   64 delete (Fork left (k,v) right) key
   65    | key <  k  =  Fork (delete left key) (k,v) right
   66    | key == k  =  graft left right
   67    | key >  k  =  Fork left (k,v) (delete right key)
   68    where
   69    graft left Empty = left
   70    graft left right = Fork left e right' where (e,right') = leftmost right
   71    leftmost (Fork Empty e r) = (e,r)
   72    leftmost (Fork l e r) = (e2, Fork l' e r)  where (e2,l') = leftmost l
   73 
   74 -- `enterList t es' adds a list of new entries. It is lazy in es (but may build
   75 -- a poorly balanced tree).
   76 
   77 enterList t []  =  t
   78 enterList Empty (e:res)  =  Fork left e right  where
   79    k  =  fst e
   80    left  =  enterList Empty [e1 | e1<-res, fst e1 < k]
   81    right  =  enterList Empty [e1 | e1<-res, fst e1 > k]
   82 enterList (Fork left e right) es  =  Fork left' e right'  where
   83    k  =  fst e
   84    left'  =  enterList left [e1 | e1<-es, fst e1 < k]
   85    right'  =  enterList right [e1 | e1<-es, fst e1 > k]
   86 
   87 -- `updateList t es' makes a list of updates. It is strict in es, and optimised
   88 -- to produce a well balanced tree. it can be used with es==[] purely to
   89 -- rebalance the tree.
   90 
   91 updateList t es = balance (mergeKey (entries t) (unique (sortKey es))) where
   92    balance [] = Empty
   93    balance es = Fork left (es!!m) right where
   94       left  =  balance (take m es)
   95       right  =  balance (drop (m+1) es)
   96       m  =  length es `div` 2
   97    unique [] = []
   98    unique [e] = [e]
   99    unique ((k1,v1):(k2,v2):res) =
  100       if k1==k2 then unique ((k2,v2):res) else (k1,v1) : unique ((k2,v2):res)
  101 
  102 sortKey kvs = foldr insertKey [] kvs where
  103    insertKey kv []          = [kv]
  104    insertKey (k1,v1) ((k2,v2):res)
  105         | k1 <= k2  = (k1,v1):(k2,v2):res
  106         | otherwise = (k2,v2):insertKey (k1,v1) res
  107 
  108 mergeKey [] kvs = kvs
  109 mergeKey kvs [] = kvs
  110 mergeKey ((k1,v1):kvs1) ((k2,v2):kvs2)
  111         | k1 <= k2  = (k1,v1) : mergeKey kvs1 ((k2,v2):kvs2)
  112         | otherwise = (k2,v2) : mergeKey ((k1,v1):kvs1) kvs2
  113 
  114 -- `entries t' returns the list of entries in t, sorted by key. Inefficient
  115 -- unless tree-optimised version of ++ is used.
  116 
  117 entries Empty  =  []
  118 entries (Fork left e right)  =  entries left ++ [e] ++ entries right