1 {-
    2  - Decode.hs
    3  -
    4  - Module containing the code to decode LZW encodings
    5  -
    6  - Paul Sanders, Applications Research Division, BTL 1992
    7  -
    8  - DEC_VERSION 1 uses a list with keys in ascending order as a table, ie.
    9  - entry n is given by table!!n.
   10  -
   11  - DEC_VERSION 2 uses a list with keys in descending order as a table, ie.
   12  - entry n is given by table!!(#table-n). We don't need to calculate the
   13  - length of the table however as this is given by the value of the next
   14  - code to be added.
   15  -
   16  - DEC_VERSION 3 uses a balanced binary tree to store the keys. We can do
   17  - this cheaply by putting the key in the correct place straight away and
   18  - therefore not doing any rebalancing.
   19  -}
   20 
   21 module Decode (decode)
   22 where
   23 
   24 import Prelude hiding( lookup )         -- lookup defined locally
   25 import Defaults
   26 import BinConv
   27 
   28 data Optional a = NONE | SOME a deriving (Eq, Show{-was:Text-})
   29 
   30 {- We ideally want to store the table as an array but these are inefficient
   31  - so we use a list instead. We don't use the tree used by encode since we
   32  - can make use of the fact that all our keys (the codes) come in order and
   33  - will be placed at the end of the table, at position 'code'.
   34  -
   35  - An entry of (SOME n, 'c') indicates that this code has prefix code n
   36  - and final character c.
   37  -}
   38 
   39 
   40 {- Kick off the decoding giving the real function the first code value and
   41  - the initial table.
   42  -}
   43 
   44 decode :: [Int] -> String
   45 decode []
   46       = []
   47 decode cs
   48       = decode' cs first_code init_table
   49 
   50 {- decode` decodes the first character which is special since no new code
   51  - gets added for it. It is also special in so far as we know that the
   52  - code is a singleton character and thus has prefix NONE. The '@' is a
   53  - dummy character and can be anything.
   54  -}
   55 
   56 decode' [] _ _ = []
   57 decode' (c:cs) n t
   58       = ch : do_decode cs n c ch t
   59         where
   60         (NONE, ch) = lookup c t
   61 
   62 {- do_decode decodes all the codes bar the first. 
   63  -
   64  - If the code is in the table (ie the code is less than the next code to be 
   65  - added) then we output the string for that code (using unfold if a prefix 
   66  - type) and add a new code to the table with the final character output as 
   67  - the extension and the previous code as prefix.
   68  -
   69  - If the code is not one we know about then we give it to decode_special for
   70  - special treatment
   71  -}
   72 
   73 do_decode [] _ _ _ _ = []
   74 do_decode (c:cs) n old_n fin_char t
   75       = if c >= n          -- we don't have this code in the table yet
   76         then decode_special (c:cs) n old_n fin_char t
   77         else outchs ++ do_decode cs n' c (head outchs) t'
   78         where
   79         outchs = reverse (unfold c (n-1) t)
   80         (n', t') = if n == max_entries
   81                    then (n, t)
   82                    else (n+1, insert n (SOME old_n, head outchs) t)
   83 
   84 {- decode_special decodes a code that isn't in the table.
   85  -
   86  - The algorithm in Welch describes why this works, suffice it to say that
   87  - the output string is given by the last character output and the string
   88  - given by the previous code. An entry is also made in the table for the
   89  - last character output and the old code.
   90  -}
   91 
   92 decode_special (c:cs) n old_n fin_char t
   93       = outchs ++ do_decode cs n' c (head outchs) t'
   94         where
   95         outchs = reverse (fin_char : unfold old_n (n-1) t)
   96         (n', t') = if n == max_entries
   97                    then (n, t)
   98                    else (n+1, insert n (SOME old_n, fin_char) t)
   99 
  100 {- unfold a prefix code.
  101  -
  102  - chain back through the prefixes outputting the extension characters as we
  103  - go.
  104  -}
  105 
  106 unfold n t_len t
  107       = if prefix == NONE
  108         then [c]
  109         else c : unfold n' t_len t
  110         where
  111         (prefix, c) = lookup n t
  112         SOME n' = prefix
  113 
  114 data DecompTable = Branch DecompTable DecompTable | Leaf (Optional Int, Char) deriving (Show{-was:Text-})
  115 
  116 {- Insert a code pair into the table. The position of the code is given by
  117  - the breakdown of the key into its binary digits
  118  -}
  119 
  120 insert n v t = insert' (dec_to_binx code_bits n) v t
  121 
  122 {- We can place a code exactly where it belongs using the following algorithm.
  123  - Take the code's binary rep expanded to the maximum number of bits. Start
  124  - at the first bit, if a 0 then insert the code to the left, if a 1 then
  125  - insert to the right. Carry on with the other bits until we run out and are
  126  - thus at the right place and can construct the node.
  127  -}
  128 
  129 insert' [] v (Leaf _)
  130       = Leaf v
  131 insert' ('0' : bs) v (Branch l r)
  132       = Branch (insert' bs v l) r
  133 insert' ('1' : bs) v (Branch l r)
  134       = Branch l (insert' bs v r)
  135 insert' ('0' : bs) v t
  136       = Branch (insert' bs v t) t
  137 insert' ('1' : bs) v t
  138       = Branch t (insert' bs v t)
  139 
  140 {- For a lookup we use the same mechanism to locate the position of the item
  141  - in the tree but if we find that the route has not been constructed or the
  142  - node has the dummy value then that code is not yet in the tree. The way
  143  - in which the decode algorithm works this should never happen.
  144  -}
  145 
  146 lookup n t = lookup' (dec_to_binx code_bits n) t
  147 
  148 lookup' [] (Leaf v)
  149       = v
  150 lookup' ('0' : bs) (Branch l _)
  151       = lookup' bs l
  152 lookup' ('1' : bs) (Branch _ r)
  153       = lookup' bs r
  154 lookup' _  _ = error "tree insert error - seek professional help"
  155 
  156 init_table = mk_init_table 0 (Leaf (SOME 99999, '@'))
  157 
  158 mk_init_table 256 t = t
  159 mk_init_table n t = mk_init_table (n+1) (insert n (NONE, toEnum n) t)
  160