1 {- 2 - Decode.hs 3 - 4 - Module containing the code to decode LZW encodings 5 - 6 - Paul Sanders, Applications Research Division, BTL 1992 7 - 8 - DEC_VERSION 1 uses a list with keys in ascending order as a table, ie. 9 - entry n is given by table!!n. 10 - 11 - DEC_VERSION 2 uses a list with keys in descending order as a table, ie. 12 - entry n is given by table!!(#table-n). We don't need to calculate the 13 - length of the table however as this is given by the value of the next 14 - code to be added. 15 - 16 - DEC_VERSION 3 uses a balanced binary tree to store the keys. We can do 17 - this cheaply by putting the key in the correct place straight away and 18 - therefore not doing any rebalancing. 19 -} 20 21 module Decode (decode) 22 where 23 24 import Prelude hiding( lookup ) -- lookup defined locally 25 import Defaults 26 import BinConv 27 28 data Optional a = NONE | SOME a deriving (Eq, Show{-was:Text-}) 29 30 {- We ideally want to store the table as an array but these are inefficient 31 - so we use a list instead. We don't use the tree used by encode since we 32 - can make use of the fact that all our keys (the codes) come in order and 33 - will be placed at the end of the table, at position 'code'. 34 - 35 - An entry of (SOME n, 'c') indicates that this code has prefix code n 36 - and final character c. 37 -} 38 39 40 {- Kick off the decoding giving the real function the first code value and 41 - the initial table. 42 -} 43 44 decode :: [Int] -> String 45 decode [] 46 = [] 47 decode cs 48 = decode' cs first_code init_table 49 50 {- decode` decodes the first character which is special since no new code 51 - gets added for it. It is also special in so far as we know that the 52 - code is a singleton character and thus has prefix NONE. The '@' is a 53 - dummy character and can be anything. 54 -} 55 56 decode' [] _ _ = [] 57 decode' (c:cs) n t 58 = ch : do_decode cs n c ch t 59 where 60 (NONE, ch) = lookup c t 61 62 {- do_decode decodes all the codes bar the first. 63 - 64 - If the code is in the table (ie the code is less than the next code to be 65 - added) then we output the string for that code (using unfold if a prefix 66 - type) and add a new code to the table with the final character output as 67 - the extension and the previous code as prefix. 68 - 69 - If the code is not one we know about then we give it to decode_special for 70 - special treatment 71 -} 72 73 do_decode [] _ _ _ _ = [] 74 do_decode (c:cs) n old_n fin_char t 75 = if c >= n -- we don't have this code in the table yet 76 then decode_special (c:cs) n old_n fin_char t 77 else outchs ++ do_decode cs n' c (head outchs) t' 78 where 79 outchs = reverse (unfold c (n-1) t) 80 (n', t') = if n == max_entries 81 then (n, t) 82 else (n+1, insert n (SOME old_n, head outchs) t) 83 84 {- decode_special decodes a code that isn't in the table. 85 - 86 - The algorithm in Welch describes why this works, suffice it to say that 87 - the output string is given by the last character output and the string 88 - given by the previous code. An entry is also made in the table for the 89 - last character output and the old code. 90 -} 91 92 decode_special (c:cs) n old_n fin_char t 93 = outchs ++ do_decode cs n' c (head outchs) t' 94 where 95 outchs = reverse (fin_char : unfold old_n (n-1) t) 96 (n', t') = if n == max_entries 97 then (n, t) 98 else (n+1, insert n (SOME old_n, fin_char) t) 99 100 {- unfold a prefix code. 101 - 102 - chain back through the prefixes outputting the extension characters as we 103 - go. 104 -} 105 106 unfold n t_len t 107 = if prefix == NONE 108 then [c] 109 else c : unfold n' t_len t 110 where 111 (prefix, c) = lookup n t 112 SOME n' = prefix 113 114 data DecompTable = Branch DecompTable DecompTable | Leaf (Optional Int, Char) deriving (Show{-was:Text-}) 115 116 {- Insert a code pair into the table. The position of the code is given by 117 - the breakdown of the key into its binary digits 118 -} 119 120 insert n v t = insert' (dec_to_binx code_bits n) v t 121 122 {- We can place a code exactly where it belongs using the following algorithm. 123 - Take the code's binary rep expanded to the maximum number of bits. Start 124 - at the first bit, if a 0 then insert the code to the left, if a 1 then 125 - insert to the right. Carry on with the other bits until we run out and are 126 - thus at the right place and can construct the node. 127 -} 128 129 insert' [] v (Leaf _) 130 = Leaf v 131 insert' ('0' : bs) v (Branch l r) 132 = Branch (insert' bs v l) r 133 insert' ('1' : bs) v (Branch l r) 134 = Branch l (insert' bs v r) 135 insert' ('0' : bs) v t 136 = Branch (insert' bs v t) t 137 insert' ('1' : bs) v t 138 = Branch t (insert' bs v t) 139 140 {- For a lookup we use the same mechanism to locate the position of the item 141 - in the tree but if we find that the route has not been constructed or the 142 - node has the dummy value then that code is not yet in the tree. The way 143 - in which the decode algorithm works this should never happen. 144 -} 145 146 lookup n t = lookup' (dec_to_binx code_bits n) t 147 148 lookup' [] (Leaf v) 149 = v 150 lookup' ('0' : bs) (Branch l _) 151 = lookup' bs l 152 lookup' ('1' : bs) (Branch _ r) 153 = lookup' bs r 154 lookup' _ _ = error "tree insert error - seek professional help" 155 156 init_table = mk_init_table 0 (Leaf (SOME 99999, '@')) 157 158 mk_init_table 256 t = t 159 mk_init_table n t = mk_init_table (n+1) (insert n (NONE, toEnum n) t) 160