1 {-
    2  - Encode Mk 2, using a prefix table for the codes
    3  - 
    4  - Paul Sanders, Systems Research, British Telecom Laboratories 1992
    5  -}
    6 
    7 module Encode (encode) where
    8 
    9 import Defaults
   10 import PTTrees
   11 
   12 -- for convenience we make the code table type explicit
   13 
   14 type CodeTable = PrefixTree Char Int
   15 
   16 -- encode sets up the arguments for the real function.
   17 
   18 encode :: String -> [Int]
   19 encode input = encode' input first_code initial_table
   20 
   21 {-
   22  - encode' loops through the input string assembling the codes produced
   23  - by code_string.  The first character is treated specially in that it
   24  - is not added to the table; its code is simply its ascii value.
   25  -}
   26 
   27 encode' [] _ _ 
   28   = []
   29 encode' input v t
   30   = case (code_string input 0 v t) of { (input', n, t') ->
   31       n : encode' input' (v + 1) t'
   32     }
   33 
   34 {-
   35  - code_string parses enough of the input string to produce one code and
   36  - returns the remaining input, the code and a new code table.
   37  -
   38  - The first character is taken and its place found in the code table. The
   39  - extension code table found for this character is then used as the lookup
   40  - table for the next character.
   41  -
   42  - If a character is not found in the current table then output the code
   43  - of the character associated with the current table and add the current
   44  - character to the current table and assign it the next new code value.
   45  -}
   46 
   47 code_string input@(c : input2) old_code next_code (PT p@(PTE k v t) l r)
   48    | c < k = (f1 r1 p r)
   49    | c > k = (f2 r2 p l)
   50    | otherwise = (f3 r3 k v l r)
   51  where
   52    r1 = code_string input old_code next_code l
   53    r2 = code_string input old_code next_code r
   54    r3 = code_string input2 v next_code t
   55 
   56    f1 (input_l,nl,l2) p r   = (input_l,nl,PT p l2 r)
   57    f2 (input_r,nr,r2) p l   = (input_r,nr,PT p l r2)
   58    f3 (input2,n,t2) k v l r = (input2, n, PT (PTE k v t2) l r)
   59 
   60 code_string input@(c : input_file2) old_code next_code PTNil
   61   | next_code >= 4096 = (input, old_code, PTNil)
   62   | otherwise = (input, old_code, PT (PTE c next_code PTNil) PTNil PTNil)
   63 
   64 code_string [] old_code next_code code_table
   65   = ([], old_code, PTNil)
   66 
   67 {-
   68  - We want the inital table to be balanced, but this is expensive to compute
   69  - as a rebalance is needed evert two inserts (yuk!). So we do the ordinary
   70  - infix-order binary tree insert but give the keys in such an order as to
   71  - give a balanced tree.
   72  -
   73  - (I would have defined the tree by hand but the constant was too big
   74  -  for hc-0.41)
   75  -}
   76 
   77 initial_table :: CodeTable
   78 initial_table = foldr tab_insert PTNil balanced_list
   79 
   80 tab_insert n = insert (toEnum n) n
   81 
   82 balanced_list
   83     = [128,64,32,16,8,4,2,1,0,3,6,5,7,12,10,9,11,14,13,15,24,20,18,17,19,22,
   84        21,23,28,26,25,27,30,29,31,48,40,36,34,33,35,38,37,39,44,42,41,43,46,
   85        45,47,56,52,50,49,51,54,53,55,60,58,57,59,62,61,63,96,80,72,68,66,65]
   86       ++ bal_list2 ++ bal_list3 ++ bal_list4 ++ bal_list5
   87 
   88 bal_list2
   89     = [67,70,69,71,76,74,73,75,78,77,79,88,84,82,81,83,86,85,87,92,90,89,91,
   90        94,93,95,112,104,100,98,97,99,102,101,103,108,106,105,107,110,109,111,
   91        120,116,114,113,115,118,117,119,124,122,121,123,126,125,127,192,160]
   92 
   93 bal_list3
   94     = [144,136,132,130,129,131,134,133,135,140,138,137,139,142,141,143,152,
   95        148,146,145,147,150,149,151,156,154,153,155,158,157,159,176,168,164,
   96        162,161,163,166,165,167,172,170,169,171,174,173,175,184,180,178,177]
   97 
   98 bal_list4
   99     = [179,182,181,183,188,186,185,187,190,189,191,224,208,200,196,194,193,
  100        195,198,197,199,204,202,201,203,206,205,207,216,212,210,209,211,214,
  101        213,215,220,218,217,219,222,221,223,240,232,228,226,225,227,230,229,
  102        231,236,234,233,235,238,237,239,248,244,242,241,243,246,245,247,252]
  103 bal_list5
  104     = [250,249,251,254,253,255]