1   module Token(tokenise) where
    2 
    3   import Core_datatype
    4 
    5   import Kernel
    6 
    7   import Type_defs
    8 
    9 
   10 
   11   endofline  = "\n\f\r"
   12 
   13   whitespace = " \t" ++ endofline
   14 
   15   a_symbol = "!#$&*-+=~<>?/\\.:|@"
   16 
   17   singlechrs = ",;\"`\'(){}[]\176\185"
   18 
   19   binder = "\208\211\236\177\178\229\196"
   20 
   21   ifx_binder = "\183\184\167\182\187"
   22 
   23   ifx_op = "\179\180=\172:"
   24 
   25 
   26   reserved = [ "empty", "signature", "extend", "combine", "with", "sharing" ,
   27                "++", "=", "(", ")", "@", "operator", ";", ":", "\189", 
   28                "datatype", "|", "case", "of", "\167", "recurse", "end", 
   29                "typed", "fn", "rec", "if", "then", "else", "let", "in", 
   30                "\168", "\169", "{", "}", "[", "]", "\208", "\211", "\236", 
   31                "\229", "\177", "\178", ".", ",", "\181", "\179", "\180", 
   32                "\182", "\172", "\183", "\184", "\185", "\176", "BinL", 
   33                "BinR", "Pre", "Post", "\196", "\187" ]
   34 
   35 
   36 
   37 
   38 
   39   tokenise :: String -> [Token]
   40 
   41   tokenise string
   42         = denull ( scan' string "" )
   43 
   44 
   45 
   46   denull :: [Token] -> [Token]
   47 
   48   denull ( Clr "" : tkl ) = denull tkl
   49 
   50   denull ( Clr tk : tkl ) 
   51         = tag rev_tk : denull tkl
   52           where
   53           tag | rev_tk `elem` reserved = Rvd
   54               | otherwise            = Clr 
   55           rev_tk = reverse tk
   56 
   57   denull ( Rvd tk : tkl ) = error " optimise to trap some reserved earlier and reduce length of reserved set"
   58 
   59   denull ( Scan_Err mess : _ ) = [ Scan_Err mess ]
   60 
   61   denull ( tk : tkl ) = tk : denull tkl
   62 
   63   denull [] = []
   64 
   65   denull x = error ( show x )
   66 
   67 
   68 
   69 
   70 
   71   scan' ( a : x ) current 
   72         | a `elem` binder     = Clr current : ( Bdr ( mk_bdr a ) : scan' x "" )
   73         | a `elem` ifx_binder = Clr current : ( IfxBdr [a] : scan' x "" )
   74         | a `elem` ifx_op     = Clr current : ( IfxOp [a] : scan' x "" )
   75         | single a          = Clr current : ( Clr [a] : scan' x "" )
   76         | a `elem` a_symbol     = Clr current : symbol_scan x [a]
   77         | a `elem` whitespace = Clr current : scan' x "" 
   78         | alphanum' a       = scan' x ( a : current )
   79         | a == '%'          = Clr current : scan' ( discard x ) ""
   80         | a == '^'          = super_scan x ( '^' : current ) 
   81         | otherwise         = [ Scan_Err "\nInvalid character\n" ]
   82 
   83   scan' [] current = [ Clr current ]
   84 
   85 
   86 
   87 
   88   symbol_scan :: String -> String -> [Token]
   89 
   90   symbol_scan ( a : x ) current
   91         | a `elem` a_symbol = symbol_scan x ( a : current )
   92         | a == '^'      = super_scan x ( a : current ) 
   93         | otherwise     = Clr current : scan' ( a : x ) ""
   94 
   95   symbol_scan [] current = [ Clr current ]
   96 
   97 
   98 
   99   super_scan _ "^" = [ Scan_Err "\nAttempt to superscript invalid string\n"]
  100 
  101   super_scan ( a : x ) current
  102         | alphanum a || a `elem` a_symbol 
  103                         = Clr ( a : current ) : scan' x ""
  104         | otherwise     = [ Scan_Err "\nInvalid superscript character\n" ]
  105 
  106   super_scan [] _ = [ Scan_Err "\nEmpty superscript\n" ]
  107 
  108 
  109 
  110 
  111 
  112 
  113 
  114 
  115   alphanum' ch = alphanum ch || ch == '_'
  116 
  117   alphanum ch
  118         = ( ch >= 'A' && ch <= 'Z' ) ||
  119           ( ch >= 'a' && ch <= 'z' ) ||
  120           ( ch >= '0' && ch <= '9' )
  121 
  122 
  123 
  124   single ch
  125         = ch `elem` singlechrs || ch > '\DEL'
  126 
  127   discard :: String -> String
  128 
  129   discard ( ch : x ) 
  130         | ch `elem` endofline = x
  131         | otherwise         = discard x
  132 
  133   discard [] = ""
  134 
  135 
  136   mk_bdr :: Char -> Binder_conn
  137 
  138  {-
  139   mk_bdr x | fromEnum x == 208 = Pi
  140 
  141   mk_bdr x | fromEnum x == 211 = Sigma
  142 
  143   mk_bdr x | fromEnum x == 236 = Lambda
  144 
  145   mk_bdr x | fromEnum x == 177 = Forall
  146 
  147   mk_bdr x | fromEnum x == 178 = Exists
  148 
  149   mk_bdr x | fromEnum x == 229 = Choose
  150  -}
  151 
  152   mk_bdr '\208' = Pi
  153 
  154   mk_bdr '\211' = Sigma
  155 
  156   mk_bdr '\236' = Lambda
  157 
  158   mk_bdr '\177' = Forall
  159 
  160   mk_bdr '\178' = Exists
  161 
  162   mk_bdr '\229' = Choose
  163 
  164   mk_bdr '\196' = Delta
  165 
  166   mk_bdr oth = error ( "Mk_bdr: " ++ [oth])
  167 
  168 
  169 
  170 
  171 
  172   disp_tk :: Token -> String
  173 
  174   disp_tk ( Rvd str ) = str
  175 
  176   disp_tk ( Clr str ) = str