1 module Token(tokenise) where 2 3 import Core_datatype 4 5 import Kernel 6 7 import Type_defs 8 9 10 11 endofline = "\n\f\r" 12 13 whitespace = " \t" ++ endofline 14 15 a_symbol = "!#$&*-+=~<>?/\\.:|@" 16 17 singlechrs = ",;\"`\'(){}[]\176\185" 18 19 binder = "\208\211\236\177\178\229\196" 20 21 ifx_binder = "\183\184\167\182\187" 22 23 ifx_op = "\179\180=\172:" 24 25 26 reserved = [ "empty", "signature", "extend", "combine", "with", "sharing" , 27 "++", "=", "(", ")", "@", "operator", ";", ":", "\189", 28 "datatype", "|", "case", "of", "\167", "recurse", "end", 29 "typed", "fn", "rec", "if", "then", "else", "let", "in", 30 "\168", "\169", "{", "}", "[", "]", "\208", "\211", "\236", 31 "\229", "\177", "\178", ".", ",", "\181", "\179", "\180", 32 "\182", "\172", "\183", "\184", "\185", "\176", "BinL", 33 "BinR", "Pre", "Post", "\196", "\187" ] 34 35 36 37 38 39 tokenise :: String -> [Token] 40 41 tokenise string 42 = denull ( scan' string "" ) 43 44 45 46 denull :: [Token] -> [Token] 47 48 denull ( Clr "" : tkl ) = denull tkl 49 50 denull ( Clr tk : tkl ) 51 = tag rev_tk : denull tkl 52 where 53 tag | rev_tk `elem` reserved = Rvd 54 | otherwise = Clr 55 rev_tk = reverse tk 56 57 denull ( Rvd tk : tkl ) = error " optimise to trap some reserved earlier and reduce length of reserved set" 58 59 denull ( Scan_Err mess : _ ) = [ Scan_Err mess ] 60 61 denull ( tk : tkl ) = tk : denull tkl 62 63 denull [] = [] 64 65 denull x = error ( show x ) 66 67 68 69 70 71 scan' ( a : x ) current 72 | a `elem` binder = Clr current : ( Bdr ( mk_bdr a ) : scan' x "" ) 73 | a `elem` ifx_binder = Clr current : ( IfxBdr [a] : scan' x "" ) 74 | a `elem` ifx_op = Clr current : ( IfxOp [a] : scan' x "" ) 75 | single a = Clr current : ( Clr [a] : scan' x "" ) 76 | a `elem` a_symbol = Clr current : symbol_scan x [a] 77 | a `elem` whitespace = Clr current : scan' x "" 78 | alphanum' a = scan' x ( a : current ) 79 | a == '%' = Clr current : scan' ( discard x ) "" 80 | a == '^' = super_scan x ( '^' : current ) 81 | otherwise = [ Scan_Err "\nInvalid character\n" ] 82 83 scan' [] current = [ Clr current ] 84 85 86 87 88 symbol_scan :: String -> String -> [Token] 89 90 symbol_scan ( a : x ) current 91 | a `elem` a_symbol = symbol_scan x ( a : current ) 92 | a == '^' = super_scan x ( a : current ) 93 | otherwise = Clr current : scan' ( a : x ) "" 94 95 symbol_scan [] current = [ Clr current ] 96 97 98 99 super_scan _ "^" = [ Scan_Err "\nAttempt to superscript invalid string\n"] 100 101 super_scan ( a : x ) current 102 | alphanum a || a `elem` a_symbol 103 = Clr ( a : current ) : scan' x "" 104 | otherwise = [ Scan_Err "\nInvalid superscript character\n" ] 105 106 super_scan [] _ = [ Scan_Err "\nEmpty superscript\n" ] 107 108 109 110 111 112 113 114 115 alphanum' ch = alphanum ch || ch == '_' 116 117 alphanum ch 118 = ( ch >= 'A' && ch <= 'Z' ) || 119 ( ch >= 'a' && ch <= 'z' ) || 120 ( ch >= '0' && ch <= '9' ) 121 122 123 124 single ch 125 = ch `elem` singlechrs || ch > '\DEL' 126 127 discard :: String -> String 128 129 discard ( ch : x ) 130 | ch `elem` endofline = x 131 | otherwise = discard x 132 133 discard [] = "" 134 135 136 mk_bdr :: Char -> Binder_conn 137 138 {- 139 mk_bdr x | fromEnum x == 208 = Pi 140 141 mk_bdr x | fromEnum x == 211 = Sigma 142 143 mk_bdr x | fromEnum x == 236 = Lambda 144 145 mk_bdr x | fromEnum x == 177 = Forall 146 147 mk_bdr x | fromEnum x == 178 = Exists 148 149 mk_bdr x | fromEnum x == 229 = Choose 150 -} 151 152 mk_bdr '\208' = Pi 153 154 mk_bdr '\211' = Sigma 155 156 mk_bdr '\236' = Lambda 157 158 mk_bdr '\177' = Forall 159 160 mk_bdr '\178' = Exists 161 162 mk_bdr '\229' = Choose 163 164 mk_bdr '\196' = Delta 165 166 mk_bdr oth = error ( "Mk_bdr: " ++ [oth]) 167 168 169 170 171 172 disp_tk :: Token -> String 173 174 disp_tk ( Rvd str ) = str 175 176 disp_tk ( Clr str ) = str