1 module Lexer where 2 -- Copyright 1994 by Peter Thiemann 3 import Char -- 1.3 4 5 ------------------------------------------------------------------------------ 6 --NOW the lexer 7 ------------------------------------------------------------------------------ 8 9 data Token 10 = Ident String | Symbol String | String String 11 12 instance Show Token where 13 showsPrec n (Ident s) = showChar '[' . showString s . showString "] " 14 showsPrec n (Symbol s) = showChar '<' . showString s . showString "> " 15 showsPrec n (String s) = showChar '"' . showString s . showString "\" " 16 showList [] = id 17 showList (x:xs) = shows x . showList xs 18 19 isIdChar c = isAlpha c || isDigit c || c == '_' 20 21 theSymbols = "!@#$%^&*+./<=>?\\|:" 22 isSymbolChar c = c `elem` theSymbols 23 24 lexer :: String -> [Token] 25 lexer "" = [] 26 lexer ('"':cs) = String (stchars): lexer srest 27 where (stchars, srest) = lexString cs 28 lexer ('\'':cs) = String (oneChar): lexer srest 29 where (oneChar, srest) = lexChar cs 30 lexer (c:cs) | isSpace c = lexer cs 31 | isAlpha c = Ident (c:idchars): lexer irest 32 | isSymbolChar c = Symbol(c:sychars): lexer srest 33 | otherwise = Symbol([c]): lexer cs 34 where (idchars, irest) = span isIdChar cs 35 (sychars, srest) = span isSymbolChar cs 36 37 -- preprocessor for EBNF style comments 38 uncomment :: String -> String 39 uncomment "" = "" 40 uncomment ('#':cs) = uncomment (dropWhile (/= '\n') cs) 41 uncomment ('"':cs) = '"':uncommentString cs 42 uncomment ('\'':cs) = '\'':uncommentChar cs 43 uncomment (c:cs) = c:uncomment cs 44 45 uncommentString "" = "" 46 uncommentString ('\\':c:cs) = '\\':c:uncommentString cs 47 uncommentString ('"':cs) = '"':uncomment cs 48 uncommentString (c:cs) = c:uncommentString cs 49 50 uncommentChar "" = "" 51 uncommentChar ('\\':c:cs) = '\\':c:uncommentChar cs 52 uncommentChar ('\'':cs) = '"':uncomment cs 53 uncommentChar (c:cs) = c:uncommentChar cs 54 55 -- generic lexers 56 lexChar ('\\':c:'\'':cs) = ([c], cs) 57 lexChar (c:'\'':cs) = ([c], cs) 58 lexChar cs = ([], cs) 59 60 lexString ('\\':c:cs) = (c:stchars, srest) where (stchars, srest) = lexString cs 61 lexString ('"':cs) = ("", cs) 62 lexString ("") = ("","") 63 lexString (c:cs) = (c:stchars, srest) where (stchars, srest) = lexString cs 64 65 isIdent (Ident _ ) = True 66 isIdent _ = False 67 68 getIdent (Ident s) = s 69 70 isString (String _) = True 71 isString _ = False 72 73 getString (String s) = s 74