1 module Lexer where
    2 -- Copyright 1994 by Peter Thiemann
    3 import Char -- 1.3
    4 
    5 ------------------------------------------------------------------------------
    6 --NOW the lexer
    7 ------------------------------------------------------------------------------
    8 
    9 data Token
   10         = Ident String | Symbol String | String String
   11 
   12 instance Show Token where
   13   showsPrec n (Ident s)  = showChar '[' . showString s . showString "] "
   14   showsPrec n (Symbol s) = showChar '<' . showString s . showString "> "
   15   showsPrec n (String s) = showChar '"' . showString s . showString "\" "
   16   showList [] = id
   17   showList (x:xs) = shows x . showList xs
   18 
   19 isIdChar c = isAlpha c || isDigit c || c == '_'
   20 
   21 theSymbols = "!@#$%^&*+./<=>?\\|:"
   22 isSymbolChar c = c `elem` theSymbols
   23 
   24 lexer :: String -> [Token]
   25 lexer "" = []
   26 lexer ('"':cs) = String (stchars): lexer srest
   27         where (stchars, srest) = lexString cs
   28 lexer ('\'':cs) = String (oneChar): lexer srest
   29         where (oneChar, srest) = lexChar cs
   30 lexer (c:cs) | isSpace c = lexer cs
   31            | isAlpha c = Ident (c:idchars): lexer irest
   32            | isSymbolChar c = Symbol(c:sychars): lexer srest
   33            | otherwise = Symbol([c]): lexer cs
   34         where (idchars, irest) = span isIdChar cs
   35               (sychars, srest) = span isSymbolChar cs
   36 
   37 -- preprocessor for EBNF style comments
   38 uncomment :: String -> String
   39 uncomment ""        = ""
   40 uncomment ('#':cs)  = uncomment (dropWhile (/= '\n') cs)
   41 uncomment ('"':cs)  = '"':uncommentString cs
   42 uncomment ('\'':cs) = '\'':uncommentChar cs
   43 uncomment (c:cs)    = c:uncomment cs
   44 
   45 uncommentString "" = ""
   46 uncommentString ('\\':c:cs) = '\\':c:uncommentString cs
   47 uncommentString ('"':cs) = '"':uncomment cs
   48 uncommentString (c:cs) = c:uncommentString cs
   49 
   50 uncommentChar "" = ""
   51 uncommentChar ('\\':c:cs) = '\\':c:uncommentChar cs
   52 uncommentChar ('\'':cs) = '"':uncomment cs
   53 uncommentChar (c:cs) = c:uncommentChar cs
   54 
   55 -- generic lexers
   56 lexChar ('\\':c:'\'':cs) = ([c], cs)
   57 lexChar (c:'\'':cs) = ([c], cs)
   58 lexChar cs = ([], cs)
   59 
   60 lexString ('\\':c:cs) = (c:stchars, srest) where (stchars, srest) = lexString cs
   61 lexString ('"':cs) = ("", cs)
   62 lexString ("") = ("","")
   63 lexString (c:cs) = (c:stchars, srest) where (stchars, srest) = lexString cs
   64 
   65 isIdent (Ident _ ) = True
   66 isIdent _ = False
   67 
   68 getIdent (Ident s) = s
   69 
   70 isString (String _) = True
   71 isString _ = False
   72 
   73 getString (String s) = s
   74