1 
    2 -- ==========================================================--
    3 -- === Parser of Core programs          File: parse.m (1) ===--
    4 -- ==========================================================--
    5 
    6 module Parser2 where
    7 import BaseDefs
    8 import Utils
    9 import MyUtils
   10 import MakeDomains
   11 
   12 import List(nub) -- 1.3
   13 import Char(isAlpha,isDigit) -- 1.3
   14 
   15 -- ====================================--
   16 -- === Lexical analyser             ===--
   17 -- ====================================--
   18 
   19 -- ==========================================================--
   20 --
   21 paLex :: Int -> 
   22          [Char] -> 
   23          [Token]
   24 
   25 paLex n (':':':':'=':cs) 
   26    = (n,"::="):paLex n cs
   27 
   28 paLex n (c1:c2:cs) 
   29      | [c1,c2] `elem` ["==", ">=", "<=", "->", ";;"] = (n, [c1,c2]):paLex n cs
   30 
   31 paLex n ('{':cs) 
   32    = lexcomment n cs
   33      where
   34         lexcomment n [] = paLex n []
   35         lexcomment n ('}':ds) = paLex n ds
   36         lexcomment n ('\n':ds) = lexcomment (n+1) ds
   37         lexcomment n (e:es) = lexcomment n es
   38 
   39 paLex n ('\n':cs) 
   40    = paLex (n+1) cs
   41 
   42 paLex n (c:cs) 
   43    | c `elem` " \t" = paLex n cs
   44 
   45 paLex n (c:cs) 
   46      | isDigit c = (n, num_token): paLex n rest_cs
   47      where
   48         num_token = c:takeWhile isDigit cs
   49         rest_cs = dropWhile isDigit cs
   50 
   51 paLex n (c:cs) 
   52      | isAlpha c = (n, var_tok):paLex n rest_cs
   53      where
   54         var_tok = c:takeWhile isIdChar cs
   55         rest_cs = dropWhile isIdChar cs
   56         isIdChar c = isAlpha c || isDigit c || (c == '_')
   57 
   58 paLex n (c:cs) 
   59    = (n, [c]):paLex n cs
   60 
   61 paLex n [] = [(999999, "$$$")]
   62 
   63 -- ====================================--
   64 -- === Generic parsing functions    ===--
   65 -- ====================================--
   66 
   67 
   68 -- ==========================================================--
   69 --
   70 paFailed (PFail _) = True
   71 paFailed (POk _ _) = False
   72 
   73 paGetItem :: PResult a -> a
   74 paGetItem (POk item _) = item
   75 
   76 paGetRest :: PResult a -> [Token]
   77 paGetRest (POk _ rest) = rest
   78 paGetRest (PFail rest) = rest
   79 
   80 
   81 -- ==========================================================--
   82 --
   83 paLit :: [Char] -> 
   84          Parser [Char]
   85 
   86 paLit lit []                          = PFail []
   87 paLit lit ((n, t):ts)  | lit == t     = POk lit ts
   88                        | otherwise    = PFail ((n, t):ts)
   89 
   90 
   91 -- ==========================================================--
   92 --
   93 paAlts :: [([Char] -> Bool, Parser a)] -> Parser a
   94 
   95 paAlts pps [] = PFail []
   96 
   97 paAlts [] toks = PFail []
   98 paAlts ((pred, par):pps) toks@((n,t):_)
   99    | pred t  = par toks
  100    | otherwise = paAlts pps toks
  101 
  102 
  103 -- ==========================================================--
  104 --
  105 paThen2 :: (a -> b -> c) ->
  106            Parser a ->
  107            Parser b ->
  108            Parser c
  109 
  110 paThen2 combine p1 p2 toks
  111    = let p1parse = p1 toks
  112          p2parse = p2 (paGetRest p1parse)
  113      in  
  114              if paFailed p1parse then PFail (paGetRest p1parse)
  115         else if paFailed p2parse then PFail (paGetRest p2parse)
  116         else POk (combine (paGetItem p1parse) (paGetItem p2parse)) 
  117                  (paGetRest p2parse)
  118 
  119 
  120 -- ==========================================================--
  121 --
  122 paThen3 :: (a -> b -> c -> d) ->
  123            Parser a ->
  124            Parser b ->
  125            Parser c ->
  126            Parser d
  127 
  128 paThen3 combine p1 p2 p3 toks
  129    = let p1parse = p1 toks
  130          p2parse = p2 (paGetRest p1parse)
  131          p3parse = p3 (paGetRest p2parse)
  132      in  
  133              if paFailed p1parse then PFail (paGetRest p1parse)
  134         else if paFailed p2parse then PFail (paGetRest p2parse)
  135         else if paFailed p3parse then PFail (paGetRest p3parse)
  136         else POk (combine (paGetItem p1parse) (paGetItem p2parse) 
  137                           (paGetItem p3parse))
  138                 (paGetRest p3parse)
  139 
  140 
  141 -- ==========================================================--
  142 --
  143 paThen4 :: (a -> b -> c -> d -> e) ->
  144            Parser a ->
  145            Parser b ->
  146            Parser c ->
  147            Parser d ->
  148            Parser e
  149 
  150 paThen4 combine p1 p2 p3 p4 toks
  151    = let p1parse = p1 toks
  152          p2parse = p2 (paGetRest p1parse)
  153          p3parse = p3 (paGetRest p2parse)
  154          p4parse = p4 (paGetRest p3parse)
  155      in  
  156              if paFailed p1parse then PFail (paGetRest p1parse)
  157         else if paFailed p2parse then PFail (paGetRest p2parse)
  158         else if paFailed p3parse then PFail (paGetRest p3parse)
  159         else if paFailed p4parse then PFail (paGetRest p4parse)
  160         else POk (combine (paGetItem p1parse) (paGetItem p2parse) 
  161                          (paGetItem p3parse) (paGetItem p4parse))
  162                 (paGetRest p4parse)
  163 
  164 
  165 -- ==========================================================--
  166 --
  167 paZeroOrMore :: Parser a -> Parser [a]
  168 
  169 paZeroOrMore p toks
  170    = let pParse    = p toks
  171          pUnused   = paGetRest pParse
  172          zmParse   = paZeroOrMore p pUnused
  173          zmUnused  = paGetRest zmParse
  174      in
  175              if paFailed pParse then POk [] toks
  176         else if paFailed zmParse then POk [paGetItem pParse] pUnused
  177         else POk ((paGetItem pParse):paGetItem zmParse) zmUnused
  178 
  179 
  180 -- ==========================================================--
  181 --
  182 paOneOrMore :: Parser a -> Parser [a]
  183 
  184 paOneOrMore p
  185    = paThen2 (:) p (paZeroOrMore p)
  186 
  187 
  188 -- ==========================================================--
  189 --
  190 paOneOrMoreWithSep :: Parser a -> 
  191                       Parser b -> 
  192                       Parser [a]
  193 
  194 paOneOrMoreWithSep p psep toks
  195    = let pParse  = p toks
  196          pRest   = paGetRest pParse
  197          sParse  = psep pRest
  198          sRest   = paGetRest sParse
  199          mParse  = paOneOrMoreWithSep p psep sRest
  200          mRest   = paGetRest mParse
  201      in
  202              if paFailed pParse then PFail toks
  203         else if paFailed sParse then POk [paGetItem pParse] pRest
  204         else if paFailed mParse then POk [paGetItem pParse] pRest
  205         else POk ((paGetItem pParse):paGetItem mParse) mRest
  206 
  207 
  208 -- ==========================================================--
  209 --
  210 paApply :: Parser a -> 
  211            (a -> b) -> 
  212            Parser b
  213 
  214 paApply p f toks
  215    = let pParse = p toks
  216      in  
  217         if      paFailed pParse 
  218         then    PFail (paGetRest pParse)
  219         else    POk (f (paGetItem pParse)) (paGetRest pParse)
  220 
  221 
  222 -- ==========================================================--
  223 --
  224 paSat :: (String -> Bool) ->
  225          Parser String
  226 
  227 paSat pred [] = PFail []
  228 paSat pred ((n,t):toks)
  229    | pred t     = POk t toks
  230    | otherwise  = PFail toks
  231 
  232 
  233 -- ==========================================================--
  234 --
  235 paEmpty :: a -> Parser a
  236 
  237 paEmpty v toks = POk v toks
  238 
  239 
  240 -- ====================================--
  241 -- === Specific parsing functions   ===--
  242 -- ====================================--
  243 
  244 -- ================================================--
  245 paSyntax 
  246    = get_parse . paProgram
  247      where
  248         get_parse (PFail [])
  249            = myFail "Syntax error: Unexpected end of source text"
  250 
  251         get_parse (PFail ((n,t):_))
  252            = myFail ( "Syntax error: unexpected token \"" ++ t ++
  253                     "\" on line " ++ show ( n :: Int ))
  254 
  255         get_parse (POk _ ((n,t):_:_))
  256            = myFail ( "Syntax error: unexpected token \"" ++ t ++
  257                     "\" on line " ++ show ( n :: Int ))
  258 
  259         get_parse (POk prog [(999999, "$$$")]) = prog
  260 
  261 -- ================================================--
  262 paProgram = paThen3 f paTypeDefList (paLit ";;") paScdefs
  263             where f a b c = (a,c)
  264 
  265 -- ================================================--
  266 paName = paSat paIsName
  267 
  268 -- ================================================--
  269 paIsName s = isAlpha (head s) &&  not (s `elem` paKeywords)
  270 
  271 -- ================================================--
  272 paCname = paSat paIsCname
  273 
  274 -- ================================================--
  275 paIsCname s = ('A'<=(head s)) && 
  276               ((head s)<='Z') && 
  277               not (s `elem` paKeywords)
  278 
  279 -- ================================================--
  280 paKeywords = ["let", "letrec", "case", "in", "of", "end"]
  281 
  282 -- ================================================--
  283 paRelops = ["<=", "<", ">=", ">", "==", "~="]
  284 
  285 -- ================================================--
  286 paIsRelop op = op `elem` paRelops
  287 
  288 -- ================================================--
  289 paRelop = paSat paIsRelop
  290 
  291 -- ================================================--
  292 paNum = paSat paIsNum `paApply` paNumval
  293 
  294 -- ================================================--
  295 paNumval :: [Char] -> Int
  296 paNumval cs 
  297    = sum (powers 1 (map (\d -> fromEnum d - 48) (reverse cs)))
  298      where
  299         powers n [] = []
  300         powers n (h:t) = n*h : powers ((10 :: Int) *n) t
  301 
  302 -- ================================================--
  303 paIsNum = isDigit.head
  304 
  305 -- ================================================--
  306 paWithTrailingSemi p = paThen2 const p (paLit ";")
  307 
  308 -- ==================================--
  309 -- === Parsing type definitions   ===--
  310 -- ==================================--
  311 
  312 -- ================================================--
  313 paTypeDefList = paZeroOrMore (paThen2 f paTypeDef (paLit ";"))
  314                 where f a b = a
  315 
  316 -- ================================================--
  317 paTypeDef 
  318    = paThen4 f paName (paZeroOrMore paName) (paLit "::=") paConstrAlts
  319      where f a b c d = (a,b,d)
  320 
  321 -- ================================================--
  322 paConstrAlts = paOneOrMoreWithSep paConstrAlt (paLit "|")
  323 
  324 -- ================================================--
  325 paConstrAlt = paThen2 f paCname (paZeroOrMore paTDefExpr)
  326               where f a b = (a,b)
  327 
  328 -- ================================================--
  329 paTDefExpr
  330     = paAlts [ (  (== "("),   paTDefExpr2  ),
  331                (  paIsName,   paApply paName TDefVar) ]
  332       where
  333          paTDefExpr2 = paThen3 g (paLit "(") paTDefExpr3 (paLit ")")
  334          g a b c = b
  335          paTDefExpr3 = paThen2 h paName (paZeroOrMore paTDefExpr)
  336          h a b = TDefCons a b
  337 
  338 
  339 -- ===========================================--
  340 -- === Parsing supercombinator definitions ===--
  341 -- ===========================================--
  342 
  343 -- ================================================--
  344 paScdefs = paOneOrMore (paWithTrailingSemi paSc)
  345 
  346 -- ================================================--
  347 paSc = paThen4 mk_sc paName (paZeroOrMore paName) (paLit "=") paExpr
  348        where
  349           mk_sc sc args eq rhs = (sc, (args, rhs))
  350 
  351 -- ================================================--
  352 paExpr
  353    = paAlts [  (  (== "let"),  paLet    ),
  354                (  (== "letrec"), paLetrec ),
  355                (  (== "case"), paCase ),
  356                (  (== "\\"),  paLambda  ),
  357                (  (const True),  paExpr1 ) ]
  358 
  359 
  360 -- ================================================--
  361 paLet = paThen4 mk_let
  362               (paLit "let")
  363               paDefns
  364               (paLit "in") paExpr
  365         where
  366         mk_let lett defns inn expr = ELet False defns expr
  367 
  368 
  369 -- ================================================--
  370 paLetrec = paThen4 mk_letrec
  371               (paLit "letrec")
  372               paDefns
  373               (paLit "in") paExpr
  374            where
  375            mk_letrec letrecc defns inn expr = ELet True defns expr
  376 
  377 
  378 -- ================================================--
  379 paDefns = paOneOrMoreWithSep paDefn (paLit ";")
  380 
  381 -- ================================================--
  382 paDefn = paThen3 mk_defn paName (paLit "=") paExpr
  383          where
  384          mk_defn var equals rhs = (var,rhs)
  385 
  386 -- ================================================--
  387 paCase = paThen4 mk_case (paLit "case") paExpr (paLit "of") paAlters
  388          where
  389          mk_case kase e ov alts = ECase e alts
  390 
  391 -- ================================================--
  392 paAlters = paThen2 const (paOneOrMoreWithSep paAlter (paLit ";")) (paLit "end")
  393 
  394 -- ================================================--
  395 paAlter = paThen4 mk_alt paCname (paZeroOrMore paName) (paLit "->") paExpr
  396           where
  397           mk_alt tag args arrow rhs = (tag, (args, rhs))
  398 
  399 -- ================================================--
  400 paLambda = paThen4 mk_lam
  401              (paLit "\\") (paOneOrMore paName) (paLit "->") paExpr
  402            where
  403            mk_lam lam vars dot expr = ELam vars expr
  404 
  405 -- ================================================--
  406 paExpr1 = paThen2 paAssembleOp paExpr2 paExpr1c
  407 
  408 -- ================================================--
  409 paExpr1c = paAlts [((== "|"),   paThen2 FoundOp (paLit "|") paExpr1),
  410                    ((== "#"),   paThen2 FoundOp (paLit "#") paExpr1),
  411                    (const True, paEmpty NoOp)]
  412 
  413 -- ================================================--
  414 paExpr2 = paThen2 paAssembleOp paExpr3 paExpr2c
  415 
  416 -- ================================================--
  417 paExpr2c = paAlts [((== "&"),   paThen2 FoundOp (paLit "&") paExpr2),
  418                    (const True, paEmpty NoOp)]
  419 
  420 -- ================================================--
  421 paExpr3 = paThen2 paAssembleOp paExpr4 paExpr3c
  422 
  423 -- ================================================--
  424 paExpr3c = paAlts [(paIsRelop,  paThen2 FoundOp paRelop paExpr4),
  425                    (const True, paEmpty NoOp)]
  426 
  427 -- ================================================--
  428 paExpr4 = paThen2 paAssembleOp paExpr5 paExpr4c
  429 
  430 -- ================================================--
  431 paExpr4c = paAlts [((== "+"),   paThen2 FoundOp (paLit "+") paExpr4),
  432                    ((== "-"),   paThen2 FoundOp (paLit "-") paExpr5),
  433                    (const True, paEmpty NoOp)]
  434 
  435 -- ================================================--
  436 paExpr5 = paThen2 paAssembleOp paExpr6 paExpr5c
  437 
  438 -- ================================================--
  439 paExpr5c = paAlts [((== "*"),   paThen2 FoundOp (paLit "*") paExpr5),
  440                    ((== "/"),   paThen2 FoundOp (paLit "/") paExpr6),
  441                    (const True, paEmpty NoOp)]
  442 
  443 -- ================================================--
  444 paExpr6 = (paOneOrMore paAtomic) `paApply` mk_ap_chain
  445             where
  446               mk_ap_chain (fn:args) = foldl EAp fn args
  447 
  448 -- ================================================--
  449 paAtomic = paAlts [(paIsCname, paConstr),
  450                    ((== "("), paBracExpr),
  451                    (paIsName, paName `paApply` EVar),
  452                    (paIsNum,  paNum `paApply` ENum)]
  453 
  454 -- ================================================--
  455 paBracExpr = paThen3 mk_brack (paLit "(") paExpr (paLit ")")
  456              where
  457              mk_brack open expr close = expr
  458 
  459 -- ================================================--
  460 paConstr = paApply paCname EConstr
  461 
  462 
  463 -- ================================================--
  464 paAssembleOp e1 NoOp = e1
  465 paAssembleOp e1 (FoundOp op e2) = EAp (EAp (EVar op) e1) e2
  466 
  467 
  468 
  469 -- ===================================================--
  470 -- === Validation & transformation of parsed trees ===--
  471 -- ===================================================--
  472 
  473 -- ==========================================================--
  474 --
  475 paProgramToAtomic :: CoreProgram -> 
  476                      AtomicProgram
  477 
  478 paProgramToAtomic (tds, scdefs) 
  479    = (tds, ce)
  480      where
  481         ce = ELet True
  482                 [(name, ELam ns b) | (name, (ns, b)) <- scdefs]
  483                 (ENum 42)
  484 
  485 -- ==========================================================--
  486 --
  487 paValidTypeDefs :: [TypeDef] ->       -- all type definitions
  488                    TypeDependancy ->  -- type dependancy info
  489                    [Char]             -- wordy description of any problems
  490 
  491 paValidTypeDefs tds rda
  492    = if  not uniqueTNames    then  "Non-unique type names"  else
  493      if  not uniqueParNames  then "Non-unique parameter names" else
  494      if  not uniqueCNames    then "Non-unique constructor names" else
  495      if  not balanced   then "Declared parameters do not match used parameters" else
  496      if  not allDefined then "Undefined types are present" else
  497      if  not rightArity then "Types are used at wrong arities" else
  498      if  not allSimple  then  "Perverse type definitions are present"
  499                         else ""
  500      where
  501         arityMap = map f tds
  502                    where
  503                        f (tname, tvs, cal) = (tname, length tvs)
  504         allTNames = map f tds
  505                     where
  506                        f (tname, tvs, cal) = tname
  507         allCNames = concat (map f tds)
  508                     where
  509                        f (tname, tvs, cal) = map first cal
  510         uniqueTNames = length allTNames == ((length.nub) allTNames)
  511         uniqueParNames = and (map f tds)
  512                          where
  513                             f (tname, tvs, cal) = length tvs == ((length.nub) tvs)
  514         uniqueCNames = length allCNames == ((length.nub) allCNames)
  515         balanced = and (map isBalanced tds)
  516                    where
  517                       tvsIn (TDefVar n) = [n]
  518                       tvsIn (TDefCons n tel) = concat (map tvsIn tel)
  519                       g tDefExprList = concat (map tvsIn tDefExprList)
  520                       isBalanced (tname, tvs, cal) 
  521                          = (utSetFromList tvs) == 
  522                            (utSetFromList (concat (map (g.second) cal)))
  523         allDefined = utSetSubsetOf
  524                         (utSetFromList (concat (map mdFreeTVarsIn tds)))
  525                         (utSetFromList allTNames)
  526         rightArity = and (map f tds)
  527                      where
  528                         f (tname, tvs, cal) = and (map (g.second) cal)
  529                         g tDefExprList = and (map rArity tDefExprList)
  530                         rArity (TDefVar v) = True
  531                         rArity (TDefCons n tel) 
  532                            = (length tel == utSureLookup arityMap "paVTD`rA`rA" n) && 
  533                              (and (map rArity tel))
  534         allSimple = and (map f tds)
  535                     where
  536                        f (tname, tvs, cal) = 
  537                           utSetSubsetOf (utSetFromList (allVars cal))
  538                                         (utSetFromList (tvs++(groupOf tname rda)))
  539                        allVars cal = concat (map g cal)
  540                        g (n, tel) = concat (map allTVs tel)
  541                        allTVs (TDefVar n) = [n]
  542                        allTVs (TDefCons n tel) = n:concat (map allTVs tel)
  543                        groupOf tname ((rf, group):rest) 
  544                            | tname `elem` group &&  rf    = group
  545                            | tname `elem` group && not rf = []                
  546                            | otherwise                    = groupOf tname rest
  547 
  548             
  549 -- ==========================================================--
  550 --
  551 paParse :: [Char] -> (TypeDependancy, AtomicProgram)
  552 
  553 paParse fileContents
  554    = if typeDefErrors == "" 
  555         then (dependResult, (typeDefs, mainExpr)) 
  556         else myFail typeDefErrors
  557      where
  558         (typeDefs, mainExpr) = paProgramToAtomic parsedProgram
  559         dependResult = mdTypeDependancy typeDefs
  560         typeDefErrors = paValidTypeDefs typeDefs dependResult
  561         tokens = paLex 1 fileContents
  562         parsedProgram = paSyntax tokens
  563 
  564 
  565 -- ==========================================================--
  566 -- === End                                    parse.m (1) ===--
  567 -- ==========================================================--