1 2 -- ==========================================================-- 3 -- === Parser of Core programs File: parse.m (1) ===-- 4 -- ==========================================================-- 5 6 module Parser2 where 7 import BaseDefs 8 import Utils 9 import MyUtils 10 import MakeDomains 11 12 import List(nub) -- 1.3 13 import Char(isAlpha,isDigit) -- 1.3 14 15 -- ====================================-- 16 -- === Lexical analyser ===-- 17 -- ====================================-- 18 19 -- ==========================================================-- 20 -- 21 paLex :: Int -> 22 [Char] -> 23 [Token] 24 25 paLex n (':':':':'=':cs) 26 = (n,"::="):paLex n cs 27 28 paLex n (c1:c2:cs) 29 | [c1,c2] `elem` ["==", ">=", "<=", "->", ";;"] = (n, [c1,c2]):paLex n cs 30 31 paLex n ('{':cs) 32 = lexcomment n cs 33 where 34 lexcomment n [] = paLex n [] 35 lexcomment n ('}':ds) = paLex n ds 36 lexcomment n ('\n':ds) = lexcomment (n+1) ds 37 lexcomment n (e:es) = lexcomment n es 38 39 paLex n ('\n':cs) 40 = paLex (n+1) cs 41 42 paLex n (c:cs) 43 | c `elem` " \t" = paLex n cs 44 45 paLex n (c:cs) 46 | isDigit c = (n, num_token): paLex n rest_cs 47 where 48 num_token = c:takeWhile isDigit cs 49 rest_cs = dropWhile isDigit cs 50 51 paLex n (c:cs) 52 | isAlpha c = (n, var_tok):paLex n rest_cs 53 where 54 var_tok = c:takeWhile isIdChar cs 55 rest_cs = dropWhile isIdChar cs 56 isIdChar c = isAlpha c || isDigit c || (c == '_') 57 58 paLex n (c:cs) 59 = (n, [c]):paLex n cs 60 61 paLex n [] = [(999999, "$$$")] 62 63 -- ====================================-- 64 -- === Generic parsing functions ===-- 65 -- ====================================-- 66 67 68 -- ==========================================================-- 69 -- 70 paFailed (PFail _) = True 71 paFailed (POk _ _) = False 72 73 paGetItem :: PResult a -> a 74 paGetItem (POk item _) = item 75 76 paGetRest :: PResult a -> [Token] 77 paGetRest (POk _ rest) = rest 78 paGetRest (PFail rest) = rest 79 80 81 -- ==========================================================-- 82 -- 83 paLit :: [Char] -> 84 Parser [Char] 85 86 paLit lit [] = PFail [] 87 paLit lit ((n, t):ts) | lit == t = POk lit ts 88 | otherwise = PFail ((n, t):ts) 89 90 91 -- ==========================================================-- 92 -- 93 paAlts :: [([Char] -> Bool, Parser a)] -> Parser a 94 95 paAlts pps [] = PFail [] 96 97 paAlts [] toks = PFail [] 98 paAlts ((pred, par):pps) toks@((n,t):_) 99 | pred t = par toks 100 | otherwise = paAlts pps toks 101 102 103 -- ==========================================================-- 104 -- 105 paThen2 :: (a -> b -> c) -> 106 Parser a -> 107 Parser b -> 108 Parser c 109 110 paThen2 combine p1 p2 toks 111 = let p1parse = p1 toks 112 p2parse = p2 (paGetRest p1parse) 113 in 114 if paFailed p1parse then PFail (paGetRest p1parse) 115 else if paFailed p2parse then PFail (paGetRest p2parse) 116 else POk (combine (paGetItem p1parse) (paGetItem p2parse)) 117 (paGetRest p2parse) 118 119 120 -- ==========================================================-- 121 -- 122 paThen3 :: (a -> b -> c -> d) -> 123 Parser a -> 124 Parser b -> 125 Parser c -> 126 Parser d 127 128 paThen3 combine p1 p2 p3 toks 129 = let p1parse = p1 toks 130 p2parse = p2 (paGetRest p1parse) 131 p3parse = p3 (paGetRest p2parse) 132 in 133 if paFailed p1parse then PFail (paGetRest p1parse) 134 else if paFailed p2parse then PFail (paGetRest p2parse) 135 else if paFailed p3parse then PFail (paGetRest p3parse) 136 else POk (combine (paGetItem p1parse) (paGetItem p2parse) 137 (paGetItem p3parse)) 138 (paGetRest p3parse) 139 140 141 -- ==========================================================-- 142 -- 143 paThen4 :: (a -> b -> c -> d -> e) -> 144 Parser a -> 145 Parser b -> 146 Parser c -> 147 Parser d -> 148 Parser e 149 150 paThen4 combine p1 p2 p3 p4 toks 151 = let p1parse = p1 toks 152 p2parse = p2 (paGetRest p1parse) 153 p3parse = p3 (paGetRest p2parse) 154 p4parse = p4 (paGetRest p3parse) 155 in 156 if paFailed p1parse then PFail (paGetRest p1parse) 157 else if paFailed p2parse then PFail (paGetRest p2parse) 158 else if paFailed p3parse then PFail (paGetRest p3parse) 159 else if paFailed p4parse then PFail (paGetRest p4parse) 160 else POk (combine (paGetItem p1parse) (paGetItem p2parse) 161 (paGetItem p3parse) (paGetItem p4parse)) 162 (paGetRest p4parse) 163 164 165 -- ==========================================================-- 166 -- 167 paZeroOrMore :: Parser a -> Parser [a] 168 169 paZeroOrMore p toks 170 = let pParse = p toks 171 pUnused = paGetRest pParse 172 zmParse = paZeroOrMore p pUnused 173 zmUnused = paGetRest zmParse 174 in 175 if paFailed pParse then POk [] toks 176 else if paFailed zmParse then POk [paGetItem pParse] pUnused 177 else POk ((paGetItem pParse):paGetItem zmParse) zmUnused 178 179 180 -- ==========================================================-- 181 -- 182 paOneOrMore :: Parser a -> Parser [a] 183 184 paOneOrMore p 185 = paThen2 (:) p (paZeroOrMore p) 186 187 188 -- ==========================================================-- 189 -- 190 paOneOrMoreWithSep :: Parser a -> 191 Parser b -> 192 Parser [a] 193 194 paOneOrMoreWithSep p psep toks 195 = let pParse = p toks 196 pRest = paGetRest pParse 197 sParse = psep pRest 198 sRest = paGetRest sParse 199 mParse = paOneOrMoreWithSep p psep sRest 200 mRest = paGetRest mParse 201 in 202 if paFailed pParse then PFail toks 203 else if paFailed sParse then POk [paGetItem pParse] pRest 204 else if paFailed mParse then POk [paGetItem pParse] pRest 205 else POk ((paGetItem pParse):paGetItem mParse) mRest 206 207 208 -- ==========================================================-- 209 -- 210 paApply :: Parser a -> 211 (a -> b) -> 212 Parser b 213 214 paApply p f toks 215 = let pParse = p toks 216 in 217 if paFailed pParse 218 then PFail (paGetRest pParse) 219 else POk (f (paGetItem pParse)) (paGetRest pParse) 220 221 222 -- ==========================================================-- 223 -- 224 paSat :: (String -> Bool) -> 225 Parser String 226 227 paSat pred [] = PFail [] 228 paSat pred ((n,t):toks) 229 | pred t = POk t toks 230 | otherwise = PFail toks 231 232 233 -- ==========================================================-- 234 -- 235 paEmpty :: a -> Parser a 236 237 paEmpty v toks = POk v toks 238 239 240 -- ====================================-- 241 -- === Specific parsing functions ===-- 242 -- ====================================-- 243 244 -- ================================================-- 245 paSyntax 246 = get_parse . paProgram 247 where 248 get_parse (PFail []) 249 = myFail "Syntax error: Unexpected end of source text" 250 251 get_parse (PFail ((n,t):_)) 252 = myFail ( "Syntax error: unexpected token \"" ++ t ++ 253 "\" on line " ++ show ( n :: Int )) 254 255 get_parse (POk _ ((n,t):_:_)) 256 = myFail ( "Syntax error: unexpected token \"" ++ t ++ 257 "\" on line " ++ show ( n :: Int )) 258 259 get_parse (POk prog [(999999, "$$$")]) = prog 260 261 -- ================================================-- 262 paProgram = paThen3 f paTypeDefList (paLit ";;") paScdefs 263 where f a b c = (a,c) 264 265 -- ================================================-- 266 paName = paSat paIsName 267 268 -- ================================================-- 269 paIsName s = isAlpha (head s) && not (s `elem` paKeywords) 270 271 -- ================================================-- 272 paCname = paSat paIsCname 273 274 -- ================================================-- 275 paIsCname s = ('A'<=(head s)) && 276 ((head s)<='Z') && 277 not (s `elem` paKeywords) 278 279 -- ================================================-- 280 paKeywords = ["let", "letrec", "case", "in", "of", "end"] 281 282 -- ================================================-- 283 paRelops = ["<=", "<", ">=", ">", "==", "~="] 284 285 -- ================================================-- 286 paIsRelop op = op `elem` paRelops 287 288 -- ================================================-- 289 paRelop = paSat paIsRelop 290 291 -- ================================================-- 292 paNum = paSat paIsNum `paApply` paNumval 293 294 -- ================================================-- 295 paNumval :: [Char] -> Int 296 paNumval cs 297 = sum (powers 1 (map (\d -> fromEnum d - 48) (reverse cs))) 298 where 299 powers n [] = [] 300 powers n (h:t) = n*h : powers ((10 :: Int) *n) t 301 302 -- ================================================-- 303 paIsNum = isDigit.head 304 305 -- ================================================-- 306 paWithTrailingSemi p = paThen2 const p (paLit ";") 307 308 -- ==================================-- 309 -- === Parsing type definitions ===-- 310 -- ==================================-- 311 312 -- ================================================-- 313 paTypeDefList = paZeroOrMore (paThen2 f paTypeDef (paLit ";")) 314 where f a b = a 315 316 -- ================================================-- 317 paTypeDef 318 = paThen4 f paName (paZeroOrMore paName) (paLit "::=") paConstrAlts 319 where f a b c d = (a,b,d) 320 321 -- ================================================-- 322 paConstrAlts = paOneOrMoreWithSep paConstrAlt (paLit "|") 323 324 -- ================================================-- 325 paConstrAlt = paThen2 f paCname (paZeroOrMore paTDefExpr) 326 where f a b = (a,b) 327 328 -- ================================================-- 329 paTDefExpr 330 = paAlts [ ( (== "("), paTDefExpr2 ), 331 ( paIsName, paApply paName TDefVar) ] 332 where 333 paTDefExpr2 = paThen3 g (paLit "(") paTDefExpr3 (paLit ")") 334 g a b c = b 335 paTDefExpr3 = paThen2 h paName (paZeroOrMore paTDefExpr) 336 h a b = TDefCons a b 337 338 339 -- ===========================================-- 340 -- === Parsing supercombinator definitions ===-- 341 -- ===========================================-- 342 343 -- ================================================-- 344 paScdefs = paOneOrMore (paWithTrailingSemi paSc) 345 346 -- ================================================-- 347 paSc = paThen4 mk_sc paName (paZeroOrMore paName) (paLit "=") paExpr 348 where 349 mk_sc sc args eq rhs = (sc, (args, rhs)) 350 351 -- ================================================-- 352 paExpr 353 = paAlts [ ( (== "let"), paLet ), 354 ( (== "letrec"), paLetrec ), 355 ( (== "case"), paCase ), 356 ( (== "\\"), paLambda ), 357 ( (const True), paExpr1 ) ] 358 359 360 -- ================================================-- 361 paLet = paThen4 mk_let 362 (paLit "let") 363 paDefns 364 (paLit "in") paExpr 365 where 366 mk_let lett defns inn expr = ELet False defns expr 367 368 369 -- ================================================-- 370 paLetrec = paThen4 mk_letrec 371 (paLit "letrec") 372 paDefns 373 (paLit "in") paExpr 374 where 375 mk_letrec letrecc defns inn expr = ELet True defns expr 376 377 378 -- ================================================-- 379 paDefns = paOneOrMoreWithSep paDefn (paLit ";") 380 381 -- ================================================-- 382 paDefn = paThen3 mk_defn paName (paLit "=") paExpr 383 where 384 mk_defn var equals rhs = (var,rhs) 385 386 -- ================================================-- 387 paCase = paThen4 mk_case (paLit "case") paExpr (paLit "of") paAlters 388 where 389 mk_case kase e ov alts = ECase e alts 390 391 -- ================================================-- 392 paAlters = paThen2 const (paOneOrMoreWithSep paAlter (paLit ";")) (paLit "end") 393 394 -- ================================================-- 395 paAlter = paThen4 mk_alt paCname (paZeroOrMore paName) (paLit "->") paExpr 396 where 397 mk_alt tag args arrow rhs = (tag, (args, rhs)) 398 399 -- ================================================-- 400 paLambda = paThen4 mk_lam 401 (paLit "\\") (paOneOrMore paName) (paLit "->") paExpr 402 where 403 mk_lam lam vars dot expr = ELam vars expr 404 405 -- ================================================-- 406 paExpr1 = paThen2 paAssembleOp paExpr2 paExpr1c 407 408 -- ================================================-- 409 paExpr1c = paAlts [((== "|"), paThen2 FoundOp (paLit "|") paExpr1), 410 ((== "#"), paThen2 FoundOp (paLit "#") paExpr1), 411 (const True, paEmpty NoOp)] 412 413 -- ================================================-- 414 paExpr2 = paThen2 paAssembleOp paExpr3 paExpr2c 415 416 -- ================================================-- 417 paExpr2c = paAlts [((== "&"), paThen2 FoundOp (paLit "&") paExpr2), 418 (const True, paEmpty NoOp)] 419 420 -- ================================================-- 421 paExpr3 = paThen2 paAssembleOp paExpr4 paExpr3c 422 423 -- ================================================-- 424 paExpr3c = paAlts [(paIsRelop, paThen2 FoundOp paRelop paExpr4), 425 (const True, paEmpty NoOp)] 426 427 -- ================================================-- 428 paExpr4 = paThen2 paAssembleOp paExpr5 paExpr4c 429 430 -- ================================================-- 431 paExpr4c = paAlts [((== "+"), paThen2 FoundOp (paLit "+") paExpr4), 432 ((== "-"), paThen2 FoundOp (paLit "-") paExpr5), 433 (const True, paEmpty NoOp)] 434 435 -- ================================================-- 436 paExpr5 = paThen2 paAssembleOp paExpr6 paExpr5c 437 438 -- ================================================-- 439 paExpr5c = paAlts [((== "*"), paThen2 FoundOp (paLit "*") paExpr5), 440 ((== "/"), paThen2 FoundOp (paLit "/") paExpr6), 441 (const True, paEmpty NoOp)] 442 443 -- ================================================-- 444 paExpr6 = (paOneOrMore paAtomic) `paApply` mk_ap_chain 445 where 446 mk_ap_chain (fn:args) = foldl EAp fn args 447 448 -- ================================================-- 449 paAtomic = paAlts [(paIsCname, paConstr), 450 ((== "("), paBracExpr), 451 (paIsName, paName `paApply` EVar), 452 (paIsNum, paNum `paApply` ENum)] 453 454 -- ================================================-- 455 paBracExpr = paThen3 mk_brack (paLit "(") paExpr (paLit ")") 456 where 457 mk_brack open expr close = expr 458 459 -- ================================================-- 460 paConstr = paApply paCname EConstr 461 462 463 -- ================================================-- 464 paAssembleOp e1 NoOp = e1 465 paAssembleOp e1 (FoundOp op e2) = EAp (EAp (EVar op) e1) e2 466 467 468 469 -- ===================================================-- 470 -- === Validation & transformation of parsed trees ===-- 471 -- ===================================================-- 472 473 -- ==========================================================-- 474 -- 475 paProgramToAtomic :: CoreProgram -> 476 AtomicProgram 477 478 paProgramToAtomic (tds, scdefs) 479 = (tds, ce) 480 where 481 ce = ELet True 482 [(name, ELam ns b) | (name, (ns, b)) <- scdefs] 483 (ENum 42) 484 485 -- ==========================================================-- 486 -- 487 paValidTypeDefs :: [TypeDef] -> -- all type definitions 488 TypeDependancy -> -- type dependancy info 489 [Char] -- wordy description of any problems 490 491 paValidTypeDefs tds rda 492 = if not uniqueTNames then "Non-unique type names" else 493 if not uniqueParNames then "Non-unique parameter names" else 494 if not uniqueCNames then "Non-unique constructor names" else 495 if not balanced then "Declared parameters do not match used parameters" else 496 if not allDefined then "Undefined types are present" else 497 if not rightArity then "Types are used at wrong arities" else 498 if not allSimple then "Perverse type definitions are present" 499 else "" 500 where 501 arityMap = map f tds 502 where 503 f (tname, tvs, cal) = (tname, length tvs) 504 allTNames = map f tds 505 where 506 f (tname, tvs, cal) = tname 507 allCNames = concat (map f tds) 508 where 509 f (tname, tvs, cal) = map first cal 510 uniqueTNames = length allTNames == ((length.nub) allTNames) 511 uniqueParNames = and (map f tds) 512 where 513 f (tname, tvs, cal) = length tvs == ((length.nub) tvs) 514 uniqueCNames = length allCNames == ((length.nub) allCNames) 515 balanced = and (map isBalanced tds) 516 where 517 tvsIn (TDefVar n) = [n] 518 tvsIn (TDefCons n tel) = concat (map tvsIn tel) 519 g tDefExprList = concat (map tvsIn tDefExprList) 520 isBalanced (tname, tvs, cal) 521 = (utSetFromList tvs) == 522 (utSetFromList (concat (map (g.second) cal))) 523 allDefined = utSetSubsetOf 524 (utSetFromList (concat (map mdFreeTVarsIn tds))) 525 (utSetFromList allTNames) 526 rightArity = and (map f tds) 527 where 528 f (tname, tvs, cal) = and (map (g.second) cal) 529 g tDefExprList = and (map rArity tDefExprList) 530 rArity (TDefVar v) = True 531 rArity (TDefCons n tel) 532 = (length tel == utSureLookup arityMap "paVTD`rA`rA" n) && 533 (and (map rArity tel)) 534 allSimple = and (map f tds) 535 where 536 f (tname, tvs, cal) = 537 utSetSubsetOf (utSetFromList (allVars cal)) 538 (utSetFromList (tvs++(groupOf tname rda))) 539 allVars cal = concat (map g cal) 540 g (n, tel) = concat (map allTVs tel) 541 allTVs (TDefVar n) = [n] 542 allTVs (TDefCons n tel) = n:concat (map allTVs tel) 543 groupOf tname ((rf, group):rest) 544 | tname `elem` group && rf = group 545 | tname `elem` group && not rf = [] 546 | otherwise = groupOf tname rest 547 548 549 -- ==========================================================-- 550 -- 551 paParse :: [Char] -> (TypeDependancy, AtomicProgram) 552 553 paParse fileContents 554 = if typeDefErrors == "" 555 then (dependResult, (typeDefs, mainExpr)) 556 else myFail typeDefErrors 557 where 558 (typeDefs, mainExpr) = paProgramToAtomic parsedProgram 559 dependResult = mdTypeDependancy typeDefs 560 typeDefErrors = paValidTypeDefs typeDefs dependResult 561 tokens = paLex 1 fileContents 562 parsedProgram = paSyntax tokens 563 564 565 -- ==========================================================-- 566 -- === End parse.m (1) ===-- 567 -- ==========================================================--