1 2 -- ==========================================================-- 3 -- === Raw lexical analysis (tokenisation) of source ===-- 4 -- === Lexer.hs ===-- 5 -- ==========================================================-- 6 7 module Main where 8 import Char -- 1.3 9 ---------------------------------------------------------- 10 -- Lexemes -- 11 ---------------------------------------------------------- 12 13 type Token = (Int, Int, Lex, String) -- (line, column, lexeme type, value) 14 15 data Lex = Lcon -- constructor used as prefix: 16 -- normal prefix constructor, 17 -- or bracketed infix constructor 18 19 | Lconop -- constructor used as infix: 20 -- normal prefix constructor in backquotes, 21 -- or infix constructor (starting with ":") 22 23 | Lvar -- variable used as prefix: 24 -- normal prefix variable, 25 -- or bracketed infix var (operator) 26 27 | Lvarop -- variable used as infix: 28 -- normal prefix variable in backquotes, 29 -- or infix variable (operator) 30 31 -- | Ltycon -- constructor starting with A-Z 32 -- subcase of Lcon 33 34 -- | Ltyvar -- variable starting with a-z 35 -- subcase of Lvar 36 37 | Lintlit -- integer literal 38 | Lcharlit -- character literal 39 | Lstringlit -- string literal 40 41 | Llbrace -- { 42 | Lrbrace -- } 43 | Lsemi -- ; 44 | Lequals -- = 45 | Lbar -- | 46 | Larrow -- -> 47 | Llparen -- ( 48 | Lrparen -- ) 49 | Lcomma -- , 50 | Llbrack -- [ 51 | Lrbrack -- ] 52 | Lunder -- _ 53 | Lminus -- - 54 | Lslash -- \ 55 56 | Lmodule 57 | Linfixl 58 | Linfixr 59 | Linfix 60 | Lext 61 | Ldata 62 | Lif 63 | Lthen 64 | Lelse 65 | Llet 66 | Lin 67 | Lcase 68 | Lof 69 | Lwhere 70 71 | Leof deriving (Eq, Show{-was:Text-}) 72 73 {- 74 Lexing rules: 75 76 case ( 77 if next is \, -> Llparen 78 if next is symbol, take symbols and expect closing ) -> Lvar 79 if next is :, take tail-ident-chars, expect closing ) -> Lcon 80 otherwise -> Llparen 81 82 case ` 83 if next A-Z, take tail-ident-chars, expect ` -> Lconop 84 if next a-z, take tail-ident-chars, expect ` -> Lvarop 85 otherwise -> error 86 87 case A-Z 88 take tail-ident-chars -> Lcon 89 90 case a-z 91 take tail-ident-chars -> Lvar 92 93 case 0-9 94 take 0-9s -> Lintlit 95 96 case ' 97 expect a lit-char, then ' -> charlit 98 99 case " 100 expect lit-chars, then " -> stringlit 101 102 case { 103 case - -> run_comment 104 otherwise -> Llbrace 105 106 case } -> Lrbrace 107 108 case ) -> Lrparen 109 110 case [ -> Llbrack 111 case ] -> Lrbrack 112 113 case ; -> Lsemi 114 case , -> Lcomma 115 case _ -> Lunder 116 case - 117 case - -> line_comment 118 case > -> Larrow 119 otherwise -> Lminus 120 121 case # in column 1: this is a preprocessor line 122 123 case :!#$%&*+./<=>?@\^|~ 124 take symbols, then case resulting 125 "=" -> Lequals 126 "|" -> Lbar 127 "\" -> Lslash 128 otherwise 129 if starts with : -> Lconop 130 else -> lvarop 131 -} 132 133 134 135 -- ==========================================================-- 136 -- 137 leLex :: Int -> Int -> String -> [Token] 138 139 leLex l n [] 140 = repeat (99997, 99997, Leof, "") 141 142 leLex l n ('(':[]) 143 = [(l, n, Llparen, ")")] 144 145 leLex l n ('(':c:cs) 146 | c == ':' 147 = case leChunk (n+1) leIsTailChar cs of 148 (restSym, nn, restInput) -> case restInput of 149 [] -> leFail l nn " ) expected" 150 (')':as) -> (l, n, Lvar, c:restSym) : leLex l (nn+1) as 151 (_:_) -> leFail l nn " ) expected" 152 | c == '\\' 153 = (l, n, Llparen, "(") : leLex l (n+1) (c:cs) 154 | leIsSymbol c 155 = case leChunk (n+1) leIsSymbol cs of 156 (restSym, nn, restInput) -> case restInput of 157 [] -> leFail l nn " ) expected" 158 (')':as) -> (l, n, Lvar, c:restSym) : leLex l (nn+1) as 159 (_:_) -> leFail l nn " ) expected" 160 | otherwise 161 = (l, n, Llparen, "(") : leLex l (n+1) (c:cs) 162 163 leLex l n ('`':c:cs) 164 | isAlpha c 165 = case leChunk (n+1) isAlpha cs of 166 (restSym, nn, restInput) -> case restInput of 167 [] -> leFail l nn " ` expected" 168 ('`':as) -> (l, n, if isUpper c then Lconop else Lvarop, c:restSym) 169 : leLex l (nn+1) as 170 (_:_) -> leFail l nn " ` expected" 171 | otherwise 172 = leFail l n "Bad infix operator" 173 174 leLex l n ('"':cs) 175 = case leTakeLitChars True l (n+1) cs of 176 (restSym, nn, restInput) -> case restInput of 177 [] -> leFail l nn " \" expected" 178 ('"':as) -> (l, n, Lstringlit, restSym) : leLex l (nn+1) as 179 (_:_) -> leFail l nn " \" expected" 180 181 leLex l n ('\'':cs) 182 = case leTakeLitChars False l (n+1) cs of 183 (restSym, nn, restInput) -> case restInput of 184 [] -> leFail l nn " ' expected" 185 ('\'':as) -> case restSym of 186 [_] -> (l, n, Lcharlit, restSym) : leLex l (nn+1) as 187 _ -> leFail l (n+1) "Bad character literal" 188 (_:_) -> leFail l nn " ' expected" 189 190 leLex l n ('}':cs) 191 = (l, n, Lrbrace, "}") : leLex l (n+1) cs 192 193 leLex l n (')':cs) 194 = (l, n, Lrparen, ")") : leLex l (n+1) cs 195 196 leLex l n ('[':cs) 197 = (l, n, Llbrack, "[") : leLex l (n+1) cs 198 199 leLex l n (']':cs) 200 = (l, n, Lrbrack, "]") : leLex l (n+1) cs 201 202 leLex l n (';':cs) 203 = (l, n, Lsemi, ";") : leLex l (n+1) cs 204 205 leLex l n (',':cs) 206 = (l, n, Lcomma, ",") : leLex l (n+1) cs 207 208 leLex l n ('_':cs) 209 = (l, n, Lunder, "_") : leLex l (n+1) cs 210 211 leLex l n ('{':cs) 212 = case cs of 213 [] -> [(l, n, Llbrace, "}")] 214 ('-':cs2) -> leLexRComment l (n+2) cs2 215 (_:_) -> (l, n, Llbrace, "}") : leLex l (n+1) cs 216 217 leLex l n ('-':cs) 218 = case cs of 219 [] -> [(l, n, Lminus, "-")] 220 ('-':cs2) -> leLexLComment l (n+2) cs2 221 ('>':cs3) -> (l, n, Larrow, "->") : leLex l (n+2) cs3 222 ('}':cs3) -> leFail l n "Misplaced -}" 223 (_:_) -> (l, n, Lminus, "-") : leLex l (n+1) cs 224 225 leLex l n (' ':cs) 226 = leLex l (n+1) cs 227 228 leLex l n ('\n':cs) 229 = leLex (l+1) 1 cs 230 231 leLex l n ('\t':cs) 232 = leLex l (n - (n `mod` 8) + 9) cs 233 234 leLex l n (c:cs) 235 = if c == '#' 236 then if n == 1 237 then 238 {- This is a CPP line number thingy -} 239 let lineNoText = takeWhile isDigit (tail cs) 240 lineNo = leStringToInt lineNoText 241 nextLine = drop 1 (dropWhile ((/=) '\n') cs) 242 in 243 leLex lineNo 1 nextLine 244 else 245 {- it's a symbol starting with # -} 246 case leChunk (n+1) leIsSymbol cs of 247 (restSym, nn, restText) -> (l, n, Lvarop, c:restSym) : 248 leLex l nn restText 249 else 250 if isAlpha c 251 then case leChunk (n+1) leIsTailChar cs of 252 (restSym, nn, restText) -> (l, n, if isUpper c 253 then Lcon 254 else Lvar, c:restSym) : 255 leLex l nn restText 256 else 257 if isDigit c 258 then case leChunk (n+1) isDigit cs of 259 (restSym, nn, restText) -> (l, n, Lintlit, c:restSym) : 260 leLex l nn restText 261 else 262 if leIsSymbol c 263 then case leChunk (n+1) leIsSymbol cs of 264 (restSym, nn, restText) -> (l, n, if c == ':' 265 then Lconop 266 else Lvarop, c:restSym) : 267 leLex l nn restText 268 else 269 leFail l n ("Illegal character " ++ [c]) 270 271 272 -- ==========================================================-- 273 -- 274 leChunk :: Int -> (Char -> Bool) -> String -> (String, Int, String) 275 276 leChunk n proper [] 277 = ([], n, []) 278 279 leChunk n proper (c:cs) 280 | proper c 281 = case leChunk (n+1) proper cs of 282 (restId, col, restInput) -> (c:restId, col, restInput) 283 | otherwise 284 = ([], n, c:cs) 285 286 287 -- ==========================================================-- 288 -- 289 leTakeLitChars :: Bool -> Int -> Int -> String -> (String, Int, String) 290 291 leTakeLitChars d l n [] 292 = leFail l n "End of file inside literal" 293 294 leTakeLitChars d l n ('\\':'\\':cs) 295 = case leTakeLitChars d l (n+2) cs of 296 (rest, col, left) -> ('\\':rest, col, left) 297 298 leTakeLitChars d l n ('\\':'n':cs) 299 = case leTakeLitChars d l (n+2) cs of 300 (rest, col, left) -> ('\n':rest, col, left) 301 302 leTakeLitChars d l n ('\\':'t':cs) 303 = case leTakeLitChars d l (n+2) cs of 304 (rest, col, left) -> ('\t':rest, col, left) 305 306 leTakeLitChars d l n ('\\':'"':cs) 307 = case leTakeLitChars d l (n+2) cs of 308 (rest, col, left) -> ('"':rest, col, left) 309 310 leTakeLitChars d l n ('\\':'\'':cs) 311 = case leTakeLitChars d l (n+2) cs of 312 (rest, col, left) -> ('\'':rest, col, left) 313 314 leTakeLitChars d l n ('"':cs) 315 | d = ([], n, ('"':cs)) 316 | not d = case leTakeLitChars d l (n+1) cs of 317 (rest, col, left) -> ('"':rest, col, left) 318 319 leTakeLitChars d l n ('\'':cs) 320 | not d = ([], n, ('\'':cs)) 321 | d = case leTakeLitChars d l (n+1) cs of 322 (rest, col, left) -> ('\'':rest, col, left) 323 324 leTakeLitChars d l n ('\n':cs) 325 = leFail l n "Literal exceeds line" 326 327 leTakeLitChars d l n ('\t':cs) 328 = leFail l n "Literal contains tab" 329 330 leTakeLitChars d l n (c:cs) 331 = case leTakeLitChars d l (n+1) cs of 332 (rest, col, left) -> (c:rest, col, left) 333 334 335 -- ==========================================================-- 336 -- 337 leLexLComment :: Int -> Int -> String -> [Token] 338 339 leLexLComment l n cs 340 = leLex (l+1) 1 (drop 1 (dropWhile ((/=) '\n') cs)) 341 342 343 -- ==========================================================-- 344 -- 345 leLexRComment :: Int -> Int -> String -> [Token] 346 347 leLexRComment l n [] 348 = leFail l n "End of file inside {- ... -} comment" 349 350 leLexRComment l n ('-':'}':cs) 351 = leLex l (n+2) cs 352 353 leLexRComment l n ('\n':cs) 354 = leLexRComment (l+1) 1 cs 355 356 leLexRComment l n ('\t':cs) 357 = leLexRComment l (n - (n `mod` 8) + 9) cs 358 359 leLexRComment l n (c:cs) 360 = leLexRComment l (n+1) cs 361 362 363 -- ==========================================================-- 364 -- 365 leIsSymbol :: Char -> Bool 366 367 leIsSymbol c = c `elem` leSymbols 368 369 leSymbols = ":!#$%&*+./<=>?\\@^|~" 370 371 372 -- ==========================================================-- 373 -- 374 leIsTailChar :: Char -> Bool 375 376 leIsTailChar c 377 = isLower c || 378 isUpper c || 379 isDigit c || 380 c == '\'' || 381 c == '_' || 382 c == '\'' 383 384 385 -- ==========================================================-- 386 -- 387 leIsLitChar :: Char -> Bool 388 389 leIsLitChar c 390 = c /= '\n' && 391 c /= '\t' && 392 c /= '\'' && 393 c /= '"' 394 395 396 -- ==========================================================-- 397 -- 398 leStringToInt :: String -> Int 399 400 leStringToInt 401 = let s2i [] = 0 402 s2i (d:ds) = (fromEnum d - fromEnum '0') + 10 *s2i ds 403 in s2i . reverse 404 405 406 -- ==========================================================-- 407 -- 408 leFail l n m 409 = faiL ("Lexical error, line " ++ show l ++ ", col " ++ show n ++ 410 ":\n " ++ m ) 411 412 faiL m = error ( "\n\n" ++ m ++ "\n" ) 413 414 -- ==========================================================-- 415 -- === end Lexer.hs ===-- 416 -- ==========================================================-- 417 418 -- ==========================================================-- 419 -- === Keyword spotting, and offside rule implementation ===-- 420 -- === Layout.hs ===-- 421 -- ==========================================================-- 422 423 --module Layout 424 425 -- ==========================================================-- 426 -- 427 laKeyword :: Token -> Token 428 429 laKeyword (l, n, what, text) 430 = let 431 f Lvarop "=" = Lequals 432 f Lvarop "|" = Lbar 433 f Lvarop "\\" = Lslash 434 435 f Lvar "module" = Lmodule 436 f Lvar "infix" = Linfix 437 f Lvar "infixl" = Linfixl 438 f Lvar "infixr" = Linfixr 439 f Lvar "ext" = Lext 440 f Lvar "data" = Ldata 441 f Lvar "if" = Lif 442 f Lvar "then" = Lthen 443 f Lvar "else" = Lelse 444 f Lvar "let" = Llet 445 f Lvar "in" = Lin 446 f Lvar "case" = Lcase 447 f Lvar "of" = Lof 448 f Lvar "where" = Lwhere 449 450 f item words = item 451 452 in 453 (l, n, f what text, text) 454 455 456 -- ==========================================================-- 457 -- 458 laLayout :: Int -> [Int] -> [Token] -> [Token] 459 460 laLayout l s [] 461 = laRbrace (length s - 1) 99999 99999 462 463 laLayout l s (t1:[]) 464 = t1 : laRbrace (length s - 1) 99998 99998 465 466 laLayout l (s:ss) (t1@(l1, n1, w1, c1) : 467 t2@(l2, n2, w2, c2) : ts) 468 469 | w1 `elem` [Lof, Llet, Lwhere] && w2 /= Llbrace 470 = t1 : 471 (l1, n1, Llbrace, "{") : 472 t2 : 473 laLayout l2 (n2:s:ss) ts 474 475 | l1 == l 476 = t1 : 477 laLayout l (s:ss) (t2:ts) 478 479 | n1 > s 480 = t1 : 481 laLayout l1 (s:ss) (t2:ts) 482 483 | n1 == s 484 = (l1, n1, Lsemi, ";") : 485 t1 : 486 laLayout l1 (s:ss) (t2:ts) 487 488 | n1 < s 489 = (l1, n1, Lrbrace, "}") : 490 laLayout l ss (t1:t2:ts) 491 492 493 -- ==========================================================-- 494 -- 495 laRbrace c l n 496 = take c (repeat (l, n, Lrbrace, "}")) 497 498 -- ==========================================================-- 499 -- 500 laMain :: String -> [Token] 501 502 laMain 503 = laLayout 1 [0] . map laKeyword . leLex 1 1 504 505 506 -- ==========================================================-- 507 -- === end Layout.hs ===-- 508 -- ==========================================================-- 509 510 -- ==========================================================-- 511 -- === Abstract syntax for modules ===-- 512 -- === AbsSyntax.hs ===-- 513 -- ==========================================================-- 514 515 --module AbsSyntax where 516 517 --1.3:data Maybe a = Nothing 518 -- | Just a 519 520 type AList a b = [(a, b)] 521 522 type Id = String 523 524 data Module 525 = MkModule Id [TopDecl] 526 deriving (Show{-was:Text-}) 527 528 data FixityDecl 529 = MkFixDecl Id (Fixity, Int) 530 deriving (Show{-was:Text-}) 531 532 data DataDecl 533 = MkDataDecl Id ([Id], [ConstrAltDecl]) 534 deriving (Show{-was:Text-}) 535 536 data TopDecl 537 = MkTopF FixityDecl 538 | MkTopD DataDecl 539 | MkTopV ValBind 540 deriving (Show{-was:Text-}) 541 542 data Fixity 543 = InfixL 544 | InfixR 545 | InfixN 546 deriving (Eq,Show{-was:Text-}) 547 548 type ConstrAltDecl 549 = (Id, [TypeExpr]) 550 551 data TypeExpr = TypeVar Id 552 | TypeArr TypeExpr TypeExpr 553 | TypeCon Id [TypeExpr] 554 | TypeList TypeExpr 555 | TypeTuple [TypeExpr] 556 deriving (Show{-was:Text-}) 557 558 data ValBind 559 = MkValBind Int Lhs Expr 560 deriving (Show{-was:Text-}) 561 562 data Lhs 563 = LhsPat Pat 564 | LhsVar Id [Pat] 565 deriving (Show{-was:Text-}) 566 567 data Pat 568 = PatVar Id 569 | PatCon Id [Pat] 570 | PatWild 571 | PatList [Pat] 572 | PatTuple [Pat] 573 deriving (Show{-was:Text-}) 574 575 data Expr 576 = ExprVar Id 577 | ExprCon Id 578 | ExprApp Expr Expr 579 | ExprLam [Pat] Expr 580 | ExprCase Expr [ExprCaseAlt] 581 | ExprLetrec [ValBind] Expr 582 | ExprWhere Expr [ValBind] 583 | ExprGuards [(Expr, Expr)] 584 | ExprLiteral Literal 585 | ExprList [Expr] 586 | ExprTuple [Expr] 587 | ExprIf Expr Expr Expr 588 | ExprBar 589 | ExprFail 590 deriving (Show{-was:Text-}) 591 592 data ExprCaseAlt 593 = MkExprCaseAlt Pat Expr 594 deriving (Show{-was:Text-}) 595 596 data Literal 597 = LiteralInt Int 598 | LiteralChar Char 599 | LiteralString String 600 deriving (Show{-was:Text-}) 601 602 -- ==========================================================-- 603 -- === end AbsSyntax.hs ===-- 604 -- ==========================================================-- 605 606 -- ==========================================================-- 607 -- === Parser generics ===-- 608 -- === ParserGeneric.hs ===-- 609 -- ==========================================================-- 610 611 --module ParserGeneric 612 613 type PEnv = AList String (Fixity, Int) 614 615 data PResult a = POk PEnv [Token] a 616 | PFail Token 617 618 type Parser a = PEnv -> [Token] -> PResult a 619 620 type PEntry = (Bool, Expr, Id) 621 622 -- ==========================================================-- 623 -- 624 pgItem :: Lex -> Parser String 625 626 pgItem x env [] = PFail pgEOF 627 628 pgItem x env ((l, n, w, t):toks) 629 | x == w = POk env toks t 630 | otherwise = PFail (l, n, w, t) 631 632 633 -- ==========================================================-- 634 -- 635 pgAlts :: [Parser a] -> Parser a 636 637 pgAlts ps env toks 638 = let 639 useAlts [] bestErrTok 640 = PFail bestErrTok 641 useAlts (p:ps) bestErrTok 642 = case p env toks of 643 PFail someErrTok -> useAlts ps (further someErrTok bestErrTok) 644 successful_parse -> successful_parse 645 further x1@(l1, n1, w1, t1) x2@(l2, n2, w2, t2) 646 = if l2 > l1 then x2 647 else if l1 > l2 then x1 648 else if n1 > n2 then x1 649 else x2 650 in 651 useAlts ps (head (toks ++ [pgEOF])) 652 653 654 -- ==========================================================-- 655 -- 656 pgThen2 :: (a -> b -> c) -> 657 Parser a -> 658 Parser b -> 659 Parser c 660 661 pgThen2 combine p1 p2 env toks 662 = case p1 env toks of 663 { 664 PFail tok1 665 -> PFail tok1 ; 666 POk env1 toks1 item1 667 -> case p2 env1 toks1 of 668 { 669 PFail tok2 670 -> PFail tok2 ; 671 POk env2 toks2 item2 672 -> POk env2 toks2 (combine item1 item2) 673 } 674 } 675 676 677 -- ==========================================================-- 678 -- 679 pgThen3 :: (a -> b -> c -> d) -> 680 Parser a -> 681 Parser b -> 682 Parser c -> 683 Parser d 684 685 pgThen3 combine p1 p2 p3 env toks 686 = case p1 env toks of 687 { 688 PFail tok1 689 -> PFail tok1 ; 690 POk env1 toks1 item1 691 -> case p2 env1 toks1 of 692 { 693 PFail tok2 694 -> PFail tok2 ; 695 POk env2 toks2 item2 696 -> case p3 env2 toks2 of 697 { 698 PFail tok3 699 -> PFail tok3 ; 700 POk env3 toks3 item3 701 -> POk env3 toks3 (combine item1 item2 item3) 702 } 703 } 704 } 705 706 707 -- ==========================================================-- 708 -- 709 pgThen4 :: (a -> b -> c -> d -> e) -> 710 Parser a -> 711 Parser b -> 712 Parser c -> 713 Parser d -> 714 Parser e 715 716 pgThen4 combine p1 p2 p3 p4 env toks 717 = case p1 env toks of 718 { 719 PFail tok1 720 -> PFail tok1 ; 721 POk env1 toks1 item1 722 -> case p2 env1 toks1 of 723 { 724 PFail tok2 725 -> PFail tok2 ; 726 POk env2 toks2 item2 727 -> case p3 env2 toks2 of 728 { 729 PFail tok3 730 -> PFail tok3 ; 731 POk env3 toks3 item3 732 -> case p4 env3 toks3 of 733 { 734 PFail tok4 735 -> PFail tok4 ; 736 POk env4 toks4 item4 737 -> POk env4 toks4 (combine item1 item2 item3 item4) 738 } 739 } 740 } 741 } 742 743 744 -- ==========================================================-- 745 -- 746 pgZeroOrMore :: Parser a -> Parser [a] 747 748 pgZeroOrMore p env toks 749 = case p env toks of 750 { 751 PFail tok1 752 -> POk env toks [] ; 753 POk env1 toks1 item1 754 -> case pgZeroOrMore p env1 toks1 of 755 { 756 PFail tok2 757 -> POk env1 toks1 [item1] ; 758 POk env2 toks2 item2_list 759 -> POk env2 toks2 (item1 : item2_list) 760 } 761 } 762 763 764 -- ==========================================================-- 765 -- 766 pgOneOrMore :: Parser a -> Parser [a] 767 768 pgOneOrMore p 769 = pgThen2 (:) p (pgZeroOrMore p) 770 771 772 -- ==========================================================-- 773 -- 774 pgApply :: (a -> b) -> Parser a -> Parser b 775 776 pgApply f p env toks 777 = case p env toks of 778 { 779 PFail tok1 780 -> PFail tok1 ; 781 POk env1 toks1 item1 782 -> POk env1 toks1 (f item1) 783 } 784 785 786 -- ==========================================================-- 787 -- 788 pgTwoOrMoreWithSep :: Parser a -> Parser b -> Parser [a] 789 790 pgTwoOrMoreWithSep p psep 791 = pgThen4 792 (\i1 s1 i2 rest -> i1:i2:rest) 793 p 794 psep 795 p 796 (pgZeroOrMore (pgThen2 (\sep x -> x) psep p)) 797 798 799 -- ==========================================================-- 800 -- 801 pgOneOrMoreWithSep :: Parser a -> Parser b -> Parser [a] 802 803 pgOneOrMoreWithSep p psep 804 = pgThen2 (:) p (pgZeroOrMore (pgThen2 (\sep x -> x) psep p)) 805 806 807 -- ==========================================================-- 808 -- 809 pgZeroOrMoreWithSep :: Parser a -> Parser b -> Parser [a] 810 811 pgZeroOrMoreWithSep p psep 812 = pgAlts 813 [ 814 pgOneOrMoreWithSep p psep, 815 pgApply (\x -> x:[]) p, 816 pgEmpty [] 817 ] 818 819 820 -- ==========================================================-- 821 -- 822 pgOptional :: Parser a -> Parser (Maybe a) 823 824 pgOptional p env toks 825 = case p env toks of 826 { 827 PFail tok1 828 -> POk env toks Nothing ; 829 POk env2 toks2 item2 830 -> POk env2 toks2 (Just item2) 831 } 832 833 834 -- ==========================================================-- 835 -- 836 pgGetLineNumber :: Parser a -> Parser (Int, a) 837 838 pgGetLineNumber p env toks 839 = let 840 lineNo = case (head (toks ++ [pgEOF])) of (l, n, w, t) -> l 841 in 842 case p env toks of 843 { 844 PFail tok1 845 -> PFail tok1 ; 846 POk env2 toks2 item2 847 -> POk env2 toks2 (lineNo, item2) 848 } 849 850 851 -- ==========================================================-- 852 -- 853 pgEmpty :: a -> Parser a 854 855 pgEmpty item env toks 856 = POk env toks item 857 858 859 -- ==========================================================-- 860 -- 861 pgEOF :: Token 862 863 pgEOF = (88888, 88888, Lvar, "*** Unexpected end of source! ***") 864 865 866 -- ============================================================-- 867 -- === Some kludgey stuff for implementing the offside rule ===-- 868 -- ============================================================-- 869 870 -- ==========================================================-- 871 -- 872 pgEatEnd :: Parser () 873 874 pgEatEnd env [] 875 = POk env [] () 876 877 pgEatEnd env (tok@(l, n, w, t):toks) 878 | w == Lsemi || w == Lrbrace = POk env toks () 879 | otherwise = POk env (tok:toks) () 880 881 882 -- ==========================================================-- 883 -- 884 pgDeclList :: Parser a -> Parser [a] 885 886 pgDeclList p 887 = pgThen3 (\a b c -> b) (pgItem Llbrace) 888 (pgOneOrMoreWithSep p (pgItem Lsemi)) 889 pgEatEnd 890 891 892 -- ==========================================================-- 893 -- === end ParserGeneric.hs ===-- 894 -- ==========================================================-- 895 896 -- ==========================================================-- 897 -- === The parser. ===-- 898 -- === Parser.hs ===-- 899 -- ==========================================================-- 900 901 --module Parser where 902 903 {- FIX THESE UP -} 904 utLookupDef env k def 905 = head ( [ vv | (kk,vv) <- env, kk == k] ++ [def] ) 906 panic = error 907 {- END FIXUPS -} 908 909 paLiteral :: Parser Literal 910 paLiteral 911 = pgAlts 912 [ 913 pgApply (LiteralInt . leStringToInt) (pgItem Lintlit), 914 pgApply (LiteralChar . head) (pgItem Lcharlit), 915 pgApply LiteralString (pgItem Lstringlit) 916 ] 917 918 paExpr 919 = pgAlts 920 [ 921 paCaseExpr, 922 paLetExpr, 923 paLamExpr, 924 paIfExpr, 925 paUnaryMinusExpr, 926 hsDoExpr [] 927 ] 928 929 paUnaryMinusExpr 930 = pgThen2 931 (\minus (_, aexpr, _) -> 932 ExprApp (ExprApp (ExprVar "-") (ExprLiteral (LiteralInt 0))) aexpr) 933 paMinus 934 paAExpr 935 936 paCaseExpr 937 = pgThen4 938 (\casee expr off alts -> ExprCase expr alts) 939 (pgItem Lcase) 940 paExpr 941 (pgItem Lof) 942 (pgDeclList paAlt) 943 944 paAlt 945 = pgAlts 946 [ 947 pgThen4 948 (\pat arrow expr wheres 949 -> MkExprCaseAlt pat (pa_MakeWhereExpr expr wheres)) 950 paPat 951 (pgItem Larrow) 952 paExpr 953 (pgOptional paWhereClause), 954 pgThen3 955 (\pat agrdrhss wheres 956 -> MkExprCaseAlt pat 957 (pa_MakeWhereExpr (ExprGuards agrdrhss) wheres)) 958 paPat 959 (pgOneOrMore paGalt) 960 (pgOptional paWhereClause) 961 ] 962 963 paGalt 964 = pgThen4 965 (\bar guard arrow expr -> (guard, expr)) 966 (pgItem Lbar) 967 paExpr 968 (pgItem Larrow) 969 paExpr 970 971 paLamExpr 972 = pgThen4 973 (\lam patterns arrow rhs -> ExprLam patterns rhs) 974 (pgItem Lslash) 975 (pgZeroOrMore paAPat) 976 (pgItem Larrow) 977 paExpr 978 979 paLetExpr 980 = pgThen4 981 (\lett decls inn rhs -> ExprLetrec decls rhs) 982 (pgItem Llet) 983 paValdefs 984 (pgItem Lin) 985 paExpr 986 987 paValdefs 988 = pgApply pa_MergeValdefs (pgDeclList paValdef) 989 990 pa_MergeValdefs 991 = id 992 993 paLhs 994 = pgAlts 995 [ 996 pgThen2 (\v ps -> LhsVar v ps) paVar (pgOneOrMore paPat), 997 pgApply LhsPat paPat 998 ] 999 1000 paValdef 1001 = pgAlts 1002 [ 1003 pgThen4 1004 (\(line, lhs) eq rhs wheres 1005 -> MkValBind line lhs (pa_MakeWhereExpr rhs wheres)) 1006 (pgGetLineNumber paLhs) 1007 (pgItem Lequals) 1008 paExpr 1009 (pgOptional paWhereClause), 1010 pgThen3 1011 (\(line, lhs) grdrhss wheres 1012 -> MkValBind line lhs 1013 (pa_MakeWhereExpr (ExprGuards grdrhss) wheres)) 1014 (pgGetLineNumber paLhs) 1015 (pgOneOrMore paGrhs) 1016 (pgOptional paWhereClause) 1017 ] 1018 1019 pa_MakeWhereExpr expr Nothing 1020 = expr 1021 pa_MakeWhereExpr expr (Just whereClauses) 1022 = ExprWhere expr whereClauses 1023 1024 paWhereClause 1025 = pgThen2 (\x y -> y) (pgItem Lwhere) paValdefs 1026 paGrhs 1027 = pgThen4 1028 (\bar guard equals expr -> (guard, expr)) 1029 (pgItem Lbar) 1030 paExpr 1031 (pgItem Lequals) 1032 paExpr 1033 1034 1035 paAPat 1036 = pgAlts 1037 [ 1038 pgApply PatVar paVar, 1039 pgApply (\id -> PatCon id []) paCon, 1040 pgApply (const PatWild) (pgItem Lunder), 1041 pgApply PatTuple 1042 (pgThen3 (\l es r -> es) 1043 (pgItem Llparen) 1044 (pgTwoOrMoreWithSep paPat (pgItem Lcomma)) 1045 (pgItem Lrparen)), 1046 pgApply PatList 1047 (pgThen3 (\l es r -> es) 1048 (pgItem Llbrack) 1049 (pgZeroOrMoreWithSep paPat (pgItem Lcomma)) 1050 (pgItem Lrbrack)), 1051 pgThen3 (\l p r -> p) 1052 (pgItem Llparen) 1053 paPat 1054 (pgItem Lrparen) 1055 ] 1056 1057 paPat 1058 = pgAlts 1059 [ 1060 pgThen2 (\c ps -> PatCon c ps) 1061 paCon 1062 (pgOneOrMore paAPat), 1063 pgThen3 (\ap c pa -> PatCon c [ap,pa]) 1064 paAPat 1065 paConop 1066 paPat, 1067 paAPat 1068 ] 1069 1070 1071 paIfExpr 1072 = pgThen4 1073 (\iff c thenn (t,f) -> ExprIf c t f) 1074 (pgItem Lif) 1075 paExpr 1076 (pgItem Lthen) 1077 (pgThen3 1078 (\t elsee f -> (t,f)) 1079 paExpr 1080 (pgItem Lelse) 1081 paExpr 1082 ) 1083 1084 paAExpr 1085 = pgApply (\x -> (False, x, [])) 1086 (pgAlts 1087 [ 1088 pgApply ExprVar paVar, 1089 pgApply ExprCon paCon, 1090 pgApply ExprLiteral paLiteral, 1091 pgApply ExprList paListExpr, 1092 pgApply ExprTuple paTupleExpr, 1093 pgThen3 (\l e r -> e) (pgItem Llparen) paExpr (pgItem Lrparen) 1094 ] 1095 ) 1096 1097 paListExpr 1098 = pgThen3 (\l es r -> es) 1099 (pgItem Llbrack) 1100 (pgZeroOrMoreWithSep paExpr (pgItem Lcomma)) 1101 (pgItem Lrbrack) 1102 1103 paTupleExpr 1104 = pgThen3 (\l es r -> es) 1105 (pgItem Llparen) 1106 (pgTwoOrMoreWithSep paExpr (pgItem Lcomma)) 1107 (pgItem Lrparen) 1108 1109 paVar = pgItem Lvar 1110 paCon = pgItem Lcon 1111 paVarop = pgItem Lvarop 1112 paConop = pgItem Lconop 1113 paMinus = pgItem Lminus 1114 1115 paOp 1116 = pgAlts [ 1117 pgApply (\x -> (True, ExprVar x, x)) paVarop, 1118 pgApply (\x -> (True, ExprCon x, x)) paConop, 1119 pgApply (\x -> (True, ExprVar x, x)) paMinus 1120 ] 1121 1122 paDataDecl 1123 = pgThen2 1124 (\dataa useful -> useful) 1125 (pgItem Ldata) 1126 paDataDecl_main 1127 1128 paDataDecl_main 1129 = pgThen4 1130 (\name params eq drhs -> MkDataDecl name (params, drhs)) 1131 paCon 1132 (pgZeroOrMore paVar) 1133 (pgItem Lequals) 1134 (pgOneOrMoreWithSep paConstrs (pgItem Lbar)) 1135 1136 paConstrs 1137 = pgThen2 1138 (\con texprs -> (con, texprs)) 1139 paCon 1140 (pgZeroOrMore paAType) 1141 1142 paType 1143 = pgAlts 1144 [ 1145 pgThen3 1146 (\atype arrow typee -> TypeArr atype typee) 1147 paAType 1148 (pgItem Larrow) 1149 paType, 1150 pgThen2 1151 TypeCon 1152 paCon 1153 (pgOneOrMore paAType), 1154 paAType 1155 ] 1156 1157 paAType 1158 = pgAlts 1159 [ 1160 pgApply TypeVar paVar, 1161 pgApply (\tycon -> TypeCon tycon []) paCon, 1162 pgThen3 1163 (\l t r -> t) 1164 (pgItem Llparen) 1165 paType 1166 (pgItem Lrparen), 1167 pgThen3 1168 (\l t r -> TypeList t) 1169 (pgItem Llbrack) 1170 paType 1171 (pgItem Lrbrack), 1172 pgThen3 1173 (\l t r -> TypeTuple t) 1174 (pgItem Llparen) 1175 (pgTwoOrMoreWithSep paType (pgItem Lcomma)) 1176 (pgItem Lrparen) 1177 ] 1178 1179 paInfixDecl env toks 1180 = let dump (ExprVar v) = v 1181 dump (ExprCon c) = c 1182 in 1183 pa_UpdateFixityEnv 1184 (pgThen3 1185 (\assoc prio name -> MkFixDecl name (assoc, prio)) 1186 paInfixWord 1187 (pgApply leStringToInt (pgItem Lintlit)) 1188 (pgApply (\(_, op, _) -> dump op) paOp) 1189 env 1190 toks 1191 ) 1192 1193 paInfixWord 1194 = pgAlts 1195 [ 1196 pgApply (const InfixL) (pgItem Linfixl), 1197 pgApply (const InfixR) (pgItem Linfixr), 1198 pgApply (const InfixN) (pgItem Linfix) 1199 ] 1200 1201 pa_UpdateFixityEnv (PFail tok) 1202 = PFail tok 1203 1204 pa_UpdateFixityEnv (POk env toks (MkFixDecl name assoc_prio)) 1205 = let 1206 new_env = (name, assoc_prio) : env 1207 in 1208 POk new_env toks (MkFixDecl name assoc_prio) 1209 1210 paTopDecl 1211 = pgAlts 1212 [ 1213 pgApply MkTopF paInfixDecl, 1214 pgApply MkTopD paDataDecl, 1215 pgApply MkTopV paValdef 1216 ] 1217 1218 paModule 1219 = pgThen4 1220 (\modyule name wheree topdecls -> MkModule name topdecls) 1221 (pgItem Lmodule) 1222 paCon 1223 (pgItem Lwhere) 1224 (pgDeclList paTopDecl) 1225 1226 parser_test toks 1227 = let parser_to_test 1228 = --paPat 1229 --paExpr 1230 --paValdef 1231 --pgZeroOrMore paInfixDecl 1232 --paDataDecl 1233 --paType 1234 paModule 1235 --pgTwoOrMoreWithSep (pgItem Lsemi) (pgItem Lcomma) 1236 1237 in 1238 parser_to_test hsPrecTable toks 1239 1240 -- ==============================================-- 1241 -- === The Operator-Precedence parser (yuck!) ===-- 1242 -- ==============================================-- 1243 1244 -- 1245 -- ==========================================================-- 1246 -- 1247 hsAExprOrOp 1248 = pgAlts [paAExpr, paOp] 1249 1250 hsDoExpr :: [PEntry] -> Parser Expr 1251 -- [PaEntry] is a stack of operators and atomic expressions 1252 -- hsDoExpr uses a parser (hsAexpOrOp :: Parsr PaEntry) for atomic 1253 -- expressions or operators 1254 1255 hsDoExpr stack env toks = 1256 let 1257 (validIn, restIn, parseIn, err) 1258 = case hsAExprOrOp env toks of 1259 POk env1 toks1 item1 1260 -> (True, toks1, item1, panic "hsDoExpr(1)") 1261 PFail err 1262 -> (False, panic "hsDoExpr(2)", panic "hsDoExpr(3)", err) 1263 (opIn, valueIn, nameIn) 1264 = parseIn 1265 (assocIn, priorIn) 1266 = utLookupDef env nameIn (InfixL, 9) 1267 shift 1268 = hsDoExpr (parseIn:stack) env restIn 1269 in 1270 case stack of 1271 s1:s2:s3:ss 1272 | validIn && opS2 && opIn && priorS2 > priorIn 1273 -> reduce 1274 | validIn && opS2 && opIn && priorS2 == priorIn 1275 -> if assocS2 == InfixL && 1276 assocIn == InfixL 1277 then reduce 1278 else 1279 if assocS2 == InfixR && 1280 assocIn == InfixR 1281 then shift 1282 else PFail (head toks) -- Because of ambiguousness 1283 | not validIn && opS2 1284 -> reduce 1285 where 1286 (opS1, valueS1, nameS1) = s1 1287 (opS2, valueS2, nameS2) = s2 1288 (opS3, valueS3, nameS3) = s3 1289 (assocS2, priorS2) = utLookupDef env nameS2 (InfixL, 9) 1290 reduce = hsDoExpr ((False, ExprApp (ExprApp valueS2 valueS3) 1291 valueS1, []) 1292 : ss) env toks 1293 s1:s2:ss 1294 | validIn && (opS1 || opS2) -> shift 1295 | otherwise -> reduce 1296 where 1297 (opS1, valueS1, nameS1) = s1 1298 (opS2, valueS2, nameS2) = s2 1299 reduce = hsDoExpr ((False, ExprApp valueS2 valueS1, []) : ss) 1300 env toks 1301 (s1:[]) 1302 | validIn -> shift 1303 | otherwise -> POk env toks valueS1 1304 where 1305 (opS1, valueS1, nameS1) = s1 1306 [] 1307 | validIn -> shift 1308 | otherwise -> PFail err 1309 1310 -- ==========================================================-- 1311 -- === end Parser.hs ===-- 1312 -- ==========================================================-- 1313 1314 hsPrecTable :: PEnv 1315 hsPrecTable = [ 1316 ("-", (InfixL, 6)), 1317 ("+", (InfixL, 6)), 1318 ("*", (InfixL, 7)), 1319 ("div", (InfixN, 7)), 1320 ("mod", (InfixN, 7)), 1321 1322 ("<", (InfixN, 4)), 1323 ("<=", (InfixN, 4)), 1324 ("==", (InfixN, 4)), 1325 ("/=", (InfixN, 4)), 1326 (">=", (InfixN, 4)), 1327 (">", (InfixN, 4)), 1328 1329 ("C:", (InfixR, 5)), 1330 ("++", (InfixR, 5)), 1331 ("\\", (InfixN, 5)), 1332 ("!!", (InfixL, 9)), 1333 (".", (InfixR, 9)), 1334 ("^", (InfixR, 8)), 1335 ("elem", (InfixN, 4)), 1336 ("notElem", (InfixN, 4)), 1337 1338 ("||", (InfixR, 2)), 1339 ("&&", (InfixR, 3))] 1340 1341 1342 main = do 1343 cs <- getContents 1344 let tokens = laMain cs 1345 let parser_res = parser_test tokens 1346 putStr (showx parser_res) 1347 1348 showx (PFail t) 1349 = "\n\nFailed on token: " ++ show t ++ "\n\n" 1350 1351 showx (POk env toks result) 1352 = "\n\nSucceeded, with:\n Size env = " ++ show (length env) ++ 1353 "\n Next token = " ++ show (head toks) ++ 1354 "\n\n Result = " ++ show result ++ "\n\n" 1355 1356 -- ==========================================================-- 1357 -- 1358 layn :: [[Char]] -> [Char] 1359 1360 layn x = f 1 x 1361 where 1362 f :: Int -> [[Char]] -> [Char] 1363 f n [] = [] 1364 f n (a:x) = rjustify 4 (show n) ++") "++a++"\n"++f (n+1) x 1365 1366 1367 1368 -- ==========================================================-- 1369 -- 1370 rjustify :: Int -> [Char] -> [Char] 1371 rjustify n s = spaces (n - length s)++s 1372 where 1373 spaces :: Int -> [Char] 1374 spaces m = copy m ' ' 1375 1376 copy :: Int -> a -> [a] 1377 1378 copy n x = take (max 0 n) xs where xs = x:xs 1379