1 --                            -*- Mode: Haskell -*- 
    2 -- Copyright 1994 by Peter Thiemann
    3 -- StringMatch.hs --- translate regular expression into a string match function
    4 -- Author          : Peter Thiemann
    5 -- Created On      : Thu Dec 23 11:16:26 1993
    6 -- Last Modified By: Peter Thiemann
    7 -- Last Modified On: Thu Dec 23 12:32:39 1993
    8 -- Update Count    : 18
    9 -- Status          : Unknown, Use with caution!
   10 -- 
   11 -- $Locker:  $
   12 -- $Log: StringMatch.hs,v $
   13 -- Revision 1.1  1996/01/08 20:02:33  partain
   14 -- Initial revision
   15 --
   16 -- Revision 1.1  1994/03/15  15:34:53  thiemann
   17 -- Initial revision
   18 --
   19 -- 
   20 
   21 module StringMatch {-(stringMatch)-} where
   22 
   23 import Parsers
   24 
   25 infixr 8 +.+ , +.. , ..+
   26 infixl 7 <<< , <<*
   27 infixr 6 |||
   28 
   29 (+.+) = thn
   30 (..+) = xthn
   31 (+..) = thnx
   32 (|||) = alt
   33 (<<<) = using
   34 (<<*) = using2
   35 lit   :: Eq a => a -> Parser a a
   36 lit   = literal
   37 star  = rpt
   38 anyC  = satisfy (const True)
   39 butC cs = satisfy (not.(`elem` cs))
   40 noC   "" = [("","")]
   41 noC   _  = []
   42 unitL = \x -> [x]
   43 
   44 --
   45 -- grammar for regular expressions:
   46 --
   47 {-
   48   Atom   = character | "\\" character | "." | "\\(" Regexp "\\) .
   49   ExtAtom       = Atom ["*" | "+" | "?"] .
   50   Factor        = ExtAtom + .
   51   Regexp        = Factor / "\\|" ["$"].
   52 -}
   53 
   54 type ParseRegexp = Parser Char String
   55 
   56 rrAtom :: Parser Char ParseRegexp
   57 rrAtom =
   58      lit '\\' ..+ lit '(' ..+ rrRegexp +.. lit '\\' +.. lit ')'
   59  ||| 
   60    ( lit '\\' ..+ butC "|()"    <<< lit
   61  ||| lit '.'         <<< const anyC
   62  ||| butC "\\.$"               <<< lit
   63  ||| lit '$' `followedBy` anyC  <<< lit
   64    ) <<< (<<< unitL)
   65 
   66 rrExtAtom :: Parser Char ParseRegexp
   67 rrExtAtom =
   68      rrAtom +.+ opt (lit '*' <<< const star
   69                 |||  lit '+' <<< const plus
   70                 |||  lit '?' <<< const opt)
   71         <<< helper
   72      where
   73        helper (ea, []) = ea
   74        helper (ea, [f]) = f ea <<< concat
   75 
   76 rrFactor :: Parser Char ParseRegexp
   77 rrFactor =
   78      plus rrExtAtom     <<< foldr (\ p1 p2 -> p1 +.+ p2 <<* (++)) (succeed "")
   79 
   80 rrRegexp =
   81      rrFactor +.+ star (lit '\\' ..+ lit '|' ..+ rrFactor) +.+ opt (lit '$')
   82         <<< helper
   83      where
   84        helper (ef, (efs, [])) = foldl (|||) ef efs +.. star anyC
   85        helper (ef, (efs, _ )) = foldl (|||) ef efs +.. noC
   86 
   87 regexp0 :: Parser Char (Parser Char String)
   88 regexp0 =
   89      lit '^' ..+ rrRegexp
   90  ||| rrRegexp
   91          <<< (\p -> let p' = p ||| anyC ..+ p' in p')
   92 
   93 stringMatch :: String -> String -> Bool
   94 stringMatch re subject = wellformed && not (null (filter (null . snd) (match subject)))
   95     where matches      = regexp0 re
   96           wellformed   = not (null matches) && null rest
   97           (match,rest) = head matches