097afc9831
--HG-- extra : convert_revision : e114cb9154838d7f74c72671db22dbddb97e3a58
178 lines
6.7 KiB
Haskell
178 lines
6.7 KiB
Haskell
-- |
|
|
-- Module: System.FilePath.GlobPattern
|
|
-- Copyright: Bryan O'Sullivan
|
|
-- License: LGPL
|
|
-- Maintainer: Bryan O'Sullivan <bos@serpentine.com>
|
|
-- Stability: unstable
|
|
-- Portability: everywhere
|
|
module System.FilePath.GlobPattern (
|
|
-- * Glob patterns
|
|
-- $syntax
|
|
GlobPattern
|
|
-- * Matching functions
|
|
, (~~)
|
|
, (/~)
|
|
) where
|
|
|
|
import Control.Arrow (second)
|
|
import Data.Ix (Ix, inRange)
|
|
import Data.List (nub)
|
|
import Data.Maybe (isJust)
|
|
import System.FilePath (pathSeparator)
|
|
|
|
-- $syntax
|
|
--
|
|
-- Basic glob pattern syntax is the same as for the Unix shell
|
|
-- environment.
|
|
--
|
|
-- * @*@ matches everything up to a directory separator or end of
|
|
-- string.
|
|
--
|
|
-- * @[/range/]@ matches any character in /range/.
|
|
--
|
|
-- * @[!/range/]@ matches any character /not/ in /range/.
|
|
--
|
|
-- There are three extensions to the traditional glob syntax, taken
|
|
-- from modern Unix shells.
|
|
--
|
|
-- * @\\@ escapes a character that might otherwise have special
|
|
-- meaning. For a literal @\"\\\"@ character, use @\"\\\\\"@.
|
|
--
|
|
-- * @**@ matches everything, including a directory separator.
|
|
--
|
|
-- * @(/s1/|/s2/|/.../)@ matches any of the strings /s1/, /s2/, etc.
|
|
|
|
-- | Glob pattern type.
|
|
type GlobPattern = String
|
|
|
|
spanClass :: Char -> String -> (String, String)
|
|
|
|
spanClass c = gs []
|
|
where gs _ [] = error "unterminated character class"
|
|
gs acc (d:ds) | d == c = (reverse acc, ds)
|
|
| d == '\\' = case ds of
|
|
(e:es) -> gs (e:'\\':acc) es
|
|
_ -> error "unterminated escape"
|
|
| otherwise = gs (d:acc) ds
|
|
|
|
data Ix a => SRange a = SRange [a] [(a, a)]
|
|
deriving (Show)
|
|
|
|
inSRange :: Ix a => a -> SRange a -> Bool
|
|
|
|
inSRange c (SRange d s) = c `elem` d || any (flip inRange c) s
|
|
|
|
type CharClass = SRange Char
|
|
|
|
makeClass :: String -> CharClass
|
|
|
|
makeClass = makeClass' [] []
|
|
where makeClass' :: [(Char, Char)] -> [Char] -> String -> CharClass
|
|
makeClass' dense sparse [] = SRange sparse dense
|
|
makeClass' dense sparse (a:'-':b:cs) =
|
|
makeClass' ((a,b):dense) sparse cs
|
|
makeClass' dense sparse (c:cs) = makeClass' dense (c:sparse) cs
|
|
|
|
data MatchTerm = MatchLiteral String
|
|
| MatchAny
|
|
| MatchDir
|
|
| MatchChar
|
|
| MatchClass Bool CharClass
|
|
| MatchGroup [String]
|
|
deriving (Show)
|
|
|
|
parseGlob :: GlobPattern -> [MatchTerm]
|
|
|
|
parseGlob [] = []
|
|
parseGlob ('*':'*':cs) = MatchAny : parseGlob cs
|
|
parseGlob ('*':cs) = MatchDir : parseGlob cs
|
|
parseGlob ('?':cs) = MatchChar : parseGlob cs
|
|
parseGlob ('[':cs) = let (cc, ccs) = spanClass ']' cs
|
|
cls = case cc of
|
|
('!':ccs') -> MatchClass False $ makeClass ccs'
|
|
_ -> MatchClass True $ makeClass cc
|
|
in cls : parseGlob ccs
|
|
parseGlob ('(':cs) = let (gg, ggs) = spanClass ')' cs
|
|
in MatchGroup (breakGroup [] gg) : parseGlob ggs
|
|
where breakGroup :: String -> String -> [String]
|
|
breakGroup acc [] = [reverse acc]
|
|
breakGroup _ ['\\'] = error "group: unterminated escape"
|
|
breakGroup acc ('\\':c:cs') = breakGroup (c:acc) cs'
|
|
breakGroup acc ('|':cs') = reverse acc : breakGroup [] cs'
|
|
breakGroup acc (c:cs') = breakGroup (c:acc) cs'
|
|
parseGlob ['\\'] = error "glob: unterminated escape"
|
|
parseGlob ('\\':c:cs) = MatchLiteral [c] : parseGlob cs
|
|
parseGlob (c:cs) = MatchLiteral [c] : parseGlob cs
|
|
|
|
simplifyTerms :: [MatchTerm] -> [MatchTerm]
|
|
simplifyTerms [] = []
|
|
simplifyTerms (MatchLiteral []:as) = simplifyTerms as
|
|
simplifyTerms (m@(MatchLiteral a):as) =
|
|
case simplifyTerms as of
|
|
(MatchLiteral b:bs) -> MatchLiteral (a ++ b) : bs
|
|
bs -> m : bs
|
|
simplifyTerms (MatchClass True (SRange [] []):as) = simplifyTerms as
|
|
simplifyTerms (MatchClass True (SRange a@[_] []):as) =
|
|
simplifyTerms $ MatchLiteral a : as
|
|
simplifyTerms (MatchGroup []:as) = simplifyTerms as
|
|
simplifyTerms (MatchGroup gs:as) =
|
|
case commonPrefix gs of
|
|
(p,[]) -> simplifyTerms (MatchLiteral p : as)
|
|
(p,ss) -> simplifyTerms (MatchLiteral p : MatchGroup ss : as)
|
|
simplifyTerms (a:as) = a:simplifyTerms as
|
|
|
|
commonPrefix :: [String] -> (String, [String])
|
|
commonPrefix = second nub . pfx ""
|
|
where pfx _ [] = ("", [])
|
|
pfx acc ss | any null ss = (reverse acc, ss)
|
|
| otherwise = let hs = map head ss
|
|
h = head hs
|
|
in if all (h==) $ tail hs
|
|
then pfx (h:acc) $ map tail ss
|
|
else (reverse acc, ss)
|
|
|
|
matchTerms :: [MatchTerm] -> String -> Maybe ()
|
|
|
|
matchTerms [] [] = return ()
|
|
matchTerms [] _ = fail "residual string"
|
|
matchTerms (MatchLiteral m:ts) cs = matchLiteral m cs >>= matchTerms ts
|
|
where matchLiteral (a:as) (b:bs) | a == b = matchLiteral as bs
|
|
matchLiteral [] as = return as
|
|
matchLiteral _ _ = fail "not a prefix"
|
|
matchTerms (MatchClass k c:ts) cs = matchClass cs >>= matchTerms ts
|
|
where matchClass (b:bs) | (inClass && k) || not (inClass || k) = return bs
|
|
where inClass = b `inSRange` c
|
|
matchClass _ = fail "no match"
|
|
matchTerms (MatchGroup g:ts) cs = matchGroup g cs >>= matchTerms ts
|
|
where matchGroup g' as | any null g' = return as
|
|
matchGroup g' (a:as) | a `elem` map head g' =
|
|
matchGroup (map tail g') as
|
|
matchGroup _ _ = fail "not in group"
|
|
matchTerms [MatchAny] _ = return ()
|
|
matchTerms (MatchAny:ts) cs = matchAny cs >>= matchTerms ts
|
|
where matchAny [] = fail "no match"
|
|
matchAny cs' = case matchTerms ts cs' of
|
|
Nothing -> matchAny (tail cs')
|
|
_ -> return cs
|
|
matchTerms [MatchDir] cs | pathSeparator `elem` cs = fail "path separator"
|
|
| otherwise = return ()
|
|
matchTerms (MatchDir:ts) cs = matchDir cs >>= matchTerms ts
|
|
where matchDir [] = fail "no match"
|
|
matchDir (c:_) | c == pathSeparator = fail "path separator"
|
|
matchDir cs' = case matchTerms ts cs' of
|
|
Nothing -> matchDir $ tail cs'
|
|
_ -> return cs'
|
|
matchTerms (MatchChar:_) [] = fail "end of input"
|
|
matchTerms (MatchChar:ts) (_:cs) = matchTerms ts cs
|
|
|
|
-- | Match a file name against a glob pattern.
|
|
(~~) :: FilePath -> GlobPattern -> Bool
|
|
|
|
name ~~ pat = let terms = simplifyTerms (parseGlob pat)
|
|
in (isJust . matchTerms terms) name
|
|
|
|
-- | Match a file name against a glob pattern, but return 'True' if
|
|
-- the match /fail/s.
|
|
(/~) :: FilePath -> GlobPattern -> Bool
|
|
|
|
(/~) = (not . ) . (~~)
|