filemanip/System/FilePath/Find.hs

608 lines
18 KiB
Haskell

{-# LANGUAGE CPP, GeneralizedNewtypeDeriving, ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-- |
-- Module: System.FilePath.Find
-- Copyright: Bryan O'Sullivan
-- License: BSD3
-- Maintainer: Bryan O'Sullivan <bos@serpentine.com>
-- Stability: unstable
-- Portability: Unix-like systems (requires newtype deriving)
--
-- This module provides functions for traversing a filesystem
-- hierarchy. The 'find' function generates a lazy list of matching
-- files, while 'fold' performs a left fold.
--
-- Both 'find' and 'fold' allow fine control over recursion, using the
-- 'FindClause' type. This type is also used to pre-filter the results
-- returned by 'find'.
--
-- The 'FindClause' type lets you write filtering and recursion
-- control expressions clearly and easily.
--
-- For example, this clause matches C source files.
--
-- @
-- 'extension' '==?' \".c\" '||?' 'extension' '==?' \".h\"
-- @
--
-- Because 'FindClause' is a monad, you can use the usual monad
-- machinery to, for example, lift pure functions into it.
--
-- Here's a clause that will return 'True' for any file whose
-- directory name contains the word @\"temp\"@.
--
-- @
-- (isInfixOf \"temp\") \`liftM\` 'directory'
-- @
module System.FilePath.Find (
FileInfo(..)
, FileType(..)
, FindClause
, FilterPredicate
, RecursionPredicate
-- * Simple entry points
, find
, findL
, fold
, foldL
-- * More expressive entry points
, findWithHandler
, findWithHandlerL
, foldWithHandler
, foldWithHandlerL
-- * Helper functions
, evalClause
, statusType
, liftOp
-- * Combinators for controlling recursion and filtering behaviour
, filePath
, fileStatus
, depth
, fileInfo
, always
, extension
, directory
, fileName
, fileType
, contains
-- ** Combinator versions of 'F.FileStatus' functions from "System.Posix.Files"
-- $statusFunctions
, deviceID
, fileID
, fileOwner
, fileGroup
, fileSize
, linkCount
, specialDeviceID
, fileMode
, accessTime
, modificationTime
, statusChangeTime
-- *** Convenience combinators for file status
, filePerms
, anyPerms
-- ** Combinators for canonical path and name
, canonicalPath
, canonicalName
-- ** Combinators that operate on symbolic links
, readLink
, followStatus
-- ** Common binary operators, lifted as combinators
-- $binaryOperators
, (~~?)
, (/~?)
, (==?)
, (/=?)
, (>?)
, (<?)
, (>=?)
, (<=?)
, (.&.?)
-- ** Combinators for gluing clauses together
, (&&?)
, (||?)
) where
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative (Applicative)
#endif
import qualified Control.Exception as E
import Control.Exception (IOException, handle)
import Control.Monad (foldM, forM, liftM, liftM2)
import Control.Monad.State (State, evalState, get)
import Data.Bits (Bits, (.&.))
import Data.List (sort)
import System.Directory (getDirectoryContents, canonicalizePath)
import System.FilePath ((</>), takeDirectory, takeExtension, takeFileName)
import System.FilePath.GlobPattern (GlobPattern, (~~), (/~))
import System.IO (hPutStrLn, stderr)
import System.IO.Unsafe (unsafeInterleaveIO, unsafePerformIO)
import qualified System.PosixCompat.Files as F
import qualified System.PosixCompat.Types as T
-- | Information collected during the traversal of a directory.
data FileInfo = FileInfo
{
infoPath :: FilePath -- ^ file path
, infoDepth :: Int -- ^ current recursion depth
, infoStatus :: F.FileStatus -- ^ status of file
} deriving (Eq)
instance Eq F.FileStatus where
a == b = F.deviceID a == F.deviceID b &&
F.fileID a == F.fileID b
-- | Construct a 'FileInfo' value.
mkFI :: FilePath -> Int -> F.FileStatus -> FileInfo
mkFI = FileInfo
-- | Monadic container for file information, allowing for clean
-- construction of combinators. Wraps the 'State' monad, but doesn't
-- allow 'get' or 'put'.
newtype FindClause a = FC { runFC :: State FileInfo a }
deriving (Functor, Applicative, Monad)
-- | Run the given 'FindClause' on the given 'FileInfo' and return its
-- result. This can be useful if you are writing a function to pass
-- to 'fold'.
--
-- Example:
--
-- @
-- myFoldFunc :: a -> 'FileInfo' -> a
-- myFoldFunc a i = let useThisFile = 'evalClause' ('fileName' '==?' \"foo\") i
-- in if useThisFile
-- then fiddleWith a
-- else a
-- @
evalClause :: FindClause a -> FileInfo -> a
evalClause = evalState . runFC
evalFI :: FindClause a
-> FilePath
-> Int
-> F.FileStatus
-> a
evalFI m p d s = evalClause m (mkFI p d s)
-- | Return the current 'FileInfo'.
fileInfo :: FindClause FileInfo
fileInfo = FC $ get
-- | Return the name of the file being visited.
filePath :: FindClause FilePath
filePath = infoPath `liftM` fileInfo
-- | Return the current recursion depth.
depth :: FindClause Int
depth = infoDepth `liftM` fileInfo
-- | Return the 'F.FileStatus' for the current file.
fileStatus :: FindClause F.FileStatus
fileStatus = infoStatus `liftM` fileInfo
type FilterPredicate = FindClause Bool
type RecursionPredicate = FindClause Bool
-- | List the files in the given directory, sorted, and without \".\"
-- or \"..\".
getDirContents :: FilePath -> IO [FilePath]
getDirContents dir = (sort . filter goodName) `liftM` getDirectoryContents dir
where goodName "." = False
goodName ".." = False
goodName _ = True
-- | Search a directory recursively, with recursion controlled by a
-- 'RecursionPredicate'. Lazily return a sorted list of all files
-- matching the given 'FilterPredicate'. Any errors that occur are
-- dealt with by the given handler.
findWithHandlerL ::
Bool -- ^ follow symlinks
-> (FilePath -> IOException -> IO [FilePath]) -- ^ error handler
-> RecursionPredicate -- ^ control recursion into subdirectories
-> FilterPredicate -- ^ decide whether a file appears in the result
-> FilePath -- ^ directory to start searching
-> IO [FilePath] -- ^ files that matched the 'FilterPredicate'
findWithHandlerL links errHandler recurse filt path0 =
handle (errHandler path0) $ getStatus links path0 >>= visit path0 0
where visit path depth st =
if F.isDirectory st && evalFI recurse path depth st
then unsafeInterleaveIO (traverse path (succ depth) st)
else filterPath path depth st []
traverse dir depth dirSt = do
names <- E.catch (getDirContents dir) (errHandler dir)
filteredPaths <- forM names $ \name -> do
let path = dir </> name
unsafeInterleaveIO $ handle (errHandler path)
(F.getSymbolicLinkStatus path >>= visit path depth)
filterPath dir depth dirSt (concat filteredPaths)
filterPath path depth st result =
return $ if evalFI filt path depth st
then path:result
else result
getStatus :: Bool -> FilePath -> IO F.FileStatus
getStatus links = if links then F.getFileStatus else F.getSymbolicLinkStatus
-- | Same as 'findWithHandlerL' without following links
findWithHandler ::
(FilePath -> IOException -> IO [FilePath]) -- ^ error handler
-> RecursionPredicate -- ^ control recursion into subdirectories
-> FilterPredicate -- ^ decide whether a file appears in the result
-> FilePath -- ^ directory to start searching
-> IO [FilePath] -- ^ files that matched the 'FilterPredicate'
findWithHandler = findWithHandlerL False
-- | Search a directory recursively, with recursion controlled by a
-- 'RecursionPredicate'. Lazily return a sorted list of all files
-- matching the given 'FilterPredicate'. Any errors that occur are
-- ignored, with warnings printed to 'stderr'.
findL :: Bool -- ^ follow symlinks
-> RecursionPredicate -- ^ control recursion into subdirectories
-> FilterPredicate -- ^ decide whether a file appears in the result
-> FilePath -- ^ directory to start searching
-> IO [FilePath] -- ^ files that matched the 'FilterPredicate'
findL links = findWithHandlerL links warnOnError
where warnOnError path err =
hPutStrLn stderr (path ++ ": " ++ show err) >> return []
-- | Same as 'findL' without following links
find :: RecursionPredicate -- ^ control recursion into subdirectories
-> FilterPredicate -- ^ decide whether a file appears in the result
-> FilePath -- ^ directory to start searching
-> IO [FilePath] -- ^ files that matched the 'FilterPredicate'
find = findL False
-- | Search a directory recursively, with recursion controlled by a
-- 'RecursionPredicate'. Fold over all files found. Any errors that
-- occur are dealt with by the given handler. The fold is strict, and
-- run from \"left\" to \"right\", so the folded function should be
-- strict in its left argument to avoid space leaks. If you need a
-- right-to-left fold, use 'foldr' on the result of 'findWithHandler'
-- instead.
foldWithHandlerL
:: Bool -- ^ follow symlinks
-> (FilePath -> a -> IOException -> IO a) -- ^ error handler
-> RecursionPredicate -- ^ control recursion into subdirectories
-> (a -> FileInfo -> a) -- ^ function to fold with
-> a -- ^ seed value for fold
-> FilePath -- ^ directory to start searching
-> IO a -- ^ final value after folding
foldWithHandlerL links errHandler recurse f state path =
handle (errHandler path state) $ getStatus links path >>= visit state path 0
where visit state path depth st =
if F.isDirectory st && evalFI recurse path depth st
then traverse state path (succ depth) st
else let state' = f state (mkFI path depth st)
in state' `seq` return state'
traverse state dir depth dirSt = handle (errHandler dir state) $
getDirContents dir >>=
let state' = f state (mkFI dir depth dirSt)
in state' `seq` flip foldM state' (\state name ->
handle (errHandler dir state) $
let path = dir </> name
in F.getSymbolicLinkStatus path >>= visit state path depth)
-- | Same as 'foldWithHandlerL' without following symlinks
foldWithHandler
:: (FilePath -> a -> IOException -> IO a) -- ^ error handler
-> RecursionPredicate -- ^ control recursion into subdirectories
-> (a -> FileInfo -> a) -- ^ function to fold with
-> a -- ^ seed value for fold
-> FilePath -- ^ directory to start searching
-> IO a -- ^ final value after folding
foldWithHandler = foldWithHandlerL False
-- | Search a directory recursively, with recursion controlled by a
-- 'RecursionPredicate'. Fold over all files found. Any errors that
-- occur are ignored, with warnings printed to 'stderr'. The fold
-- function is run from \"left\" to \"right\", so it should be strict
-- in its left argument to avoid space leaks. If you need a
-- right-to-left fold, use 'foldr' on the result of 'findWithHandler'
-- instead.
foldL :: Bool -- ^ follow symlinks
-> RecursionPredicate
-> (a -> FileInfo -> a)
-> a
-> FilePath
-> IO a
foldL links = foldWithHandlerL links warnOnError
where warnOnError path a err =
hPutStrLn stderr (path ++ ": " ++ show err) >> return a
-- | 'foldL' without following symlinks
fold :: RecursionPredicate
-> (a -> FileInfo -> a)
-> a
-> FilePath
-> IO a
fold = foldL False
-- | Unconditionally return 'True'.
always :: FindClause Bool
always = return True
-- | Return the file name extension.
--
-- Example:
--
-- @
-- 'extension' \"foo\/bar.txt\" => \".txt\"
-- @
extension :: FindClause FilePath
extension = takeExtension `liftM` filePath
-- | Return the file name, without the directory name.
--
-- What this means in practice:
--
-- @
-- 'fileName' \"foo\/bar.txt\" => \"bar.txt\"
-- @
--
-- Example:
--
-- @
-- 'fileName' '==?' \"init.c\"
-- @
fileName :: FindClause FilePath
fileName = takeFileName `liftM` filePath
-- | Return the directory name, without the file name.
--
-- What this means in practice:
--
-- @
-- 'directory' \"foo\/bar.txt\" => \"foo\"
-- @
--
-- Example in a clause:
--
-- @
-- let hasSuffix = 'liftOp' 'isSuffixOf'
-- in directory \`hasSuffix\` \"tests\"
-- @
directory :: FindClause FilePath
directory = takeDirectory `liftM` filePath
-- | Return the canonical path of the file being visited.
--
-- See `canonicalizePath` for details of what canonical path means.
canonicalPath :: FindClause FilePath
canonicalPath = (unsafePerformIO . canonicalizePath) `liftM` filePath
-- | Return the canonical name of the file (canonical path with the
-- directory part removed).
canonicalName :: FindClause FilePath
canonicalName = takeFileName `liftM` canonicalPath
-- | Run the given action in the 'IO' monad (using 'unsafePerformIO')
-- if the current file is a symlink. Hide errors by wrapping results
-- in the 'Maybe' monad.
withLink :: (FilePath -> IO a) -> FindClause (Maybe a)
withLink f = do
path <- filePath
st <- fileStatus
return $ if F.isSymbolicLink st
then unsafePerformIO $ handle (\(_::IOException) -> return Nothing) $
Just `liftM` f path
else Nothing
-- | If the current file is a symbolic link, return 'Just' the target
-- of the link, otherwise 'Nothing'.
readLink :: FindClause (Maybe FilePath)
readLink = withLink F.readSymbolicLink
-- | If the current file is a symbolic link, return 'Just' the status
-- of the ultimate endpoint of the link. Otherwise (including in the
-- case of an error), return 'Nothing'.
--
-- Example:
--
-- @
-- 'statusType' \`liftM\` 'followStatus' '==?' 'RegularFile'
-- @
followStatus :: FindClause (Maybe F.FileStatus)
followStatus = withLink F.getFileStatus
data FileType = BlockDevice
| CharacterDevice
| NamedPipe
| RegularFile
| Directory
| SymbolicLink
| Socket
| Unknown
deriving (Eq, Ord, Show)
-- | Return the type of file currently being visited.
--
-- Example:
--
-- @
-- 'fileType' '==?' 'RegularFile'
-- @
fileType :: FindClause FileType
fileType = statusType `liftM` fileStatus
-- | Return the type of a file. This is much more useful for case
-- analysis than the usual functions on 'F.FileStatus' values.
statusType :: F.FileStatus -> FileType
statusType st | F.isBlockDevice st = BlockDevice
statusType st | F.isCharacterDevice st = CharacterDevice
statusType st | F.isNamedPipe st = NamedPipe
statusType st | F.isRegularFile st = RegularFile
statusType st | F.isDirectory st = Directory
statusType st | F.isSymbolicLink st = SymbolicLink
statusType st | F.isSocket st = Socket
statusType _ = Unknown
-- $statusFunctions
--
-- These are simply lifted versions of the 'F.FileStatus' accessor
-- functions in the "System.Posix.Files" module. The definitions all
-- have the following form:
--
-- @
-- 'deviceID' :: 'FindClause' "System.Posix.Types".DeviceID
-- 'deviceID' = "System.Posix.Files".deviceID \`liftM\` 'fileStatus'
-- @
deviceID :: FindClause T.DeviceID
deviceID = F.deviceID `liftM` fileStatus
fileID :: FindClause T.FileID
fileID = F.fileID `liftM` fileStatus
fileOwner :: FindClause T.UserID
fileOwner = F.fileOwner `liftM` fileStatus
fileGroup :: FindClause T.GroupID
fileGroup = F.fileGroup `liftM` fileStatus
fileSize :: FindClause T.FileOffset
fileSize = F.fileSize `liftM` fileStatus
linkCount :: FindClause T.LinkCount
linkCount = F.linkCount `liftM` fileStatus
specialDeviceID :: FindClause T.DeviceID
specialDeviceID = F.specialDeviceID `liftM` fileStatus
fileMode :: FindClause T.FileMode
fileMode = F.fileMode `liftM` fileStatus
-- | Return the permission bits of the 'T.FileMode'.
filePerms :: FindClause T.FileMode
filePerms = (.&. 0777) `liftM` fileMode
-- | Return 'True' if any of the given permission bits is set.
--
-- Example:
--
-- @
-- 'anyPerms' 0444
-- @
anyPerms :: T.FileMode
-> FindClause Bool
anyPerms m = filePerms >>= \p -> return (p .&. m /= 0)
accessTime :: FindClause T.EpochTime
accessTime = F.accessTime `liftM` fileStatus
modificationTime :: FindClause T.EpochTime
modificationTime = F.modificationTime `liftM` fileStatus
statusChangeTime :: FindClause T.EpochTime
statusChangeTime = F.statusChangeTime `liftM` fileStatus
-- | Return 'True' if the given path exists, relative to the current
-- file. For example, if @\"foo\"@ is being visited, and you call
-- contains @\"bar\"@, this combinator will return 'True' if
-- @\"foo\/bar\"@ exists.
contains :: FilePath -> FindClause Bool
contains p = do
d <- filePath
return $ unsafePerformIO $
handle (\(_::IOException) -> return False) $
F.getFileStatus (d </> p) >> return True
-- | Lift a binary operator into the 'FindClause' monad, so that it
-- becomes a combinator. The left hand side of the combinator should
-- be a @'FindClause' a@, while the right remains a normal value of
-- type @a@.
liftOp :: Monad m => (a -> b -> c) -> m a -> b -> m c
liftOp f a b = a >>= \a' -> return (f a' b)
-- $binaryOperators
--
-- These are lifted versions of the most commonly used binary
-- operators. They have the same fixities and associativities as
-- their unlifted counterparts. They are lifted using 'liftOp', like
-- so:
--
-- @('==?') = 'liftOp' (==)@
-- | Return 'True' if the current file's name matches the given
-- 'GlobPattern'.
(~~?) :: FindClause FilePath -> GlobPattern -> FindClause Bool
(~~?) = liftOp (~~)
infix 4 ~~?
-- | Return 'True' if the current file's name does not match the given
-- 'GlobPattern'.
(/~?) :: FindClause FilePath -> GlobPattern -> FindClause Bool
(/~?) = liftOp (/~)
infix 4 /~?
(==?) :: Eq a => FindClause a -> a -> FindClause Bool
(==?) = liftOp (==)
infix 4 ==?
(/=?) :: Eq a => FindClause a -> a -> FindClause Bool
(/=?) = liftOp (/=)
infix 4 /=?
(>?) :: Ord a => FindClause a -> a -> FindClause Bool
(>?) = liftOp (>)
infix 4 >?
(<?) :: Ord a => FindClause a -> a -> FindClause Bool
(<?) = liftOp (<)
infix 4 <?
(>=?) :: Ord a => FindClause a -> a -> FindClause Bool
(>=?) = liftOp (>=)
infix 4 >=?
(<=?) :: Ord a => FindClause a -> a -> FindClause Bool
(<=?) = liftOp (<=)
infix 4 <=?
-- | This operator is useful to check if bits are set in a
-- 'T.FileMode'.
(.&.?) :: Bits a => FindClause a -> a -> FindClause a
(.&.?) = liftOp (.&.)
infixl 7 .&.?
(&&?) :: FindClause Bool -> FindClause Bool -> FindClause Bool
(&&?) = liftM2 (&&)
infixr 3 &&?
(||?) :: FindClause Bool -> FindClause Bool -> FindClause Bool
(||?) = liftM2 (||)
infixr 2 ||?