From 8b28c0feeeae0aaea69e4f5185c42239c87732da Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Sun, 24 Jun 2007 16:30:41 +0000 Subject: [PATCH] Add Glob module, update docs. --- FileManip.cabal | 1 + README | 22 +++++++++++ System/FilePath/Glob.hs | 72 ++++++++++++++++++++++++++++++++++ System/FilePath/GlobPattern.hs | 6 +-- examples/Simple.hs | 12 ++++++ 5 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 System/FilePath/Glob.hs diff --git a/FileManip.cabal b/FileManip.cabal index 2160e8c..165b727 100644 --- a/FileManip.cabal +++ b/FileManip.cabal @@ -13,6 +13,7 @@ Build-Depends: base, filepath, mtl, unix GHC-Options: -Wall -O2 Exposed-Modules: System.FilePath.Find, + System.FilePath.Glob, System.FilePath.GlobPattern, System.FilePath.Manip Extra-Source-Files: README diff --git a/README b/README index 77b93bf..8f2735c 100644 --- a/README +++ b/README @@ -4,6 +4,28 @@ FileManip: expressive file manipulation This package provides functions and combinators for searching, matching, and manipulating files. +It provides four modules. + +System.FilePath.Find lets you search a filesystem hierarchy efficiently: + + find always (extension ==? ".pl") >>= mapM_ remove + +System.FilePath.GlobPattern lets you perform glob-style pattern +matching, without going through a regexp engine: + + "foo.c" ~~ "*.c" ==> True + +System.FilePath.Glob lets you do simple glob-style file name searches: + + namesMatching "*/*.c" ==> ["foo/bar.c"] + +System.FilePath.Manip lets you rename files procedurally, edit files +in place, or save old copies as backups: + + modifyWithBackup (<.> "bak") + (unlines . map (takeWhile (/= ',')) . lines) + "myPoorFile.csv" + To build and install: diff --git a/System/FilePath/Glob.hs b/System/FilePath/Glob.hs new file mode 100644 index 0000000..0a2ceaa --- /dev/null +++ b/System/FilePath/Glob.hs @@ -0,0 +1,72 @@ +-- | +-- Module: System.FilePath.Glob +-- Copyright: Bryan O'Sullivan +-- License: LGPL +-- Maintainer: Bryan O'Sullivan +-- Stability: unstable +-- Portability: everywhere + +module System.FilePath.Glob ( + namesMatching + ) where + +import Control.Exception (handle) +import Control.Monad (forM) +import System.FilePath.GlobPattern ((~~)) +import System.Directory (doesDirectoryExist, doesFileExist, + getCurrentDirectory, getDirectoryContents) +import System.FilePath (dropTrailingPathSeparator, splitFileName, ()) +import System.IO.Unsafe (unsafeInterleaveIO) + +-- | Return a list of names matching a glob pattern. The list is +-- generated lazily. +namesMatching :: String -> IO [FilePath] +namesMatching pat + | not (isPattern pat) = do + exists <- doesNameExist pat + return (if exists then [pat] else []) + | otherwise = do + case splitFileName pat of + ("", baseName) -> do + curDir <- getCurrentDirectory + listMatches curDir baseName + (dirName, baseName) -> do + dirs <- if isPattern dirName + then namesMatching (dropTrailingPathSeparator dirName) + else return [dirName] + let listDir = if isPattern baseName + then listMatches + else listPlain + pathNames <- forM dirs $ \dir -> do + baseNames <- listDir dir baseName + return (map (dir ) baseNames) + return (concat pathNames) + where isPattern = any (`elem` "[*?") + +listMatches :: FilePath -> String -> IO [String] +listMatches dirName pat = do + dirName' <- if null dirName + then getCurrentDirectory + else return dirName + names <- unsafeInterleaveIO (handle (const (return [])) $ + getDirectoryContents dirName') + let names' = if isHidden pat + then filter isHidden names + else filter (not . isHidden) names + return (filter (~~ pat) names') + where isHidden ('.':_) = True + isHidden _ = False + +listPlain :: FilePath -> String -> IO [String] +listPlain dirName baseName = do + exists <- if null baseName + then doesDirectoryExist dirName + else doesNameExist (dirName baseName) + return (if exists then [baseName] else []) + +doesNameExist :: FilePath -> IO Bool +doesNameExist name = do + fileExists <- doesFileExist name + if fileExists + then return True + else doesDirectoryExist name diff --git a/System/FilePath/GlobPattern.hs b/System/FilePath/GlobPattern.hs index dbc8ad5..fa80d20 100644 --- a/System/FilePath/GlobPattern.hs +++ b/System/FilePath/GlobPattern.hs @@ -32,12 +32,12 @@ import System.FilePath (pathSeparator) -- -- * @[!/range/]@ matches any character /not/ in /range/. -- +-- There are three extensions to the traditional glob syntax, taken +-- from modern Unix shells. +-- -- * @\\@ escapes a character that might otherwise have special -- meaning. For a literal @\"\\\"@ character, use @\"\\\\\"@. -- --- There are two extensions to the traditional glob syntax, taken from --- modern Unix shells. --- -- * @**@ matches everything, including a directory separator. -- -- * @(/s1/|/s2/|/.../)@ matches any of the strings /s1/, /s2/, etc. diff --git a/examples/Simple.hs b/examples/Simple.hs index 0370ef7..b49fc9c 100644 --- a/examples/Simple.hs +++ b/examples/Simple.hs @@ -1,7 +1,10 @@ import Control.Monad +import Codec.Compression.GZip import qualified Data.ByteString.Char8 as S +import qualified Data.ByteString.Lazy as L import System.FilePath import System.FilePath.Find +import System.FilePath.Glob import System.FilePath.Manip import Text.Regex.Posix ((=~)) @@ -68,3 +71,12 @@ recGrep :: String -> FilePath -> IO [(FilePath, Int, S.ByteString)] recGrep pat top = find always (fileType ==? RegularFile) top >>= mapM ((,,) >>= flip grepFile pat) >>= return . concat + + +-- Decompress all gzip files matching a fixed glob pattern, and return +-- the results as a single huge lazy ByteString. + +decomp :: IO L.ByteString + +decomp = namesMatching "*/*.gz" >>= + fmap L.concat . mapM (fmap decompress . L.readFile)