walk.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. package godirwalk
  2. import (
  3. "errors"
  4. "fmt"
  5. "os"
  6. "path/filepath"
  7. )
  8. // Options provide parameters for how the Walk function operates.
  9. type Options struct {
  10. // ErrorCallback specifies a function to be invoked in the case of an error
  11. // that could potentially be ignored while walking a file system
  12. // hierarchy. When set to nil or left as its zero-value, any error condition
  13. // causes Walk to immediately return the error describing what took
  14. // place. When non-nil, this user supplied function is invoked with the OS
  15. // pathname of the file system object that caused the error along with the
  16. // error that took place. The return value of the supplied ErrorCallback
  17. // function determines whether the error will cause Walk to halt immediately
  18. // as it would were no ErrorCallback value provided, or skip this file
  19. // system node yet continue on with the remaining nodes in the file system
  20. // hierarchy.
  21. //
  22. // ErrorCallback is invoked both for errors that are returned by the
  23. // runtime, and for errors returned by other user supplied callback
  24. // functions.
  25. ErrorCallback func(string, error) ErrorAction
  26. // FollowSymbolicLinks specifies whether Walk will follow symbolic links
  27. // that refer to directories. When set to false or left as its zero-value,
  28. // Walk will still invoke the callback function with symbolic link nodes,
  29. // but if the symbolic link refers to a directory, it will not recurse on
  30. // that directory. When set to true, Walk will recurse on symbolic links
  31. // that refer to a directory.
  32. FollowSymbolicLinks bool
  33. // Unsorted controls whether or not Walk will sort the immediate descendants
  34. // of a directory by their relative names prior to visiting each of those
  35. // entries.
  36. //
  37. // When set to false or left at its zero-value, Walk will get the list of
  38. // immediate descendants of a particular directory, sort that list by
  39. // lexical order of their names, and then visit each node in the list in
  40. // sorted order. This will cause Walk to always traverse the same directory
  41. // tree in the same order, however may be inefficient for directories with
  42. // many immediate descendants.
  43. //
  44. // When set to true, Walk skips sorting the list of immediate descendants
  45. // for a directory, and simply visits each node in the order the operating
  46. // system enumerated them. This will be more fast, but with the side effect
  47. // that the traversal order may be different from one invocation to the
  48. // next.
  49. Unsorted bool
  50. // Callback is a required function that Walk will invoke for every file
  51. // system node it encounters.
  52. Callback WalkFunc
  53. // PostChildrenCallback is an option function that Walk will invoke for
  54. // every file system directory it encounters after its children have been
  55. // processed.
  56. PostChildrenCallback WalkFunc
  57. // ScratchBuffer is an optional byte slice to use as a scratch buffer for
  58. // Walk to use when reading directory entries, to reduce amount of garbage
  59. // generation. Not all architectures take advantage of the scratch
  60. // buffer. If omitted or the provided buffer has fewer bytes than
  61. // MinimumScratchBufferSize, then a buffer with MinimumScratchBufferSize
  62. // bytes will be created and used once per Walk invocation.
  63. ScratchBuffer []byte
  64. // AllowNonDirectory causes Walk to bypass the check that ensures it is
  65. // being called on a directory node, or when FollowSymbolicLinks is true, a
  66. // symbolic link that points to a directory. Leave this value false to have
  67. // Walk return an error when called on a non-directory. Set this true to
  68. // have Walk run even when called on a non-directory node.
  69. AllowNonDirectory bool
  70. }
  71. // ErrorAction defines a set of actions the Walk function could take based on
  72. // the occurrence of an error while walking the file system. See the
  73. // documentation for the ErrorCallback field of the Options structure for more
  74. // information.
  75. type ErrorAction int
  76. const (
  77. // Halt is the ErrorAction return value when the upstream code wants to halt
  78. // the walk process when a runtime error takes place. It matches the default
  79. // action the Walk function would take were no ErrorCallback provided.
  80. Halt ErrorAction = iota
  81. // SkipNode is the ErrorAction return value when the upstream code wants to
  82. // ignore the runtime error for the current file system node, skip
  83. // processing of the node that caused the error, and continue walking the
  84. // file system hierarchy with the remaining nodes.
  85. SkipNode
  86. )
  87. // SkipThis is used as a return value from WalkFuncs to indicate that the file
  88. // system entry named in the call is to be skipped. It is not returned as an
  89. // error by any function.
  90. var SkipThis = errors.New("skip this directory entry")
  91. // WalkFunc is the type of the function called for each file system node visited
  92. // by Walk. The pathname argument will contain the argument to Walk as a prefix;
  93. // that is, if Walk is called with "dir", which is a directory containing the
  94. // file "a", the provided WalkFunc will be invoked with the argument "dir/a",
  95. // using the correct os.PathSeparator for the Go Operating System architecture,
  96. // GOOS. The directory entry argument is a pointer to a Dirent for the node,
  97. // providing access to both the basename and the mode type of the file system
  98. // node.
  99. //
  100. // If an error is returned by the Callback or PostChildrenCallback functions,
  101. // and no ErrorCallback function is provided, processing stops. If an
  102. // ErrorCallback function is provided, then it is invoked with the OS pathname
  103. // of the node that caused the error along along with the error. The return
  104. // value of the ErrorCallback function determines whether to halt processing, or
  105. // skip this node and continue processing remaining file system nodes.
  106. //
  107. // The exception is when the function returns the special value
  108. // filepath.SkipDir. If the function returns filepath.SkipDir when invoked on a
  109. // directory, Walk skips the directory's contents entirely. If the function
  110. // returns filepath.SkipDir when invoked on a non-directory file system node,
  111. // Walk skips the remaining files in the containing directory. Note that any
  112. // supplied ErrorCallback function is not invoked with filepath.SkipDir when the
  113. // Callback or PostChildrenCallback functions return that special value.
  114. //
  115. // One arguably confusing aspect of the filepath.WalkFunc API that this library
  116. // must emulate is how a caller tells Walk to skip file system entries or
  117. // directories. With both filepath.Walk and this Walk, when a callback function
  118. // wants to skip a directory and not descend into its children, it returns
  119. // filepath.SkipDir. If the callback function returns filepath.SkipDir for a
  120. // non-directory, filepath.Walk and this library will stop processing any more
  121. // entries in the current directory, which is what many people do not want. If
  122. // you want to simply skip a particular non-directory entry but continue
  123. // processing entries in the directory, a callback function must return nil. The
  124. // implications of this API is when you want to walk a file system hierarchy and
  125. // skip an entry, when the entry is a directory, you must return one value,
  126. // namely filepath.SkipDir, but when the entry is a non-directory, you must
  127. // return a different value, namely nil. In other words, to get identical
  128. // behavior for two file system entry types you need to send different token
  129. // values.
  130. //
  131. // Here is an example callback function that adheres to filepath.Walk API to
  132. // have it skip any file system entry whose full pathname includes a particular
  133. // substring, optSkip:
  134. //
  135. // func callback1(osPathname string, de *godirwalk.Dirent) error {
  136. // if optSkip != "" && strings.Contains(osPathname, optSkip) {
  137. // if b, err := de.IsDirOrSymlinkToDir(); b == true && err == nil {
  138. // return filepath.SkipDir
  139. // }
  140. // return nil
  141. // }
  142. // // Process file like normal...
  143. // return nil
  144. // }
  145. //
  146. // This library attempts to eliminate some of that logic boilerplate by
  147. // providing a new token error value, SkipThis, which a callback function may
  148. // return to skip the current file system entry regardless of what type of entry
  149. // it is. If the current entry is a directory, its children will not be
  150. // enumerated, exactly as if the callback returned filepath.SkipDir. If the
  151. // current entry is a non-directory, the next file system entry in the current
  152. // directory will be enumerated, exactly as if the callback returned nil. The
  153. // following example callback function has identical behavior as the previous,
  154. // but has less boilerplate, and admittedly more simple logic.
  155. //
  156. // func callback2(osPathname string, de *godirwalk.Dirent) error {
  157. // if optSkip != "" && strings.Contains(osPathname, optSkip) {
  158. // return godirwalk.SkipThis
  159. // }
  160. // // Process file like normal...
  161. // return nil
  162. // }
  163. type WalkFunc func(osPathname string, directoryEntry *Dirent) error
  164. // Walk walks the file tree rooted at the specified directory, calling the
  165. // specified callback function for each file system node in the tree, including
  166. // root, symbolic links, and other node types.
  167. //
  168. // This function is often much faster than filepath.Walk because it does not
  169. // invoke os.Stat for every node it encounters, but rather obtains the file
  170. // system node type when it reads the parent directory.
  171. //
  172. // If a runtime error occurs, either from the operating system or from the
  173. // upstream Callback or PostChildrenCallback functions, processing typically
  174. // halts. However, when an ErrorCallback function is provided in the provided
  175. // Options structure, that function is invoked with the error along with the OS
  176. // pathname of the file system node that caused the error. The ErrorCallback
  177. // function's return value determines the action that Walk will then take.
  178. //
  179. // func main() {
  180. // dirname := "."
  181. // if len(os.Args) > 1 {
  182. // dirname = os.Args[1]
  183. // }
  184. // err := godirwalk.Walk(dirname, &godirwalk.Options{
  185. // Callback: func(osPathname string, de *godirwalk.Dirent) error {
  186. // fmt.Printf("%s %s\n", de.ModeType(), osPathname)
  187. // return nil
  188. // },
  189. // ErrorCallback: func(osPathname string, err error) godirwalk.ErrorAction {
  190. // // Your program may want to log the error somehow.
  191. // fmt.Fprintf(os.Stderr, "ERROR: %s\n", err)
  192. //
  193. // // For the purposes of this example, a simple SkipNode will suffice,
  194. // // although in reality perhaps additional logic might be called for.
  195. // return godirwalk.SkipNode
  196. // },
  197. // })
  198. // if err != nil {
  199. // fmt.Fprintf(os.Stderr, "%s\n", err)
  200. // os.Exit(1)
  201. // }
  202. // }
  203. func Walk(pathname string, options *Options) error {
  204. if options == nil || options.Callback == nil {
  205. return errors.New("cannot walk without non-nil options and Callback function")
  206. }
  207. pathname = filepath.Clean(pathname)
  208. var fi os.FileInfo
  209. var err error
  210. if options.FollowSymbolicLinks {
  211. fi, err = os.Stat(pathname)
  212. } else {
  213. fi, err = os.Lstat(pathname)
  214. }
  215. if err != nil {
  216. return err
  217. }
  218. mode := fi.Mode()
  219. if !options.AllowNonDirectory && mode&os.ModeDir == 0 {
  220. return fmt.Errorf("cannot Walk non-directory: %s", pathname)
  221. }
  222. dirent := &Dirent{
  223. name: filepath.Base(pathname),
  224. path: filepath.Dir(pathname),
  225. modeType: mode & os.ModeType,
  226. }
  227. if len(options.ScratchBuffer) < MinimumScratchBufferSize {
  228. options.ScratchBuffer = newScratchBuffer()
  229. }
  230. // If ErrorCallback is nil, set to a default value that halts the walk
  231. // process on all operating system errors. This is done to allow error
  232. // handling to be more succinct in the walk code.
  233. if options.ErrorCallback == nil {
  234. options.ErrorCallback = defaultErrorCallback
  235. }
  236. err = walk(pathname, dirent, options)
  237. switch err {
  238. case nil, SkipThis, filepath.SkipDir:
  239. // silence SkipThis and filepath.SkipDir for top level
  240. debug("no error of significance: %v\n", err)
  241. return nil
  242. default:
  243. return err
  244. }
  245. }
  246. // defaultErrorCallback always returns Halt because if the upstream code did not
  247. // provide an ErrorCallback function, walking the file system hierarchy ought to
  248. // halt upon any operating system error.
  249. func defaultErrorCallback(_ string, _ error) ErrorAction { return Halt }
  250. // walk recursively traverses the file system node specified by pathname and the
  251. // Dirent.
  252. func walk(osPathname string, dirent *Dirent, options *Options) error {
  253. err := options.Callback(osPathname, dirent)
  254. if err != nil {
  255. if err == SkipThis || err == filepath.SkipDir {
  256. return err
  257. }
  258. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  259. return nil
  260. }
  261. return err
  262. }
  263. if dirent.IsSymlink() {
  264. if !options.FollowSymbolicLinks {
  265. return nil
  266. }
  267. // Does this symlink point to a directory?
  268. info, err := os.Stat(osPathname)
  269. if err != nil {
  270. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  271. return nil
  272. }
  273. return err
  274. }
  275. if !info.IsDir() {
  276. return nil
  277. }
  278. } else if !dirent.IsDir() {
  279. return nil
  280. }
  281. // If get here, then specified pathname refers to a directory or a
  282. // symbolic link to a directory.
  283. var ds scanner
  284. if options.Unsorted {
  285. // When upstream does not request a sorted iteration, it's more memory
  286. // efficient to read a single child at a time from the file system.
  287. ds, err = NewScanner(osPathname)
  288. } else {
  289. // When upstream wants a sorted iteration, we must read the entire
  290. // directory and sort through the child names, and then iterate on each
  291. // child.
  292. ds, err = newSortedScanner(osPathname, options.ScratchBuffer)
  293. }
  294. if err != nil {
  295. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  296. return nil
  297. }
  298. return err
  299. }
  300. for ds.Scan() {
  301. deChild, err := ds.Dirent()
  302. osChildname := filepath.Join(osPathname, deChild.name)
  303. if err != nil {
  304. if action := options.ErrorCallback(osChildname, err); action == SkipNode {
  305. return nil
  306. }
  307. return err
  308. }
  309. err = walk(osChildname, deChild, options)
  310. debug("osChildname: %q; error: %v\n", osChildname, err)
  311. if err == nil || err == SkipThis {
  312. continue
  313. }
  314. if err != filepath.SkipDir {
  315. return err
  316. }
  317. // When received SkipDir on a directory or a symbolic link to a
  318. // directory, stop processing that directory but continue processing
  319. // siblings. When received on a non-directory, stop processing
  320. // remaining siblings.
  321. isDir, err := deChild.IsDirOrSymlinkToDir()
  322. if err != nil {
  323. if action := options.ErrorCallback(osChildname, err); action == SkipNode {
  324. continue // ignore and continue with next sibling
  325. }
  326. return err // caller does not approve of this error
  327. }
  328. if !isDir {
  329. break // stop processing remaining siblings, but allow post children callback
  330. }
  331. // continue processing remaining siblings
  332. }
  333. if err = ds.Err(); err != nil {
  334. return err
  335. }
  336. if options.PostChildrenCallback == nil {
  337. return nil
  338. }
  339. err = options.PostChildrenCallback(osPathname, dirent)
  340. if err == nil || err == filepath.SkipDir {
  341. return err
  342. }
  343. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  344. return nil
  345. }
  346. return err
  347. }