Package CedarBackup3 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008,2010,2015 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python 3 (>= 3.4) 
  29  # Project  : Cedar Backup, release 3 
  30  # Purpose  : Provides filesystem-related objects. 
  31  # 
  32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  33   
  34  ######################################################################## 
  35  # Module documentation 
  36  ######################################################################## 
  37   
  38  """ 
  39  Provides filesystem-related objects. 
  40  @sort: FilesystemList, BackupFileList, PurgeItemList 
  41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  42  """ 
  43   
  44   
  45  ######################################################################## 
  46  # Imported modules 
  47  ######################################################################## 
  48   
  49  # System modules 
  50  import os 
  51  import re 
  52  import math 
  53  import logging 
  54  import tarfile 
  55  import hashlib 
  56   
  57  # Cedar Backup modules 
  58  from CedarBackup3.knapsack import firstFit, bestFit, worstFit, alternateFit 
  59  from CedarBackup3.util import AbsolutePathList, UnorderedList, RegexList 
  60  from CedarBackup3.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  61   
  62   
  63  ######################################################################## 
  64  # Module-wide variables 
  65  ######################################################################## 
  66   
  67  logger = logging.getLogger("CedarBackup3.log.filesystem") 
68 69 70 ######################################################################## 71 # FilesystemList class definition 72 ######################################################################## 73 74 -class FilesystemList(list):
75 76 ###################### 77 # Class documentation 78 ###################### 79 80 """ 81 Represents a list of filesystem items. 82 83 This is a generic class that represents a list of filesystem items. Callers 84 can add individual files or directories to the list, or can recursively add 85 the contents of a directory. The class also allows for up-front exclusions 86 in several forms (all files, all directories, all items matching a pattern, 87 all items whose basename matches a pattern, or all directories containing a 88 specific "ignore file"). Symbolic links are typically backed up 89 non-recursively, i.e. the link to a directory is backed up, but not the 90 contents of that link (we don't want to deal with recursive loops, etc.). 91 92 The custom methods such as L{addFile} will only add items if they exist on 93 the filesystem and do not match any exclusions that are already in place. 94 However, since a FilesystemList is a subclass of Python's standard list 95 class, callers can also add items to the list in the usual way, using 96 methods like C{append()} or C{insert()}. No validations apply to items 97 added to the list in this way; however, many list-manipulation methods deal 98 "gracefully" with items that don't exist in the filesystem, often by 99 ignoring them. 100 101 Once a list has been created, callers can remove individual items from the 102 list using standard methods like C{pop()} or C{remove()} or they can use 103 custom methods to remove specific types of entries or entries which match a 104 particular pattern. 105 106 @note: Regular expression patterns that apply to paths are assumed to be 107 bounded at front and back by the beginning and end of the string, i.e. they 108 are treated as if they begin with C{^} and end with C{$}. This is true 109 whether we are matching a complete path or a basename. 110 111 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 112 removeLinks, removeMatch, removeInvalid, normalize, 113 excludeFiles, excludeDirs, excludeLinks, excludePaths, 114 excludePatterns, excludeBasenamePatterns, ignoreFile 115 """ 116 117 118 ############## 119 # Constructor 120 ############## 121
122 - def __init__(self):
123 """Initializes a list with no configured exclusions.""" 124 list.__init__(self) 125 self._excludeFiles = False 126 self._excludeDirs = False 127 self._excludeLinks = False 128 self._excludePaths = None 129 self._excludePatterns = None 130 self._excludeBasenamePatterns = None 131 self._ignoreFile = None 132 self.excludeFiles = False 133 self.excludeLinks = False 134 self.excludeDirs = False 135 self.excludePaths = [] 136 self.excludePatterns = RegexList() 137 self.excludeBasenamePatterns = RegexList() 138 self.ignoreFile = None
139 140 141 ############# 142 # Properties 143 ############# 144
145 - def _setExcludeFiles(self, value):
146 """ 147 Property target used to set the exclude files flag. 148 No validations, but we normalize the value to C{True} or C{False}. 149 """ 150 if value: 151 self._excludeFiles = True 152 else: 153 self._excludeFiles = False
154
155 - def _getExcludeFiles(self):
156 """ 157 Property target used to get the exclude files flag. 158 """ 159 return self._excludeFiles
160
161 - def _setExcludeDirs(self, value):
162 """ 163 Property target used to set the exclude directories flag. 164 No validations, but we normalize the value to C{True} or C{False}. 165 """ 166 if value: 167 self._excludeDirs = True 168 else: 169 self._excludeDirs = False
170
171 - def _getExcludeDirs(self):
172 """ 173 Property target used to get the exclude directories flag. 174 """ 175 return self._excludeDirs
176 186 192
193 - def _setExcludePaths(self, value):
194 """ 195 Property target used to set the exclude paths list. 196 A C{None} value is converted to an empty list. 197 Elements do not have to exist on disk at the time of assignment. 198 @raise ValueError: If any list element is not an absolute path. 199 """ 200 self._excludePaths = AbsolutePathList() 201 if value is not None: 202 self._excludePaths.extend(value)
203
204 - def _getExcludePaths(self):
205 """ 206 Property target used to get the absolute exclude paths list. 207 """ 208 return self._excludePaths
209
210 - def _setExcludePatterns(self, value):
211 """ 212 Property target used to set the exclude patterns list. 213 A C{None} value is converted to an empty list. 214 """ 215 self._excludePatterns = RegexList() 216 if value is not None: 217 self._excludePatterns.extend(value)
218
219 - def _getExcludePatterns(self):
220 """ 221 Property target used to get the exclude patterns list. 222 """ 223 return self._excludePatterns
224
225 - def _setExcludeBasenamePatterns(self, value):
226 """ 227 Property target used to set the exclude basename patterns list. 228 A C{None} value is converted to an empty list. 229 """ 230 self._excludeBasenamePatterns = RegexList() 231 if value is not None: 232 self._excludeBasenamePatterns.extend(value)
233
235 """ 236 Property target used to get the exclude basename patterns list. 237 """ 238 return self._excludeBasenamePatterns
239
240 - def _setIgnoreFile(self, value):
241 """ 242 Property target used to set the ignore file. 243 The value must be a non-empty string if it is not C{None}. 244 @raise ValueError: If the value is an empty string. 245 """ 246 if value is not None: 247 if len(value) < 1: 248 raise ValueError("The ignore file must be a non-empty string.") 249 self._ignoreFile = value
250
251 - def _getIgnoreFile(self):
252 """ 253 Property target used to get the ignore file. 254 """ 255 return self._ignoreFile
256 257 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 258 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 259 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 260 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 261 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 262 "List of regular expression patterns (matching complete path) to be excluded.") 263 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 264 None, "List of regular expression patterns (matching basename) to be excluded.") 265 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 266 267 268 ############## 269 # Add methods 270 ############## 271
272 - def addFile(self, path):
273 """ 274 Adds a file to the list. 275 276 The path must exist and must be a file or a link to an existing file. It 277 will be added to the list subject to any exclusions that are in place. 278 279 @param path: File path to be added to the list 280 @type path: String representing a path on disk 281 282 @return: Number of items added to the list. 283 284 @raise ValueError: If path is not a file or does not exist. 285 @raise ValueError: If the path could not be encoded properly. 286 """ 287 path = encodePath(path) 288 if not os.path.exists(path) or not os.path.isfile(path): 289 logger.debug("Path [%s] is not a file or does not exist on disk.", path) 290 raise ValueError("Path is not a file or does not exist on disk.") 291 if self.excludeLinks and os.path.islink(path): 292 logger.debug("Path [%s] is excluded based on excludeLinks.", path) 293 return 0 294 if self.excludeFiles: 295 logger.debug("Path [%s] is excluded based on excludeFiles.", path) 296 return 0 297 if path in self.excludePaths: 298 logger.debug("Path [%s] is excluded based on excludePaths.", path) 299 return 0 300 for pattern in self.excludePatterns: 301 pattern = encodePath(pattern) # use same encoding as filenames 302 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 303 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 304 return 0 305 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 306 pattern = encodePath(pattern) # use same encoding as filenames 307 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 308 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 309 return 0 310 self.append(path) 311 logger.debug("Added file to list: [%s]", path) 312 return 1
313
314 - def addDir(self, path):
315 """ 316 Adds a directory to the list. 317 318 The path must exist and must be a directory or a link to an existing 319 directory. It will be added to the list subject to any exclusions that 320 are in place. The L{ignoreFile} does not apply to this method, only to 321 L{addDirContents}. 322 323 @param path: Directory path to be added to the list 324 @type path: String representing a path on disk 325 326 @return: Number of items added to the list. 327 328 @raise ValueError: If path is not a directory or does not exist. 329 @raise ValueError: If the path could not be encoded properly. 330 """ 331 path = encodePath(path) 332 path = normalizeDir(path) 333 if not os.path.exists(path) or not os.path.isdir(path): 334 logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 335 raise ValueError("Path is not a directory or does not exist on disk.") 336 if self.excludeLinks and os.path.islink(path): 337 logger.debug("Path [%s] is excluded based on excludeLinks.", path) 338 return 0 339 if self.excludeDirs: 340 logger.debug("Path [%s] is excluded based on excludeDirs.", path) 341 return 0 342 if path in self.excludePaths: 343 logger.debug("Path [%s] is excluded based on excludePaths.", path) 344 return 0 345 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 346 pattern = encodePath(pattern) # use same encoding as filenames 347 if re.compile(r"^%s$" % pattern).match(path): 348 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 349 return 0 350 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 351 pattern = encodePath(pattern) # use same encoding as filenames 352 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 353 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 354 return 0 355 self.append(path) 356 logger.debug("Added directory to list: [%s]", path) 357 return 1
358
359 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
360 """ 361 Adds the contents of a directory to the list. 362 363 The path must exist and must be a directory or a link to a directory. 364 The contents of the directory (as well as the directory path itself) will 365 be recursively added to the list, subject to any exclusions that are in 366 place. If you only want the directory and its immediate contents to be 367 added, then pass in C{recursive=False}. 368 369 @note: If a directory's absolute path matches an exclude pattern or path, 370 or if the directory contains the configured ignore file, then the 371 directory and all of its contents will be recursively excluded from the 372 list. 373 374 @note: If the passed-in directory happens to be a soft link, it will be 375 recursed. However, the linkDepth parameter controls whether any soft 376 links I{within} the directory will be recursed. The link depth is 377 maximum depth of the tree at which soft links should be followed. So, a 378 depth of 0 does not follow any soft links, a depth of 1 follows only 379 links within the passed-in directory, a depth of 2 follows the links at 380 the next level down, etc. 381 382 @note: Any invalid soft links (i.e. soft links that point to 383 non-existent items) will be silently ignored. 384 385 @note: The L{excludeDirs} flag only controls whether any given directory 386 path itself is added to the list once it has been discovered. It does 387 I{not} modify any behavior related to directory recursion. 388 389 @note: If you call this method I{on a link to a directory} that link will 390 never be dereferenced (it may, however, be followed). 391 392 @param path: Directory path whose contents should be added to the list 393 @type path: String representing a path on disk 394 395 @param recursive: Indicates whether directory contents should be added recursively. 396 @type recursive: Boolean value 397 398 @param addSelf: Indicates whether the directory itself should be added to the list. 399 @type addSelf: Boolean value 400 401 @param linkDepth: Maximum depth of the tree at which soft links should be followed 402 @type linkDepth: Integer value, where zero means not to follow any soft links 403 404 @param dereference: Indicates whether soft links, if followed, should be dereferenced 405 @type dereference: Boolean value 406 407 @return: Number of items recursively added to the list 408 409 @raise ValueError: If path is not a directory or does not exist. 410 @raise ValueError: If the path could not be encoded properly. 411 """ 412 path = encodePath(path) 413 path = normalizeDir(path) 414 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
415
416 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
417 """ 418 Internal implementation of C{addDirContents}. 419 420 This internal implementation exists due to some refactoring. Basically, 421 some subclasses have a need to add the contents of a directory, but not 422 the directory itself. This is different than the standard C{FilesystemList} 423 behavior and actually ends up making a special case out of the first 424 call in the recursive chain. Since I don't want to expose the modified 425 interface, C{addDirContents} ends up being wholly implemented in terms 426 of this method. 427 428 The linkDepth parameter controls whether soft links are followed when we 429 are adding the contents recursively. Any recursive calls reduce the 430 value by one. If the value zero or less, then soft links will just be 431 added as directories, but will not be followed. This means that links 432 are followed to a I{constant depth} starting from the top-most directory. 433 434 There is one difference between soft links and directories: soft links 435 that are added recursively are not placed into the list explicitly. This 436 is because if we do add the links recursively, the resulting tar file 437 gets a little confused (it has a link and a directory with the same 438 name). 439 440 @note: If you call this method I{on a link to a directory} that link will 441 never be dereferenced (it may, however, be followed). 442 443 @param path: Directory path whose contents should be added to the list. 444 @param includePath: Indicates whether to include the path as well as contents. 445 @param recursive: Indicates whether directory contents should be added recursively. 446 @param linkDepth: Depth of soft links that should be followed 447 @param dereference: Indicates whether soft links, if followed, should be dereferenced 448 449 @return: Number of items recursively added to the list 450 451 @raise ValueError: If path is not a directory or does not exist. 452 """ 453 added = 0 454 if not os.path.exists(path) or not os.path.isdir(path): 455 logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 456 raise ValueError("Path is not a directory or does not exist on disk.") 457 if path in self.excludePaths: 458 logger.debug("Path [%s] is excluded based on excludePaths.", path) 459 return added 460 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 461 pattern = encodePath(pattern) # use same encoding as filenames 462 if re.compile(r"^%s$" % pattern).match(path): 463 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 464 return added 465 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 466 pattern = encodePath(pattern) # use same encoding as filenames 467 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 468 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 469 return added 470 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 471 logger.debug("Path [%s] is excluded based on ignore file.", path) 472 return added 473 if includePath: 474 added += self.addDir(path) # could actually be excluded by addDir, yet 475 for entry in os.listdir(path): 476 entrypath = os.path.join(path, entry) 477 if os.path.isfile(entrypath): 478 if linkDepth > 0 and dereference: 479 derefpath = dereferenceLink(entrypath) 480 if derefpath != entrypath: 481 added += self.addFile(derefpath) 482 added += self.addFile(entrypath) 483 elif os.path.isdir(entrypath): 484 if os.path.islink(entrypath): 485 if recursive: 486 if linkDepth > 0: 487 newDepth = linkDepth - 1 488 if dereference: 489 derefpath = dereferenceLink(entrypath) 490 if derefpath != entrypath: 491 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 492 added += self.addDir(entrypath) 493 else: 494 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 495 else: 496 added += self.addDir(entrypath) 497 else: 498 added += self.addDir(entrypath) 499 else: 500 if recursive: 501 newDepth = linkDepth - 1 502 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 503 else: 504 added += self.addDir(entrypath) 505 return added
506 507 508 ################# 509 # Remove methods 510 ################# 511
512 - def removeFiles(self, pattern=None):
513 """ 514 Removes file entries from the list. 515 516 If C{pattern} is not passed in or is C{None}, then all file entries will 517 be removed from the list. Otherwise, only those file entries matching 518 the pattern will be removed. Any entry which does not exist on disk 519 will be ignored (use L{removeInvalid} to purge those entries). 520 521 This method might be fairly slow for large lists, since it must check the 522 type of each item in the list. If you know ahead of time that you want 523 to exclude all files, then you will be better off setting L{excludeFiles} 524 to C{True} before adding items to the list. 525 526 @param pattern: Regular expression pattern representing entries to remove 527 528 @return: Number of entries removed 529 @raise ValueError: If the passed-in pattern is not a valid regular expression. 530 """ 531 removed = 0 532 if pattern is None: 533 for entry in self[:]: 534 if os.path.exists(entry) and os.path.isfile(entry): 535 self.remove(entry) 536 logger.debug("Removed path [%s] from list.", entry) 537 removed += 1 538 else: 539 try: 540 pattern = encodePath(pattern) # use same encoding as filenames 541 compiled = re.compile(pattern) 542 except re.error: 543 raise ValueError("Pattern is not a valid regular expression.") 544 for entry in self[:]: 545 if os.path.exists(entry) and os.path.isfile(entry): 546 if compiled.match(entry): 547 self.remove(entry) 548 logger.debug("Removed path [%s] from list.", entry) 549 removed += 1 550 logger.debug("Removed a total of %d entries.", removed) 551 return removed
552
553 - def removeDirs(self, pattern=None):
554 """ 555 Removes directory entries from the list. 556 557 If C{pattern} is not passed in or is C{None}, then all directory entries 558 will be removed from the list. Otherwise, only those directory entries 559 matching the pattern will be removed. Any entry which does not exist on 560 disk will be ignored (use L{removeInvalid} to purge those entries). 561 562 This method might be fairly slow for large lists, since it must check the 563 type of each item in the list. If you know ahead of time that you want 564 to exclude all directories, then you will be better off setting 565 L{excludeDirs} to C{True} before adding items to the list (note that this 566 will not prevent you from recursively adding the I{contents} of 567 directories). 568 569 @param pattern: Regular expression pattern representing entries to remove 570 571 @return: Number of entries removed 572 @raise ValueError: If the passed-in pattern is not a valid regular expression. 573 """ 574 removed = 0 575 if pattern is None: 576 for entry in self[:]: 577 if os.path.exists(entry) and os.path.isdir(entry): 578 self.remove(entry) 579 logger.debug("Removed path [%s] from list.", entry) 580 removed += 1 581 else: 582 try: 583 pattern = encodePath(pattern) # use same encoding as filenames 584 compiled = re.compile(pattern) 585 except re.error: 586 raise ValueError("Pattern is not a valid regular expression.") 587 for entry in self[:]: 588 if os.path.exists(entry) and os.path.isdir(entry): 589 if compiled.match(entry): 590 self.remove(entry) 591 logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 592 removed += 1 593 logger.debug("Removed a total of %d entries.", removed) 594 return removed
595 636
637 - def removeMatch(self, pattern):
638 """ 639 Removes from the list all entries matching a pattern. 640 641 This method removes from the list all entries which match the passed in 642 C{pattern}. Since there is no need to check the type of each entry, it 643 is faster to call this method than to call the L{removeFiles}, 644 L{removeDirs} or L{removeLinks} methods individually. If you know which 645 patterns you will want to remove ahead of time, you may be better off 646 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 647 items to the list. 648 649 @note: Unlike when using the exclude lists, the pattern here is I{not} 650 bounded at the front and the back of the string. You can use any pattern 651 you want. 652 653 @param pattern: Regular expression pattern representing entries to remove 654 655 @return: Number of entries removed. 656 @raise ValueError: If the passed-in pattern is not a valid regular expression. 657 """ 658 try: 659 pattern = encodePath(pattern) # use same encoding as filenames 660 compiled = re.compile(pattern) 661 except re.error: 662 raise ValueError("Pattern is not a valid regular expression.") 663 removed = 0 664 for entry in self[:]: 665 if compiled.match(entry): 666 self.remove(entry) 667 logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 668 removed += 1 669 logger.debug("Removed a total of %d entries.", removed) 670 return removed
671
672 - def removeInvalid(self):
673 """ 674 Removes from the list all entries that do not exist on disk. 675 676 This method removes from the list all entries which do not currently 677 exist on disk in some form. No attention is paid to whether the entries 678 are files or directories. 679 680 @return: Number of entries removed. 681 """ 682 removed = 0 683 for entry in self[:]: 684 if not os.path.exists(entry): 685 self.remove(entry) 686 logger.debug("Removed path [%s] from list.", entry) 687 removed += 1 688 logger.debug("Removed a total of %d entries.", removed) 689 return removed
690 691 692 ################## 693 # Utility methods 694 ################## 695
696 - def normalize(self):
697 """Normalizes the list, ensuring that each entry is unique.""" 698 orig = len(self) 699 self.sort() 700 dups = list(filter(lambda x, self=self: self[x] == self[x+1], list(range(0, len(self) - 1)))) 701 items = list(map(lambda x, self=self: self[x], dups)) 702 list(map(self.remove, items)) 703 new = len(self) 704 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now).", new-orig, orig, new)
705
706 - def verify(self):
707 """ 708 Verifies that all entries in the list exist on disk. 709 @return: C{True} if all entries exist, C{False} otherwise. 710 """ 711 for entry in self: 712 if not os.path.exists(entry): 713 logger.debug("Path [%s] is invalid; list is not valid.", entry) 714 return False 715 logger.debug("All entries in list are valid.") 716 return True
717
718 719 ######################################################################## 720 # SpanItem class definition 721 ######################################################################## 722 723 -class SpanItem(object): # pylint: disable=R0903
724 """ 725 Item returned by L{BackupFileList.generateSpan}. 726 """
727 - def __init__(self, fileList, size, capacity, utilization):
728 """ 729 Create object. 730 @param fileList: List of files 731 @param size: Size (in bytes) of files 732 @param utilization: Utilization, as a percentage (0-100) 733 """ 734 self.fileList = fileList 735 self.size = size 736 self.capacity = capacity 737 self.utilization = utilization
738
739 740 ######################################################################## 741 # BackupFileList class definition 742 ######################################################################## 743 744 -class BackupFileList(FilesystemList): # pylint: disable=R0904
745 746 ###################### 747 # Class documentation 748 ###################### 749 750 """ 751 List of files to be backed up. 752 753 A BackupFileList is a L{FilesystemList} containing a list of files to be 754 backed up. It only contains files, not directories (soft links are treated 755 like files). On top of the generic functionality provided by 756 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 757 for each file in the list, and it also provides a method to calculate the 758 total size of the files in the list and a way to export the list into tar 759 form. 760 761 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 762 generateFitted, generateTarfile, removeUnchanged 763 """ 764 765 ############## 766 # Constructor 767 ############## 768
769 - def __init__(self):
770 """Initializes a list with no configured exclusions.""" 771 FilesystemList.__init__(self)
772 773 774 ################################ 775 # Overridden superclass methods 776 ################################ 777
778 - def addDir(self, path):
779 """ 780 Adds a directory to the list. 781 782 Note that this class does not allow directories to be added by themselves 783 (a backup list contains only files). However, since links to directories 784 are technically files, we allow them to be added. 785 786 This method is implemented in terms of the superclass method, with one 787 additional validation: the superclass method is only called if the 788 passed-in path is both a directory and a link. All of the superclass's 789 existing validations and restrictions apply. 790 791 @param path: Directory path to be added to the list 792 @type path: String representing a path on disk 793 794 @return: Number of items added to the list. 795 796 @raise ValueError: If path is not a directory or does not exist. 797 @raise ValueError: If the path could not be encoded properly. 798 """ 799 path = encodePath(path) 800 path = normalizeDir(path) 801 if os.path.isdir(path) and not os.path.islink(path): 802 return 0 803 else: 804 return FilesystemList.addDir(self, path)
805 806 807 ################## 808 # Utility methods 809 ################## 810
811 - def totalSize(self):
812 """ 813 Returns the total size among all files in the list. 814 Only files are counted. 815 Soft links that point at files are ignored. 816 Entries which do not exist on disk are ignored. 817 @return: Total size, in bytes 818 """ 819 total = 0.0 820 for entry in self: 821 if os.path.isfile(entry) and not os.path.islink(entry): 822 total += float(os.stat(entry).st_size) 823 return total
824
825 - def generateSizeMap(self):
826 """ 827 Generates a mapping from file to file size in bytes. 828 The mapping does include soft links, which are listed with size zero. 829 Entries which do not exist on disk are ignored. 830 @return: Dictionary mapping file to file size 831 """ 832 table = { } 833 for entry in self: 834 if os.path.islink(entry): 835 table[entry] = 0.0 836 elif os.path.isfile(entry): 837 table[entry] = float(os.stat(entry).st_size) 838 return table
839
840 - def generateDigestMap(self, stripPrefix=None):
841 """ 842 Generates a mapping from file to file digest. 843 844 Currently, the digest is an SHA hash, which should be pretty secure. In 845 the future, this might be a different kind of hash, but we guarantee that 846 the type of the hash will not change unless the library major version 847 number is bumped. 848 849 Entries which do not exist on disk are ignored. 850 851 Soft links are ignored. We would end up generating a digest for the file 852 that the soft link points at, which doesn't make any sense. 853 854 If C{stripPrefix} is passed in, then that prefix will be stripped from 855 each key when the map is generated. This can be useful in generating two 856 "relative" digest maps to be compared to one another. 857 858 @param stripPrefix: Common prefix to be stripped from paths 859 @type stripPrefix: String with any contents 860 861 @return: Dictionary mapping file to digest value 862 @see: L{removeUnchanged} 863 """ 864 table = { } 865 if stripPrefix is not None: 866 for entry in self: 867 if os.path.isfile(entry) and not os.path.islink(entry): 868 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 869 else: 870 for entry in self: 871 if os.path.isfile(entry) and not os.path.islink(entry): 872 table[entry] = BackupFileList._generateDigest(entry) 873 return table
874 875 @staticmethod
876 - def _generateDigest(path):
877 """ 878 Generates an SHA digest for a given file on disk. 879 880 The original code for this function used this simplistic implementation, 881 which requires reading the entire file into memory at once in order to 882 generate a digest value:: 883 884 sha.new(open(path).read()).hexdigest() 885 886 Not surprisingly, this isn't an optimal solution. The U{Simple file 887 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 888 Python Cookbook recipe describes how to incrementally generate a hash 889 value by reading in chunks of data rather than reading the file all at 890 once. The recipe relies on the the C{update()} method of the various 891 Python hashing algorithms. 892 893 In my tests using a 110 MB file on CD, the original implementation 894 requires 111 seconds. This implementation requires only 40-45 seconds, 895 which is a pretty substantial speed-up. 896 897 Experience shows that reading in around 4kB (4096 bytes) at a time yields 898 the best performance. Smaller reads are quite a bit slower, and larger 899 reads don't make much of a difference. The 4kB number makes me a little 900 suspicious, and I think it might be related to the size of a filesystem 901 read at the hardware level. However, I've decided to just hardcode 4096 902 until I have evidence that shows it's worthwhile making the read size 903 configurable. 904 905 @param path: Path to generate digest for. 906 907 @return: ASCII-safe SHA digest for the file. 908 @raise OSError: If the file cannot be opened. 909 """ 910 # pylint: disable=C0103,E1101 911 s = hashlib.sha1() 912 with open(path, mode="rb") as f: 913 readBytes = 4096 # see notes above 914 while readBytes > 0: 915 readString = f.read(readBytes) 916 s.update(readString) 917 readBytes = len(readString) 918 digest = s.hexdigest() 919 logger.debug("Generated digest [%s] for file [%s].", digest, path) 920 return digest
921
922 - def generateFitted(self, capacity, algorithm="worst_fit"):
923 """ 924 Generates a list of items that fit in the indicated capacity. 925 926 Sometimes, callers would like to include every item in a list, but are 927 unable to because not all of the items fit in the space available. This 928 method returns a copy of the list, containing only the items that fit in 929 a given capacity. A copy is returned so that we don't lose any 930 information if for some reason the fitted list is unsatisfactory. 931 932 The fitting is done using the functions in the knapsack module. By 933 default, the first fit algorithm is used, but you can also choose 934 from best fit, worst fit and alternate fit. 935 936 @param capacity: Maximum capacity among the files in the new list 937 @type capacity: Integer, in bytes 938 939 @param algorithm: Knapsack (fit) algorithm to use 940 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 941 942 @return: Copy of list with total size no larger than indicated capacity 943 @raise ValueError: If the algorithm is invalid. 944 """ 945 table = self._getKnapsackTable() 946 function = BackupFileList._getKnapsackFunction(algorithm) 947 return function(table, capacity)[0]
948
949 - def generateSpan(self, capacity, algorithm="worst_fit"):
950 """ 951 Splits the list of items into sub-lists that fit in a given capacity. 952 953 Sometimes, callers need split to a backup file list into a set of smaller 954 lists. For instance, you could use this to "span" the files across a set 955 of discs. 956 957 The fitting is done using the functions in the knapsack module. By 958 default, the first fit algorithm is used, but you can also choose 959 from best fit, worst fit and alternate fit. 960 961 @note: If any of your items are larger than the capacity, then it won't 962 be possible to find a solution. In this case, a value error will be 963 raised. 964 965 @param capacity: Maximum capacity among the files in the new list 966 @type capacity: Integer, in bytes 967 968 @param algorithm: Knapsack (fit) algorithm to use 969 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 970 971 @return: List of L{SpanItem} objects. 972 973 @raise ValueError: If the algorithm is invalid. 974 @raise ValueError: If it's not possible to fit some items 975 """ 976 spanItems = [] 977 function = BackupFileList._getKnapsackFunction(algorithm) 978 table = self._getKnapsackTable(capacity) 979 iteration = 0 980 while len(table) > 0: 981 iteration += 1 982 fit = function(table, capacity) 983 if len(fit[0]) == 0: 984 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 985 raise ValueError("After iteration %d, unable to add any new items." % iteration) 986 removeKeys(table, fit[0]) 987 utilization = (float(fit[1])/float(capacity))*100.0 988 item = SpanItem(fit[0], fit[1], capacity, utilization) 989 spanItems.append(item) 990 return spanItems
991
992 - def _getKnapsackTable(self, capacity=None):
993 """ 994 Converts the list into the form needed by the knapsack algorithms. 995 @return: Dictionary mapping file name to tuple of (file path, file size). 996 """ 997 table = { } 998 for entry in self: 999 if os.path.islink(entry): 1000 table[entry] = (entry, 0.0) 1001 elif os.path.isfile(entry): 1002 size = float(os.stat(entry).st_size) 1003 if capacity is not None: 1004 if size > capacity: 1005 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 1006 table[entry] = (entry, size) 1007 return table
1008 1009 @staticmethod
1010 - def _getKnapsackFunction(algorithm):
1011 """ 1012 Returns a reference to the function associated with an algorithm name. 1013 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 1014 @param algorithm: Name of the algorithm 1015 @return: Reference to knapsack function 1016 @raise ValueError: If the algorithm name is unknown. 1017 """ 1018 if algorithm == "first_fit": 1019 return firstFit 1020 elif algorithm == "best_fit": 1021 return bestFit 1022 elif algorithm == "worst_fit": 1023 return worstFit 1024 elif algorithm == "alternate_fit": 1025 return alternateFit 1026 else: 1027 raise ValueError("Algorithm [%s] is invalid." % algorithm)
1028
1029 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
1030 """ 1031 Creates a tar file containing the files in the list. 1032 1033 By default, this method will create uncompressed tar files. If you pass 1034 in mode C{'targz'}, then it will create gzipped tar files, and if you 1035 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1036 1037 The tar file will be created as a GNU tar archive, which enables extended 1038 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1039 the extra functionality out-weighs the disadvantage of not being 1040 "standard". 1041 1042 If you pass in C{flat=True}, then a "flat" archive will be created, and 1043 all of the files will be added to the root of the archive. So, the file 1044 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1045 1046 By default, the whole method call fails if there are problems adding any 1047 of the files to the archive, resulting in an exception. Under these 1048 circumstances, callers are advised that they might want to call 1049 L{removeInvalid()} and then attempt to extract the tar file a second 1050 time, since the most common cause of failures is a missing file (a file 1051 that existed when the list was built, but is gone again by the time the 1052 tar file is built). 1053 1054 If you want to, you can pass in C{ignore=True}, and the method will 1055 ignore errors encountered when adding individual files to the archive 1056 (but not errors opening and closing the archive itself). 1057 1058 We'll always attempt to remove the tarfile from disk if an exception will 1059 be thrown. 1060 1061 @note: No validation is done as to whether the entries in the list are 1062 files, since only files or soft links should be in an object like this. 1063 However, to be safe, everything is explicitly added to the tar archive 1064 non-recursively so it's safe to include soft links to directories. 1065 1066 @note: The Python C{tarfile} module, which is used internally here, is 1067 supposed to deal properly with long filenames and links. In my testing, 1068 I have found that it appears to be able to add long really long filenames 1069 to archives, but doesn't do a good job reading them back out, even out of 1070 an archive it created. Fortunately, all Cedar Backup does is add files 1071 to archives. 1072 1073 @param path: Path of tar file to create on disk 1074 @type path: String representing a path on disk 1075 1076 @param mode: Tar creation mode 1077 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1078 1079 @param ignore: Indicates whether to ignore certain errors. 1080 @type ignore: Boolean 1081 1082 @param flat: Creates "flat" archive by putting all items in root 1083 @type flat: Boolean 1084 1085 @raise ValueError: If mode is not valid 1086 @raise ValueError: If list is empty 1087 @raise ValueError: If the path could not be encoded properly. 1088 @raise TarError: If there is a problem creating the tar file 1089 """ 1090 # pylint: disable=E1101 1091 path = encodePath(path) 1092 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1093 if mode == 'tar': tarmode = "w:" 1094 elif mode == 'targz': tarmode = "w:gz" 1095 elif mode == 'tarbz2': tarmode = "w:bz2" 1096 else: raise ValueError("Mode [%s] is not valid." % mode) 1097 try: 1098 tar = tarfile.open(path, tarmode) 1099 try: 1100 tar.format = tarfile.GNU_FORMAT 1101 except AttributeError: 1102 tar.posix = False 1103 for entry in self: 1104 try: 1105 if flat: 1106 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1107 else: 1108 tar.add(entry, recursive=False) 1109 except tarfile.TarError as e: 1110 if not ignore: 1111 raise e 1112 logger.info("Unable to add file [%s]; going on anyway.", entry) 1113 except OSError as e: 1114 if not ignore: 1115 raise tarfile.TarError(e) 1116 logger.info("Unable to add file [%s]; going on anyway.", entry) 1117 tar.close() 1118 except tarfile.ReadError as e: 1119 try: tar.close() 1120 except: pass 1121 if os.path.exists(path): 1122 try: os.remove(path) 1123 except: pass 1124 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1125 except tarfile.TarError as e: 1126 try: tar.close() 1127 except: pass 1128 if os.path.exists(path): 1129 try: os.remove(path) 1130 except: pass 1131 raise e
1132
1133 - def removeUnchanged(self, digestMap, captureDigest=False):
1134 """ 1135 Removes unchanged entries from the list. 1136 1137 This method relies on a digest map as returned from L{generateDigestMap}. 1138 For each entry in C{digestMap}, if the entry also exists in the current 1139 list I{and} the entry in the current list has the same digest value as in 1140 the map, the entry in the current list will be removed. 1141 1142 This method offers a convenient way for callers to filter unneeded 1143 entries from a list. The idea is that a caller will capture a digest map 1144 from C{generateDigestMap} at some point in time (perhaps the beginning of 1145 the week), and will save off that map using C{pickle} or some other 1146 method. Then, the caller could use this method sometime in the future to 1147 filter out any unchanged files based on the saved-off map. 1148 1149 If C{captureDigest} is passed-in as C{True}, then digest information will 1150 be captured for the entire list before the removal step occurs using the 1151 same rules as in L{generateDigestMap}. The check will involve a lookup 1152 into the complete digest map. 1153 1154 If C{captureDigest} is passed in as C{False}, we will only generate a 1155 digest value for files we actually need to check, and we'll ignore any 1156 entry in the list which isn't a file that currently exists on disk. 1157 1158 The return value varies depending on C{captureDigest}, as well. To 1159 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1160 we'll just return a single value representing the number of entries 1161 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1162 map)}. The returned digest map will be in exactly the form returned by 1163 L{generateDigestMap}. 1164 1165 @note: For performance reasons, this method actually ends up rebuilding 1166 the list from scratch. First, we build a temporary dictionary containing 1167 all of the items from the original list. Then, we remove items as needed 1168 from the dictionary (which is faster than the equivalent operation on a 1169 list). Finally, we replace the contents of the current list based on the 1170 keys left in the dictionary. This should be transparent to the caller. 1171 1172 @param digestMap: Dictionary mapping file name to digest value. 1173 @type digestMap: Map as returned from L{generateDigestMap}. 1174 1175 @param captureDigest: Indicates that digest information should be captured. 1176 @type captureDigest: Boolean 1177 1178 @return: Results as discussed above (format varies based on arguments) 1179 """ 1180 if captureDigest: 1181 removed = 0 1182 table = {} 1183 captured = {} 1184 for entry in self: 1185 if os.path.isfile(entry) and not os.path.islink(entry): 1186 table[entry] = BackupFileList._generateDigest(entry) 1187 captured[entry] = table[entry] 1188 else: 1189 table[entry] = None 1190 for entry in list(digestMap.keys()): 1191 if entry in table: 1192 if table[entry] is not None: # equivalent to file/link check in other case 1193 digest = table[entry] 1194 if digest == digestMap[entry]: 1195 removed += 1 1196 del table[entry] 1197 logger.debug("Discarded unchanged file [%s].", entry) 1198 self[:] = list(table.keys()) 1199 return (removed, captured) 1200 else: 1201 removed = 0 1202 table = {} 1203 for entry in self: 1204 table[entry] = None 1205 for entry in list(digestMap.keys()): 1206 if entry in table: 1207 if os.path.isfile(entry) and not os.path.islink(entry): 1208 digest = BackupFileList._generateDigest(entry) 1209 if digest == digestMap[entry]: 1210 removed += 1 1211 del table[entry] 1212 logger.debug("Discarded unchanged file [%s].", entry) 1213 self[:] = list(table.keys()) 1214 return removed
1215
1216 1217 ######################################################################## 1218 # PurgeItemList class definition 1219 ######################################################################## 1220 1221 -class PurgeItemList(FilesystemList): # pylint: disable=R0904
1222 1223 ###################### 1224 # Class documentation 1225 ###################### 1226 1227 """ 1228 List of files and directories to be purged. 1229 1230 A PurgeItemList is a L{FilesystemList} containing a list of files and 1231 directories to be purged. On top of the generic functionality provided by 1232 L{FilesystemList}, this class adds functionality to remove items that are 1233 too young to be purged, and to actually remove each item in the list from 1234 the filesystem. 1235 1236 The other main difference is that when you add a directory's contents to a 1237 purge item list, the directory itself is not added to the list. This way, 1238 if someone asks to purge within in C{/opt/backup/collect}, that directory 1239 doesn't get removed once all of the files within it is gone. 1240 """ 1241 1242 ############## 1243 # Constructor 1244 ############## 1245
1246 - def __init__(self):
1247 """Initializes a list with no configured exclusions.""" 1248 FilesystemList.__init__(self)
1249 1250 1251 ############## 1252 # Add methods 1253 ############## 1254
1255 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1256 """ 1257 Adds the contents of a directory to the list. 1258 1259 The path must exist and must be a directory or a link to a directory. 1260 The contents of the directory (but I{not} the directory path itself) will 1261 be recursively added to the list, subject to any exclusions that are in 1262 place. If you only want the directory and its contents to be added, then 1263 pass in C{recursive=False}. 1264 1265 @note: If a directory's absolute path matches an exclude pattern or path, 1266 or if the directory contains the configured ignore file, then the 1267 directory and all of its contents will be recursively excluded from the 1268 list. 1269 1270 @note: If the passed-in directory happens to be a soft link, it will be 1271 recursed. However, the linkDepth parameter controls whether any soft 1272 links I{within} the directory will be recursed. The link depth is 1273 maximum depth of the tree at which soft links should be followed. So, a 1274 depth of 0 does not follow any soft links, a depth of 1 follows only 1275 links within the passed-in directory, a depth of 2 follows the links at 1276 the next level down, etc. 1277 1278 @note: Any invalid soft links (i.e. soft links that point to 1279 non-existent items) will be silently ignored. 1280 1281 @note: The L{excludeDirs} flag only controls whether any given soft link 1282 path itself is added to the list once it has been discovered. It does 1283 I{not} modify any behavior related to directory recursion. 1284 1285 @note: The L{excludeDirs} flag only controls whether any given directory 1286 path itself is added to the list once it has been discovered. It does 1287 I{not} modify any behavior related to directory recursion. 1288 1289 @note: If you call this method I{on a link to a directory} that link will 1290 never be dereferenced (it may, however, be followed). 1291 1292 @param path: Directory path whose contents should be added to the list 1293 @type path: String representing a path on disk 1294 1295 @param recursive: Indicates whether directory contents should be added recursively. 1296 @type recursive: Boolean value 1297 1298 @param addSelf: Ignored in this subclass. 1299 1300 @param linkDepth: Depth of soft links that should be followed 1301 @type linkDepth: Integer value, where zero means not to follow any soft links 1302 1303 @param dereference: Indicates whether soft links, if followed, should be dereferenced 1304 @type dereference: Boolean value 1305 1306 @return: Number of items recursively added to the list 1307 1308 @raise ValueError: If path is not a directory or does not exist. 1309 @raise ValueError: If the path could not be encoded properly. 1310 """ 1311 path = encodePath(path) 1312 path = normalizeDir(path) 1313 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1314 1315 1316 ################## 1317 # Utility methods 1318 ################## 1319
1320 - def removeYoungFiles(self, daysOld):
1321 """ 1322 Removes from the list files younger than a certain age (in days). 1323 1324 Any file whose "age" in days is less than (C{<}) the value of the 1325 C{daysOld} parameter will be removed from the list so that it will not be 1326 purged later when L{purgeItems} is called. Directories and soft links 1327 will be ignored. 1328 1329 The "age" of a file is the amount of time since the file was last used, 1330 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1331 1332 @note: Some people find the "sense" of this method confusing or 1333 "backwards". Keep in mind that this method is used to remove items 1334 I{from the list}, not from the filesystem! It removes from the list 1335 those items that you would I{not} want to purge because they are too 1336 young. As an example, passing in C{daysOld} of zero (0) would remove 1337 from the list no files, which would result in purging all of the files 1338 later. I would be happy to make a synonym of this method with an 1339 easier-to-understand "sense", if someone can suggest one. 1340 1341 @param daysOld: Minimum age of files that are to be kept in the list. 1342 @type daysOld: Integer value >= 0. 1343 1344 @return: Number of entries removed 1345 """ 1346 removed = 0 1347 daysOld = int(daysOld) 1348 if daysOld < 0: 1349 raise ValueError("Days old value must be an integer >= 0.") 1350 for entry in self[:]: 1351 if os.path.isfile(entry) and not os.path.islink(entry): 1352 try: 1353 ageInDays = calculateFileAge(entry) 1354 ageInWholeDays = math.floor(ageInDays) 1355 if ageInWholeDays < 0: ageInWholeDays = 0 1356 if ageInWholeDays < daysOld: 1357 removed += 1 1358 self.remove(entry) 1359 except OSError: 1360 pass 1361 return removed
1362
1363 - def purgeItems(self):
1364 """ 1365 Purges all items in the list. 1366 1367 Every item in the list will be purged. Directories in the list will 1368 I{not} be purged recursively, and hence will only be removed if they are 1369 empty. Errors will be ignored. 1370 1371 To faciliate easy removal of directories that will end up being empty, 1372 the delete process happens in two passes: files first (including soft 1373 links), then directories. 1374 1375 @return: Tuple containing count of (files, dirs) removed 1376 """ 1377 files = 0 1378 dirs = 0 1379 for entry in self: 1380 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1381 try: 1382 os.remove(entry) 1383 files += 1 1384 logger.debug("Purged file [%s].", entry) 1385 except OSError: 1386 pass 1387 for entry in self: 1388 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1389 try: 1390 os.rmdir(entry) 1391 dirs += 1 1392 logger.debug("Purged empty directory [%s].", entry) 1393 except OSError: 1394 pass 1395 return (files, dirs)
1396
1397 1398 ######################################################################## 1399 # Public functions 1400 ######################################################################## 1401 1402 ########################## 1403 # normalizeDir() function 1404 ########################## 1405 1406 -def normalizeDir(path):
1407 """ 1408 Normalizes a directory name. 1409 1410 For our purposes, a directory name is normalized by removing the trailing 1411 path separator, if any. This is important because we want directories to 1412 appear within lists in a consistent way, although from the user's 1413 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1414 1415 @param path: Path to be normalized. 1416 @type path: String representing a path on disk 1417 1418 @return: Normalized path, which should be equivalent to the original. 1419 """ 1420 if path != os.sep and path[-1:] == os.sep: 1421 return path[:-1] 1422 return path
1423
1424 1425 ############################# 1426 # compareContents() function 1427 ############################# 1428 1429 -def compareContents(path1, path2, verbose=False):
1430 """ 1431 Compares the contents of two directories to see if they are equivalent. 1432 1433 The two directories are recursively compared. First, we check whether they 1434 contain exactly the same set of files. Then, we check to see every given 1435 file has exactly the same contents in both directories. 1436 1437 This is all relatively simple to implement through the magic of 1438 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1439 off the front of each entry in the mapping it generates. This makes our 1440 comparison as simple as creating a list for each path, then generating a 1441 digest map for each path and comparing the two. 1442 1443 If no exception is thrown, the two directories are considered identical. 1444 1445 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1446 used so that any thrown exception can indicate exactly which file caused the 1447 comparison to fail. The thrown C{ValueError} exception distinguishes 1448 between the directories containing different files, and containing the same 1449 files with differing content. 1450 1451 @note: Symlinks are I{not} followed for the purposes of this comparison. 1452 1453 @param path1: First path to compare. 1454 @type path1: String representing a path on disk 1455 1456 @param path2: First path to compare. 1457 @type path2: String representing a path on disk 1458 1459 @param verbose: Indicates whether a verbose response should be given. 1460 @type verbose: Boolean 1461 1462 @raise ValueError: If a directory doesn't exist or can't be read. 1463 @raise ValueError: If the two directories are not equivalent. 1464 @raise IOError: If there is an unusual problem reading the directories. 1465 """ 1466 try: 1467 path1List = BackupFileList() 1468 path1List.addDirContents(path1) 1469 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1470 path2List = BackupFileList() 1471 path2List.addDirContents(path2) 1472 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1473 compareDigestMaps(path1Digest, path2Digest, verbose) 1474 except IOError as e: 1475 logger.error("I/O error encountered during consistency check.") 1476 raise e
1477
1478 -def compareDigestMaps(digest1, digest2, verbose=False):
1479 """ 1480 Compares two digest maps and throws an exception if they differ. 1481 1482 @param digest1: First digest to compare. 1483 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1484 1485 @param digest2: Second digest to compare. 1486 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1487 1488 @param verbose: Indicates whether a verbose response should be given. 1489 @type verbose: Boolean 1490 1491 @raise ValueError: If the two directories are not equivalent. 1492 """ 1493 if not verbose: 1494 if digest1 != digest2: 1495 raise ValueError("Consistency check failed.") 1496 else: 1497 list1 = UnorderedList(list(digest1.keys())) 1498 list2 = UnorderedList(list(digest2.keys())) 1499 if list1 != list2: 1500 raise ValueError("Directories contain a different set of files.") 1501 for key in list1: 1502 if digest1[key] != digest2[key]: 1503 raise ValueError("File contents for [%s] vary between directories." % key)
1504