Package CedarBackup3 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008,2010,2015 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python 3 (>= 3.4) 
  29  # Project  : Cedar Backup, release 3 
  30  # Purpose  : Provides filesystem-related objects. 
  31  # 
  32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  33   
  34  ######################################################################## 
  35  # Module documentation 
  36  ######################################################################## 
  37   
  38  """ 
  39  Provides filesystem-related objects. 
  40  @sort: FilesystemList, BackupFileList, PurgeItemList 
  41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  42  """ 
  43   
  44   
  45  ######################################################################## 
  46  # Imported modules 
  47  ######################################################################## 
  48   
  49  # System modules 
  50  import os 
  51  import re 
  52  import math 
  53  import logging 
  54  import tarfile 
  55  import hashlib 
  56   
  57  # Cedar Backup modules 
  58  from CedarBackup3.knapsack import firstFit, bestFit, worstFit, alternateFit 
  59  from CedarBackup3.util import AbsolutePathList, UnorderedList, RegexList 
  60  from CedarBackup3.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  61   
  62   
  63  ######################################################################## 
  64  # Module-wide variables 
  65  ######################################################################## 
  66   
  67  logger = logging.getLogger("CedarBackup3.log.filesystem") 
68 69 70 ######################################################################## 71 # FilesystemList class definition 72 ######################################################################## 73 74 -class FilesystemList(list):
75 76 ###################### 77 # Class documentation 78 ###################### 79 80 """ 81 Represents a list of filesystem items. 82 83 This is a generic class that represents a list of filesystem items. Callers 84 can add individual files or directories to the list, or can recursively add 85 the contents of a directory. The class also allows for up-front exclusions 86 in several forms (all files, all directories, all items matching a pattern, 87 all items whose basename matches a pattern, or all directories containing a 88 specific "ignore file"). Symbolic links are typically backed up 89 non-recursively, i.e. the link to a directory is backed up, but not the 90 contents of that link (we don't want to deal with recursive loops, etc.). 91 92 The custom methods such as L{addFile} will only add items if they exist on 93 the filesystem and do not match any exclusions that are already in place. 94 However, since a FilesystemList is a subclass of Python's standard list 95 class, callers can also add items to the list in the usual way, using 96 methods like C{append()} or C{insert()}. No validations apply to items 97 added to the list in this way; however, many list-manipulation methods deal 98 "gracefully" with items that don't exist in the filesystem, often by 99 ignoring them. 100 101 Once a list has been created, callers can remove individual items from the 102 list using standard methods like C{pop()} or C{remove()} or they can use 103 custom methods to remove specific types of entries or entries which match a 104 particular pattern. 105 106 @note: Regular expression patterns that apply to paths are assumed to be 107 bounded at front and back by the beginning and end of the string, i.e. they 108 are treated as if they begin with C{^} and end with C{$}. This is true 109 whether we are matching a complete path or a basename. 110 111 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 112 removeLinks, removeMatch, removeInvalid, normalize, 113 excludeFiles, excludeDirs, excludeLinks, excludePaths, 114 excludePatterns, excludeBasenamePatterns, ignoreFile 115 """ 116 117 118 ############## 119 # Constructor 120 ############## 121
122 - def __init__(self):
123 """Initializes a list with no configured exclusions.""" 124 list.__init__(self) 125 self._excludeFiles = False 126 self._excludeDirs = False 127 self._excludeLinks = False 128 self._excludePaths = None 129 self._excludePatterns = None 130 self._excludeBasenamePatterns = None 131 self._ignoreFile = None 132 self.excludeFiles = False 133 self.excludeLinks = False 134 self.excludeDirs = False 135 self.excludePaths = [] 136 self.excludePatterns = RegexList() 137 self.excludeBasenamePatterns = RegexList() 138 self.ignoreFile = None
139 140 141 ############# 142 # Properties 143 ############# 144
145 - def _setExcludeFiles(self, value):
146 """ 147 Property target used to set the exclude files flag. 148 No validations, but we normalize the value to C{True} or C{False}. 149 """ 150 if value: 151 self._excludeFiles = True 152 else: 153 self._excludeFiles = False
154
155 - def _getExcludeFiles(self):
156 """ 157 Property target used to get the exclude files flag. 158 """ 159 return self._excludeFiles
160
161 - def _setExcludeDirs(self, value):
162 """ 163 Property target used to set the exclude directories flag. 164 No validations, but we normalize the value to C{True} or C{False}. 165 """ 166 if value: 167 self._excludeDirs = True 168 else: 169 self._excludeDirs = False
170
171 - def _getExcludeDirs(self):
172 """ 173 Property target used to get the exclude directories flag. 174 """ 175 return self._excludeDirs
176 186 192
193 - def _setExcludePaths(self, value):
194 """ 195 Property target used to set the exclude paths list. 196 A C{None} value is converted to an empty list. 197 Elements do not have to exist on disk at the time of assignment. 198 @raise ValueError: If any list element is not an absolute path. 199 """ 200 self._excludePaths = AbsolutePathList() 201 if value is not None: 202 self._excludePaths.extend(value)
203
204 - def _getExcludePaths(self):
205 """ 206 Property target used to get the absolute exclude paths list. 207 """ 208 return self._excludePaths
209
210 - def _setExcludePatterns(self, value):
211 """ 212 Property target used to set the exclude patterns list. 213 A C{None} value is converted to an empty list. 214 """ 215 self._excludePatterns = RegexList() 216 if value is not None: 217 self._excludePatterns.extend(value)
218
219 - def _getExcludePatterns(self):
220 """ 221 Property target used to get the exclude patterns list. 222 """ 223 return self._excludePatterns
224
225 - def _setExcludeBasenamePatterns(self, value):
226 """ 227 Property target used to set the exclude basename patterns list. 228 A C{None} value is converted to an empty list. 229 """ 230 self._excludeBasenamePatterns = RegexList() 231 if value is not None: 232 self._excludeBasenamePatterns.extend(value)
233
235 """ 236 Property target used to get the exclude basename patterns list. 237 """ 238 return self._excludeBasenamePatterns
239
240 - def _setIgnoreFile(self, value):
241 """ 242 Property target used to set the ignore file. 243 The value must be a non-empty string if it is not C{None}. 244 @raise ValueError: If the value is an empty string. 245 """ 246 if value is not None: 247 if len(value) < 1: 248 raise ValueError("The ignore file must be a non-empty string.") 249 self._ignoreFile = value
250
251 - def _getIgnoreFile(self):
252 """ 253 Property target used to get the ignore file. 254 """ 255 return self._ignoreFile
256 257 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 258 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 259 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 260 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 261 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 262 "List of regular expression patterns (matching complete path) to be excluded.") 263 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 264 None, "List of regular expression patterns (matching basename) to be excluded.") 265 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 266 267 268 ############## 269 # Add methods 270 ############## 271
272 - def addFile(self, path):
273 """ 274 Adds a file to the list. 275 276 The path must exist and must be a file or a link to an existing file. It 277 will be added to the list subject to any exclusions that are in place. 278 279 @param path: File path to be added to the list 280 @type path: String representing a path on disk 281 282 @return: Number of items added to the list. 283 284 @raise ValueError: If path is not a file or does not exist. 285 @raise ValueError: If the path could not be encoded properly. 286 """ 287 path = encodePath(path) 288 if not os.path.exists(path) or not os.path.isfile(path): 289 logger.debug("Path [%s] is not a file or does not exist on disk.", path) 290 raise ValueError("Path is not a file or does not exist on disk.") 291 if self.excludeLinks and os.path.islink(path): 292 logger.debug("Path [%s] is excluded based on excludeLinks.", path) 293 return 0 294 if self.excludeFiles: 295 logger.debug("Path [%s] is excluded based on excludeFiles.", path) 296 return 0 297 if path in self.excludePaths: 298 logger.debug("Path [%s] is excluded based on excludePaths.", path) 299 return 0 300 for pattern in self.excludePatterns: 301 pattern = encodePath(pattern) # use same encoding as filenames 302 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 303 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 304 return 0 305 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 306 pattern = encodePath(pattern) # use same encoding as filenames 307 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 308 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 309 return 0 310 self.append(path) 311 logger.debug("Added file to list: [%s]", path) 312 return 1
313
314 - def addDir(self, path):
315 """ 316 Adds a directory to the list. 317 318 The path must exist and must be a directory or a link to an existing 319 directory. It will be added to the list subject to any exclusions that 320 are in place. The L{ignoreFile} does not apply to this method, only to 321 L{addDirContents}. 322 323 @param path: Directory path to be added to the list 324 @type path: String representing a path on disk 325 326 @return: Number of items added to the list. 327 328 @raise ValueError: If path is not a directory or does not exist. 329 @raise ValueError: If the path could not be encoded properly. 330 """ 331 path = encodePath(path) 332 path = normalizeDir(path) 333 if not os.path.exists(path) or not os.path.isdir(path): 334 logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 335 raise ValueError("Path is not a directory or does not exist on disk.") 336 if self.excludeLinks and os.path.islink(path): 337 logger.debug("Path [%s] is excluded based on excludeLinks.", path) 338 return 0 339 if self.excludeDirs: 340 logger.debug("Path [%s] is excluded based on excludeDirs.", path) 341 return 0 342 if path in self.excludePaths: 343 logger.debug("Path [%s] is excluded based on excludePaths.", path) 344 return 0 345 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 346 pattern = encodePath(pattern) # use same encoding as filenames 347 if re.compile(r"^%s$" % pattern).match(path): 348 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 349 return 0 350 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 351 pattern = encodePath(pattern) # use same encoding as filenames 352 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 353 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 354 return 0 355 self.append(path) 356 logger.debug("Added directory to list: [%s]", path) 357 return 1
358
359 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
360 """ 361 Adds the contents of a directory to the list. 362 363 The path must exist and must be a directory or a link to a directory. 364 The contents of the directory (as well as the directory path itself) will 365 be recursively added to the list, subject to any exclusions that are in 366 place. If you only want the directory and its immediate contents to be 367 added, then pass in C{recursive=False}. 368 369 @note: If a directory's absolute path matches an exclude pattern or path, 370 or if the directory contains the configured ignore file, then the 371 directory and all of its contents will be recursively excluded from the 372 list. 373 374 @note: If the passed-in directory happens to be a soft link, it will be 375 recursed. However, the linkDepth parameter controls whether any soft 376 links I{within} the directory will be recursed. The link depth is 377 maximum depth of the tree at which soft links should be followed. So, a 378 depth of 0 does not follow any soft links, a depth of 1 follows only 379 links within the passed-in directory, a depth of 2 follows the links at 380 the next level down, etc. 381 382 @note: Any invalid soft links (i.e. soft links that point to 383 non-existent items) will be silently ignored. 384 385 @note: The L{excludeDirs} flag only controls whether any given directory 386 path itself is added to the list once it has been discovered. It does 387 I{not} modify any behavior related to directory recursion. 388 389 @note: If you call this method I{on a link to a directory} that link will 390 never be dereferenced (it may, however, be followed). 391 392 @param path: Directory path whose contents should be added to the list 393 @type path: String representing a path on disk 394 395 @param recursive: Indicates whether directory contents should be added recursively. 396 @type recursive: Boolean value 397 398 @param addSelf: Indicates whether the directory itself should be added to the list. 399 @type addSelf: Boolean value 400 401 @param linkDepth: Maximum depth of the tree at which soft links should be followed 402 @type linkDepth: Integer value, where zero means not to follow any soft links 403 404 @param dereference: Indicates whether soft links, if followed, should be dereferenced 405 @type dereference: Boolean value 406 407 @return: Number of items recursively added to the list 408 409 @raise ValueError: If path is not a directory or does not exist. 410 @raise ValueError: If the path could not be encoded properly. 411 """ 412 path = encodePath(path) 413 path = normalizeDir(path) 414 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
415
416 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
417 """ 418 Internal implementation of C{addDirContents}. 419 420 This internal implementation exists due to some refactoring. Basically, 421 some subclasses have a need to add the contents of a directory, but not 422 the directory itself. This is different than the standard C{FilesystemList} 423 behavior and actually ends up making a special case out of the first 424 call in the recursive chain. Since I don't want to expose the modified 425 interface, C{addDirContents} ends up being wholly implemented in terms 426 of this method. 427 428 The linkDepth parameter controls whether soft links are followed when we 429 are adding the contents recursively. Any recursive calls reduce the 430 value by one. If the value zero or less, then soft links will just be 431 added as directories, but will not be followed. This means that links 432 are followed to a I{constant depth} starting from the top-most directory. 433 434 There is one difference between soft links and directories: soft links 435 that are added recursively are not placed into the list explicitly. This 436 is because if we do add the links recursively, the resulting tar file 437 gets a little confused (it has a link and a directory with the same 438 name). 439 440 @note: If you call this method I{on a link to a directory} that link will 441 never be dereferenced (it may, however, be followed). 442 443 @param path: Directory path whose contents should be added to the list. 444 @param includePath: Indicates whether to include the path as well as contents. 445 @param recursive: Indicates whether directory contents should be added recursively. 446 @param linkDepth: Depth of soft links that should be followed 447 @param dereference: Indicates whether soft links, if followed, should be dereferenced 448 449 @return: Number of items recursively added to the list 450 451 @raise ValueError: If path is not a directory or does not exist. 452 """ 453 added = 0 454 if not os.path.exists(path) or not os.path.isdir(path): 455 logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 456 raise ValueError("Path is not a directory or does not exist on disk.") 457 if path in self.excludePaths: 458 logger.debug("Path [%s] is excluded based on excludePaths.", path) 459 return added 460 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 461 pattern = encodePath(pattern) # use same encoding as filenames 462 if re.compile(r"^%s$" % pattern).match(path): 463 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 464 return added 465 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 466 pattern = encodePath(pattern) # use same encoding as filenames 467 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 468 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 469 return added 470 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 471 logger.debug("Path [%s] is excluded based on ignore file.", path) 472 return added 473 if includePath: 474 added += self.addDir(path) # could actually be excluded by addDir, yet 475 for entry in os.listdir(path): 476 entrypath = os.path.join(path, entry) 477 if os.path.isfile(entrypath): 478 if linkDepth > 0 and dereference: 479 derefpath = dereferenceLink(entrypath) 480 if derefpath != entrypath: 481 added += self.addFile(derefpath) 482 added += self.addFile(entrypath) 483 elif os.path.isdir(entrypath): 484 if os.path.islink(entrypath): 485 if recursive: 486 if linkDepth > 0: 487 newDepth = linkDepth - 1 488 if dereference: 489 derefpath = dereferenceLink(entrypath) 490 if derefpath != entrypath: 491 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 492 added += self.addDir(entrypath) 493 else: 494 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 495 else: 496 added += self.addDir(entrypath) 497 else: 498 added += self.addDir(entrypath) 499 else: 500 if recursive: 501 newDepth = linkDepth - 1 502 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 503 else: 504 added += self.addDir(entrypath) 505 return added
506 507 508 ################# 509 # Remove methods 510 ################# 511
512 - def removeFiles(self, pattern=None):
513 """ 514 Removes file entries from the list. 515 516 If C{pattern} is not passed in or is C{None}, then all file entries will 517 be removed from the list. Otherwise, only those file entries matching 518 the pattern will be removed. Any entry which does not exist on disk 519 will be ignored (use L{removeInvalid} to purge those entries). 520 521 This method might be fairly slow for large lists, since it must check the 522 type of each item in the list. If you know ahead of time that you want 523 to exclude all files, then you will be better off setting L{excludeFiles} 524 to C{True} before adding items to the list. 525 526 @param pattern: Regular expression pattern representing entries to remove 527 528 @return: Number of entries removed 529 @raise ValueError: If the passed-in pattern is not a valid regular expression. 530 """ 531 removed = 0 532 if pattern is None: 533 for entry in self[:]: 534 if os.path.exists(entry) and os.path.isfile(entry): 535 self.remove(entry) 536 logger.debug("Removed path [%s] from list.", entry) 537 removed += 1 538 else: 539 try: 540 pattern = encodePath(pattern) # use same encoding as filenames 541 compiled = re.compile(pattern) 542 except re.error: 543 raise ValueError("Pattern is not a valid regular expression.") 544 for entry in self[:]: 545 if os.path.exists(entry) and os.path.isfile(entry): 546 if compiled.match(entry): 547 self.remove(entry) 548 logger.debug("Removed path [%s] from list.", entry) 549 removed += 1 550 logger.debug("Removed a total of %d entries.", removed) 551 return removed
552
553 - def removeDirs(self, pattern=None):
554 """ 555 Removes directory entries from the list. 556 557 If C{pattern} is not passed in or is C{None}, then all directory entries 558 will be removed from the list. Otherwise, only those directory entries 559 matching the pattern will be removed. Any entry which does not exist on 560 disk will be ignored (use L{removeInvalid} to purge those entries). 561 562 This method might be fairly slow for large lists, since it must check the 563 type of each item in the list. If you know ahead of time that you want 564 to exclude all directories, then you will be better off setting 565 L{excludeDirs} to C{True} before adding items to the list (note that this 566 will not prevent you from recursively adding the I{contents} of 567 directories). 568 569 @param pattern: Regular expression pattern representing entries to remove 570 571 @return: Number of entries removed 572 @raise ValueError: If the passed-in pattern is not a valid regular expression. 573 """ 574 removed = 0 575 if pattern is None: 576 for entry in self[:]: 577 if os.path.exists(entry) and os.path.isdir(entry): 578 self.remove(entry) 579 logger.debug("Removed path [%s] from list.", entry) 580 removed += 1 581 else: 582 try: 583 pattern = encodePath(pattern) # use same encoding as filenames 584 compiled = re.compile(pattern) 585 except re.error: 586 raise ValueError("Pattern is not a valid regular expression.") 587 for entry in self[:]: 588 if os.path.exists(entry) and os.path.isdir(entry): 589 if compiled.match(entry): 590 self.remove(entry) 591 logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 592 removed += 1 593 logger.debug("Removed a total of %d entries.", removed) 594 return removed
595 636
637 - def removeMatch(self, pattern):
638 """ 639 Removes from the list all entries matching a pattern. 640 641 This method removes from the list all entries which match the passed in 642 C{pattern}. Since there is no need to check the type of each entry, it 643 is faster to call this method than to call the L{removeFiles}, 644 L{removeDirs} or L{removeLinks} methods individually. If you know which 645 patterns you will want to remove ahead of time, you may be better off 646 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 647 items to the list. 648 649 @note: Unlike when using the exclude lists, the pattern here is I{not} 650 bounded at the front and the back of the string. You can use any pattern 651 you want. 652 653 @param pattern: Regular expression pattern representing entries to remove 654 655 @return: Number of entries removed. 656 @raise ValueError: If the passed-in pattern is not a valid regular expression. 657 """ 658 try: 659 pattern = encodePath(pattern) # use same encoding as filenames 660 compiled = re.compile(pattern) 661 except re.error: 662 raise ValueError("Pattern is not a valid regular expression.") 663 removed = 0 664 for entry in self[:]: 665 if compiled.match(entry): 666 self.remove(entry) 667 logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 668 removed += 1 669 logger.debug("Removed a total of %d entries.", removed) 670 return removed
671
672 - def removeInvalid(self):
673 """ 674 Removes from the list all entries that do not exist on disk. 675 676 This method removes from the list all entries which do not currently 677 exist on disk in some form. No attention is paid to whether the entries 678 are files or directories. 679 680 @return: Number of entries removed. 681 """ 682 removed = 0 683 for entry in self[:]: 684 if not os.path.exists(entry): 685 self.remove(entry) 686 logger.debug("Removed path [%s] from list.", entry) 687 removed += 1 688 logger.debug("Removed a total of %d entries.", removed) 689 return removed
690 691 692 ################## 693 # Utility methods 694 ################## 695
696 - def normalize(self):
697 """Normalizes the list, ensuring that each entry is unique.""" 698 orig = len(self) 699 self.sort() 700 dups = list(filter(lambda x, self=self: self[x] == self[x+1], list(range(0, len(self) - 1)))) 701 items = list(map(lambda x, self=self: self[x], dups)) 702 list(map(self.remove, items)) 703 new = len(self) 704 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now).", new-orig, orig, new)
705
706 - def verify(self):
707 """ 708 Verifies that all entries in the list exist on disk. 709 @return: C{True} if all entries exist, C{False} otherwise. 710 """ 711 for entry in self: 712 if not os.path.exists(entry): 713 logger.debug("Path [%s] is invalid; list is not valid.", entry) 714 return False 715 logger.debug("All entries in list are valid.") 716 return True
717
718 719 ######################################################################## 720 # SpanItem class definition 721 ######################################################################## 722 723 -class SpanItem(object): # pylint: disable=R0903
724 """ 725 Item returned by L{BackupFileList.generateSpan}. 726 """
727 - def __init__(self, fileList, size, capacity, utilization):
728 """ 729 Create object. 730 @param fileList: List of files 731 @param size: Size (in bytes) of files 732 @param utilization: Utilization, as a percentage (0-100) 733 """ 734 self.fileList = fileList 735 self.size = size 736 self.capacity = capacity 737 self.utilization = utilization
738
739 740 ######################################################################## 741 # BackupFileList class definition 742 ######################################################################## 743 744 -class BackupFileList(FilesystemList): # pylint: disable=R0904
745 746 ###################### 747 # Class documentation 748 ###################### 749 750 """ 751 List of files to be backed up. 752 753 A BackupFileList is a L{FilesystemList} containing a list of files to be 754 backed up. It only contains files, not directories (soft links are treated 755 like files). On top of the generic functionality provided by 756 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 757 for each file in the list, and it also provides a method to calculate the 758 total size of the files in the list and a way to export the list into tar 759 form. 760 761 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 762 generateFitted, generateTarfile, removeUnchanged 763 """ 764 765 ############## 766 # Constructor 767 ############## 768
769 - def __init__(self):
770 """Initializes a list with no configured exclusions.""" 771 FilesystemList.__init__(self)
772 773 774 ################################ 775 # Overridden superclass methods 776 ################################ 777
778 - def addDir(self, path):
779 """ 780 Adds a directory to the list. 781 782 Note that this class does not allow directories to be added by themselves 783 (a backup list contains only files). However, since links to directories 784 are technically files, we allow them to be added. 785 786 This method is implemented in terms of the superclass method, with one 787 additional validation: the superclass method is only called if the 788 passed-in path is both a directory and a link. All of the superclass's 789 existing validations and restrictions apply. 790 791 @param path: Directory path to be added to the list 792 @type path: String representing a path on disk 793 794 @return: Number of items added to the list. 795 796 @raise ValueError: If path is not a directory or does not exist. 797 @raise ValueError: If the path could not be encoded properly. 798 """ 799 path = encodePath(path) 800 path = normalizeDir(path) 801 if os.path.isdir(path) and not os.path.islink(path): 802 return 0 803 else: 804 return FilesystemList.addDir(self, path)
805 806 807 ################## 808 # Utility methods 809 ################## 810
811 - def totalSize(self):
812 """ 813 Returns the total size among all files in the list. 814 Only files are counted. 815 Soft links that point at files are ignored. 816 Entries which do not exist on disk are ignored. 817 @return: Total size, in bytes 818 """ 819 total = 0.0 820 for entry in self: 821 if os.path.isfile(entry) and not os.path.islink(entry): 822 total += float(os.stat(entry).st_size) 823 return total
824
825 - def generateSizeMap(self):
826 """ 827 Generates a mapping from file to file size in bytes. 828 The mapping does include soft links, which are listed with size zero. 829 Entries which do not exist on disk are ignored. 830 @return: Dictionary mapping file to file size 831 """ 832 table = { } 833 for entry in self: 834 if os.path.islink(entry): 835 table[entry] = 0.0 836 elif os.path.isfile(entry): 837 table[entry] = float(os.stat(entry).st_size) 838 return table
839
840 - def generateDigestMap(self, stripPrefix=None):
841 """ 842 Generates a mapping from file to file digest. 843 844 Currently, the digest is an SHA hash, which should be pretty secure. In 845 the future, this might be a different kind of hash, but we guarantee that 846 the type of the hash will not change unless the library major version 847 number is bumped. 848 849 Entries which do not exist on disk are ignored. 850 851 Soft links are ignored. We would end up generating a digest for the file 852 that the soft link points at, which doesn't make any sense. 853 854 If C{stripPrefix} is passed in, then that prefix will be stripped from 855 each key when the map is generated. This can be useful in generating two 856 "relative" digest maps to be compared to one another. 857 858 @param stripPrefix: Common prefix to be stripped from paths 859 @type stripPrefix: String with any contents 860 861 @return: Dictionary mapping file to digest value 862 @see: L{removeUnchanged} 863 """ 864 table = { } 865 if stripPrefix is not None: 866 for entry in self: 867 if os.path.isfile(entry) and not os.path.islink(entry): 868 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 869 else: 870 for entry in self: 871 if os.path.isfile(entry) and not os.path.islink(entry): 872 table[entry] = BackupFileList._generateDigest(entry) 873 return table
874 875 @staticmethod
876 - def _generateDigest(path):
877 """ 878 Generates an SHA digest for a given file on disk. 879 880 The original code for this function used this simplistic implementation, 881 which requires reading the entire file into memory at once in order to 882 generate a digest value:: 883 884 sha.new(open(path).read()).hexdigest() 885 886 Not surprisingly, this isn't an optimal solution. The U{Simple file 887 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 888 Python Cookbook recipe describes how to incrementally generate a hash 889 value by reading in chunks of data rather than reading the file all at 890 once. The recipe relies on the the C{update()} method of the various 891 Python hashing algorithms. 892 893 In my tests using a 110 MB file on CD, the original implementation 894 requires 111 seconds. This implementation requires only 40-45 seconds, 895 which is a pretty substantial speed-up. 896 897 Experience shows that reading in around 4kB (4096 bytes) at a time yields 898 the best performance. Smaller reads are quite a bit slower, and larger 899 reads don't make much of a difference. The 4kB number makes me a little 900 suspicious, and I think it might be related to the size of a filesystem 901 read at the hardware level. However, I've decided to just hardcode 4096 902 until I have evidence that shows it's worthwhile making the read size 903 configurable. 904 905 @param path: Path to generate digest for. 906 907 @return: ASCII-safe SHA digest for the file. 908 @raise OSError: If the file cannot be opened. 909 """ 910 # pylint: disable=C0103,E1101 911 s = hashlib.sha1() 912 f = open(path, mode="rb") # in case platform cares about binary reads 913 readBytes = 4096 # see notes above 914 while readBytes > 0: 915 readString = f.read(readBytes) 916 s.update(readString) 917 readBytes = len(readString) 918 f.close() 919 digest = s.hexdigest() 920 logger.debug("Generated digest [%s] for file [%s].", digest, path) 921 return digest
922
923 - def generateFitted(self, capacity, algorithm="worst_fit"):
924 """ 925 Generates a list of items that fit in the indicated capacity. 926 927 Sometimes, callers would like to include every item in a list, but are 928 unable to because not all of the items fit in the space available. This 929 method returns a copy of the list, containing only the items that fit in 930 a given capacity. A copy is returned so that we don't lose any 931 information if for some reason the fitted list is unsatisfactory. 932 933 The fitting is done using the functions in the knapsack module. By 934 default, the first fit algorithm is used, but you can also choose 935 from best fit, worst fit and alternate fit. 936 937 @param capacity: Maximum capacity among the files in the new list 938 @type capacity: Integer, in bytes 939 940 @param algorithm: Knapsack (fit) algorithm to use 941 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 942 943 @return: Copy of list with total size no larger than indicated capacity 944 @raise ValueError: If the algorithm is invalid. 945 """ 946 table = self._getKnapsackTable() 947 function = BackupFileList._getKnapsackFunction(algorithm) 948 return function(table, capacity)[0]
949
950 - def generateSpan(self, capacity, algorithm="worst_fit"):
951 """ 952 Splits the list of items into sub-lists that fit in a given capacity. 953 954 Sometimes, callers need split to a backup file list into a set of smaller 955 lists. For instance, you could use this to "span" the files across a set 956 of discs. 957 958 The fitting is done using the functions in the knapsack module. By 959 default, the first fit algorithm is used, but you can also choose 960 from best fit, worst fit and alternate fit. 961 962 @note: If any of your items are larger than the capacity, then it won't 963 be possible to find a solution. In this case, a value error will be 964 raised. 965 966 @param capacity: Maximum capacity among the files in the new list 967 @type capacity: Integer, in bytes 968 969 @param algorithm: Knapsack (fit) algorithm to use 970 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 971 972 @return: List of L{SpanItem} objects. 973 974 @raise ValueError: If the algorithm is invalid. 975 @raise ValueError: If it's not possible to fit some items 976 """ 977 spanItems = [] 978 function = BackupFileList._getKnapsackFunction(algorithm) 979 table = self._getKnapsackTable(capacity) 980 iteration = 0 981 while len(table) > 0: 982 iteration += 1 983 fit = function(table, capacity) 984 if len(fit[0]) == 0: 985 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 986 raise ValueError("After iteration %d, unable to add any new items." % iteration) 987 removeKeys(table, fit[0]) 988 utilization = (float(fit[1])/float(capacity))*100.0 989 item = SpanItem(fit[0], fit[1], capacity, utilization) 990 spanItems.append(item) 991 return spanItems
992
993 - def _getKnapsackTable(self, capacity=None):
994 """ 995 Converts the list into the form needed by the knapsack algorithms. 996 @return: Dictionary mapping file name to tuple of (file path, file size). 997 """ 998 table = { } 999 for entry in self: 1000 if os.path.islink(entry): 1001 table[entry] = (entry, 0.0) 1002 elif os.path.isfile(entry): 1003 size = float(os.stat(entry).st_size) 1004 if capacity is not None: 1005 if size > capacity: 1006 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 1007 table[entry] = (entry, size) 1008 return table
1009 1010 @staticmethod
1011 - def _getKnapsackFunction(algorithm):
1012 """ 1013 Returns a reference to the function associated with an algorithm name. 1014 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 1015 @param algorithm: Name of the algorithm 1016 @return: Reference to knapsack function 1017 @raise ValueError: If the algorithm name is unknown. 1018 """ 1019 if algorithm == "first_fit": 1020 return firstFit 1021 elif algorithm == "best_fit": 1022 return bestFit 1023 elif algorithm == "worst_fit": 1024 return worstFit 1025 elif algorithm == "alternate_fit": 1026 return alternateFit 1027 else: 1028 raise ValueError("Algorithm [%s] is invalid." % algorithm)
1029
1030 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
1031 """ 1032 Creates a tar file containing the files in the list. 1033 1034 By default, this method will create uncompressed tar files. If you pass 1035 in mode C{'targz'}, then it will create gzipped tar files, and if you 1036 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1037 1038 The tar file will be created as a GNU tar archive, which enables extended 1039 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1040 the extra functionality out-weighs the disadvantage of not being 1041 "standard". 1042 1043 If you pass in C{flat=True}, then a "flat" archive will be created, and 1044 all of the files will be added to the root of the archive. So, the file 1045 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1046 1047 By default, the whole method call fails if there are problems adding any 1048 of the files to the archive, resulting in an exception. Under these 1049 circumstances, callers are advised that they might want to call 1050 L{removeInvalid()} and then attempt to extract the tar file a second 1051 time, since the most common cause of failures is a missing file (a file 1052 that existed when the list was built, but is gone again by the time the 1053 tar file is built). 1054 1055 If you want to, you can pass in C{ignore=True}, and the method will 1056 ignore errors encountered when adding individual files to the archive 1057 (but not errors opening and closing the archive itself). 1058 1059 We'll always attempt to remove the tarfile from disk if an exception will 1060 be thrown. 1061 1062 @note: No validation is done as to whether the entries in the list are 1063 files, since only files or soft links should be in an object like this. 1064 However, to be safe, everything is explicitly added to the tar archive 1065 non-recursively so it's safe to include soft links to directories. 1066 1067 @note: The Python C{tarfile} module, which is used internally here, is 1068 supposed to deal properly with long filenames and links. In my testing, 1069 I have found that it appears to be able to add long really long filenames 1070 to archives, but doesn't do a good job reading them back out, even out of 1071 an archive it created. Fortunately, all Cedar Backup does is add files 1072 to archives. 1073 1074 @param path: Path of tar file to create on disk 1075 @type path: String representing a path on disk 1076 1077 @param mode: Tar creation mode 1078 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1079 1080 @param ignore: Indicates whether to ignore certain errors. 1081 @type ignore: Boolean 1082 1083 @param flat: Creates "flat" archive by putting all items in root 1084 @type flat: Boolean 1085 1086 @raise ValueError: If mode is not valid 1087 @raise ValueError: If list is empty 1088 @raise ValueError: If the path could not be encoded properly. 1089 @raise TarError: If there is a problem creating the tar file 1090 """ 1091 # pylint: disable=E1101 1092 path = encodePath(path) 1093 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1094 if mode == 'tar': tarmode = "w:" 1095 elif mode == 'targz': tarmode = "w:gz" 1096 elif mode == 'tarbz2': tarmode = "w:bz2" 1097 else: raise ValueError("Mode [%s] is not valid." % mode) 1098 try: 1099 tar = tarfile.open(path, tarmode) 1100 try: 1101 tar.format = tarfile.GNU_FORMAT 1102 except AttributeError: 1103 tar.posix = False 1104 for entry in self: 1105 try: 1106 if flat: 1107 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1108 else: 1109 tar.add(entry, recursive=False) 1110 except tarfile.TarError as e: 1111 if not ignore: 1112 raise e 1113 logger.info("Unable to add file [%s]; going on anyway.", entry) 1114 except OSError as e: 1115 if not ignore: 1116 raise tarfile.TarError(e) 1117 logger.info("Unable to add file [%s]; going on anyway.", entry) 1118 tar.close() 1119 except tarfile.ReadError as e: 1120 try: tar.close() 1121 except: pass 1122 if os.path.exists(path): 1123 try: os.remove(path) 1124 except: pass 1125 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1126 except tarfile.TarError as e: 1127 try: tar.close() 1128 except: pass 1129 if os.path.exists(path): 1130 try: os.remove(path) 1131 except: pass 1132 raise e
1133
1134 - def removeUnchanged(self, digestMap, captureDigest=False):
1135 """ 1136 Removes unchanged entries from the list. 1137 1138 This method relies on a digest map as returned from L{generateDigestMap}. 1139 For each entry in C{digestMap}, if the entry also exists in the current 1140 list I{and} the entry in the current list has the same digest value as in 1141 the map, the entry in the current list will be removed. 1142 1143 This method offers a convenient way for callers to filter unneeded 1144 entries from a list. The idea is that a caller will capture a digest map 1145 from C{generateDigestMap} at some point in time (perhaps the beginning of 1146 the week), and will save off that map using C{pickle} or some other 1147 method. Then, the caller could use this method sometime in the future to 1148 filter out any unchanged files based on the saved-off map. 1149 1150 If C{captureDigest} is passed-in as C{True}, then digest information will 1151 be captured for the entire list before the removal step occurs using the 1152 same rules as in L{generateDigestMap}. The check will involve a lookup 1153 into the complete digest map. 1154 1155 If C{captureDigest} is passed in as C{False}, we will only generate a 1156 digest value for files we actually need to check, and we'll ignore any 1157 entry in the list which isn't a file that currently exists on disk. 1158 1159 The return value varies depending on C{captureDigest}, as well. To 1160 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1161 we'll just return a single value representing the number of entries 1162 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1163 map)}. The returned digest map will be in exactly the form returned by 1164 L{generateDigestMap}. 1165 1166 @note: For performance reasons, this method actually ends up rebuilding 1167 the list from scratch. First, we build a temporary dictionary containing 1168 all of the items from the original list. Then, we remove items as needed 1169 from the dictionary (which is faster than the equivalent operation on a 1170 list). Finally, we replace the contents of the current list based on the 1171 keys left in the dictionary. This should be transparent to the caller. 1172 1173 @param digestMap: Dictionary mapping file name to digest value. 1174 @type digestMap: Map as returned from L{generateDigestMap}. 1175 1176 @param captureDigest: Indicates that digest information should be captured. 1177 @type captureDigest: Boolean 1178 1179 @return: Results as discussed above (format varies based on arguments) 1180 """ 1181 if captureDigest: 1182 removed = 0 1183 table = {} 1184 captured = {} 1185 for entry in self: 1186 if os.path.isfile(entry) and not os.path.islink(entry): 1187 table[entry] = BackupFileList._generateDigest(entry) 1188 captured[entry] = table[entry] 1189 else: 1190 table[entry] = None 1191 for entry in list(digestMap.keys()): 1192 if entry in table: 1193 if table[entry] is not None: # equivalent to file/link check in other case 1194 digest = table[entry] 1195 if digest == digestMap[entry]: 1196 removed += 1 1197 del table[entry] 1198 logger.debug("Discarded unchanged file [%s].", entry) 1199 self[:] = list(table.keys()) 1200 return (removed, captured) 1201 else: 1202 removed = 0 1203 table = {} 1204 for entry in self: 1205 table[entry] = None 1206 for entry in list(digestMap.keys()): 1207 if entry in table: 1208 if os.path.isfile(entry) and not os.path.islink(entry): 1209 digest = BackupFileList._generateDigest(entry) 1210 if digest == digestMap[entry]: 1211 removed += 1 1212 del table[entry] 1213 logger.debug("Discarded unchanged file [%s].", entry) 1214 self[:] = list(table.keys()) 1215 return removed
1216
1217 1218 ######################################################################## 1219 # PurgeItemList class definition 1220 ######################################################################## 1221 1222 -class PurgeItemList(FilesystemList): # pylint: disable=R0904
1223 1224 ###################### 1225 # Class documentation 1226 ###################### 1227 1228 """ 1229 List of files and directories to be purged. 1230 1231 A PurgeItemList is a L{FilesystemList} containing a list of files and 1232 directories to be purged. On top of the generic functionality provided by 1233 L{FilesystemList}, this class adds functionality to remove items that are 1234 too young to be purged, and to actually remove each item in the list from 1235 the filesystem. 1236 1237 The other main difference is that when you add a directory's contents to a 1238 purge item list, the directory itself is not added to the list. This way, 1239 if someone asks to purge within in C{/opt/backup/collect}, that directory 1240 doesn't get removed once all of the files within it is gone. 1241 """ 1242 1243 ############## 1244 # Constructor 1245 ############## 1246
1247 - def __init__(self):
1248 """Initializes a list with no configured exclusions.""" 1249 FilesystemList.__init__(self)
1250 1251 1252 ############## 1253 # Add methods 1254 ############## 1255
1256 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1257 """ 1258 Adds the contents of a directory to the list. 1259 1260 The path must exist and must be a directory or a link to a directory. 1261 The contents of the directory (but I{not} the directory path itself) will 1262 be recursively added to the list, subject to any exclusions that are in 1263 place. If you only want the directory and its contents to be added, then 1264 pass in C{recursive=False}. 1265 1266 @note: If a directory's absolute path matches an exclude pattern or path, 1267 or if the directory contains the configured ignore file, then the 1268 directory and all of its contents will be recursively excluded from the 1269 list. 1270 1271 @note: If the passed-in directory happens to be a soft link, it will be 1272 recursed. However, the linkDepth parameter controls whether any soft 1273 links I{within} the directory will be recursed. The link depth is 1274 maximum depth of the tree at which soft links should be followed. So, a 1275 depth of 0 does not follow any soft links, a depth of 1 follows only 1276 links within the passed-in directory, a depth of 2 follows the links at 1277 the next level down, etc. 1278 1279 @note: Any invalid soft links (i.e. soft links that point to 1280 non-existent items) will be silently ignored. 1281 1282 @note: The L{excludeDirs} flag only controls whether any given soft link 1283 path itself is added to the list once it has been discovered. It does 1284 I{not} modify any behavior related to directory recursion. 1285 1286 @note: The L{excludeDirs} flag only controls whether any given directory 1287 path itself is added to the list once it has been discovered. It does 1288 I{not} modify any behavior related to directory recursion. 1289 1290 @note: If you call this method I{on a link to a directory} that link will 1291 never be dereferenced (it may, however, be followed). 1292 1293 @param path: Directory path whose contents should be added to the list 1294 @type path: String representing a path on disk 1295 1296 @param recursive: Indicates whether directory contents should be added recursively. 1297 @type recursive: Boolean value 1298 1299 @param addSelf: Ignored in this subclass. 1300 1301 @param linkDepth: Depth of soft links that should be followed 1302 @type linkDepth: Integer value, where zero means not to follow any soft links 1303 1304 @param dereference: Indicates whether soft links, if followed, should be dereferenced 1305 @type dereference: Boolean value 1306 1307 @return: Number of items recursively added to the list 1308 1309 @raise ValueError: If path is not a directory or does not exist. 1310 @raise ValueError: If the path could not be encoded properly. 1311 """ 1312 path = encodePath(path) 1313 path = normalizeDir(path) 1314 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1315 1316 1317 ################## 1318 # Utility methods 1319 ################## 1320
1321 - def removeYoungFiles(self, daysOld):
1322 """ 1323 Removes from the list files younger than a certain age (in days). 1324 1325 Any file whose "age" in days is less than (C{<}) the value of the 1326 C{daysOld} parameter will be removed from the list so that it will not be 1327 purged later when L{purgeItems} is called. Directories and soft links 1328 will be ignored. 1329 1330 The "age" of a file is the amount of time since the file was last used, 1331 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1332 1333 @note: Some people find the "sense" of this method confusing or 1334 "backwards". Keep in mind that this method is used to remove items 1335 I{from the list}, not from the filesystem! It removes from the list 1336 those items that you would I{not} want to purge because they are too 1337 young. As an example, passing in C{daysOld} of zero (0) would remove 1338 from the list no files, which would result in purging all of the files 1339 later. I would be happy to make a synonym of this method with an 1340 easier-to-understand "sense", if someone can suggest one. 1341 1342 @param daysOld: Minimum age of files that are to be kept in the list. 1343 @type daysOld: Integer value >= 0. 1344 1345 @return: Number of entries removed 1346 """ 1347 removed = 0 1348 daysOld = int(daysOld) 1349 if daysOld < 0: 1350 raise ValueError("Days old value must be an integer >= 0.") 1351 for entry in self[:]: 1352 if os.path.isfile(entry) and not os.path.islink(entry): 1353 try: 1354 ageInDays = calculateFileAge(entry) 1355 ageInWholeDays = math.floor(ageInDays) 1356 if ageInWholeDays < daysOld: 1357 removed += 1 1358 self.remove(entry) 1359 except OSError: 1360 pass 1361 return removed
1362
1363 - def purgeItems(self):
1364 """ 1365 Purges all items in the list. 1366 1367 Every item in the list will be purged. Directories in the list will 1368 I{not} be purged recursively, and hence will only be removed if they are 1369 empty. Errors will be ignored. 1370 1371 To faciliate easy removal of directories that will end up being empty, 1372 the delete process happens in two passes: files first (including soft 1373 links), then directories. 1374 1375 @return: Tuple containing count of (files, dirs) removed 1376 """ 1377 files = 0 1378 dirs = 0 1379 for entry in self: 1380 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1381 try: 1382 os.remove(entry) 1383 files += 1 1384 logger.debug("Purged file [%s].", entry) 1385 except OSError: 1386 pass 1387 for entry in self: 1388 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1389 try: 1390 os.rmdir(entry) 1391 dirs += 1 1392 logger.debug("Purged empty directory [%s].", entry) 1393 except OSError: 1394 pass 1395 return (files, dirs)
1396
1397 1398 ######################################################################## 1399 # Public functions 1400 ######################################################################## 1401 1402 ########################## 1403 # normalizeDir() function 1404 ########################## 1405 1406 -def normalizeDir(path):
1407 """ 1408 Normalizes a directory name. 1409 1410 For our purposes, a directory name is normalized by removing the trailing 1411 path separator, if any. This is important because we want directories to 1412 appear within lists in a consistent way, although from the user's 1413 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1414 1415 @param path: Path to be normalized. 1416 @type path: String representing a path on disk 1417 1418 @return: Normalized path, which should be equivalent to the original. 1419 """ 1420 if path != os.sep and path[-1:] == os.sep: 1421 return path[:-1] 1422 return path
1423
1424 1425 ############################# 1426 # compareContents() function 1427 ############################# 1428 1429 -def compareContents(path1, path2, verbose=False):
1430 """ 1431 Compares the contents of two directories to see if they are equivalent. 1432 1433 The two directories are recursively compared. First, we check whether they 1434 contain exactly the same set of files. Then, we check to see every given 1435 file has exactly the same contents in both directories. 1436 1437 This is all relatively simple to implement through the magic of 1438 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1439 off the front of each entry in the mapping it generates. This makes our 1440 comparison as simple as creating a list for each path, then generating a 1441 digest map for each path and comparing the two. 1442 1443 If no exception is thrown, the two directories are considered identical. 1444 1445 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1446 used so that any thrown exception can indicate exactly which file caused the 1447 comparison to fail. The thrown C{ValueError} exception distinguishes 1448 between the directories containing different files, and containing the same 1449 files with differing content. 1450 1451 @note: Symlinks are I{not} followed for the purposes of this comparison. 1452 1453 @param path1: First path to compare. 1454 @type path1: String representing a path on disk 1455 1456 @param path2: First path to compare. 1457 @type path2: String representing a path on disk 1458 1459 @param verbose: Indicates whether a verbose response should be given. 1460 @type verbose: Boolean 1461 1462 @raise ValueError: If a directory doesn't exist or can't be read. 1463 @raise ValueError: If the two directories are not equivalent. 1464 @raise IOError: If there is an unusual problem reading the directories. 1465 """ 1466 try: 1467 path1List = BackupFileList() 1468 path1List.addDirContents(path1) 1469 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1470 path2List = BackupFileList() 1471 path2List.addDirContents(path2) 1472 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1473 compareDigestMaps(path1Digest, path2Digest, verbose) 1474 except IOError as e: 1475 logger.error("I/O error encountered during consistency check.") 1476 raise e
1477
1478 -def compareDigestMaps(digest1, digest2, verbose=False):
1479 """ 1480 Compares two digest maps and throws an exception if they differ. 1481 1482 @param digest1: First digest to compare. 1483 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1484 1485 @param digest2: Second digest to compare. 1486 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1487 1488 @param verbose: Indicates whether a verbose response should be given. 1489 @type verbose: Boolean 1490 1491 @raise ValueError: If the two directories are not equivalent. 1492 """ 1493 if not verbose: 1494 if digest1 != digest2: 1495 raise ValueError("Consistency check failed.") 1496 else: 1497 list1 = UnorderedList(list(digest1.keys())) 1498 list2 = UnorderedList(list(digest2.keys())) 1499 if list1 != list2: 1500 raise ValueError("Directories contain a different set of files.") 1501 for key in list1: 1502 if digest1[key] != digest2[key]: 1503 raise ValueError("File contents for [%s] vary between directories." % key)
1504