Package CedarBackup3 :: Package tools :: Module amazons3
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.tools.amazons3

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2014-2016 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python 3 (>= 3.4) 
  29  # Project  : Cedar Backup, release 3 
  30  # Purpose  : Cedar Backup tool to synchronize an Amazon S3 bucket. 
  31  # 
  32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  33   
  34  ######################################################################## 
  35  # Notes 
  36  ######################################################################## 
  37   
  38  """ 
  39  Synchonizes a local directory with an Amazon S3 bucket. 
  40   
  41  No configuration is required; all necessary information is taken from the 
  42  command-line.  The only thing configuration would help with is the path 
  43  resolver interface, and it doesn't seem worth it to require configuration just 
  44  to get that. 
  45   
  46  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  47  """ 
  48   
  49  ######################################################################## 
  50  # Imported modules and constants 
  51  ######################################################################## 
  52   
  53  # System modules 
  54  import sys 
  55  import os 
  56  import logging 
  57  import getopt 
  58  import json 
  59  import warnings 
  60  from functools import total_ordering 
  61  from pathlib import Path 
  62  import chardet 
  63   
  64  # Cedar Backup modules 
  65  from CedarBackup3.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT 
  66  from CedarBackup3.filesystem import FilesystemList 
  67  from CedarBackup3.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE 
  68  from CedarBackup3.util import Diagnostics, splitCommandLine, encodePath 
  69  from CedarBackup3.util import executeCommand 
  70   
  71   
  72  ######################################################################## 
  73  # Module-wide constants and variables 
  74  ######################################################################## 
  75   
  76  logger = logging.getLogger("CedarBackup3.log.tools.amazons3") 
  77   
  78  AWS_COMMAND   = [ "aws" ] 
  79   
  80  SHORT_SWITCHES     = "hVbql:o:m:OdsDvw" 
  81  LONG_SWITCHES      = [ 'help', 'version', 'verbose', 'quiet', 
  82                         'logfile=', 'owner=', 'mode=', 
  83                         'output', 'debug', 'stack', 'diagnostics', 
  84                         'verifyOnly', 'ignoreWarnings', ] 
85 86 87 ####################################################################### 88 # Options class 89 ####################################################################### 90 91 @total_ordering 92 -class Options(object):
93 94 ###################### 95 # Class documentation 96 ###################### 97 98 """ 99 Class representing command-line options for the cback3-amazons3-sync script. 100 101 The C{Options} class is a Python object representation of the command-line 102 options of the cback3-amazons3-sync script. 103 104 The object representation is two-way: a command line string or a list of 105 command line arguments can be used to create an C{Options} object, and then 106 changes to the object can be propogated back to a list of command-line 107 arguments or to a command-line string. An C{Options} object can even be 108 created from scratch programmatically (if you have a need for that). 109 110 There are two main levels of validation in the C{Options} class. The first 111 is field-level validation. Field-level validation comes into play when a 112 given field in an object is assigned to or updated. We use Python's 113 C{property} functionality to enforce specific validations on field values, 114 and in some places we even use customized list classes to enforce 115 validations on list members. You should expect to catch a C{ValueError} 116 exception when making assignments to fields if you are programmatically 117 filling an object. 118 119 The second level of validation is post-completion validation. Certain 120 validations don't make sense until an object representation of options is 121 fully "complete". We don't want these validations to apply all of the time, 122 because it would make building up a valid object from scratch a real pain. 123 For instance, we might have to do things in the right order to keep from 124 throwing exceptions, etc. 125 126 All of these post-completion validations are encapsulated in the 127 L{Options.validate} method. This method can be called at any time by a 128 client, and will always be called immediately after creating a C{Options} 129 object from a command line and before exporting a C{Options} object back to 130 a command line. This way, we get acceptable ease-of-use but we also don't 131 accept or emit invalid command lines. 132 133 @note: Lists within this class are "unordered" for equality comparisons. 134 135 @sort: __init__, __repr__, __str__, __cmp__, __eq__, __lt__, __gt__ 136 """ 137 138 ############## 139 # Constructor 140 ############## 141
142 - def __init__(self, argumentList=None, argumentString=None, validate=True):
143 """ 144 Initializes an options object. 145 146 If you initialize the object without passing either C{argumentList} or 147 C{argumentString}, the object will be empty and will be invalid until it 148 is filled in properly. 149 150 No reference to the original arguments is saved off by this class. Once 151 the data has been parsed (successfully or not) this original information 152 is discarded. 153 154 The argument list is assumed to be a list of arguments, not including the 155 name of the command, something like C{sys.argv[1:]}. If you pass 156 C{sys.argv} instead, things are not going to work. 157 158 The argument string will be parsed into an argument list by the 159 L{util.splitCommandLine} function (see the documentation for that 160 function for some important notes about its limitations). There is an 161 assumption that the resulting list will be equivalent to C{sys.argv[1:]}, 162 just like C{argumentList}. 163 164 Unless the C{validate} argument is C{False}, the L{Options.validate} 165 method will be called (with its default arguments) after successfully 166 parsing any passed-in command line. This validation ensures that 167 appropriate actions, etc. have been specified. Keep in mind that even if 168 C{validate} is C{False}, it might not be possible to parse the passed-in 169 command line, so an exception might still be raised. 170 171 @note: The command line format is specified by the L{_usage} function. 172 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script. 173 174 @note: It is strongly suggested that the C{validate} option always be set 175 to C{True} (the default) unless there is a specific need to read in 176 invalid command line arguments. 177 178 @param argumentList: Command line for a program. 179 @type argumentList: List of arguments, i.e. C{sys.argv} 180 181 @param argumentString: Command line for a program. 182 @type argumentString: String, i.e. "cback3-amazons3-sync --verbose stage store" 183 184 @param validate: Validate the command line after parsing it. 185 @type validate: Boolean true/false. 186 187 @raise getopt.GetoptError: If the command-line arguments could not be parsed. 188 @raise ValueError: If the command-line arguments are invalid. 189 """ 190 self._help = False 191 self._version = False 192 self._verbose = False 193 self._quiet = False 194 self._logfile = None 195 self._owner = None 196 self._mode = None 197 self._output = False 198 self._debug = False 199 self._stacktrace = False 200 self._diagnostics = False 201 self._verifyOnly = False 202 self._ignoreWarnings = False 203 self._sourceDir = None 204 self._s3BucketUrl = None 205 if argumentList is not None and argumentString is not None: 206 raise ValueError("Use either argumentList or argumentString, but not both.") 207 if argumentString is not None: 208 argumentList = splitCommandLine(argumentString) 209 if argumentList is not None: 210 self._parseArgumentList(argumentList) 211 if validate: 212 self.validate()
213 214 215 ######################### 216 # String representations 217 ######################### 218
219 - def __repr__(self):
220 """ 221 Official string representation for class instance. 222 """ 223 return self.buildArgumentString(validate=False)
224
225 - def __str__(self):
226 """ 227 Informal string representation for class instance. 228 """ 229 return self.__repr__()
230 231 232 ############################# 233 # Standard comparison method 234 ############################# 235
236 - def __eq__(self, other):
237 """Equals operator, iplemented in terms of original Python 2 compare operator.""" 238 return self.__cmp__(other) == 0
239
240 - def __lt__(self, other):
241 """Less-than operator, iplemented in terms of original Python 2 compare operator.""" 242 return self.__cmp__(other) < 0
243
244 - def __gt__(self, other):
245 """Greater-than operator, iplemented in terms of original Python 2 compare operator.""" 246 return self.__cmp__(other) > 0
247
248 - def __cmp__(self, other):
249 """ 250 Original Python 2 comparison operator. 251 Lists within this class are "unordered" for equality comparisons. 252 @param other: Other object to compare to. 253 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 254 """ 255 if other is None: 256 return 1 257 if self.help != other.help: 258 if self.help < other.help: 259 return -1 260 else: 261 return 1 262 if self.version != other.version: 263 if self.version < other.version: 264 return -1 265 else: 266 return 1 267 if self.verbose != other.verbose: 268 if self.verbose < other.verbose: 269 return -1 270 else: 271 return 1 272 if self.quiet != other.quiet: 273 if self.quiet < other.quiet: 274 return -1 275 else: 276 return 1 277 if self.logfile != other.logfile: 278 if str(self.logfile or "") < str(other.logfile or ""): 279 return -1 280 else: 281 return 1 282 if self.owner != other.owner: 283 if str(self.owner or "") < str(other.owner or ""): 284 return -1 285 else: 286 return 1 287 if self.mode != other.mode: 288 if int(self.mode or 0) < int(other.mode or 0): 289 return -1 290 else: 291 return 1 292 if self.output != other.output: 293 if self.output < other.output: 294 return -1 295 else: 296 return 1 297 if self.debug != other.debug: 298 if self.debug < other.debug: 299 return -1 300 else: 301 return 1 302 if self.stacktrace != other.stacktrace: 303 if self.stacktrace < other.stacktrace: 304 return -1 305 else: 306 return 1 307 if self.diagnostics != other.diagnostics: 308 if self.diagnostics < other.diagnostics: 309 return -1 310 else: 311 return 1 312 if self.verifyOnly != other.verifyOnly: 313 if self.verifyOnly < other.verifyOnly: 314 return -1 315 else: 316 return 1 317 if self.ignoreWarnings != other.ignoreWarnings: 318 if self.ignoreWarnings < other.ignoreWarnings: 319 return -1 320 else: 321 return 1 322 if self.sourceDir != other.sourceDir: 323 if str(self.sourceDir or "") < str(other.sourceDir or ""): 324 return -1 325 else: 326 return 1 327 if self.s3BucketUrl != other.s3BucketUrl: 328 if str(self.s3BucketUrl or "") < str(other.s3BucketUrl or ""): 329 return -1 330 else: 331 return 1 332 return 0
333 334 335 ############# 336 # Properties 337 ############# 338
339 - def _setHelp(self, value):
340 """ 341 Property target used to set the help flag. 342 No validations, but we normalize the value to C{True} or C{False}. 343 """ 344 if value: 345 self._help = True 346 else: 347 self._help = False
348
349 - def _getHelp(self):
350 """ 351 Property target used to get the help flag. 352 """ 353 return self._help
354
355 - def _setVersion(self, value):
356 """ 357 Property target used to set the version flag. 358 No validations, but we normalize the value to C{True} or C{False}. 359 """ 360 if value: 361 self._version = True 362 else: 363 self._version = False
364
365 - def _getVersion(self):
366 """ 367 Property target used to get the version flag. 368 """ 369 return self._version
370
371 - def _setVerbose(self, value):
372 """ 373 Property target used to set the verbose flag. 374 No validations, but we normalize the value to C{True} or C{False}. 375 """ 376 if value: 377 self._verbose = True 378 else: 379 self._verbose = False
380
381 - def _getVerbose(self):
382 """ 383 Property target used to get the verbose flag. 384 """ 385 return self._verbose
386
387 - def _setQuiet(self, value):
388 """ 389 Property target used to set the quiet flag. 390 No validations, but we normalize the value to C{True} or C{False}. 391 """ 392 if value: 393 self._quiet = True 394 else: 395 self._quiet = False
396
397 - def _getQuiet(self):
398 """ 399 Property target used to get the quiet flag. 400 """ 401 return self._quiet
402
403 - def _setLogfile(self, value):
404 """ 405 Property target used to set the logfile parameter. 406 @raise ValueError: If the value cannot be encoded properly. 407 """ 408 if value is not None: 409 if len(value) < 1: 410 raise ValueError("The logfile parameter must be a non-empty string.") 411 self._logfile = encodePath(value)
412
413 - def _getLogfile(self):
414 """ 415 Property target used to get the logfile parameter. 416 """ 417 return self._logfile
418
419 - def _setOwner(self, value):
420 """ 421 Property target used to set the owner parameter. 422 If not C{None}, the owner must be a C{(user,group)} tuple or list. 423 Strings (and inherited children of strings) are explicitly disallowed. 424 The value will be normalized to a tuple. 425 @raise ValueError: If the value is not valid. 426 """ 427 if value is None: 428 self._owner = None 429 else: 430 if isinstance(value, str): 431 raise ValueError("Must specify user and group tuple for owner parameter.") 432 if len(value) != 2: 433 raise ValueError("Must specify user and group tuple for owner parameter.") 434 if len(value[0]) < 1 or len(value[1]) < 1: 435 raise ValueError("User and group tuple values must be non-empty strings.") 436 self._owner = (value[0], value[1])
437
438 - def _getOwner(self):
439 """ 440 Property target used to get the owner parameter. 441 The parameter is a tuple of C{(user, group)}. 442 """ 443 return self._owner
444
445 - def _setMode(self, value):
446 """ 447 Property target used to set the mode parameter. 448 """ 449 if value is None: 450 self._mode = None 451 else: 452 try: 453 if isinstance(value, str): 454 value = int(value, 8) 455 else: 456 value = int(value) 457 except TypeError: 458 raise ValueError("Mode must be an octal integer >= 0, i.e. 644.") 459 if value < 0: 460 raise ValueError("Mode must be an octal integer >= 0. i.e. 644.") 461 self._mode = value
462
463 - def _getMode(self):
464 """ 465 Property target used to get the mode parameter. 466 """ 467 return self._mode
468
469 - def _setOutput(self, value):
470 """ 471 Property target used to set the output flag. 472 No validations, but we normalize the value to C{True} or C{False}. 473 """ 474 if value: 475 self._output = True 476 else: 477 self._output = False
478
479 - def _getOutput(self):
480 """ 481 Property target used to get the output flag. 482 """ 483 return self._output
484
485 - def _setDebug(self, value):
486 """ 487 Property target used to set the debug flag. 488 No validations, but we normalize the value to C{True} or C{False}. 489 """ 490 if value: 491 self._debug = True 492 else: 493 self._debug = False
494
495 - def _getDebug(self):
496 """ 497 Property target used to get the debug flag. 498 """ 499 return self._debug
500
501 - def _setStacktrace(self, value):
502 """ 503 Property target used to set the stacktrace flag. 504 No validations, but we normalize the value to C{True} or C{False}. 505 """ 506 if value: 507 self._stacktrace = True 508 else: 509 self._stacktrace = False
510
511 - def _getStacktrace(self):
512 """ 513 Property target used to get the stacktrace flag. 514 """ 515 return self._stacktrace
516
517 - def _setDiagnostics(self, value):
518 """ 519 Property target used to set the diagnostics flag. 520 No validations, but we normalize the value to C{True} or C{False}. 521 """ 522 if value: 523 self._diagnostics = True 524 else: 525 self._diagnostics = False
526
527 - def _getDiagnostics(self):
528 """ 529 Property target used to get the diagnostics flag. 530 """ 531 return self._diagnostics
532
533 - def _setVerifyOnly(self, value):
534 """ 535 Property target used to set the verifyOnly flag. 536 No validations, but we normalize the value to C{True} or C{False}. 537 """ 538 if value: 539 self._verifyOnly = True 540 else: 541 self._verifyOnly = False
542
543 - def _getVerifyOnly(self):
544 """ 545 Property target used to get the verifyOnly flag. 546 """ 547 return self._verifyOnly
548
549 - def _setIgnoreWarnings(self, value):
550 """ 551 Property target used to set the ignoreWarnings flag. 552 No validations, but we normalize the value to C{True} or C{False}. 553 """ 554 if value: 555 self._ignoreWarnings = True 556 else: 557 self._ignoreWarnings = False
558
559 - def _getIgnoreWarnings(self):
560 """ 561 Property target used to get the ignoreWarnings flag. 562 """ 563 return self._ignoreWarnings
564
565 - def _setSourceDir(self, value):
566 """ 567 Property target used to set the sourceDir parameter. 568 """ 569 if value is not None: 570 if len(value) < 1: 571 raise ValueError("The sourceDir parameter must be a non-empty string.") 572 self._sourceDir = value
573
574 - def _getSourceDir(self):
575 """ 576 Property target used to get the sourceDir parameter. 577 """ 578 return self._sourceDir
579
580 - def _setS3BucketUrl(self, value):
581 """ 582 Property target used to set the s3BucketUrl parameter. 583 """ 584 if value is not None: 585 if len(value) < 1: 586 raise ValueError("The s3BucketUrl parameter must be a non-empty string.") 587 self._s3BucketUrl = value
588
589 - def _getS3BucketUrl(self):
590 """ 591 Property target used to get the s3BucketUrl parameter. 592 """ 593 return self._s3BucketUrl
594 595 help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.") 596 version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.") 597 verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.") 598 quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.") 599 logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.") 600 owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.") 601 mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.") 602 output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.") 603 debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.") 604 stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.") 605 diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.") 606 verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.") 607 ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.") 608 sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.") 609 s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.") 610 611 612 ################## 613 # Utility methods 614 ################## 615
616 - def validate(self):
617 """ 618 Validates command-line options represented by the object. 619 620 Unless C{--help} or C{--version} are supplied, at least one action must 621 be specified. Other validations (as for allowed values for particular 622 options) will be taken care of at assignment time by the properties 623 functionality. 624 625 @note: The command line format is specified by the L{_usage} function. 626 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script. 627 628 @raise ValueError: If one of the validations fails. 629 """ 630 if not self.help and not self.version and not self.diagnostics: 631 if self.sourceDir is None or self.s3BucketUrl is None: 632 raise ValueError("Source directory and S3 bucket URL are both required.")
633
634 - def buildArgumentList(self, validate=True):
635 """ 636 Extracts options into a list of command line arguments. 637 638 The original order of the various arguments (if, indeed, the object was 639 initialized with a command-line) is not preserved in this generated 640 argument list. Besides that, the argument list is normalized to use the 641 long option names (i.e. --version rather than -V). The resulting list 642 will be suitable for passing back to the constructor in the 643 C{argumentList} parameter. Unlike L{buildArgumentString}, string 644 arguments are not quoted here, because there is no need for it. 645 646 Unless the C{validate} parameter is C{False}, the L{Options.validate} 647 method will be called (with its default arguments) against the 648 options before extracting the command line. If the options are not valid, 649 then an argument list will not be extracted. 650 651 @note: It is strongly suggested that the C{validate} option always be set 652 to C{True} (the default) unless there is a specific need to extract an 653 invalid command line. 654 655 @param validate: Validate the options before extracting the command line. 656 @type validate: Boolean true/false. 657 658 @return: List representation of command-line arguments. 659 @raise ValueError: If options within the object are invalid. 660 """ 661 if validate: 662 self.validate() 663 argumentList = [] 664 if self._help: 665 argumentList.append("--help") 666 if self.version: 667 argumentList.append("--version") 668 if self.verbose: 669 argumentList.append("--verbose") 670 if self.quiet: 671 argumentList.append("--quiet") 672 if self.logfile is not None: 673 argumentList.append("--logfile") 674 argumentList.append(self.logfile) 675 if self.owner is not None: 676 argumentList.append("--owner") 677 argumentList.append("%s:%s" % (self.owner[0], self.owner[1])) 678 if self.mode is not None: 679 argumentList.append("--mode") 680 argumentList.append("%o" % self.mode) 681 if self.output: 682 argumentList.append("--output") 683 if self.debug: 684 argumentList.append("--debug") 685 if self.stacktrace: 686 argumentList.append("--stack") 687 if self.diagnostics: 688 argumentList.append("--diagnostics") 689 if self.verifyOnly: 690 argumentList.append("--verifyOnly") 691 if self.ignoreWarnings: 692 argumentList.append("--ignoreWarnings") 693 if self.sourceDir is not None: 694 argumentList.append(self.sourceDir) 695 if self.s3BucketUrl is not None: 696 argumentList.append(self.s3BucketUrl) 697 return argumentList
698
699 - def buildArgumentString(self, validate=True):
700 """ 701 Extracts options into a string of command-line arguments. 702 703 The original order of the various arguments (if, indeed, the object was 704 initialized with a command-line) is not preserved in this generated 705 argument string. Besides that, the argument string is normalized to use 706 the long option names (i.e. --version rather than -V) and to quote all 707 string arguments with double quotes (C{"}). The resulting string will be 708 suitable for passing back to the constructor in the C{argumentString} 709 parameter. 710 711 Unless the C{validate} parameter is C{False}, the L{Options.validate} 712 method will be called (with its default arguments) against the options 713 before extracting the command line. If the options are not valid, then 714 an argument string will not be extracted. 715 716 @note: It is strongly suggested that the C{validate} option always be set 717 to C{True} (the default) unless there is a specific need to extract an 718 invalid command line. 719 720 @param validate: Validate the options before extracting the command line. 721 @type validate: Boolean true/false. 722 723 @return: String representation of command-line arguments. 724 @raise ValueError: If options within the object are invalid. 725 """ 726 if validate: 727 self.validate() 728 argumentString = "" 729 if self._help: 730 argumentString += "--help " 731 if self.version: 732 argumentString += "--version " 733 if self.verbose: 734 argumentString += "--verbose " 735 if self.quiet: 736 argumentString += "--quiet " 737 if self.logfile is not None: 738 argumentString += "--logfile \"%s\" " % self.logfile 739 if self.owner is not None: 740 argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1]) 741 if self.mode is not None: 742 argumentString += "--mode %o " % self.mode 743 if self.output: 744 argumentString += "--output " 745 if self.debug: 746 argumentString += "--debug " 747 if self.stacktrace: 748 argumentString += "--stack " 749 if self.diagnostics: 750 argumentString += "--diagnostics " 751 if self.verifyOnly: 752 argumentString += "--verifyOnly " 753 if self.ignoreWarnings: 754 argumentString += "--ignoreWarnings " 755 if self.sourceDir is not None: 756 argumentString += "\"%s\" " % self.sourceDir 757 if self.s3BucketUrl is not None: 758 argumentString += "\"%s\" " % self.s3BucketUrl 759 return argumentString
760
761 - def _parseArgumentList(self, argumentList):
762 """ 763 Internal method to parse a list of command-line arguments. 764 765 Most of the validation we do here has to do with whether the arguments 766 can be parsed and whether any values which exist are valid. We don't do 767 any validation as to whether required elements exist or whether elements 768 exist in the proper combination (instead, that's the job of the 769 L{validate} method). 770 771 For any of the options which supply parameters, if the option is 772 duplicated with long and short switches (i.e. C{-l} and a C{--logfile}) 773 then the long switch is used. If the same option is duplicated with the 774 same switch (long or short), then the last entry on the command line is 775 used. 776 777 @param argumentList: List of arguments to a command. 778 @type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]} 779 780 @raise ValueError: If the argument list cannot be successfully parsed. 781 """ 782 switches = { } 783 opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES) 784 for o, a in opts: # push the switches into a hash 785 switches[o] = a 786 if "-h" in switches or "--help" in switches: 787 self.help = True 788 if "-V" in switches or "--version" in switches: 789 self.version = True 790 if "-b" in switches or "--verbose" in switches: 791 self.verbose = True 792 if "-q" in switches or "--quiet" in switches: 793 self.quiet = True 794 if "-l" in switches: 795 self.logfile = switches["-l"] 796 if "--logfile" in switches: 797 self.logfile = switches["--logfile"] 798 if "-o" in switches: 799 self.owner = switches["-o"].split(":", 1) 800 if "--owner" in switches: 801 self.owner = switches["--owner"].split(":", 1) 802 if "-m" in switches: 803 self.mode = switches["-m"] 804 if "--mode" in switches: 805 self.mode = switches["--mode"] 806 if "-O" in switches or "--output" in switches: 807 self.output = True 808 if "-d" in switches or "--debug" in switches: 809 self.debug = True 810 if "-s" in switches or "--stack" in switches: 811 self.stacktrace = True 812 if "-D" in switches or "--diagnostics" in switches: 813 self.diagnostics = True 814 if "-v" in switches or "--verifyOnly" in switches: 815 self.verifyOnly = True 816 if "-w" in switches or "--ignoreWarnings" in switches: 817 self.ignoreWarnings = True 818 try: 819 (self.sourceDir, self.s3BucketUrl) = remaining 820 except ValueError: 821 pass
822
823 824 ####################################################################### 825 # Public functions 826 ####################################################################### 827 828 ################# 829 # cli() function 830 ################# 831 832 -def cli():
833 """ 834 Implements the command-line interface for the C{cback3-amazons3-sync} script. 835 836 Essentially, this is the "main routine" for the cback3-amazons3-sync script. It does 837 all of the argument processing for the script, and then also implements the 838 tool functionality. 839 840 This function looks pretty similiar to C{CedarBackup3.cli.cli()}. It's not 841 easy to refactor this code to make it reusable and also readable, so I've 842 decided to just live with the duplication. 843 844 A different error code is returned for each type of failure: 845 846 - C{1}: The Python interpreter version is < 3.4 847 - C{2}: Error processing command-line arguments 848 - C{3}: Error configuring logging 849 - C{5}: Backup was interrupted with a CTRL-C or similar 850 - C{6}: Error executing other parts of the script 851 852 @note: This script uses print rather than logging to the INFO level, because 853 it is interactive. Underlying Cedar Backup functionality uses the logging 854 mechanism exclusively. 855 856 @return: Error code as described above. 857 """ 858 try: 859 if list(map(int, [sys.version_info[0], sys.version_info[1]])) < [3, 4]: 860 sys.stderr.write("Python 3 version 3.4 or greater required.\n") 861 return 1 862 except: 863 # sys.version_info isn't available before 2.0 864 sys.stderr.write("Python 3 version 3.4 or greater required.\n") 865 return 1 866 867 try: 868 options = Options(argumentList=sys.argv[1:]) 869 except Exception as e: 870 _usage() 871 sys.stderr.write(" *** Error: %s\n" % e) 872 return 2 873 874 if options.help: 875 _usage() 876 return 0 877 if options.version: 878 _version() 879 return 0 880 if options.diagnostics: 881 _diagnostics() 882 return 0 883 884 if options.stacktrace: 885 logfile = setupLogging(options) 886 else: 887 try: 888 logfile = setupLogging(options) 889 except Exception as e: 890 sys.stderr.write("Error setting up logging: %s\n" % e) 891 return 3 892 893 logger.info("Cedar Backup Amazon S3 sync run started.") 894 logger.info("Options were [%s]", options) 895 logger.info("Logfile is [%s]", logfile) 896 Diagnostics().logDiagnostics(method=logger.info) 897 898 if options.stacktrace: 899 _executeAction(options) 900 else: 901 try: 902 _executeAction(options) 903 except KeyboardInterrupt: 904 logger.error("Backup interrupted.") 905 logger.info("Cedar Backup Amazon S3 sync run completed with status 5.") 906 return 5 907 except Exception as e: 908 logger.error("Error executing backup: %s", e) 909 logger.info("Cedar Backup Amazon S3 sync run completed with status 6.") 910 return 6 911 912 logger.info("Cedar Backup Amazon S3 sync run completed with status 0.") 913 return 0
914
915 916 ####################################################################### 917 # Utility functions 918 ####################################################################### 919 920 #################### 921 # _usage() function 922 #################### 923 924 -def _usage(fd=sys.stderr):
925 """ 926 Prints usage information for the cback3-amazons3-sync script. 927 @param fd: File descriptor used to print information. 928 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 929 """ 930 fd.write("\n") 931 fd.write(" Usage: cback3-amazons3-sync [switches] sourceDir s3bucketUrl\n") 932 fd.write("\n") 933 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 934 fd.write("\n") 935 fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n") 936 fd.write(" bucket. After the sync is complete, a validation step is taken. An\n") 937 fd.write(" error is reported if the contents of the bucket do not match the\n") 938 fd.write(" source directory, or if the indicated size for any file differs.\n") 939 fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n") 940 fd.write("\n") 941 fd.write(" The following arguments are required:\n") 942 fd.write("\n") 943 fd.write(" sourceDir The local source directory on disk (must exist)\n") 944 fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n") 945 fd.write("\n") 946 fd.write(" The following switches are accepted:\n") 947 fd.write("\n") 948 fd.write(" -h, --help Display this usage/help listing\n") 949 fd.write(" -V, --version Display version information\n") 950 fd.write(" -b, --verbose Print verbose output as well as logging to disk\n") 951 fd.write(" -q, --quiet Run quietly (display no output to the screen)\n") 952 fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE) 953 fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1])) 954 fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE) 955 fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n") 956 fd.write(" -d, --debug Write debugging information to the log (implies --output)\n") 957 fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n") # exactly 80 characters in width! 958 fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n") 959 fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n") 960 fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n") 961 fd.write("\n") 962 fd.write(" Typical usage would be something like:\n") 963 fd.write("\n") 964 fd.write(" cback3-amazons3-sync /home/myuser s3://example.com-backup/myuser\n") 965 fd.write("\n") 966 fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n") 967 fd.write("\n")
968
969 970 ###################### 971 # _version() function 972 ###################### 973 974 -def _version(fd=sys.stdout):
975 """ 976 Prints version information for the cback3-amazons3-sync script. 977 @param fd: File descriptor used to print information. 978 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 979 """ 980 fd.write("\n") 981 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 982 fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE)) 983 fd.write("\n") 984 fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL)) 985 fd.write(" See CREDITS for a list of included code and other contributors.\n") 986 fd.write(" This is free software; there is NO warranty. See the\n") 987 fd.write(" GNU General Public License version 2 for copying conditions.\n") 988 fd.write("\n") 989 fd.write(" Use the --help option for usage information.\n") 990 fd.write("\n")
991
992 993 ########################## 994 # _diagnostics() function 995 ########################## 996 997 -def _diagnostics(fd=sys.stdout):
998 """ 999 Prints runtime diagnostics information. 1000 @param fd: File descriptor used to print information. 1001 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 1002 """ 1003 fd.write("\n") 1004 fd.write("Diagnostics:\n") 1005 fd.write("\n") 1006 Diagnostics().printDiagnostics(fd=fd, prefix=" ") 1007 fd.write("\n")
1008
1009 1010 ############################ 1011 # _executeAction() function 1012 ############################ 1013 1014 -def _executeAction(options):
1015 """ 1016 Implements the guts of the cback3-amazons3-sync tool. 1017 1018 @param options: Program command-line options. 1019 @type options: Options object. 1020 1021 @raise Exception: Under many generic error conditions 1022 """ 1023 sourceFiles = _buildSourceFiles(options.sourceDir) 1024 if not options.ignoreWarnings: 1025 _checkSourceFiles(options.sourceDir, sourceFiles) 1026 if not options.verifyOnly: 1027 _synchronizeBucket(options.sourceDir, options.s3BucketUrl) 1028 _verifyBucketContents(options.sourceDir, sourceFiles, options.s3BucketUrl)
1029
1030 1031 ################################ 1032 # _buildSourceFiles() function 1033 ################################ 1034 1035 -def _buildSourceFiles(sourceDir):
1036 """ 1037 Build a list of files in a source directory 1038 @param sourceDir: Local source directory 1039 @return: FilesystemList with contents of source directory 1040 """ 1041 if not os.path.isdir(sourceDir): 1042 raise ValueError("Source directory does not exist on disk.") 1043 sourceFiles = FilesystemList() 1044 sourceFiles.addDirContents(sourceDir) 1045 return sourceFiles
1046
1047 1048 ############################### 1049 # _checkSourceFiles() function 1050 ############################### 1051 1052 -def _checkSourceFiles(sourceDir, sourceFiles):
1053 """ 1054 Check source files, trying to guess which ones will have encoding problems. 1055 @param sourceDir: Local source directory 1056 @param sourceDir: Local source directory 1057 @raises ValueError: If a problem file is found 1058 @see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/} 1059 @see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux} 1060 @see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/} 1061 """ 1062 with warnings.catch_warnings(): 1063 encoding = Diagnostics().encoding 1064 1065 # Note: this was difficult to fully test. As of the original Python 2 1066 # implementation, I had a bunch of files on disk that had inconsistent 1067 # encodings, so I was able to prove that the check warned about these 1068 # files initially, and then didn't warn after I fixed them. I didn't 1069 # save off those files for a unit test (ugh) so by the time of the Python 1070 # 3 conversion -- which is subtly different because of the different way 1071 # Python 3 handles unicode strings -- I had to contrive some tests. I 1072 # think the tests I wrote are consistent with the earlier problems, and I 1073 # do get the same result for those tests in both CedarBackup 2 and Cedar 1074 # Backup 3. However, I can't be certain the implementation is 1075 # equivalent. If someone runs into a situation that this code doesn't 1076 # handle, you may need to revisit the implementation. 1077 1078 failed = False 1079 for entry in sourceFiles: 1080 path = bytes(Path(entry)) 1081 result = chardet.detect(path) 1082 source = path.decode(result["encoding"]) 1083 try: 1084 target = path.decode(encoding) 1085 if source != target: 1086 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", path, result["encoding"], encoding) 1087 failed = True 1088 except Exception: 1089 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", path, result["encoding"], encoding) 1090 failed = True 1091 1092 if not failed: 1093 logger.info("Completed checking source filename encoding (no problems found).") 1094 else: 1095 logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.") 1096 logger.error("You may be able to fix this by setting a more sensible locale in your environment.") 1097 logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.") 1098 logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings") 1099 raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
1100
1101 1102 ################################ 1103 # _synchronizeBucket() function 1104 ################################ 1105 1106 -def _synchronizeBucket(sourceDir, s3BucketUrl):
1107 """ 1108 Synchronize a local directory to an Amazon S3 bucket. 1109 @param sourceDir: Local source directory 1110 @param s3BucketUrl: Target S3 bucket URL 1111 """ 1112 logger.info("Synchronizing local source directory up to Amazon S3.") 1113 args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ] 1114 result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0] 1115 if result != 0: 1116 raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
1117
1118 1119 ################################### 1120 # _verifyBucketContents() function 1121 ################################### 1122 1123 -def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
1124 """ 1125 Verify that a source directory is equivalent to an Amazon S3 bucket. 1126 @param sourceDir: Local source directory 1127 @param sourceFiles: Filesystem list containing contents of source directory 1128 @param s3BucketUrl: Target S3 bucket URL 1129 """ 1130 # As of this writing, the documentation for the S3 API that we're using 1131 # below says that up to 1000 elements at a time are returned, and that we 1132 # have to manually handle pagination by looking for the IsTruncated element. 1133 # However, in practice, this is not true. I have been testing with 1134 # "aws-cli/1.4.4 Python/2.7.3 Linux/3.2.0-4-686-pae", installed through PIP. 1135 # No matter how many items exist in my bucket and prefix, I get back a 1136 # single JSON result. I've tested with buckets containing nearly 6000 1137 # elements. 1138 # 1139 # If I turn on debugging, it's clear that underneath, something in the API 1140 # is executing multiple list-object requests against AWS, and stiching 1141 # results together to give me back the final JSON result. The debug output 1142 # clearly incldues multiple requests, and each XML response (except for the 1143 # final one) contains <IsTruncated>true</IsTruncated>. 1144 # 1145 # This feature is not mentioned in the offical changelog for any of the 1146 # releases going back to 1.0.0. It appears to happen in the botocore 1147 # library, but I'll admit I can't actually find the code that implements it. 1148 # For now, all I can do is rely on this behavior and hope that the 1149 # documentation is out-of-date. I'm not going to write code that tries to 1150 # parse out IsTruncated if I can't actually test that code. 1151 1152 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1) 1153 1154 query = "Contents[].{Key: Key, Size: Size}" 1155 args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ] 1156 (result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True) 1157 if result != 0: 1158 raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result) 1159 1160 contents = { } 1161 for entry in json.loads("".join(data)): 1162 key = entry["Key"].replace(prefix, "") 1163 size = int(entry["Size"]) 1164 contents[key] = size 1165 1166 failed = False 1167 for entry in sourceFiles: 1168 if os.path.isfile(entry): 1169 key = entry.replace(sourceDir, "") 1170 size = int(os.stat(entry).st_size) 1171 if not key in contents: 1172 logger.error("File was apparently not uploaded: [%s]", entry) 1173 failed = True 1174 else: 1175 if size != contents[key]: 1176 logger.error("File size differs [%s]: expected %s bytes but got %s bytes", entry, size, contents[key]) 1177 failed = True 1178 1179 if not failed: 1180 logger.info("Completed verifying Amazon S3 bucket contents (no problems found).") 1181 else: 1182 logger.error("There were differences between source directory and target S3 bucket.") 1183 raise ValueError("There were differences between source directory and target S3 bucket.")
1184 1185 1186 ######################################################################### 1187 # Main routine 1188 ######################################################################## 1189 1190 if __name__ == "__main__": 1191 sys.exit(cli()) 1192