|
19 | 19 | from dateutil.tz import tzlocal
|
20 | 20 |
|
21 | 21 | from awscli.compat import queue
|
| 22 | +from awscli.customizations.s3.fileinfo import VersionedFileInfo |
22 | 23 | from awscli.customizations.s3.utils import (
|
23 | 24 | EPOCH_TIME,
|
24 | 25 | BucketLister,
|
25 | 26 | create_warning,
|
26 | 27 | find_bucket_key,
|
27 | 28 | find_dest_path_comp_key,
|
28 | 29 | get_file_stat,
|
| 30 | + split_s3_bucket_key, |
29 | 31 | )
|
30 | 32 |
|
31 | 33 | _open = open
|
@@ -406,3 +408,109 @@ def _list_single_object(self, s3_path):
|
406 | 408 | response['LastModified'] = last_update.astimezone(tzlocal())
|
407 | 409 | response['ETag'] = response.pop('ETag', None)
|
408 | 410 | return s3_path, response
|
| 411 | + |
| 412 | + |
| 413 | +class VersionedFileGenerator: |
| 414 | + """ |
| 415 | + This class generates VersionedFileInfo objects for all versions of objects in a bucket. |
| 416 | + It uses the BucketLister class to list all versions and creates appropriate |
| 417 | + VersionedFileInfo objects for each version. |
| 418 | + """ |
| 419 | + |
| 420 | + def __init__( |
| 421 | + self, |
| 422 | + client, |
| 423 | + operation_name, |
| 424 | + follow_symlinks=True, |
| 425 | + page_size=None, |
| 426 | + result_queue=None, |
| 427 | + request_parameters=None, |
| 428 | + ): |
| 429 | + """ |
| 430 | + Initialize a new VersionedFileGenerator. |
| 431 | +
|
| 432 | + :param client: The S3 client to use. |
| 433 | + :param operation_name: The name of the operation to perform. |
| 434 | + :param follow_symlinks: Whether to follow symlinks. |
| 435 | + :param page_size: The number of items to include in each API response. |
| 436 | + :param result_queue: Queue for results and warnings. |
| 437 | + :param request_parameters: Additional parameters for the request. |
| 438 | + """ |
| 439 | + self._client = client |
| 440 | + self.operation_name = operation_name |
| 441 | + self.follow_symlinks = follow_symlinks |
| 442 | + self.page_size = page_size |
| 443 | + self.result_queue = result_queue |
| 444 | + if not result_queue: |
| 445 | + self.result_queue = queue.Queue() |
| 446 | + self.request_parameters = {} |
| 447 | + if request_parameters is not None: |
| 448 | + self.request_parameters = request_parameters |
| 449 | + self._version_lister = BucketLister(client) |
| 450 | + |
| 451 | + def call(self, files): |
| 452 | + """ |
| 453 | + Generate VersionedFileInfo objects for all versions of objects. |
| 454 | +
|
| 455 | + :param files: Dictionary containing source and destination information. |
| 456 | + :yields: VersionedFileInfo objects for each version of each object. |
| 457 | + """ |
| 458 | + source = files['src']['path'] |
| 459 | + src_type = files['src']['type'] |
| 460 | + dest_type = files['dest']['type'] |
| 461 | + |
| 462 | + # Use the list_object_versions method to get all versions |
| 463 | + file_iterator = self.list_object_versions(source, files['dir_op']) |
| 464 | + |
| 465 | + for src_path, content, version_id in file_iterator: |
| 466 | + dest_path, compare_key = find_dest_path_comp_key(files, src_path) |
| 467 | + |
| 468 | + # Create a VersionedFileInfo for this object version |
| 469 | + yield VersionedFileInfo( |
| 470 | + src=src_path, |
| 471 | + dest=dest_path, |
| 472 | + compare_key=compare_key, |
| 473 | + size=content.get('Size', 0), |
| 474 | + last_update=content.get('LastModified'), |
| 475 | + src_type=src_type, |
| 476 | + dest_type=dest_type, |
| 477 | + operation_name=self.operation_name, |
| 478 | + associated_response_data=content, |
| 479 | + version_id=version_id, |
| 480 | + ) |
| 481 | + |
| 482 | + def list_object_versions(self, s3_path, dir_op): |
| 483 | + """ |
| 484 | + This function yields the appropriate object versions or all object versions |
| 485 | + under a common prefix depending if the operation is on objects under a |
| 486 | + common prefix. It yields the file's source path, content, and version ID. |
| 487 | +
|
| 488 | + :param s3_path: The S3 path to list versions for. |
| 489 | + :param dir_op: Whether this is a directory operation. |
| 490 | + :yields: Tuples of (source_path, content, version_id) |
| 491 | + """ |
| 492 | + bucket, key = split_s3_bucket_key(s3_path) |
| 493 | + |
| 494 | + # Short circuit path: if we are not recursing into the s3 |
| 495 | + # bucket and a specific path was given, we can just yield |
| 496 | + # that path and not have to call any operation in s3. |
| 497 | + # However, for versioned objects, we still need to list all versions |
| 498 | + # even for a specific object, so we don't have a short circuit path here. |
| 499 | + |
| 500 | + # List all versions of objects |
| 501 | + for ( |
| 502 | + src_path, |
| 503 | + content, |
| 504 | + version_id, |
| 505 | + ) in self._version_lister.list_object_versions( |
| 506 | + bucket=bucket, |
| 507 | + prefix=key, |
| 508 | + page_size=self.page_size, |
| 509 | + extra_args=self.request_parameters.get('ListObjectVersions', {}), |
| 510 | + ): |
| 511 | + # If this is not a directory operation and the path doesn't match exactly, |
| 512 | + # skip it (similar to the behavior in FileGenerator.list_objects) |
| 513 | + if not dir_op and s3_path != src_path: |
| 514 | + continue |
| 515 | + |
| 516 | + yield src_path, content, version_id |
0 commit comments