You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
149 lines
4.5 KiB
Bash
149 lines
4.5 KiB
Bash
#!/usr/bin/env bash
|
|
# vim:ts=4:sts=4:sw=4:et
|
|
#
|
|
# Author: Hari Sekhon
|
|
# Date: 2024-09-05 12:18:36 +0200 (Thu, 05 Sep 2024)
|
|
#
|
|
# https///github.com/HariSekhon/DevOps-Bash-tools
|
|
#
|
|
# License: see accompanying Hari Sekhon LICENSE file
|
|
#
|
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
|
|
#
|
|
# https://www.linkedin.com/in/HariSekhon
|
|
#
|
|
|
|
set -euo pipefail
|
|
[ -n "${DEBUG:-}" ] && set -x
|
|
srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
# shellcheck disable=SC1090,SC1091
|
|
. "$srcdir/lib/aws.sh"
|
|
|
|
# shellcheck disable=SC2034,SC2154
|
|
usage_description="
|
|
Syncs multiple AWS S3 URLs from file lists
|
|
|
|
Needed because clients often request copies of data ranges of multiple directories between environment buckets for test data
|
|
|
|
For multiple source and destinations specify text files containing the paths, one line per path
|
|
|
|
For convenience:
|
|
|
|
- ignores hash # comment lines
|
|
- strips leading and trailing whitespaces
|
|
- validates each S3 URL's format
|
|
- validates the source and destination list lengths are the same
|
|
- validates each source and destination path suffix is the same
|
|
- can disable this by 'export AWS_S3_SYNC_DIFFERENT_PATHS=true' before running this script if you really intend for
|
|
the destination paths to be different to the source paths
|
|
|
|
These last two checks help prevent off-by-one human errors missing one path and spraying data to the wrong directories
|
|
|
|
You can populate the source and destination path files using native Bash like this:
|
|
|
|
echo s3://prod-landing-bucket/transactions/2023-06-{20..30} | tr ' ' '\n' > sources.txt
|
|
|
|
echo s3://uat-landing-bucket/transactions/2023-06-{20..30} | tr ' ' '\n' > destinations.txt
|
|
|
|
|
|
Consider adding the --dryrun option to the end of the script args when running it the first time
|
|
|
|
|
|
$usage_aws_cli_required
|
|
"
|
|
|
|
# used by usage() in lib/utils.sh
|
|
# shellcheck disable=SC2034
|
|
usage_args="<sources.txt> <destinations.txt> [<aws_cli_options>]"
|
|
|
|
help_usage "$@"
|
|
|
|
min_args 2 "$@"
|
|
|
|
sources_file="$1"
|
|
destinations_file="$2"
|
|
shift || :
|
|
shift || :
|
|
|
|
sources=()
|
|
destinations=()
|
|
|
|
decomment(){
|
|
sed '
|
|
s/#.*$//;
|
|
s/^[[:space:]]*//;
|
|
s/[[:space:]]*$//;
|
|
/^[[:space:]]*$/d
|
|
' "$1"
|
|
}
|
|
|
|
validate_s3_url(){
|
|
local url="$1"
|
|
if ! is_s3_url "$url"; then
|
|
die "Invalid S3 URL given: $url"
|
|
fi
|
|
}
|
|
|
|
# initially deduplicated this to a load_file() function but it turns out mapfile is only Bash 4+
|
|
# and Bash 3 has no native array passing, requiring array pass-by-name string and ugly evals
|
|
if ! [ -f "$sources_file" ]; then
|
|
die "File not found: $sources_file"
|
|
fi
|
|
timestamp "Loading sources from file '$sources_file'"
|
|
while IFS= read -r line; do
|
|
validate_s3_url "$line"
|
|
sources+=("$line")
|
|
done < <(decomment "$sources_file")
|
|
sources_len="${#sources[@]}"
|
|
timestamp "$sources_len sources loaded"
|
|
echo
|
|
|
|
if ! [ -f "$destinations_file" ]; then
|
|
die "File not found: $destinations_file"
|
|
fi
|
|
timestamp "Loading destinations from file '$destinations_file'"
|
|
while IFS= read -r line; do
|
|
validate_s3_url "$line"
|
|
destinations+=("$line")
|
|
done < <(decomment "$destinations_file")
|
|
destinations_len="${#destinations[@]}"
|
|
timestamp "$destinations_len destinations loaded"
|
|
echo
|
|
|
|
timestamp "Sanity check: Verifying source and destination list lengths are the same"
|
|
if [ "$sources_len" != "$destinations_len" ]; then
|
|
die "ERROR: length of sources and destinations arrays of paths are not equal in length: sources ($sources_len) vs destinations ($destinations_len)"
|
|
fi
|
|
|
|
if [ "${AWS_S3_SYNC_DIFFERENT_PATHS:-}" != true ]; then
|
|
timestamp "Sanity check: Verifying source and destination suffix paths are the same"
|
|
for ((i=0; i < sources_len; i++)); do
|
|
src="${sources[i]}"
|
|
dest="${destinations[i]}"
|
|
src_path="${src#s3://}"
|
|
src_path="${src_path#*/}"
|
|
dest_path="${dest#s3://}"
|
|
dest_path="${dest_path#*/}"
|
|
if [ "$src_path" != "$dest_path" ]; then
|
|
echo
|
|
error "Source path suffix '$src' does not match destination path suffix '$dest'"
|
|
echo
|
|
die "If this is really intentional, 'export AWS_S3_SYNC_DIFFERENT_PATHS=true' before running this script"
|
|
fi
|
|
done
|
|
echo
|
|
fi
|
|
|
|
for ((i=0; i < sources_len; i++)); do
|
|
src="${sources[i]}"
|
|
dest="${destinations[i]}"
|
|
|
|
timestamp "Syncing AWS S3 '$src' to '$dest'"
|
|
aws s3 sync "$src" "$dest" "$@"
|
|
done
|
|
|
|
echo
|
|
# we've already verified above that $sources_len and $destination_len are the same
|
|
timestamp "AWS S3 Sync completed for $sources_len S3 URL paths"
|