You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DevOps-Bash-tools/github_mirror_repos_to_aws_...

207 lines
7.6 KiB
Bash

#!/usr/bin/env bash
# vim:ts=4:sts=4:sw=4:et
#
# Author: Hari Sekhon
# Date: 2022-03-29 19:19:43 +0100 (Tue, 29 Mar 2022)
#
# https://github.com/HariSekhon/DevOps-Bash-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
#
# https://www.linkedin.com/in/HariSekhon
#
set -euo pipefail
[ -n "${DEBUG:-}" ] && set -x
# gets absolute rather than relative path, for when we pushd later, otherwise relative $srcdir references will break
srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1090,SC1091
. "$srcdir/lib/github.sh"
# shellcheck disable=SC2034,SC2154
usage_description="
Mirrors all or given repos from GitHub to AWS CodeCommit via AWS CLI and Git HTTPS mirror clones
Useful to create/sync GitHub repos to AWS CodeCommit for migration or to cron for fast almost free DR purposes
(almost \$0 AWS charges compared to \$100-\$400+ per month for Rewind / BackHub)
Includes repo descriptions and all branches and tags, but not PRs/Wikis/Releases
Especially useful to backup dynamic environments where people are adding new repos all the time, avoids having to maintain configurations as finds and iterates all non-fork repos by default
Can't use Terraform to dynamically create these backups because a simple commented/deleted code mistake would bypass prevent_destroy and delete your backup repos as well as your originals!
https://github.com/hashicorp/terraform/issues/17599
Cron this script as per your preferred backup schedule
If no repos are given, iterates all non-fork repos for the current user or GitHub organization
Each repo will have the same name in AWS as it does on GitHub
For source GitHub accounts, requires:
- \$GITHUB_TOKEN
- \$GITHUB_ORGANIZATION, \$GITHUB_USER or else infers owner of the \$GITHUB_TOKEN
For AWS CodeCommit requires:
- \$AWS_DEFAULT_REGION
- AWS Credentials:
- AWS CLI configured with CodeCommit full access to create repositories (\$AWS_PROFILE, \$AWS_ACCESS_KEY_ID, \$AWS_SECRET_ACCESS_KEY etc.)
- \$AWS_GIT_USER and \$AWS_GIT_PASSWORD
or
- Python Pip git-remote-codecommit module to be installed to use AWS CLI credentials
In a GitHub Organization, only repos the user can read will be mirrored, others won't be returned in the list of GitHub repos to even try (as an outside collaborator user)
If \$CLEAR_CACHE=true, deletes the /tmp cache and uses a fresh clone mirror. This can sometimes clear push errors.
If \$FORCE_MIRROR=true, runs a mirror operation (overwrites refs and deletes removed branches). Not the default for safety.
"
# used by usage() in lib/utils.sh
# shellcheck disable=SC2034
usage_args="[<repo1> <repo2> <repo3> ...]"
check_env_defined "GITHUB_TOKEN"
check_env_defined "AWS_DEFAULT_REGION"
help_usage "$@"
#min_args 1 "$@"
timestamp "Starting GitHub to AWS CodeCommit mirroring"
echo >&2
user="${GITHUB_USER:-$(get_github_user)}"
owner="${GITHUB_ORGANIZATION:-$user}"
if is_blank "$owner"; then
die "Failed to determine GitHub owner"
fi
if [ $# -gt 0 ]; then
repos="$*"
else
timestamp "Getting list of all non-fork GitHub repos owned by '$owner'"
repos="$(get_github_repos "$owner" "${GITHUB_ORGANIZATION:-}")"
echo >&2
fi
# not using mktemp because we want to reuse this staging area between runs for efficiency
tmpdir="/tmp/github_mirror_to_aws_codecommmit/$owner"
if [ "${CLEAR_CACHE:-}" = true ]; then
timestamp "Removing cache: $tmpdir"
rm -fr -- "$tmpdir"
fi
timestamp "Switching to '$tmpdir' directory for mirror staging"
mkdir -p -v "$tmpdir"
cd "$tmpdir"
echo >&2
succeeded=0
failed=0
mirror_repo(){
local repo="$1"
local description
# in case we need to mutate the names later, such as working around dots in repo names eg. ".github"
local aws_repo="$repo"
timestamp "Checking AWS repo '$aws_repo' exists"
if ! aws codecommit list-repositories | jq -r '.repositories[].repositoryName' | grep -Fxq "$aws_repo" >/dev/null; then
timestamp "Creating AWS repo '$aws_repo'"
aws codecommit create-repository --repository-name "$aws_repo" || return 1
echo >&2
fi
timestamp "Checking GitHub repo for description to copy"
description="$("$srcdir/github_repo_description.sh" "$owner/$repo" | sed "s/^${repo}[[:space:]]*//")"
if [ -n "$description" ]; then
timestamp "Setting AWS repo '$aws_repo' description to '$description'"
aws codecommit update-repository-description --repository-name "$aws_repo" --repository-description "$description"
fi
if [ -d "$repo.git" ]; then
timestamp "Using existing clone in directory '$repo.git'"
pushd "$repo.git" >/dev/null || return 1
git remote update origin || return 1
else
timestamp "Cloning GitHub repo to directory '$repo.git'"
git clone --mirror "https://$user:$GITHUB_TOKEN@github.com/$owner/$repo.git" || return 1
pushd "$repo.git" >/dev/null || return 1
fi
if ! git remote -v | awk '{print $1}' | grep -Fxq aws; then
timestamp "Adding AWS remote origin"
if [ -n "${AWS_GIT_USER:-}" ] &&
[ -n "${AWS_GIT_PASSWORD:-}" ]; then
timestamp "Using AWS git user and url encoded password"
AWS_GIT_PASSWORD_URLENCODED="$("$srcdir/urlencode.sh" <<< "$AWS_GIT_PASSWORD")"
git remote add aws "https://$AWS_GIT_USER:$AWS_GIT_PASSWORD_URLENCODED@git-codecommit.$AWS_DEFAULT_REGION.amazonaws.com/v1/repos/$repo"
else
timestamp "Using AWS credentials via git-remote-codecommit"
git remote add aws "codecommit::$AWS_DEFAULT_REGION://$repo"
fi
fi
if [ "${FORCE_MIRROR:-}" = true ]; then
# more dangerous, force overwrites remote repo refs
timestamp "Force mirroring to AWS CodeCommit (overwrite)"
git push --mirror aws || return 1
else
timestamp "Pushing all branches to AWS CodeCommit"
git push --all aws || return 1 # XXX: without return 1 the function ignores errors, even with set -e inside the function
timestamp "Pushing all tags to AWS CodeCommit"
git push --tags aws || return 1
fi
# TODO: if AWS CodeCommit supports protected branches in future
#timestamp "Enabling branch protections on AWS mirror repo '$aws_repo'"
#"$srcdir/aws_codecommit_protect_branches.sh" "$aws_repo"
timestamp "Getting GitHub repo '$repo' default branch"
local default_branch
default_branch="$("$srcdir/github_api.sh" "/repos/$owner/$repo" | jq -r '.default_branch')"
timestamp "Setting AWS CodeCommit repo '$aws_repo' default branch to '$default_branch'"
aws codecommit update-default-branch --repository-name "$aws_repo" --default-branch-name "$default_branch"
popd >/dev/null || return 1
echo >&2
((succeeded+=1))
}
failed_repos=""
for repo in $repos; do
if [[ "$repo" =~ / ]]; then
die "Repo '$repo' should be specified without owner prefix"
fi
if ! mirror_repo "$repo"; then
popd >/dev/null || :
timestamp "Mirroring failed, clearing cache and trying again"
rm -fr -- "$tmpdir/$repo.git"
if ! mirror_repo "$repo"; then
echo >&2
timestamp "ERROR: Failed to mirror repo '$repo' to AWS"
failed_repos+=" $repo"
echo >&2
((failed+=1))
fi
fi
done
if [ $failed -gt 0 ]; then
timestamp "ERROR: $failed GitHub repos failed to mirror to AWS ($succeeded succeeded). Failed repos: $failed_repos"
exit 1
fi
timestamp "GitHub to AWS mirroring completed successfully for $succeeded repos"