#!/usr/bin/env bash
# vim:ts=4:sts=4:sw=4:et
#
# Author: Hari Sekhon
# Date: 2022-03-29 19:19:43 +0100 (Tue, 29 Mar 2022)
#
# https://github.com/HariSekhon/DevOps-Bash-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
#
# https://www.linkedin.com/in/HariSekhon
#
set -euo pipefail
[ -n " ${ DEBUG :- } " ] && set -x
# gets absolute rather than relative path, for when we pushd later, otherwise relative $srcdir references will break
srcdir = " $( cd " $( dirname " ${ BASH_SOURCE [0] } " ) " && pwd ) "
# shellcheck disable=SC1090
. " $srcdir /lib/github.sh "
# shellcheck disable=SC2034,SC2154
usage_description = "
Mirrors all or given repos from GitHub to AWS CodeCommit via AWS CLI and Git HTTPS mirror clones
Useful to create/sync GitHub repos to AWS CodeCommit for migration or to cron for fast almost free DR purposes
( almost \$ 0 AWS charges compared to \$ 100-\$ 400+ per month for Rewind / BackHub)
Includes repo descriptions and all branches and tags, but not PRs/Wikis/Releases
Especially useful to backup dynamic environments where people are adding new repos all the time, avoids having to maintain configurations as finds and iterates all non-fork repos by default
Can' t use Terraform to dynamically create these backups because a simple commented/deleted code mistake would bypass prevent_destroy and delete your backup repos as well as your originals!
https://github.com/hashicorp/terraform/issues/17599
Cron this script as per your preferred backup schedule
If no repos are given, iterates all non-fork repos for the current user or GitHub organization
Each repo will have the same name in AWS as it does on GitHub
For source GitHub accounts, requires:
- \$ GITHUB_TOKEN
- \$ GITHUB_ORGANIZATION, \$ GITHUB_USER or else infers owner of the \$ GITHUB_TOKEN
For AWS CodeCommit requires:
- \$ AWS_DEFAULT_REGION
- AWS Credentials:
- AWS CLI configured with CodeCommit full access to create repositories ( \$ AWS_PROFILE, \$ AWS_ACCESS_KEY_ID, \$ AWS_SECRET_ACCESS_KEY etc.)
- \$ AWS_GIT_USER and \$ AWS_GIT_PASSWORD
or
- Python Pip git-remote-codecommit module to be installed to use AWS CLI credentials
In a GitHub Organization, only repos the user can read will be mirrored, others won' t be returned in the list of GitHub repos to even try ( as an outside collaborator user)
If \$ CLEAR_CACHE = true, deletes the /tmp cache and uses a fresh clone mirror. This can sometimes clear push errors.
If \$ FORCE_MIRROR = true, runs a mirror operation ( overwrites refs and deletes removed branches) . Not the default for safety.
"
# used by usage() in lib/utils.sh
# shellcheck disable=SC2034
usage_args = "[<repo1> <repo2> <repo3> ...]"
check_env_defined "GITHUB_TOKEN"
check_env_defined "AWS_DEFAULT_REGION"
help_usage " $@ "
#min_args 1 "$@"
timestamp "Starting GitHub to AWS CodeCommit mirroring"
echo >& 2
user = " ${ GITHUB_USER :- $( get_github_user) } "
owner = " ${ GITHUB_ORGANIZATION :- $user } "
if is_blank " $owner " ; then
die "Failed to determine GitHub owner"
fi
if [ $# -gt 0 ] ; then
repos = " $* "
else
timestamp " Getting list of all non-fork GitHub repos owned by ' $owner ' "
repos = " $( get_github_repos " $owner " " ${ GITHUB_ORGANIZATION :- } " ) "
echo >& 2
fi
# not using mktemp because we want to reuse this staging area between runs for efficiency
tmpdir = " /tmp/github_mirror_to_aws_codecommmit/ $owner "
if [ " ${ CLEAR_CACHE :- } " = true ] ; then
timestamp " Removing cache: $tmpdir "
rm -fr " $tmpdir "
fi
timestamp " Switching to ' $tmpdir ' directory for mirror staging "
mkdir -p -v " $tmpdir "
cd " $tmpdir "
echo >& 2
succeeded = 0
failed = 0
mirror_repo( ) {
local repo = " $1 "
local description
# in case we need to mutate the names later, such as working around dots in repo names eg. ".github"
local aws_repo = " $repo "
timestamp " Checking AWS repo ' $aws_repo ' exists "
if ! aws codecommit list-repositories | jq -r '.repositories[].repositoryName' | grep -Fxq " $aws_repo " >/dev/null; then
timestamp " Creating AWS repo ' $aws_repo ' "
aws codecommit create-repository --repository-name " $aws_repo " || return 1
echo >& 2
fi
timestamp "Checking GitHub repo for description to copy"
description = " $( " $srcdir /github_repo_description.sh " " $owner / $repo " | sed " s/^ ${ repo } [[:space:]]*// " ) "
if [ -n " $description " ] ; then
timestamp " Setting AWS repo ' $aws_repo ' description to ' $description ' "
aws codecommit update-repository-description --repository-name " $aws_repo " --repository-description " $description "
fi
if [ -d " $repo .git " ] ; then
timestamp " Using existing clone in directory ' $repo .git' "
pushd " $repo .git " >/dev/null || return 1
git remote update origin || return 1
else
timestamp " Cloning GitHub repo to directory ' $repo .git' "
git clone --mirror " https:// $user : $GITHUB_TOKEN @github.com/ $owner / $repo .git " || return 1
pushd " $repo .git " >/dev/null || return 1
fi
if ! git remote -v | awk '{print $1}' | grep -Fxq aws; then
timestamp "Adding AWS remote origin"
if [ -n " ${ AWS_GIT_USER :- } " ] &&
[ -n " ${ AWS_GIT_PASSWORD :- } " ] ; then
timestamp "Using AWS git user and url encoded password"
AWS_GIT_PASSWORD_URLENCODED = " $( " $srcdir /urlencode.sh " <<< " $AWS_GIT_PASSWORD " ) "
git remote add aws " https:// $AWS_GIT_USER : $AWS_GIT_PASSWORD_URLENCODED @git-codecommit. $AWS_DEFAULT_REGION .amazonaws.com/v1/repos/ $repo "
else
timestamp "Using AWS credentials via git-remote-codecommit"
git remote add aws " codecommit:: $AWS_DEFAULT_REGION :// $repo "
fi
fi
if [ " ${ FORCE_MIRROR :- } " = true ] ; then
# more dangerous, force overwrites remote repo refs
timestamp "Force mirroring to AWS CodeCommit (overwrite)"
git push --mirror aws || return 1
else
timestamp "Pushing all branches to AWS CodeCommit"
git push --all aws || return 1 # XXX: without return 1 the function ignores errors, even with set -e inside the function
timestamp "Pushing all tags to AWS CodeCommit"
git push --tags aws || return 1
fi
# TODO: if AWS CodeCommit supports protected branches in future
#timestamp "Enabling branch protections on AWS mirror repo '$aws_repo'"
#"$srcdir/aws_codecommit_protect_branches.sh" "$aws_repo"
timestamp " Getting GitHub repo ' $repo ' default branch "
local default_branch
default_branch = " $( " $srcdir /github_api.sh " " /repos/ $owner / $repo " | jq -r '.default_branch' ) "
timestamp " Setting AWS CodeCommit repo ' $aws_repo ' default branch to ' $default_branch ' "
aws codecommit update-default-branch --repository-name " $aws_repo " --default-branch-name " $default_branch "
popd >/dev/null || return 1
echo >& 2
( ( succeeded += 1) )
}
failed_repos = ""
for repo in $repos ; do
if [ [ " $repo " = ~ / ] ] ; then
die " Repo ' $repo ' should be specified without owner prefix "
fi
if ! mirror_repo " $repo " ; then
popd >/dev/null || :
timestamp "Mirroring failed, clearing cache and trying again"
rm -fr " $tmpdir / $repo .git "
if ! mirror_repo " $repo " ; then
echo >& 2
timestamp " ERROR: Failed to mirror repo ' $repo ' to AWS "
failed_repos += " $repo "
echo >& 2
( ( failed += 1) )
fi
fi
done
if [ $failed -gt 0 ] ; then
timestamp " ERROR: $failed GitHub repos failed to mirror to AWS ( $succeeded succeeded). Failed repos: $failed_repos "
exit 1
fi
timestamp " GitHub to AWS mirroring completed successfully for $succeeded repos "