You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DevOps-Bash-tools/github_mirror_repos_to_gitl...

202 lines
7.7 KiB
Bash

#!/usr/bin/env bash
# vim:ts=4:sts=4:sw=4:et
#
# Author: Hari Sekhon
# Date: 2022-03-22 10:47:11 +0000 (Tue, 22 Mar 2022)
#
# https://github.com/HariSekhon/DevOps-Bash-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
#
# https://www.linkedin.com/in/HariSekhon
#
set -euo pipefail
[ -n "${DEBUG:-}" ] && set -x
# gets absolute rather than relative path, for when we pushd later, otherwise relative $srcdir references will break
srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck disable=SC1090
. "$srcdir/lib/github.sh"
# shellcheck disable=SC2034,SC2154
usage_description="
Mirrors all or given repos from GitHub to GitLab via APIs and HTTPS mirror clones
Useful to create/sync GitHub repos to GitLab for migration or to cron for fast free DR purposes
Includes repo descriptions and all branches and tags, but not PRs/Wikis/Releases
Especially useful to backup dynamic environments where people are adding new repos all the time, avoids having to maintain configurations as finds and iterates all non-fork repos by default
Can't use Terraform to dynamically create these backups because a simple commented/deleted code mistake would bypass prevent_destroy and delete your backup repos as well as your originals!
https://github.com/hashicorp/terraform/issues/17599
Cron this script as per your preferred backup schedule
If no repos are given, iterates all non-fork repos for the current user or GitHub organization
Each repo will have the same name in GitLab as it does on GitHub, but characters other than alphanumeric/dash/underscores will be replaced by underscore,
and any leading special characters will be removed to meet GitLab's repo naming requirements eg. a repo called '.test' on GitHub will mirrored to just 'test' on GitLab
Requires \$GITHUB_TOKEN AND \$GITLAB_TOKEN to be set
In a GitHub Organization, only repos the user can read will be mirrored, others won't be returned in the list of GitHub repos to even try (as an outside collaborator user)
Source GitHub and Destination GitLab accounts, in order or priority:
\$GITHUB_ORGANIZATION, \$GITHUB_USER or owner of the \$GITHUB_TOKEN
\$GITLAB_OWNER, \$GITLAB_USER or the owner of the \$GITLAB_TOKEN
If \$CLEAR_CACHE=true, deletes the /tmp cache and uses a fresh clone mirror. This can sometimes clear push errors.
If \$FORCE_MIRROR=true, runs a mirror operation (overwrites refs and deletes removed branches). Not the default for safety.
"
# used by usage() in lib/utils.sh
# shellcheck disable=SC2034
usage_args="[<repo1> <repo2> <repo3> ...]"
check_env_defined "GITHUB_TOKEN"
check_env_defined "GITLAB_TOKEN"
help_usage "$@"
#min_args 1 "$@"
timestamp "Starting GitHub to GitLab mirroring"
echo >&2
user="${GITHUB_USER:-$(get_github_user)}"
owner="${GITHUB_ORGANIZATION:-$user}"
gitlab_owner="${GITLAB_OWNER:-${GITLAB_USER:-$("$srcdir/gitlab_api.sh" /user | jq -r .username)}}"
if is_blank "$owner"; then
die "Failed to determine GitHub owner"
fi
if is_blank "$gitlab_owner"; then
die "Failed to determine GitLab owner"
fi
#timestamp "Getting GitLab id in case we need to create any repos in GitLab"
#gitlab_id="$("$srcdir/gitlab_api.sh" "/users?username=$gitlab_owner" | jq -r '.[0].id')"
#echo >&2
#if is_blank "$gitlab_id"; then
# die "Failed to determine GitLab id"
#fi
if [ $# -gt 0 ]; then
repos="$*"
else
timestamp "Getting list of all non-fork GitHub repos owned by '$owner'"
repos="$(get_github_repos "$owner" "${GITHUB_ORGANIZATION:-}")"
echo >&2
fi
# not using mktemp because we want to reuse this staging area between runs for efficiency
tmpdir="/tmp/github_mirror_to_gitlab/$owner"
if [ "${CLEAR_CACHE:-}" = true ]; then
timestamp "Removing cache: $tmpdir"
rm -fr -- "$tmpdir"
fi
timestamp "Switching to '$tmpdir' directory for mirror staging"
mkdir -p -v "$tmpdir"
cd "$tmpdir"
echo >&2
succeeded=0
failed=0
mirror_repo(){
local repo="$1"
# GitLab doesn't allow repo name like .github, only alnum, dashes and underscores, and not starting with unusual characters either
gitlab_repo="$(sed 's/[^[:alnum:]_-]/_/g; s/^[^[:alnum:]]*//' <<< "$repo")"
gitlab_owner_repo="$("$srcdir/urlencode.sh" <<< "$gitlab_owner/$gitlab_repo")"
timestamp "Checking GitLab repo '$gitlab_owner/$gitlab_repo' exists"
if ! "$srcdir/gitlab_api.sh" "/projects/$gitlab_owner_repo" >/dev/null; then
timestamp "Creating GitLab repo '$gitlab_owner/$gitlab_repo'"
# only available for admins
#"$srcdir/gitlab_api.sh" "/projects/user/$gitlab_id" -X POST -d "{ \"name\": \"$gitlab_repo\", \"visibility\": \"private\" }" >/dev/null
"$srcdir/gitlab_api.sh" "/projects" -X POST -d "{ \"name\": \"$gitlab_repo\", \"visibility\": \"private\" }" >/dev/null || return 1
echo >&2
fi
timestamp "Checking GitHub repo for description to copy"
"$srcdir/github_repo_description.sh" "$owner/$repo" |
sed "s/^$repo/$gitlab_repo/" |
# timestamp not needed here as gitlab_project_set_description.sh will output if it is setting the repo description
"$srcdir/gitlab_project_set_description.sh"
if [ -d "$repo.git" ]; then
timestamp "Using existing clone in directory '$repo.git'"
pushd "$repo.git" >/dev/null || return 1
git remote update origin || return 1
else
timestamp "Cloning GitHub repo to directory '$repo.git'"
git clone --mirror "https://$user:$GITHUB_TOKEN@github.com/$owner/$repo.git" || return 1
pushd "$repo.git" >/dev/null || return 1
fi
if ! git remote -v | awk '{print $1}' | grep -Fxq gitlab; then
timestamp "Adding GitLab remote origin"
git remote add gitlab "https://$gitlab_owner:$GITLAB_TOKEN@gitlab.com/$gitlab_owner/$gitlab_repo.git"
fi
if [ "${FORCE_MIRROR:-}" = true ]; then
# more dangerous, force overwrites remote repo refs
timestamp "Force mirroring to GitLab (overwrite)"
git push --mirror gitlab || return 1
else
timestamp "Pushing all branches to GitLab"
git push --all gitlab || return 1 # XXX: without return 1 the function ignores errors, even with set -e inside the function
timestamp "Pushing all tags to GitLab"
git push --tags gitlab || return 1
fi
timestamp "Enabling branch protections on GitLab mirror repo '$gitlab_owner/$gitlab_repo'"
"$srcdir/gitlab_project_protect_branches.sh" "$gitlab_owner/$gitlab_repo"
timestamp "Getting GitHub repo '$repo' default branch"
local default_branch
default_branch="$("$srcdir/github_api.sh" "/repos/$owner/$repo" | jq -r '.default_branch')"
timestamp "Setting GitLab repo '$gitlab_owner/$gitlab_repo' default branch to '$default_branch'"
"$srcdir/gitlab_api.sh" "/projects/$gitlab_owner_repo" -X PUT -d '{"default_branch": "'"$default_branch"'"}' >/dev/null
popd >/dev/null || return 1
echo >&2
((succeeded+=1))
}
failed_repos=""
for repo in $repos; do
if [[ "$repo" =~ / ]]; then
die "Repo '$repo' should be specified without owner prefix"
fi
if ! mirror_repo "$repo"; then
popd >/dev/null || :
timestamp "Mirroring failed, clearing cache and trying again"
rm -fr -- "$tmpdir/$repo.git"
if ! mirror_repo "$repo"; then
echo >&2
timestamp "ERROR: Failed to mirror repo '$repo' to GitLab"
failed_repos+=" $repo"
echo >&2
((failed+=1))
fi
fi
done
if [ $failed -gt 0 ]; then
timestamp "ERROR: $failed GitHub repos failed to mirror to GitLab ($succeeded succeeded). Failed repos: $failed_repos"
exit 1
fi
timestamp "GitHub to GitLab mirroring completed successfully for $succeeded repos"