#!/usr/bin/env bash # vim:ts=4:sts=4:sw=4:et # # Author: Hari Sekhon # Date: 2019-12-06 11:10:26 +0000 (Fri, 06 Dec 2019) # # https://github.com/HariSekhon/DevOps-Bash-tools # # License: see accompanying Hari Sekhon LICENSE file # # If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish # # https://www.linkedin.com/in/HariSekhon # set -eu -o pipefail [ -n "${DEBUG:-}" ] && set -x srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck disable=SC1090,SC1091 . "$srcdir/lib/utils.sh" # shellcheck disable=SC2034,SC2154 usage_description=" Get row counts for all Hive tables in all databases via beeline TSV Output format: . FILTER environment variable will restrict to matching fully qualified tables (.
) Tested on Hive 1.1.0 on CDH 5.10, 5.16 For a better version written in Python see DevOps Python tools repo: https://github.com/HariSekhon/DevOps-Python-tools you will need to comment out / remove '-o pipefail' below to skip errors if you aren't authorized to use any of the databases to avoid the script exiting early upon encountering any authorization error such: ERROR: AuthorizationException: User '@' does not have privileges to access: default Default Hive database.*.* " # used by usage() in lib/utils.sh # shellcheck disable=SC2034 usage_args="[]" help_usage "$@" exec "$srcdir/hive_foreach_table.sh" "SELECT COUNT(*) FROM {db}.{table}" "$@"