You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
85 lines
3.2 KiB
Bash
85 lines
3.2 KiB
Bash
#!/usr/bin/env bash
|
|
# vim:ts=4:sts=4:sw=4:et
|
|
#
|
|
# Author: Hari Sekhon
|
|
# Date: 2019-12-06 11:10:26 +0000 (Fri, 06 Dec 2019)
|
|
#
|
|
# https://github.com/harisekhon/bash-tools
|
|
#
|
|
# License: see accompanying Hari Sekhon LICENSE file
|
|
#
|
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
|
|
#
|
|
# https://www.linkedin.com/in/harisekhon
|
|
#
|
|
|
|
# Script to more easily connect to HiveServer2 without having to specify the big JDBC connection string and all options like kerberos principal, ssl etc
|
|
#
|
|
# Tested on Hive 1.1.0 on CDH 5.10
|
|
|
|
# useful options for scripting:
|
|
#
|
|
# --silent=true
|
|
# --outputformat=tsv2 (tsv is deprecated and single quotes results, tsv2 is recommended and cleaner)
|
|
#
|
|
# See adjacent hive_*.sh scripts for slightly better versions of these quick command line examples, including better escaping
|
|
#
|
|
# list all databases
|
|
#
|
|
# ./beeline.sh --silent=true --outputformat=tsv2 -e 'show databases' | tail -n +2
|
|
#
|
|
# list all tables in all databases
|
|
#
|
|
# opts="--silent=true --outputformat=tsv2"; ./beeline.sh $opts -e 'show databases' | tail -n +2 | while read db; do ./beeline.sh $opts -e "show tables from $db" | sed "s/^/$db./"; done
|
|
#
|
|
# row counts of all tables in all databases:
|
|
#
|
|
# opts="--silent=true --outputformat=tsv2"; ./beeline.sh $opts -e 'show databases' | tail -n +2 | while read db; do ./beeline.sh $opts -e "show tables from $db" | sed "s/^/$db./"; done | tail -n +2 | while read table; do printf "%s\t" "$table"; ./beeline.sh $opts -e "select count(*) from $table" | tail -n +2; done | tee row_counts_hive.tsv
|
|
#
|
|
# See also:
|
|
#
|
|
# https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-Usinghive-site.xmltoautomaticallyconnecttoHiveServer2
|
|
#
|
|
# hive_foreach_table.py / impala_foreach_table.py and similar tools in DevOps Python Tools repo - https://github.com/harisekhon/devops-python-tools
|
|
|
|
set -euo pipefail
|
|
[ -n "${DEBUG:-}" ] && set -x
|
|
srcdir="$(dirname "$0")"
|
|
|
|
if [ -n "${HIVE_HA:-}" ] ||
|
|
[ -n "${HIVE_ZOOKEEPERS:-}" ]; then
|
|
exec "$srcdir/beeline_zk.sh" "$@"
|
|
fi
|
|
|
|
# not listed in hive-site.xml on edge nodes nor https://github.com/apache/hive/blob/master/data/conf/hive-site.xml
|
|
# must specify in your environment / .bashrc or similar
|
|
if [ -z "${HIVESERVER2_HOST:-}" ]; then
|
|
echo "HIVESERVER2_HOST environment variable not set"
|
|
read -r -p "Enter HiveServer2 address (FQDN): " HIVESERVER2_HOST
|
|
fi
|
|
|
|
opts=""
|
|
if [ -n "${BEELINE_OPTS:-}" ]; then
|
|
opts="$opts;$BEELINE_OPTS"
|
|
fi
|
|
|
|
set +o pipefail
|
|
# xq -r < hive-site.xml '.configuration.property[] | select(.name == "hive.server2.use.SSL") | .value'
|
|
if [ -n "${HIVESERVER2_SSL:-}" ] ||
|
|
grep -A1 'hive.server2.use.SSL' /etc/hive/conf/hive-site.xml 2>/dev/null |
|
|
grep -q true; then
|
|
opts="$opts;ssl=true"
|
|
# works without this but enable if you need
|
|
#set +o pipefail
|
|
#trust_file="$(find /opt/cloudera/security/jks -maxdepth 1 -name '*-trust.jks' 2>/dev/null | head -n1)"
|
|
#set -o pipefail
|
|
#if [ -f "$trust_file" ]; then
|
|
# opts="$opts;sslTrustStore=$trust_file"
|
|
#fi
|
|
fi
|
|
|
|
realm="${HIVESERVER2_HOST#*.}"
|
|
|
|
[ -n "${VERBOSE:-}" ] && set -x
|
|
beeline -u "jdbc:hive2://$HIVESERVER2_HOST:10000/default;principal=hive/_HOST@${realm}${opts}" "$@"
|