You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DevOps-Bash-tools/beeline.sh

85 lines
3.2 KiB
Bash

#!/usr/bin/env bash
# vim:ts=4:sts=4:sw=4:et
#
# Author: Hari Sekhon
# Date: 2019-12-06 11:10:26 +0000 (Fri, 06 Dec 2019)
#
# https://github.com/harisekhon/bash-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
#
# https://www.linkedin.com/in/harisekhon
#
# Script to more easily connect to HiveServer2 without having to specify the big JDBC connection string and all options like kerberos principal, ssl etc
#
# Tested on Hive 1.1.0 on CDH 5.10
# useful options for scripting:
#
# --silent=true
# --outputformat=tsv2 (tsv is deprecated and single quotes results, tsv2 is recommended and cleaner)
#
# See adjacent hive_*.sh scripts for slightly better versions of these quick command line examples, including better escaping
#
# list all databases
#
# ./beeline.sh --silent=true --outputformat=tsv2 -e 'show databases' | tail -n +2
#
# list all tables in all databases
#
# opts="--silent=true --outputformat=tsv2"; ./beeline.sh $opts -e 'show databases' | tail -n +2 | while read db; do ./beeline.sh $opts -e "show tables from $db" | sed "s/^/$db./"; done
#
# row counts of all tables in all databases:
#
# opts="--silent=true --outputformat=tsv2"; ./beeline.sh $opts -e 'show databases' | tail -n +2 | while read db; do ./beeline.sh $opts -e "show tables from $db" | sed "s/^/$db./"; done | tail -n +2 | while read table; do printf "%s\t" "$table"; ./beeline.sh $opts -e "select count(*) from $table" | tail -n +2; done | tee row_counts_hive.tsv
#
# See also:
#
# https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-Usinghive-site.xmltoautomaticallyconnecttoHiveServer2
#
# hive_foreach_table.py / impala_foreach_table.py and similar tools in DevOps Python Tools repo - https://github.com/harisekhon/devops-python-tools
set -euo pipefail
[ -n "${DEBUG:-}" ] && set -x
srcdir="$(dirname "$0")"
if [ -n "${HIVE_HA:-}" ] ||
[ -n "${HIVE_ZOOKEEPERS:-}" ]; then
exec "$srcdir/beeline_zk.sh" "$@"
fi
# not listed in hive-site.xml on edge nodes nor https://github.com/apache/hive/blob/master/data/conf/hive-site.xml
# must specify in your environment / .bashrc or similar
if [ -z "${HIVESERVER2_HOST:-}" ]; then
echo "HIVESERVER2_HOST environment variable not set"
read -r -p "Enter HiveServer2 address (FQDN): " HIVESERVER2_HOST
fi
opts=""
if [ -n "${BEELINE_OPTS:-}" ]; then
opts="$opts;$BEELINE_OPTS"
fi
set +o pipefail
# xq -r < hive-site.xml '.configuration.property[] | select(.name == "hive.server2.use.SSL") | .value'
if [ -n "${HIVESERVER2_SSL:-}" ] ||
grep -A1 'hive.server2.use.SSL' /etc/hive/conf/hive-site.xml 2>/dev/null |
grep -q true; then
opts="$opts;ssl=true"
# works without this but enable if you need
#set +o pipefail
#trust_file="$(find /opt/cloudera/security/jks -maxdepth 1 -name '*-trust.jks' 2>/dev/null | head -n1)"
#set -o pipefail
#if [ -f "$trust_file" ]; then
# opts="$opts;sslTrustStore=$trust_file"
#fi
fi
realm="${HIVESERVER2_HOST#*.}"
set -x
beeline -u "jdbc:hive2://$HIVESERVER2_HOST:10000/default;principal=hive/_HOST@${realm}${opts}" "$@"