#!/usr/bin/env bash # vim:ts=4:sts=4:sw=4:et # # Author: Hari Sekhon # Date: 2019-12-06 11:10:26 +0000 (Fri, 06 Dec 2019) # # https://github.com/harisekhon/bash-tools # # License: see accompanying Hari Sekhon LICENSE file # # If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish # # https://www.linkedin.com/in/harisekhon # # Script to more easily connect to HiveServer2 without having to specify the big JDBC connection string and all options like kerberos principal, ssl etc # # Tested on Hive 1.1.0 on CDH 5.10 # useful options for scripting: # # --silent=true # --outputformat=tsv2 (tsv is deprecated and single quotes results, tsv2 is recommended and cleaner) # # See adjacent hive_*.sh scripts for slightly better versions of these quick command line examples, including better escaping # # list all databases # # ./beeline.sh --silent=true --outputformat=tsv2 -e 'show databases' | tail -n +2 # # list all tables in all databases # # opts="--silent=true --outputformat=tsv2"; ./beeline.sh $opts -e 'show databases' | tail -n +2 | while read db; do ./beeline.sh $opts -e "show tables from $db" | sed "s/^/$db./"; done # # row counts of all tables in all databases: # # opts="--silent=true --outputformat=tsv2"; ./beeline.sh $opts -e 'show databases' | tail -n +2 | while read db; do ./beeline.sh $opts -e "show tables from $db" | sed "s/^/$db./"; done | tail -n +2 | while read table; do printf "%s\t" "$table"; ./beeline.sh $opts -e "select count(*) from $table" | tail -n +2; done | tee row_counts_hive.tsv # # See also: # # https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients#HiveServer2Clients-Usinghive-site.xmltoautomaticallyconnecttoHiveServer2 # # hive_foreach_table.py / impala_foreach_table.py and similar tools in DevOps Python Tools repo - https://github.com/harisekhon/devops-python-tools set -euo pipefail [ -n "${DEBUG:-}" ] && set -x srcdir="$(dirname "$0")" if [ -n "${HIVE_HA:-}" ] || [ -n "${HIVE_ZOOKEEPERS:-}" ]; then exec "$srcdir/beeline_zk.sh" "$@" fi # not listed in hive-site.xml on edge nodes nor https://github.com/apache/hive/blob/master/data/conf/hive-site.xml # must specify in your environment / .bashrc or similar if [ -z "${HIVESERVER2_HOST:-}" ]; then echo "HIVESERVER2_HOST environment variable not set" read -r -p "Enter HiveServer2 address (FQDN): " HIVESERVER2_HOST fi opts="" if [ -n "${BEELINE_OPTS:-}" ]; then opts="$opts;$BEELINE_OPTS" fi set +o pipefail # xq -r < hive-site.xml '.configuration.property[] | select(.name == "hive.server2.use.SSL") | .value' if [ -n "${HIVESERVER2_SSL:-}" ] || grep -A1 'hive.server2.use.SSL' /etc/hive/conf/hive-site.xml 2>/dev/null | grep -q true; then opts="$opts;ssl=true" # works without this but enable if you need #set +o pipefail #trust_file="$(find /opt/cloudera/security/jks -maxdepth 1 -name '*-trust.jks' 2>/dev/null | head -n1)" #set -o pipefail #if [ -f "$trust_file" ]; then # opts="$opts;sslTrustStore=$trust_file" #fi fi realm="${HIVESERVER2_HOST#*.}" [ -n "${VERBOSE:-}" ] && set -x exec beeline -u "jdbc:hive2://$HIVESERVER2_HOST:10000/default;principal=hive/_HOST@${realm}${opts}" "$@"