You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
93 lines
4.1 KiB
Bash
93 lines
4.1 KiB
Bash
#!/usr/bin/env bash
|
|
# vim:ts=4:sts=4:sw=4:et
|
|
# shellcheck disable=SC2016
|
|
#
|
|
# Author: Hari Sekhon
|
|
# Date: 2019-12-10 11:33:52 +0000 (Tue, 10 Dec 2019)
|
|
#
|
|
# https://github.com/harisekhon/bash-tools
|
|
#
|
|
# License: see accompanying Hari Sekhon LICENSE file
|
|
#
|
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
|
|
#
|
|
# https://www.linkedin.com/in/harisekhon
|
|
#
|
|
|
|
set -eu # -o pipefail
|
|
[ -n "${DEBUG:-}" ] && set -x
|
|
srcdir="$(dirname "${BASH_SOURCE[0]}")"
|
|
|
|
# shellcheck disable=SC1090
|
|
. "$srcdir/lib/utils.sh"
|
|
|
|
# shellcheck disable=SC2034,SC2154
|
|
usage_description="
|
|
Run SQL query against all Impala tables in all databases via impala-shell
|
|
|
|
Query can contain {db} and {table} placeholders which will be replaced for each table
|
|
|
|
FILTER environment variable will restrict to matching fully qualified tables (<db>.<table>)
|
|
|
|
|
|
Tested on Impala 2.7.0, 2.12.0 on CDH 5.10, 5.16 with Kerberos and SSL
|
|
|
|
|
|
For more documentation see the comments at the top of impala_shell.sh
|
|
|
|
For a better version written in Python see DevOps Python tools repo:
|
|
|
|
https://github.com/harisekhon/devops-python-tools
|
|
|
|
'set -o pipefail' is not enabled in order to skip authorization errors such as that documented in impala_list_tables.sh
|
|
and also ignore errors from the 'select count(*)' in the loop as Impala often has metadata errors such as:
|
|
|
|
ERROR: AnalysisException: Failed to load metadata for table: '<table>'
|
|
CAUSED BY: TableLoadingException: Unsupported type 'void' in column '<column>' of table '<table>'
|
|
|
|
============================================================================ #
|
|
"'
|
|
WARNINGS: Disk I/O error: Failed to open HDFS file hdfs://nameservice1/user/hive/warehouse/<database>.db/<table>/1234a5678b90cd1-ef23a45678901234_5678901234_data.10.parq
|
|
Error(2): No such file or directory
|
|
Root cause: RemoteException: File does not exist: /user/hive/warehouse/<database>.db/<table>/1234a5678b90cd1-ef23a45678901234_5678901234_data.10.parq
|
|
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:66)
|
|
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:56)
|
|
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:2157)
|
|
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2127)
|
|
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2040)
|
|
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:583)
|
|
at org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.getBlockLocations(AuthorizationProviderProxyClientProtocol.java:94)
|
|
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:377)
|
|
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
|
|
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
|
|
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
|
|
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2278)
|
|
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2274)
|
|
at java.security.AccessController.doPrivileged(Native Method)
|
|
at javax.security.auth.Subject.doAs(Subject.java:422)
|
|
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
|
|
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2272)
|
|
'
|
|
|
|
# used by usage() in lib/utils.sh
|
|
# shellcheck disable=SC2034
|
|
usage_args="\"<query>\" [<impala_shell_options>]"
|
|
|
|
help_usage "$@"
|
|
|
|
min_args 1 "$@"
|
|
|
|
query_template="$1"
|
|
shift || :
|
|
|
|
# exit the loop subshell if you Control-C
|
|
trap 'exit 130' INT
|
|
|
|
"$srcdir/impala_list_tables.sh" "$@" |
|
|
while read -r db table; do
|
|
printf '%s.%s\t' "$db" "$table"
|
|
query="${query_template//\{db\}/\`$db\`}"
|
|
query="${query//\{table\}/\`$table\`}"
|
|
"$srcdir/impala_shell.sh" --quiet -Bq "USE \`$db\`; $query" "$@"
|
|
done
|