4 years ago · 7de020941a
parent e3f0d7f4ca
commit 7de020941a
1 changed files with 10 additions and 1 deletions
--- a/impala_tables_column_counts.sh
+++ b/impala_tables_column_counts.sh
@ -25,8 +25,14 @@ srcdir="$(dirname "${BASH_SOURCE[0]}")"
 usage_description="
 Print each table's number of columns

+Output Format:
+
+<db>.<table>    <column_count>
+
+
 FILTER environment variable will restrict to matching fully qualified tables (<db>.<table>)

+
 Caveats:

    Hive is more reliable as Impala breaks on some table metadata definitions where Hive doesn't
@ -34,8 +40,10 @@ Caveats:
    Impala is faster than Hive for the first ~1000 tables but then slows down
    so if you have a lot of tables I recommend you use the Hive version of this instead

+
 Tested on Impala 2.7.0, 2.12.0 on CDH 5.10, 5.16 with Kerberos and SSL

+
 For more documentation see the comments at the top of impala_shell.sh

 For a better version written in Python see DevOps Python tools repo:
@ -65,5 +73,6 @@ while read -r db table; do
        echo "UNKNOWN"
    fi |
    awk '{if(NF == 2){print}}' |
-    wc -l
+    wc -l |
+    sed 's/[[:space:]]*//g'
 done