#!/usr/bin/env bash # vim:ts=4:sts=4:sw=4:et # # Author: Hari Sekhon # Date: 2023-04-24 22:04:05 +0100 (Mon, 24 Apr 2023) # # https://github.com/HariSekhon/DevOps-Bash-tools # # License: see accompanying Hari Sekhon LICENSE file # # If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish # # https://www.linkedin.com/in/HariSekhon # set -euo pipefail [ -n "${DEBUG:-}" ] && set -x srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck disable=SC1090,SC1091 . "$srcdir/lib/utils.sh" # shellcheck disable=SC2034,SC2154 usage_description=" Creates a word freqency list ranked by most used words at the top Works like a standard unix filter program - pass in stdin or give it a filename, and outputs to stdout, so you can continue to pipe or redirect to a file as usual " # used by usage() in lib/utils.sh # shellcheck disable=SC2034 usage_args="[]" help_usage "$@" #min_args 1 "$@" #filename="$1" if [ $# -eq 0 ]; then echo "Reading from stdin" >&2 fi #output_file="$filename.word_frequency.txt" # one of the few legit uses of cat - tr can't process a filename arg or stdin cat "$@" | tr ' ' '\n' | sed ' /^[[:space:]]*$/d; # because sometimes you want to see the occurence of emojis in WhatsApp chats #/^[^[:alnum:]]*$/d; ' | tr '[:upper:]' '[:lower:]' | sort | uniq -c | sort -k1nr # > "$output_file" #head -n "$LINES" "$output_file"