You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
2.1 KiB
Bash

6 years ago
#!/usr/bin/env bash
# vim:ts=4:sts=4:sw=4:et
5 years ago
# # false positives
# shellcheck disable=SC2178,SC2128
6 years ago
#
# Author: Hari Sekhon
# Date: 2019-03-05 18:18:13 +0000 (Tue, 05 Mar 2019)
#
# https://github.com/harisekhon/bash-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
#
# https://www.linkedin.com/in/harisekhon
#
set -euo pipefail
[ -n "${DEBUG:-}" ] && set -x
6 years ago
srcdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5 years ago
# shellcheck source=lib/utils.sh
. "$srcdir/lib/utils.sh"
6 years ago
usage(){
if [ -n "$*" ]; then
echo "$@"
echo
fi
cat <<EOF
6 years ago
Splits big file(s) in to \$PARTS parts (defaults to the number of CPU cores)
6 years ago
Useful for easy parallelizing things that don't easily lend themselves to parallelization like
anonymize.py from DevOps Python Tools which needs successive ordered anonymization rules
usage: ${0##*/} <files>
6 years ago
-p --parts Number of parts to split files in to (\$PARTS, defaults to number of CPU cores)
6 years ago
-h --help Show usage and exit
EOF
exit 3
}
6 years ago
if [ $# -eq 0 ]; then
usage "no file arguments given"
fi
5 years ago
for x in "$@"; do
case "$x" in
6 years ago
-h|--help) usage
;;
esac
done
check_bin split
5 years ago
#check_bin parallel
6 years ago
parts="${PARTS:-}"
if [ -z "$parts" ]; then
6 years ago
parts="$(cpu_count)"
6 years ago
fi
6 years ago
file_list=""
while [ $# -gt 0 ]; do
case $1 in
-p|--parts) parts="$2"
shift
;;
-h|--help|-*) usage
;;
*) file_list="$file_list $1"
;;
esac
shift
done
for filename in $file_list; do
6 years ago
echo "Splitting $filename in to $parts parts"
if [ "$(uname -s)" = "Darwin" ]; then
linecount="$(wc -l < "$filename" | awk '{print $1}')"
parts="$(bc <<< "$linecount / $parts")"
split -l "$parts" "$filename" "$filename."
else
split -d -n "l/$parts" "$filename" "$filename."
6 years ago
fi
done