added http_duplicate_urls.sh
parent
7bd725ec64
commit
c47f05e5da
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# vim:ts=4:sts=4:sw=4:et
|
||||||
|
#
|
||||||
|
# Author: Hari Sekhon
|
||||||
|
# Date: 2023-12-31 17:45:56 +0000 (Sun, 31 Dec 2023)
|
||||||
|
#
|
||||||
|
# https://github.com/HariSekhon/DevOps-Bash-tools
|
||||||
|
#
|
||||||
|
# License: see accompanying Hari Sekhon LICENSE file
|
||||||
|
#
|
||||||
|
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help steer this or other code I publish
|
||||||
|
#
|
||||||
|
# https://www.linkedin.com/in/HariSekhon
|
||||||
|
#
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
[ -n "${DEBUG:-}" ] && set -x
|
||||||
|
srcdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
# shellcheck disable=SC1090,SC1091
|
||||||
|
. "$srcdir/lib/utils.sh"
|
||||||
|
|
||||||
|
# shellcheck disable=SC2034,SC2154
|
||||||
|
usage_description="
|
||||||
|
Finds duplicate URLs in a given web page
|
||||||
|
"
|
||||||
|
|
||||||
|
# used by usage() in lib/utils.sh
|
||||||
|
# shellcheck disable=SC2034
|
||||||
|
usage_args="https://www.domain.com"
|
||||||
|
|
||||||
|
help_usage "$@"
|
||||||
|
|
||||||
|
min_args 1 "$@"
|
||||||
|
|
||||||
|
url="$1"
|
||||||
|
|
||||||
|
curl "$url" |
|
||||||
|
grep -Eo 'https?://[^[:space:]"'"'"'<>]+' |
|
||||||
|
sort |
|
||||||
|
uniq -c |
|
||||||
|
sort -k1n |
|
||||||
|
grep -Ev '^[[:space:]]+1[[:space:]]+' || :
|
Loading…
Reference in New Issue