aboutsummaryrefslogtreecommitdiff
path: root/bin/htmlurls
diff options
context:
space:
mode:
authorTom Ryder <tom@sanctum.geek.nz>2016-06-18 14:03:41 +1200
committerTom Ryder <tom@sanctum.geek.nz>2016-06-18 14:03:41 +1200
commitc04d328b1a04cd637762c21c5d6d9425bf1ad46d (patch)
tree735c57c25638270a2bb5aa86dcb9ab59bc3b47eb /bin/htmlurls
parentCommenting for urlcheck (diff)
downloaddotfiles-c04d328b1a04cd637762c21c5d6d9425bf1ad46d.tar.gz
dotfiles-c04d328b1a04cd637762c21c5d6d9425bf1ad46d.zip
Nice comments for URLs scripts
Diffstat (limited to 'bin/htmlurls')
-rwxr-xr-xbin/htmlurls17
1 files changed, 17 insertions, 0 deletions
diff --git a/bin/htmlurls b/bin/htmlurls
index b99667de..fc13bac8 100755
--- a/bin/htmlurls
+++ b/bin/htmlurls
@@ -1,6 +1,23 @@
#!/usr/bin/env bash
+
+#
+# Extract <a href="..."> URLs from an HTML document or documents.
+#
+# Author: Tom Ryder <tom@sanctum.geek.nz>
+# Copyright: 2016
+# License: Public domain
+#
+
+# Check we have the programs we need
hash pup || exit
+
+# Emit the content of the args, or stdin
cat -- "${@:-/dev/stdin}" | ## shellcheck disable=SC2002
+
+# Pipe it through a pup filter to get all the values of the a href elements
pup 'a attr{href}' |
+
+# Sort it; use a plain locale to force a proper bytewise sort so that
+# punctuation is not ignored
LANG=C.UTF-8 sort | # skipping punctuation in a locale sort is unacceptable
uniq