aboutsummaryrefslogtreecommitdiff
path: root/bin/hurl
diff options
context:
space:
mode:
authorTom Ryder <tom@sanctum.geek.nz>2016-06-22 10:13:10 +1200
committerTom Ryder <tom@sanctum.geek.nz>2016-06-22 10:13:10 +1200
commitc83042d6701f275bd792fb15c889e780deddb14a (patch)
treecc6f6615267ce1ce8aae52e59477d8cea4dd8e73 /bin/hurl
parentRemove reference to GitHub dotfiles pages (diff)
downloaddotfiles-c83042d6701f275bd792fb15c889e780deddb14a.tar.gz
dotfiles-c83042d6701f275bd792fb15c889e780deddb14a.zip
Rename binscripts more tersely
Diffstat (limited to 'bin/hurl')
-rwxr-xr-xbin/hurl25
1 files changed, 25 insertions, 0 deletions
diff --git a/bin/hurl b/bin/hurl
new file mode 100755
index 00000000..23dc7dcc
--- /dev/null
+++ b/bin/hurl
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Extract <a href="..."> URLs from an HTML document or documents.
+#
+# Author: Tom Ryder <tom@sanctum.geek.nz>
+# Copyright: 2016
+# License: Public domain
+#
+
+# Set a sensible locale so that sort(1) doesn't act dumbly
+LANG=C.UTF-8
+export LANG
+
+# Check we have the programs we need
+hash pup || exit
+
+# Emit the content of the args, or stdin
+cat -- "${@:-/dev/stdin}" | ## shellcheck disable=SC2002
+
+# Pipe it through a pup filter to get all the values of the a href elements
+pup 'a attr{href}' |
+
+# Sort it uniquely
+sort | uniq