summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-x4car65
1 files changed, 65 insertions, 0 deletions
diff --git a/4car b/4car
new file mode 100755
index 0000000..0417d21
--- /dev/null
+++ b/4car
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+function print_usage() {
+ echo "Usage: $0 <html file>"
+ exit
+}
+
+error() {
+ echo "$1" >&2
+}
+
+assert_command() {
+ if ! which "$1" 2>&1 > /dev/null; then
+ error "$0 requires $1 to be installed" 1>&2
+ exit 1
+ fi
+}
+
+split_pipe() {
+ while read i; do
+ echo " $i" >> /dev/tty
+ echo "$i"
+ done
+}
+
+filter_image_urls() {
+ grep -oe 'http[s]*:\/\/i\.4cdn\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \
+ -e 'http[s]*:\/\/is[0-9]\+\.4chan\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \
+ -e 'http[s]*:\/\/img\.fireden\.net\/[a-z0-9]\+\/image\/[0-9]\+\/[0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+'
+}
+
+# Check dependencies
+assert_command sponge
+assert_command wget
+
+if [ -z "$1" ]; then
+ print_usage
+fi
+
+for i in "$@"; do
+ file="$i"
+ dir="${i%.html}_files"
+ rdir="$(echo "$dir" | sed 's/&/\\\\\\&/g')"
+
+ if ! [ -f "$file" ] || [ "$(file -b --mime-type "$file")" != text/html ]; then
+ error "Not a valid webpage file: $file"
+ continue
+ elif ! [ -d "$dir" ]; then
+ error "Not a valid webpage directory: $dir"
+ continue
+ fi
+
+ echo "Downloading images for $file"
+
+ # Piping autism. Here we download all the files and use a regex to modify the HTML in a single step.
+ filter_image_urls < "$file" |
+ sort |
+ uniq |
+ wget -i - -P "$dir" -nv |&
+ split_pipe |
+ filter_image_urls |
+ sed "s/\(.*\)\/\(.*\)/s;\1\/\2;.\/$rdir\/\2;g/" |
+ sed -f - "$file" |
+ sponge "$file"
+done