From 491313a1a372d85f0b39a69dd87f203c8787daeb Mon Sep 17 00:00:00 2001 From: Jake Mannens Date: Mon, 2 Sep 2019 15:55:47 +1000 Subject: Initial commit --- 4car | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100755 4car diff --git a/4car b/4car new file mode 100755 index 0000000..0417d21 --- /dev/null +++ b/4car @@ -0,0 +1,65 @@ +#!/bin/bash + +function print_usage() { + echo "Usage: $0 " + exit +} + +error() { + echo "$1" >&2 +} + +assert_command() { + if ! which "$1" 2>&1 > /dev/null; then + error "$0 requires $1 to be installed" 1>&2 + exit 1 + fi +} + +split_pipe() { + while read i; do + echo " $i" >> /dev/tty + echo "$i" + done +} + +filter_image_urls() { + grep -oe 'http[s]*:\/\/i\.4cdn\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ + -e 'http[s]*:\/\/is[0-9]\+\.4chan\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ + -e 'http[s]*:\/\/img\.fireden\.net\/[a-z0-9]\+\/image\/[0-9]\+\/[0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' +} + +# Check dependencies +assert_command sponge +assert_command wget + +if [ -z "$1" ]; then + print_usage +fi + +for i in "$@"; do + file="$i" + dir="${i%.html}_files" + rdir="$(echo "$dir" | sed 's/&/\\\\\\&/g')" + + if ! [ -f "$file" ] || [ "$(file -b --mime-type "$file")" != text/html ]; then + error "Not a valid webpage file: $file" + continue + elif ! [ -d "$dir" ]; then + error "Not a valid webpage directory: $dir" + continue + fi + + echo "Downloading images for $file" + + # Piping autism. Here we download all the files and use a regex to modify the HTML in a single step. + filter_image_urls < "$file" | + sort | + uniq | + wget -i - -P "$dir" -nv |& + split_pipe | + filter_image_urls | + sed "s/\(.*\)\/\(.*\)/s;\1\/\2;.\/$rdir\/\2;g/" | + sed -f - "$file" | + sponge "$file" +done -- cgit v1.3