diff options
| author | Jake Mannens <jakem_5@hotmail.com> | 2019-09-02 15:55:47 +1000 |
|---|---|---|
| committer | Jake Mannens <jakem_5@hotmail.com> | 2019-09-02 15:55:47 +1000 |
| commit | 491313a1a372d85f0b39a69dd87f203c8787daeb (patch) | |
| tree | c9bf11b35e51456d1b3e3e69bbc5e82fc4c81f31 | |
Initial commit
| -rwxr-xr-x | 4car | 65 |
1 files changed, 65 insertions, 0 deletions
@@ -0,0 +1,65 @@ +#!/bin/bash + +function print_usage() { + echo "Usage: $0 <html file>" + exit +} + +error() { + echo "$1" >&2 +} + +assert_command() { + if ! which "$1" 2>&1 > /dev/null; then + error "$0 requires $1 to be installed" 1>&2 + exit 1 + fi +} + +split_pipe() { + while read i; do + echo " $i" >> /dev/tty + echo "$i" + done +} + +filter_image_urls() { + grep -oe 'http[s]*:\/\/i\.4cdn\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ + -e 'http[s]*:\/\/is[0-9]\+\.4chan\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ + -e 'http[s]*:\/\/img\.fireden\.net\/[a-z0-9]\+\/image\/[0-9]\+\/[0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' +} + +# Check dependencies +assert_command sponge +assert_command wget + +if [ -z "$1" ]; then + print_usage +fi + +for i in "$@"; do + file="$i" + dir="${i%.html}_files" + rdir="$(echo "$dir" | sed 's/&/\\\\\\&/g')" + + if ! [ -f "$file" ] || [ "$(file -b --mime-type "$file")" != text/html ]; then + error "Not a valid webpage file: $file" + continue + elif ! [ -d "$dir" ]; then + error "Not a valid webpage directory: $dir" + continue + fi + + echo "Downloading images for $file" + + # Piping autism. Here we download all the files and use a regex to modify the HTML in a single step. + filter_image_urls < "$file" | + sort | + uniq | + wget -i - -P "$dir" -nv |& + split_pipe | + filter_image_urls | + sed "s/\(.*\)\/\(.*\)/s;\1\/\2;.\/$rdir\/\2;g/" | + sed -f - "$file" | + sponge "$file" +done |
