diff options
| author | Jake Mannens <jakem_5@hotmail.com> | 2019-09-02 15:56:44 +1000 |
|---|---|---|
| committer | Jake Mannens <jakem_5@hotmail.com> | 2019-09-02 15:56:44 +1000 |
| commit | 8765a68903561cb990b20747eaf2d4ac85bbc4e2 (patch) | |
| tree | 5177da13d273e89f4cf3268dccff8a9f1a4e83e2 | |
| parent | 491313a1a372d85f0b39a69dd87f203c8787daeb (diff) | |
Patched regex generator to now match any URL ending with a downloaded
file for replacement in the HTML. This is due to some sites hosting the
same file across multiple CDN servers, thus creating different links to
the same file within the page.
| -rwxr-xr-x | 4car | 9 |
1 files changed, 5 insertions, 4 deletions
@@ -24,9 +24,10 @@ split_pipe() { } filter_image_urls() { - grep -oe 'http[s]*:\/\/i\.4cdn\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ - -e 'http[s]*:\/\/is[0-9]\+\.4chan\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ - -e 'http[s]*:\/\/img\.fireden\.net\/[a-z0-9]\+\/image\/[0-9]\+\/[0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' + grep -oe 'http[s]\?:\/\/i\.4cdn\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ + -e 'http[s]\?:\/\/is[0-9]\+\.4chan\.org\/[a-z0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ + -e 'http[s]\?:\/\/img\.fireden\.net\/[a-z0-9]\+\/image\/[0-9]\+\/[0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' \ + -e 'http[s]\?:\/\/s[0-9]\+\.desu-usergeneratedcontent\.xyz\/[a-z0-9]\+\/image\/[0-9]\+\/[0-9]\+\/[0-9]\+\.[a-zA-Z0-9]\+' } # Check dependencies @@ -59,7 +60,7 @@ for i in "$@"; do wget -i - -P "$dir" -nv |& split_pipe | filter_image_urls | - sed "s/\(.*\)\/\(.*\)/s;\1\/\2;.\/$rdir\/\2;g/" | + sed "s/\(.*\)\/\(.*\)/s;http[s]\\\\{0,1\\\\}:\/\/[^ \"]*\/\2;.\/$rdir\/\2;g/" | sed -f - "$file" | sponge "$file" done |
