#!/bin/sh
# bulk_webdl - download large amounts of content from the web
# usage: bulk_webdl baseurl [delay [suffix]]
# Reads lines from standard input and makes requests accordingly,
# saving response files to the working directory. Each request URL starts with
# the first command-line argument, with the line of input appended.
# The second argument is the delay between requests, default 1s.
# The third argument is the suffix to add to output files.
#
# This makes many requests in quick succession. Please use responsibly.
#
# Changelog:
# 2021-165 and before: main development
# 2021-230 improved documentation
# 2021-248 add option to control output suffix
# 2021-252 remove leading slash for full URLs
# 2021-345 fix bug in deduplicator
# 2022-003 optimise by replacing basename with shell operations, enhance output
# 2023-236 reformat/redocument a tad
#
# https://dkl9.net/scripts/bulk_webdl.html

if [ -n "$1" ]
then
    SLASH='/'
else
    SLASH=''
fi

while read INPURL
do
    # extract filename at end of URL
    FNAME="${INPURL%/}"
    FNAME="${FNAME##*/}${3}"
    FURL="${1}${SLASH}${INPURL}"
    if [ -f "$FNAME" ]
    then
        echo "$FURL already downloaded to $FNAME ."
    else
        echo "Downloading $FURL to $FNAME ..."
        curl -Lo "$FNAME" "$FURL"
        sleep "${2:-1s}"
    fi
done
