diff --git a/SCRAPE_FILEPRO.SH b/SCRAPE_FILEPRO.SH index 0ad3816..ff691f8 100644 --- a/SCRAPE_FILEPRO.SH +++ b/SCRAPE_FILEPRO.SH @@ -7,6 +7,26 @@ echo -e "-=- Lillian Skinner echo -e "-=- -=-" echo -e "-========================================================================-" +download_helper() { + local url="$1" + local out="$2" + local code + + code=$(curl -L -k -A "$WGET_UA" -sS -w "%{http_code}" --retry 3 --retry-delay 2 -z "$out" -o "$out" "$url") + case "$code" in + 200) + echo "Downloaded." + ;; + 304) + echo "Already exists! Skipping." + ;; + *) + echo "FAILED! $code: $out | $url" >&2 + return 1 + ;; + esac +} + download_folder() { local tmp_index @@ -27,9 +47,9 @@ download_folder() { LINE_TITLE=$(echo $LINE | sed 's/.*data-title="\([^"]*\)".*/\1/g' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/'/'\''/g') LINE_TYPE=$(echo $LINE | sed 's/.*data-type="\([^"]*\)".*/\1/g') if [[ "$LINE_TYPE" == "document" ]]; then - echo "Found document: $LINE_ID --- $LINE_TITLE.pdf... downloading..." + echo "Found document: $LINE_ID : $LINE_TITLE.pdf... downloading..." mkdir -p "$tmp_dir" - wget --no-check-certificate --user-agent="$WGET_UA" "${START_URL}/document/$LINE_ID" -O "$tmp_dir/$LINE_TITLE.pdf" --no-hsts -N -q + download_helper "${START_URL}/document/$LINE_ID" "$tmp_dir/$LINE_TITLE.pdf" elif [[ "$LINE_TYPE" == "folder" ]]; then download_folder "$tmp_dir/$LINE_TITLE" "${START_URL}/filepro/documents/$LINE_ID" "$3/$LINE_ID" fi @@ -38,7 +58,6 @@ download_folder() { rm -f $tmp_index } -# Example, can be any CivicWeb URL if it has FilePro. Or do they all have FilePro???? START_URL="https://aylmer.civicweb.net" while (true); do