#!/usr/bin/env bash echo -e "\n-========================================================================-" echo -e "-=- -=-" echo -e "-=- SCRAPE_FILEPRO.SH: Downloads all FilePro (CivicWeb) documents -=-" echo -e "-=- -=-" echo -e "-=- Lillian Skinner -=-" echo -e "-=- -=-" echo -e "-========================================================================-" download_helper() { local url="$1" local out="$2" local code code=$(curl -L -k -A "$WGET_UA" -sS -w "%{http_code}" --retry 3 --retry-delay 2 -z "$out" -o "$out" "$url") case "$code" in 200) echo "Downloaded." ;; 304) echo "Already exists! Skipping." ;; *) echo "FAILED! $code: $out | $url" >&2 return 1 ;; esac } download_folder() { local tmp_index tmp_index=$(mktemp) local tmp_dir tmp_dir="$1" local LINE local LINE_ID local LINE_TITLE local LINE_TYPE wget --no-check-certificate --user-agent="$WGET_UA" "$2" -O "$tmp_index" --no-hsts -q echo "Looking in folder $3/$LINE_ID" echo "Download to $tmp_dir/" while IFS= read -r LINE; do LINE_ID=$(echo $LINE | sed 's/.*data-id="\([^"]*\)".*/\1/g') LINE_TITLE=$(echo $LINE | sed 's/.*data-title="\([^"]*\)".*/\1/g' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/'/'\''/g') LINE_TYPE=$(echo $LINE | sed 's/.*data-type="\([^"]*\)".*/\1/g') if [[ "$LINE_TYPE" == "document" ]]; then echo "Found document: $LINE_ID : $LINE_TITLE.pdf... downloading..." mkdir -p "$tmp_dir" download_helper "${START_URL}/document/$LINE_ID" "$tmp_dir/$LINE_TITLE.pdf" elif [[ "$LINE_TYPE" == "folder" ]]; then download_folder "$tmp_dir/$LINE_TITLE" "${START_URL}/filepro/documents/$LINE_ID" "$3/$LINE_ID" fi done < "$tmp_index" rm -f $tmp_index } START_URL="https://aylmer.civicweb.net" while (true); do download_folder "./FilePro_Dump" "${START_URL}/filepro/documents" "0" break done