From 6f4a6b70ac0a5ceff4fb679b2ddb71700266a412 Mon Sep 17 00:00:00 2001 From: Lillian Skinner Date: Fri, 15 May 2026 00:01:20 -0400 Subject: [PATCH] Add SCRAPE_FILEPRO.SH --- SCRAPE_FILEPRO.SH | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 SCRAPE_FILEPRO.SH diff --git a/SCRAPE_FILEPRO.SH b/SCRAPE_FILEPRO.SH new file mode 100644 index 0000000..0ad3816 --- /dev/null +++ b/SCRAPE_FILEPRO.SH @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +echo -e "\n-========================================================================-" +echo -e "-=- -=-" +echo -e "-=- SCRAPE_FILEPRO.SH: Downloads all FilePro (CivicWeb) documents -=-" +echo -e "-=- -=-" +echo -e "-=- Lillian Skinner -=-" +echo -e "-=- -=-" +echo -e "-========================================================================-" + +download_folder() { + + local tmp_index + tmp_index=$(mktemp) + local tmp_dir + tmp_dir="$1" + + local LINE + local LINE_ID + local LINE_TITLE + local LINE_TYPE + + wget --no-check-certificate --user-agent="$WGET_UA" "$2" -O "$tmp_index" --no-hsts -q + echo "Looking in folder $3/$LINE_ID" + echo "Download to $tmp_dir/" + while IFS= read -r LINE; do + LINE_ID=$(echo $LINE | sed 's/.*data-id="\([^"]*\)".*/\1/g') + LINE_TITLE=$(echo $LINE | sed 's/.*data-title="\([^"]*\)".*/\1/g' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/'/'\''/g') + LINE_TYPE=$(echo $LINE | sed 's/.*data-type="\([^"]*\)".*/\1/g') + if [[ "$LINE_TYPE" == "document" ]]; then + echo "Found document: $LINE_ID --- $LINE_TITLE.pdf... downloading..." + mkdir -p "$tmp_dir" + wget --no-check-certificate --user-agent="$WGET_UA" "${START_URL}/document/$LINE_ID" -O "$tmp_dir/$LINE_TITLE.pdf" --no-hsts -N -q + elif [[ "$LINE_TYPE" == "folder" ]]; then + download_folder "$tmp_dir/$LINE_TITLE" "${START_URL}/filepro/documents/$LINE_ID" "$3/$LINE_ID" + fi + done < "$tmp_index" + + rm -f $tmp_index +} + +# Example, can be any CivicWeb URL if it has FilePro. Or do they all have FilePro???? +START_URL="https://aylmer.civicweb.net" + +while (true); do + download_folder "./FilePro_Dump" "${START_URL}/filepro/documents" "0" + break +done \ No newline at end of file