Add SCRAPE_FILEPRO.SH
This commit is contained in:
commit
6f4a6b70ac
47
SCRAPE_FILEPRO.SH
Normal file
47
SCRAPE_FILEPRO.SH
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
echo -e "\n-========================================================================-"
|
||||||
|
echo -e "-=- -=-"
|
||||||
|
echo -e "-=- SCRAPE_FILEPRO.SH: Downloads all FilePro (CivicWeb) documents -=-"
|
||||||
|
echo -e "-=- -=-"
|
||||||
|
echo -e "-=- Lillian Skinner -=-"
|
||||||
|
echo -e "-=- -=-"
|
||||||
|
echo -e "-========================================================================-"
|
||||||
|
|
||||||
|
download_folder() {
|
||||||
|
|
||||||
|
local tmp_index
|
||||||
|
tmp_index=$(mktemp)
|
||||||
|
local tmp_dir
|
||||||
|
tmp_dir="$1"
|
||||||
|
|
||||||
|
local LINE
|
||||||
|
local LINE_ID
|
||||||
|
local LINE_TITLE
|
||||||
|
local LINE_TYPE
|
||||||
|
|
||||||
|
wget --no-check-certificate --user-agent="$WGET_UA" "$2" -O "$tmp_index" --no-hsts -q
|
||||||
|
echo "Looking in folder $3/$LINE_ID"
|
||||||
|
echo "Download to $tmp_dir/"
|
||||||
|
while IFS= read -r LINE; do
|
||||||
|
LINE_ID=$(echo $LINE | sed 's/.*data-id="\([^"]*\)".*/\1/g')
|
||||||
|
LINE_TITLE=$(echo $LINE | sed 's/.*data-title="\([^"]*\)".*/\1/g' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/'/'\''/g')
|
||||||
|
LINE_TYPE=$(echo $LINE | sed 's/.*data-type="\([^"]*\)".*/\1/g')
|
||||||
|
if [[ "$LINE_TYPE" == "document" ]]; then
|
||||||
|
echo "Found document: $LINE_ID --- $LINE_TITLE.pdf... downloading..."
|
||||||
|
mkdir -p "$tmp_dir"
|
||||||
|
wget --no-check-certificate --user-agent="$WGET_UA" "${START_URL}/document/$LINE_ID" -O "$tmp_dir/$LINE_TITLE.pdf" --no-hsts -N -q
|
||||||
|
elif [[ "$LINE_TYPE" == "folder" ]]; then
|
||||||
|
download_folder "$tmp_dir/$LINE_TITLE" "${START_URL}/filepro/documents/$LINE_ID" "$3/$LINE_ID"
|
||||||
|
fi
|
||||||
|
done < "$tmp_index"
|
||||||
|
|
||||||
|
rm -f $tmp_index
|
||||||
|
}
|
||||||
|
|
||||||
|
# Example, can be any CivicWeb URL if it has FilePro. Or do they all have FilePro????
|
||||||
|
START_URL="https://aylmer.civicweb.net"
|
||||||
|
|
||||||
|
while (true); do
|
||||||
|
download_folder "./FilePro_Dump" "${START_URL}/filepro/documents" "0"
|
||||||
|
break
|
||||||
|
done
|
||||||
Loading…
Reference in New Issue
Block a user