Update SCRAPE_FILEPRO.SH
This commit is contained in:
parent
6f4a6b70ac
commit
5a2217cede
@ -7,6 +7,26 @@ echo -e "-=- Lillian Skinner
|
|||||||
echo -e "-=- -=-"
|
echo -e "-=- -=-"
|
||||||
echo -e "-========================================================================-"
|
echo -e "-========================================================================-"
|
||||||
|
|
||||||
|
download_helper() {
|
||||||
|
local url="$1"
|
||||||
|
local out="$2"
|
||||||
|
local code
|
||||||
|
|
||||||
|
code=$(curl -L -k -A "$WGET_UA" -sS -w "%{http_code}" --retry 3 --retry-delay 2 -z "$out" -o "$out" "$url")
|
||||||
|
case "$code" in
|
||||||
|
200)
|
||||||
|
echo "Downloaded."
|
||||||
|
;;
|
||||||
|
304)
|
||||||
|
echo "Already exists! Skipping."
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "FAILED! $code: $out | $url" >&2
|
||||||
|
return 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
download_folder() {
|
download_folder() {
|
||||||
|
|
||||||
local tmp_index
|
local tmp_index
|
||||||
@ -27,9 +47,9 @@ download_folder() {
|
|||||||
LINE_TITLE=$(echo $LINE | sed 's/.*data-title="\([^"]*\)".*/\1/g' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/'/'\''/g')
|
LINE_TITLE=$(echo $LINE | sed 's/.*data-title="\([^"]*\)".*/\1/g' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/'/'\''/g')
|
||||||
LINE_TYPE=$(echo $LINE | sed 's/.*data-type="\([^"]*\)".*/\1/g')
|
LINE_TYPE=$(echo $LINE | sed 's/.*data-type="\([^"]*\)".*/\1/g')
|
||||||
if [[ "$LINE_TYPE" == "document" ]]; then
|
if [[ "$LINE_TYPE" == "document" ]]; then
|
||||||
echo "Found document: $LINE_ID --- $LINE_TITLE.pdf... downloading..."
|
echo "Found document: $LINE_ID : $LINE_TITLE.pdf... downloading..."
|
||||||
mkdir -p "$tmp_dir"
|
mkdir -p "$tmp_dir"
|
||||||
wget --no-check-certificate --user-agent="$WGET_UA" "${START_URL}/document/$LINE_ID" -O "$tmp_dir/$LINE_TITLE.pdf" --no-hsts -N -q
|
download_helper "${START_URL}/document/$LINE_ID" "$tmp_dir/$LINE_TITLE.pdf"
|
||||||
elif [[ "$LINE_TYPE" == "folder" ]]; then
|
elif [[ "$LINE_TYPE" == "folder" ]]; then
|
||||||
download_folder "$tmp_dir/$LINE_TITLE" "${START_URL}/filepro/documents/$LINE_ID" "$3/$LINE_ID"
|
download_folder "$tmp_dir/$LINE_TITLE" "${START_URL}/filepro/documents/$LINE_ID" "$3/$LINE_ID"
|
||||||
fi
|
fi
|
||||||
@ -38,7 +58,6 @@ download_folder() {
|
|||||||
rm -f $tmp_index
|
rm -f $tmp_index
|
||||||
}
|
}
|
||||||
|
|
||||||
# Example, can be any CivicWeb URL if it has FilePro. Or do they all have FilePro????
|
|
||||||
START_URL="https://aylmer.civicweb.net"
|
START_URL="https://aylmer.civicweb.net"
|
||||||
|
|
||||||
while (true); do
|
while (true); do
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user