#!/usr/bin/env bash echo -e "\n-========================================================================-" echo -e "-=- -=-" echo -e "-=- SCRAPE_LONDON.SH: Downloads committee videos and agendas -=-" echo -e "-=- -=-" echo -e "-=- Lillian Skinner -=-" echo -e "-=- -=-" echo -e "-========================================================================-" source ./functions/.functions # Warning to all who read this script: # It is bad. I know it is bad, but I am tired okay, and sometimes sloppy just works. # London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person! WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" TEMP_DIR="./tmp/" INDEX_PAGE="./tmp/index.html" SEARCH_PAGE="./tmp/search.html" AGENDA_HTML="./tmp/work.html" ADDENDUM_HTML="./tmp/addendum.html" #VIDEO_TIMESTAMP_JSON="./tmp/time.json" current_year=$(date +%Y) current_month=$(date +%m) current_day=$(date +%d) SUPPORT_PAST="" if [ -d "$TEMP_DIR" ]; then rm -r $TEMP_DIR fi rm -f $INDEX_PAGE rm -f $SEARCH_PAGE rm -f $AGENDA_HTML mkdir $TEMP_DIR while IFS="," read -r INDEX_URL_PRE CITY_ARCHIVE_NAME_PRE CALENDAR_NAME_PRE; do INDEX_URL=$(echo "$INDEX_URL_PRE" | sed 's/\"//g' | sed 's/,//g' | sed 's/^[[:blank:]]*//;s/[[:blank:]]*$//') CITY_ARCHIVE_NAME=$(echo "$CITY_ARCHIVE_NAME_PRE" | sed 's/\"//g' | sed 's/\,//g' | sed 's/^[[:blank:]]*//;s/[[:blank:]]*$//') CALENDAR_NAME=$(echo "$CALENDAR_NAME_PRE" | sed 's/\"//g' | sed 's/\,//g' | sed 's/^[[:blank:]]*//;s/[[:blank:]]*$//') INDEX_END="FALSE" while [[ $INDEX_END == "FALSE" ]]; do echo "SCRAPE_ESCRIBE: Downloading eScribe index..." wget --no-check-certificate --user-agent="$WGET_UA" $INDEX_URL -O $INDEX_PAGE --no-hsts --show-progress if [ $? -ne 8 ]; then FOUNDLIST="FALSE" while IFS= read -r LINE; do if [[ "TRUE" == $FOUNDLIST ]]; then GREPENDLIST=$(echo $LINE | grep '