commit ec46f77bc4886aafcc3b77a6888c36f24bc7a437 Author: Lillian Skinner Date: Tue Apr 7 18:22:03 2026 -0400 Upload files to "/" diff --git a/SCRAPE_ESCRIBE.SH b/SCRAPE_ESCRIBE.SH new file mode 100644 index 0000000..ad97b53 --- /dev/null +++ b/SCRAPE_ESCRIBE.SH @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +echo -e "\n-========================================================================-" +echo -e "-=- -=-" +echo -e "-=- SCRAPE_ESCRIBE.SH: Download eScribe meetings JSONs -=-" +echo -e "-=- -=-" +echo -e "-=- Lillian Skinner -=-" +echo -e "-=- -=-" +echo -e "-========================================================================-" + +# Warning to all who read this script: +# It is bad. I know it is bad, but I am tired okay, and sometimes sloppy just works. + +# London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person! +WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" + +TEMP_DIR="./tmp/" +INDEX_PAGE="./tmp/index_cal.html" +SEARCH_PAGE="./tmp/search.html" +AGENDA_HTML="./tmp/work.html" +ADDENDUM_HTML="./tmp/addendum.html" +#VIDEO_TIMESTAMP_JSON="./tmp/time_cal.json" + +current_year=$(date +%Y) +current_month=$(date +%m) +current_day=$(date +%d) + +if [ -d "$TEMP_DIR" ]; then + rm -r $TEMP_DIR +fi +rm -f $INDEX_PAGE +rm -f $SEARCH_PAGE +rm -f $AGENDA_HTML + +mkdir $TEMP_DIR + +while read -r INDEX_URL_PRE CITY_ARCHIVE_NAME_PRE CALENDAR_NAME_PRE; do + INDEX_URL=$(echo $INDEX_URL_PRE | sed 's/\"//g' | sed 's/,//g') + CITY_ARCHIVE_NAME=$(echo $CITY_ARCHIVE_NAME_PRE | sed 's/\"//g' | sed 's/\,//g') + CALENDAR_NAME=$(echo $CALENDAR_NAME_PRE | sed 's/\"//g' | sed 's/\,//g') + + INDEX_END="FALSE" + while [[ $INDEX_END == "FALSE" ]]; do + echo "SCRAPE_ESCRIBE: Downloading eScribe index..." + wget --no-check-certificate --user-agent="$WGET_UA" $INDEX_URL -O $INDEX_PAGE --show-progress + if [ $? -ne 8 ]; then + FOUNDLIST="FALSE" + while IFS= read -r LINE; do + if [[ "TRUE" == $FOUNDLIST ]]; then + GREPENDLIST=$(echo $LINE | grep '