- $PDFREFNAME
#!/usr/bin/env bash echo -e "\n-========================================================================-" echo -e "-=- -=-" echo -e "-=- SCRAPE_LTC.SH: Downloads LTC committee agendas and minutes -=-" echo -e "-=- -=-" echo -e "-=- Lillian Skinner -=-" echo -e "-=- -=-" echo -e "-========================================================================-" MEETINGS_PAGE="./tmp.html" # London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person! WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" mkdir "./LondonArchive" mkdir "./LondonArchive/LTC" mkdir "./tmp" wget --user-agent="$WGET_UA" "https://www.londontransit.ca/agendas-and-minutes/" -O "./tmp/index.html" -q #--show-progress current_year=$(date +%Y) current_month=$(date +%m) current_day=$(date +%d) # If I don't set these values then "10#: invalid integer constant" MEETING_YEAR="0000" MEETING_MONTH="00" MEETING_DAY="00" while IFS= read -r LINE_PRE; do LINE=$(echo $LINE_PRE | sed 's/\xC2\xA0/ /') # Only