2026-01-26 13:42:45 -05:00 · 2025-06-17 17:18:00 -04:00 · 2025-06-17 17:18:00 -04:00 · 4db585f9fd
commit 4db585f9fd
parent b612db330c
2 changed files with 257 additions and 194 deletions
--- a/SCRAPE_LONDON.SH
+++ b/SCRAPE_LONDON.SH
@ -1,194 +0,0 @@
-#/bash
-echo -e "\n-========================================================================-"
-echo -e "-=-                                                                    -=-"
-echo -e "-=-      SCRAPE_LONDON.SH: Downloads committee videos and agendas      -=-"
-echo -e "-=-                                                                    -=-"
-echo -e "-=-    https://gist.github.com/rvtr/1b471e5f5215c368fd78d9aba05f8dc2   -=-"
-echo -e "-=-     Lillian Skinner                                                -=-"
-echo -e "-=-                                                                    -=-"
-echo -e "-========================================================================-"
-
-# Warning to all who read this script:
-# It is bad. I know it is bad, but I am tired okay, and sometimes sloppy just works.
-
-# London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person!
-WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
-
-TEMP_DIR="./tmp/"
-SEARCH_PAGE="./tmp/index.html"
-AGENDA_DIR="./Agenda/"
-AGENDA_HTML="./tmp/work.html"
-VIDEO_DIR="./Video/"
-#VIDEO_TIMESTAMP_JSON="./tmp/time.json"
-
-if [ -d "$TEMP_DIR" ]; then
-  rm -r $TEMP_DIR
-fi
-if [ -d "$AGENDA_DIR" ]; then
-  rm -r $AGENDA_DIR
-fi
-if [ -d "$VIDEO_DIR" ]; then
-  rm -r $VIDEO_DIR
-fi
-mkdir $TEMP_DIR
-mkdir $AGENDA_DIR
-mkdir $VIDEO_DIR
-
-SEARCH_URL="https://london.ca/government/council-civic-administration/council-committee-meetings/meetings"
-# Need to confirm. When stacking params does the date need to be f[1]?
-SEARCH_PARAM_COMMITTEE="f[0]=meeting_type%3A"
-SEARCH_PARAM_DATE="f[0]=meeting_date%3A"
-SEARCH_PARAM_QUERY="search=query&sort_by=field_meeting_date"
-
-i=0
-SEARCH_END="FALSE"
-while [[ $SEARCH_END == "FALSE" ]]; do
-    echo "Downloading search results... Page: $i"
-        wget --user-agent="$WGET_UA" $SEARCH_URL"?page=$i" -O $SEARCH_PAGE -q #--show-progress
-        if [ $? -ne 8 ]; then
-                FOUNDMEETING="FALSE"
-
-                GREP404=$(cat $SEARCH_PAGE | grep "No results found.")
-                if [[ "$GREP404" == "" ]]; then
-                        while IFS= read -r LINE; do
-
-                                # All meeting items in the search results are formatted like so:
-                                # - One line with the name
-                                # - Second line with all other info including links
-                                #
-                                # We can find the first line by the class "views-field-field-meeting-notes"
-                                # FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info
-                                # The first two links of every second line are (in order) the PDF and HTML agendas
-
-                                if [[ "TRUE" == $FOUNDMEETING ]]; then
-                                        FOUNDMEETING="FALSE"
-                                        echo "-========================================================================-"
-                                        echo " Working on $MEETING_NAME"
-                                        echo "-========================================================================-"
-
-                                        # Grab meeting item links
-                                        echo $LINE | sed 's/href=./\nhref="/g' | grep 'href="https' | sed 's/.*href="\([^"]*\)".*/\1/p' | uniq > "./tmp/meeting_urls"
-                                        # Grab meeting item types
-                                        echo $LINE | sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' | grep 'rel="noreferrer">' | sed 's/.*rel="noreferrer">\([^<]*\)<.*/\1/p' | uniq > "./tmp/meeting_types"
-
-                                        AGENDA_HTML_URL=""
-                                        AGENDA_PDF_URL=""
-                                        AGENDA_REVISE_HTML_URL=""
-                                        AGENDA_REVISE_PDF_URL=""
-                                        MINUTES_HTML_URL=""
-                                        MINUTES_PDF_URL=""
-                                        MINUTES_ATTACH_PDF_URL=""
-
-                                        echo "Found the following documents:"
-                                        while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
-                                            echo " - $LINEA2"
-
-                                            case "$LINEA2" in
-                                                "Agenda (HTML) ")
-                                                    AGENDA_HTML_URL="$LINEA1" ;;
-                                                "Agenda (PDF) ")
-                                                    AGENDA_PDF_URL="$LINEA1" ;;
-                                                "Revised Agenda (HTML) ")
-                                                    AGENDA_REVISE_HTML_URL="$LINEA1" ;;
-                                                "Revised Agenda (PDF) ")
-                                                    AGENDA_REVISE_PDF_URL="$LINEA1" ;;
-                                                "Minutes (HTML) ")
-                                                    MINUTES_HTML_URL="$LINEA1" ;;
-                                                "Minutes (PDF) ")
-                                                    MINUTES_PDF_URL="$LINEA1" ;;
-                                                "Minutes with Attachments (PDF) ")
-                                                    MINUTES_ATTACH_PDF_URL="$LINEA1" ;;
-                                            esac
-
-                                        done < ./tmp/meeting_urls 3< ./tmp/meeting_types
-
-                                        # Always prefer Revised Agendas
-                                        echo "Downloading agenda HTML..."
-                    if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
-                        wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress
-                    elif [[ $AGENDA_HTML_URL != "" ]]; then
-                        wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress
-                    else
-                        ERROR="TRUE"
-                    fi
-
-                                        if [[ ERROR="FALSE" ]]; then
-                                                mkdir "./tmp/$MEETING_NAME/"
-                                                mkdir "./tmp/$MEETING_NAME/Attachments/"
-
-                                                # Direct video links is always "video.isilive.ca/<REGION>/<NAME>"
-                                                # There are some eScribe ones, but those are in m3u8s and are really annoying to work with
-
-                                                # ...not annoying as more sed though.
-                                                VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html | sed -n 's/.*data-stream_name="\([^"]*\)".*/\1/p' | sed 's/ /%20/g')
-
-                        if [[ $VIDEO_URL != "" ]]; then
-                            echo "Found meeting recording."
-                            echo "https://video.isilive.ca/london/"$VIDEO_URL > "./tmp/$MEETING_NAME/RecordingLink.txt"
-                        fi
-
-                                                # Get attachment links
-                                                cat $AGENDA_HTML | grep "AgendaItemAgendaItem1TitleHeader" | sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' | grep 'filestream.ashx' | sed 's/href=.\([^/]*\)".*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_urls"
-                                                # Get attachment names
-                                                cat $AGENDA_HTML | grep "AgendaItemAgendaItem1TitleHeader" | sed 's/data-original-title=./\ndata-original-title='\''/g' | grep 'data-original-title' | sed 's/data-original-title=.\([^'\''/]*\)'\''.*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_names"
-                                                # Download attachment and use the name grabbed above
-                                                echo "Found the following agenda attachments:"
-                                                while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
-                                                  echo " - $LINEA2"
-                                                  wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "./tmp/$MEETING_NAME/Attachments/$LINEA2" -q #--show-progress
-                                                done < ./tmp/attachment_urls 3< ./tmp/attachment_names
-                                                echo "All attachments saved."
-
-                                                if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then
-                                    if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then
-                                        echo "Saving revised agenda..."
-                                        wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "./tmp/$MEETING_NAME/Agenda_Revised.pdf" -q #--show-progress
-                                    fi
-                                    if [[ $AGENDA_PDF_URL != "" ]]; then
-                                        echo "Saving regular agenda..."
-                                        wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "./tmp/$MEETING_NAME/Agenda.pdf" -q #--show-progress
-                                    fi
-                                else
-                                    if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
-                                        echo "Saving revised agenda as HTML (no PDF found!)"
-                                        wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "./tmp/$MEETING_NAME/Agenda_Revised.html" -q #--show-progress
-                                    fi
-                                    if [[ $AGENDA_HTML_URL != "" ]]; then
-                                        echo "Saving regular agenda as HTML (no PDF found!)"
-                                        wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "./tmp/$MEETING_NAME/Agenda.html" -q #--show-progress
-                                    fi
-                                fi
-                                
-                                                if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then
-                                    if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then
-                                        echo "Saving minutes with attachments..."
-                                        wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "./tmp/$MEETING_NAME/Minutes_With_Attachments.pdf" -q #--show-progress
-                                    fi
-                                    if [[ $MINUTES_PDF_URL != "" ]]; then
-                                        echo "Saving minutes..."
-                                        wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "./tmp/$MEETING_NAME/Minutes.pdf" -q #--show-progress
-                                    fi
-                                else
-                                    if [[ $MINUTES_HTML_URL != "" ]]; then
-                                        echo "Saving minutes as HTML (no PDF found!)"
-                                        wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "./tmp/$MEETING_NAME/Minutes.html" -q #--show-progress
-                                    fi
-                                fi
-
-                                        fi
-                                fi
-
-                                GREPMEETING=$(echo $LINE | grep "views-field-field-meeting-notes")
-                                if [[ "$GREPMEETING" != "" ]]; then
-                                        FOUNDMEETING="TRUE"
-                                        MEETING_NAME=$(echo $LINE | sed -n 's/.*<div class="meeting__date">\([^<]*\)<\/div>.*/\1/p')
-                                fi
-                        done < $SEARCH_PAGE
-                else
-                        SEARCH_END="TRUE"
-                fi
-        else
-            SEARCH_END="TRUE"
-        fi
-        ((i++))
-done
--- a/SCRAPE_MEETINGS.SH
+++ b/SCRAPE_MEETINGS.SH
@ -0,0 +1,257 @@
+#/bash
+# Hey folks, please do not run this script more than necessary.
+# Too many search requests will temporarily block searches for everyone, not just you.
+# I do not want to DDOS London. I just want to allow for personal backups. Cheers!
+echo -e "\n-========================================================================-"
+echo -e "-=-                                                                    -=-"
+echo -e "-=-     SCRAPE_MEETINGS.SH: Downloads committee videos and agendas     -=-"
+echo -e "-=-                                                                    -=-"
+echo -e "-=-    https://gist.github.com/rvtr/1b471e5f5215c368fd78d9aba05f8dc2   -=-"
+echo -e "-=-     Lillian Skinner (2025)                                         -=-"
+echo -e "-=-                                                                    -=-"
+echo -e "-========================================================================-"
+
+echo "Starting job: SCRAPE_MEETINGS: $(date)"
+
+# Warning to all who read this script:
+# It is badly written. I know it is bad, but I am tired okay, and sometimes sloppy just works.
+
+# London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person!
+WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
+
+TEMP_DIR="./tmp/"
+SEARCH_PAGE="./tmp/index.html"
+AGENDA_HTML="./tmp/work.html"
+#VIDEO_TIMESTAMP_JSON="./tmp/time.json"
+
+if [ -d "$TEMP_DIR" ]; then
+  rm -r $TEMP_DIR
+fi
+rm -f $SEARCH_PAGE
+rm -f $AGENDA_HTML
+
+mkdir $TEMP_DIR
+
+SEARCH_URL="https://london.ca/government/council-civic-administration/council-committee-meetings/meetings"
+# Need to confirm. When stacking params does the type need to be f[1]?
+SEARCH_FORMAT_COMMITTEE="f[1]=meeting_type%3A"
+SEARCH_FORMAT_DATE="f[0]=meeting_date%3A"
+SEARCH_FORMAT_QUERY="search=query&sort_by=field_meeting_date"
+
+# As far as I'm aware there are no meetings prior to 2011.
+current_year=$(date +%Y)
+current_month=$(date +%m)
+current_day=$(date +%d)
+i=$(date +%Y)
+x=$((i + 1))
+echo $x
+SEARCH_END="FALSE"
+while (( i < x )); do
+	j=0
+	SEARCH_END="FALSE"
+	while [[ $SEARCH_END == "FALSE" ]]; do
+	    echo "SCRAPE_MEETINGS: Downloading search results... Page $j of $i"
+		wget --user-agent="$WGET_UA" $SEARCH_URL"?$SEARCH_FORMAT_DATE$i&page=$j" -O $SEARCH_PAGE -q #--show-progress
+		if [ $? -ne 8 ]; then
+			FOUNDMEETING="FALSE"
+
+			GREP404=$(cat $SEARCH_PAGE | grep "No results found.")
+			if [[ "$GREP404" == "" ]]; then
+				while IFS= read -r LINE; do
+
+					# All meeting items in the search results are formatted like so:
+					# - One line with the name
+					# - Second line with all other info including links
+					#
+					# We can find the first line by the class "views-field-field-meeting-notes"
+					# FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info
+					# The first two links of every second line are (in order) the PDF and HTML agendas
+
+					if [[ "TRUE" == $FOUNDMEETING ]]; then
+						FOUNDMEETING="FALSE"
+						echo "SCRAPE_MEETINGS: -========================================================================-"
+						echo "SCRAPE_MEETINGS:  Working on $MEETING_NAME ($MEETING_YEAR/$MEETING_MONTH/$MEETING_DAY)"
+						echo "SCRAPE_MEETINGS:  All files to be saved as "$MEETING_NAME"/"$MEETING_YEAR"/"$MEETING_MONTH"-"$MEETING_DAY"/"
+						echo "SCRAPE_MEETINGS: -========================================================================-"
+						echo "SCRAPE_MEETINGS: Task starting on: $(date)"
+
+						#echo "CANCEL NOW!!!"
+						#sleep 5
+
+						# Grab meeting item links
+						echo $LINE | sed 's/href=./\nhref="/g' | grep 'href="https' | sed 's/.*href="\([^"]*\)".*/\1/p' | uniq > "./tmp/meeting_urls"
+						# Grab meeting item types
+						echo $LINE | sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' | grep 'rel="noreferrer">' | sed 's/.*rel="noreferrer">\([^<]*\)<.*/\1/p' | uniq > "./tmp/meeting_types"
+
+						AGENDA_HTML_URL=""
+						AGENDA_PDF_URL=""
+						AGENDA_REVISE_HTML_URL=""
+						AGENDA_REVISE_PDF_URL=""
+						MINUTES_HTML_URL=""
+						MINUTES_PDF_URL=""
+						MINUTES_ATTACH_PDF_URL=""
+
+						echo "SCRAPE_MEETINGS: Found the following documents:"
+						while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
+						    echo "SCRAPE_MEETINGS: - $LINEA2"
+
+						    case "$LINEA2" in
+						        "Agenda (HTML) ")
+						            AGENDA_HTML_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
+						        "Agenda (PDF) ")
+						            AGENDA_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
+						        "Revised Agenda (HTML) ")
+						            AGENDA_REVISE_HTML_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
+						        "Revised Agenda (PDF) ")
+						            AGENDA_REVISE_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
+						        "Minutes (HTML) ")
+						            MINUTES_HTML_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
+						        "Minutes (PDF) ")
+						            MINUTES_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
+						        "Minutes with Attachments (PDF) ")
+						            MINUTES_ATTACH_PDF_URL=$(echo $LINEA1 | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g') ;;
+						    esac
+
+						done < ./tmp/meeting_urls 3< ./tmp/meeting_types
+
+						# Always prefer Revised Agendas
+	                    echo "SCRAPE_MEETINGS: Downloading agenda HTML..."
+	                    if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
+	                        wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress
+	                    elif [[ $AGENDA_HTML_URL != "" ]]; then
+	                        wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress
+	                    else
+	                        ERROR="TRUE"
+	                    fi
+
+						if [[ ERROR="FALSE" ]]; then
+
+                mkdir "./LondonArchive"
+                mkdir "./LondonArchive/Meetings"
+
+						    if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME" ]; then
+						        mkdir "./LondonArchive/Meetings/$MEETING_NAME/"
+						    fi
+						    if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR" ]; then
+						        mkdir "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR/"
+						    fi
+						    MEETING_DIR=$(printf "./LondonArchive/Meetings/%s/%s/%s-%s" "$MEETING_NAME" "$MEETING_YEAR" "$MEETING_MONTH" "$MEETING_DAY")
+						    if [ ! -d "$MEETING_DIR" ]; then
+						        mkdir "$MEETING_DIR/"
+						    fi
+						    if [ ! -d "$MEETING_DIR/Attachments" ]; then
+						        mkdir "$MEETING_DIR/Attachments/"
+						    fi
+						
+							# Direct video links is always "video.isilive.ca/<REGION>/<NAME>"
+							# There are some eScribe ones, but those are in m3u8s and are really annoying to work with
+
+							# ...not annoying as more sed though.
+							VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html | sed -n 's/.*data-stream_name="\([^"]*\)".*/\1/p' | sed 's/ /%20/g')
+							
+	                        if [[ $VIDEO_URL != "" ]]; then
+	                            echo "SCRAPE_MEETINGS: Saving recording URL..."
+	                            echo "https://video.isilive.ca/london/"$VIDEO_URL > "$MEETING_DIR/RecordingLink.txt"
+	                        fi
+							
+							# Get attachment links
+							cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' | grep 'filestream.ashx' | sed 's/. data-toggle/\" data-toggle/p' | sed 's/href=.\([^/]*\)".*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_urls"
+							# Get attachment names
+							cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/data-original-title=./\ndata-original-title='\''/g' | grep 'data-original-title' | sed 's/data-original-title=.//p' | sed 's/.pdf['\'':"].*/.pdf/g' | awk '!x[$0]++' > "./tmp/attachment_names"
+							# Download attachment and use the name grabbed above
+							echo "SCRAPE_MEETINGS: Found the following agenda attachments:"
+							while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
+							  echo "SCRAPE_MEETINGS: - $LINEA2"
+							  wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "$MEETING_DIR/Attachments/$LINEA2" -q #--show-progress
+							done < ./tmp/attachment_urls 3< ./tmp/attachment_names
+							echo "SCRAPE_MEETINGS: All attachments saved."
+
+							if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then
+			                    if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then
+			                    	echo "SCRAPE_MEETINGS: Saving revised agenda as PDF..."
+			                      wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$MEETING_DIR/Agenda_Revised.pdf" -q #--show-progress
+			                    fi
+			                    if [[ $AGENDA_PDF_URL != "" ]]; then
+			                    	echo "SCRAPE_MEETINGS: Saving regular agenda as PDF..."
+			                      wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$MEETING_DIR/Agenda.pdf" -q #--show-progress
+			                    fi
+			                else
+			                    if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
+			                    	echo "SCRAPE_MEETINGS: Saving revised agenda as HTML... (no PDF found!)"
+			                      wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$MEETING_DIR/Agenda_Revised.html" -q #--show-progress
+			                    fi
+			                    if [[ $AGENDA_HTML_URL != "" ]]; then
+			                    	echo "SCRAPE_MEETINGS: Saving regular agenda as HTML... (no PDF found!)"
+			                      wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$MEETING_DIR/Agenda.html" -q #--show-progress
+			                    fi
+			                fi
+			                
+							if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then
+			                    if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then
+			                    	echo "SCRAPE_MEETINGS: Saving minutes with attachments as PDF..."
+			                      wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$MEETING_DIR/Minutes_With_Attachments.pdf" -q #--show-progress
+			                    fi
+			                    if [[ $MINUTES_PDF_URL != "" ]]; then
+			                    	echo "SCRAPE_MEETINGS: Saving minutes as PDF..."
+			                      wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$MEETING_DIR/Minutes.pdf" -q #--show-progress
+			                    fi
+			                else
+			                    if [[ $MINUTES_HTML_URL != "" ]]; then
+			                      echo "SCRAPE_MEETINGS: Saving minutes as HTML... (no PDF found!)"
+			                      wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$MEETING_DIR/Minutes.html" -q #--show-progress
+			                    fi
+			                fi
+
+						fi
+						echo "SCRAPE_MEETINGS: All files from this meeting have been saved."
+					fi
+					
+					GREPMEETING=$(echo $LINE | grep "views-field-field-meeting-notes")
+					if [[ "$GREPMEETING" != "" ]]; then
+						MEETING_INFO=$(echo $LINE | sed -n 's/.*<div class="meeting__date">\([^<]*\)<\/div>.*/\1/p')
+
+            MEETING_MONTH_WORD=$(echo "$MEETING_INFO" | sed -E 's/^([A-Za-z]+) .*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
+            MEETING_DAY_SHORT=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ ([0-9]+),.*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
+            MEETING_DAY=$(printf "%02d" $MEETING_DAY_SHORT)
+            MEETING_YEAR=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ [0-9]+, ([0-9]+).*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
+            MEETING_NAME=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ [0-9]+, [0-9]+ - (.*)/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
+            case "$MEETING_MONTH_WORD" in
+                January)   MEETING_MONTH="01" ;;
+                February)  MEETING_MONTH="02" ;;
+                March)     MEETING_MONTH="03" ;;
+                April)     MEETING_MONTH="04" ;;
+                May)       MEETING_MONTH="05" ;;
+                June)      MEETING_MONTH="06" ;;
+                July)      MEETING_MONTH="07" ;;
+                August)    MEETING_MONTH="08" ;;
+                September) MEETING_MONTH="09" ;;
+                October)   MEETING_MONTH="10" ;;
+                November)  MEETING_MONTH="11" ;;
+                December)  MEETING_MONTH="12" ;;
+                *)         MEETING_MONTH="--" ;;
+            esac
+
+            if (( 10#$MEETING_YEAR >= 10#$current_year )) && (( 10#$MEETING_MONTH >= $((10#$current_month - 1)) )); then
+            	#echo "date is greater"
+            	FOUNDMEETING="TRUE"
+            else
+            	echo "SCRAPE_MEETINGS: Dates are in the past! Abort."
+            	SEARCH_END="TRUE"
+            	break
+            fi
+
+					fi
+				done < $SEARCH_PAGE
+			else
+				SEARCH_END="TRUE"
+				echo "SCRAPE_MEETINGS: No more pages!"
+			fi
+		else
+		    SEARCH_END="TRUE"
+		    echo "SCRAPE_MEETINGS: No more pages!"
+		fi
+		((j++))
+	done
+((i++))
+done
+echo "Done job: SCRAPE_MEETINGS: $(date)"