From 4db585f9fd50c9bd136cd99b6d5bd15ab122b300 Mon Sep 17 00:00:00 2001 From: Lillian Skinner <56081713+rvtr@users.noreply.github.com> Date: Tue, 17 Jun 2025 17:18:00 -0400 Subject: [PATCH] --- SCRAPE_LONDON.SH | 194 ---------------------------------- SCRAPE_MEETINGS.SH | 257 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 194 deletions(-) delete mode 100644 SCRAPE_LONDON.SH create mode 100644 SCRAPE_MEETINGS.SH diff --git a/SCRAPE_LONDON.SH b/SCRAPE_LONDON.SH deleted file mode 100644 index 8a11314..0000000 --- a/SCRAPE_LONDON.SH +++ /dev/null @@ -1,194 +0,0 @@ -#/bash -echo -e "\n-========================================================================-" -echo -e "-=- -=-" -echo -e "-=- SCRAPE_LONDON.SH: Downloads committee videos and agendas -=-" -echo -e "-=- -=-" -echo -e "-=- https://gist.github.com/rvtr/1b471e5f5215c368fd78d9aba05f8dc2 -=-" -echo -e "-=- Lillian Skinner -=-" -echo -e "-=- -=-" -echo -e "-========================================================================-" - -# Warning to all who read this script: -# It is bad. I know it is bad, but I am tired okay, and sometimes sloppy just works. - -# London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person! -WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" - -TEMP_DIR="./tmp/" -SEARCH_PAGE="./tmp/index.html" -AGENDA_DIR="./Agenda/" -AGENDA_HTML="./tmp/work.html" -VIDEO_DIR="./Video/" -#VIDEO_TIMESTAMP_JSON="./tmp/time.json" - -if [ -d "$TEMP_DIR" ]; then - rm -r $TEMP_DIR -fi -if [ -d "$AGENDA_DIR" ]; then - rm -r $AGENDA_DIR -fi -if [ -d "$VIDEO_DIR" ]; then - rm -r $VIDEO_DIR -fi -mkdir $TEMP_DIR -mkdir $AGENDA_DIR -mkdir $VIDEO_DIR - -SEARCH_URL="https://london.ca/government/council-civic-administration/council-committee-meetings/meetings" -# Need to confirm. When stacking params does the date need to be f[1]? -SEARCH_PARAM_COMMITTEE="f[0]=meeting_type%3A" -SEARCH_PARAM_DATE="f[0]=meeting_date%3A" -SEARCH_PARAM_QUERY="search=query&sort_by=field_meeting_date" - -i=0 -SEARCH_END="FALSE" -while [[ $SEARCH_END == "FALSE" ]]; do - echo "Downloading search results... Page: $i" - wget --user-agent="$WGET_UA" $SEARCH_URL"?page=$i" -O $SEARCH_PAGE -q #--show-progress - if [ $? -ne 8 ]; then - FOUNDMEETING="FALSE" - - GREP404=$(cat $SEARCH_PAGE | grep "No results found.") - if [[ "$GREP404" == "" ]]; then - while IFS= read -r LINE; do - - # All meeting items in the search results are formatted like so: - # - One line with the name - # - Second line with all other info including links - # - # We can find the first line by the class "views-field-field-meeting-notes" - # FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info - # The first two links of every second line are (in order) the PDF and HTML agendas - - if [[ "TRUE" == $FOUNDMEETING ]]; then - FOUNDMEETING="FALSE" - echo "-========================================================================-" - echo " Working on $MEETING_NAME" - echo "-========================================================================-" - - # Grab meeting item links - echo $LINE | sed 's/href=./\nhref="/g' | grep 'href="https' | sed 's/.*href="\([^"]*\)".*/\1/p' | uniq > "./tmp/meeting_urls" - # Grab meeting item types - echo $LINE | sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' | grep 'rel="noreferrer">' | sed 's/.*rel="noreferrer">\([^<]*\)<.*/\1/p' | uniq > "./tmp/meeting_types" - - AGENDA_HTML_URL="" - AGENDA_PDF_URL="" - AGENDA_REVISE_HTML_URL="" - AGENDA_REVISE_PDF_URL="" - MINUTES_HTML_URL="" - MINUTES_PDF_URL="" - MINUTES_ATTACH_PDF_URL="" - - echo "Found the following documents:" - while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do - echo " - $LINEA2" - - case "$LINEA2" in - "Agenda (HTML) ") - AGENDA_HTML_URL="$LINEA1" ;; - "Agenda (PDF) ") - AGENDA_PDF_URL="$LINEA1" ;; - "Revised Agenda (HTML) ") - AGENDA_REVISE_HTML_URL="$LINEA1" ;; - "Revised Agenda (PDF) ") - AGENDA_REVISE_PDF_URL="$LINEA1" ;; - "Minutes (HTML) ") - MINUTES_HTML_URL="$LINEA1" ;; - "Minutes (PDF) ") - MINUTES_PDF_URL="$LINEA1" ;; - "Minutes with Attachments (PDF) ") - MINUTES_ATTACH_PDF_URL="$LINEA1" ;; - esac - - done < ./tmp/meeting_urls 3< ./tmp/meeting_types - - # Always prefer Revised Agendas - echo "Downloading agenda HTML..." - if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then - wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress - elif [[ $AGENDA_HTML_URL != "" ]]; then - wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress - else - ERROR="TRUE" - fi - - if [[ ERROR="FALSE" ]]; then - mkdir "./tmp/$MEETING_NAME/" - mkdir "./tmp/$MEETING_NAME/Attachments/" - - # Direct video links is always "video.isilive.ca//" - # There are some eScribe ones, but those are in m3u8s and are really annoying to work with - - # ...not annoying as more sed though. - VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html | sed -n 's/.*data-stream_name="\([^"]*\)".*/\1/p' | sed 's/ /%20/g') - - if [[ $VIDEO_URL != "" ]]; then - echo "Found meeting recording." - echo "https://video.isilive.ca/london/"$VIDEO_URL > "./tmp/$MEETING_NAME/RecordingLink.txt" - fi - - # Get attachment links - cat $AGENDA_HTML | grep "AgendaItemAgendaItem1TitleHeader" | sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' | grep 'filestream.ashx' | sed 's/href=.\([^/]*\)".*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_urls" - # Get attachment names - cat $AGENDA_HTML | grep "AgendaItemAgendaItem1TitleHeader" | sed 's/data-original-title=./\ndata-original-title='\''/g' | grep 'data-original-title' | sed 's/data-original-title=.\([^'\''/]*\)'\''.*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_names" - # Download attachment and use the name grabbed above - echo "Found the following agenda attachments:" - while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do - echo " - $LINEA2" - wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "./tmp/$MEETING_NAME/Attachments/$LINEA2" -q #--show-progress - done < ./tmp/attachment_urls 3< ./tmp/attachment_names - echo "All attachments saved." - - if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then - if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then - echo "Saving revised agenda..." - wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "./tmp/$MEETING_NAME/Agenda_Revised.pdf" -q #--show-progress - fi - if [[ $AGENDA_PDF_URL != "" ]]; then - echo "Saving regular agenda..." - wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "./tmp/$MEETING_NAME/Agenda.pdf" -q #--show-progress - fi - else - if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then - echo "Saving revised agenda as HTML (no PDF found!)" - wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "./tmp/$MEETING_NAME/Agenda_Revised.html" -q #--show-progress - fi - if [[ $AGENDA_HTML_URL != "" ]]; then - echo "Saving regular agenda as HTML (no PDF found!)" - wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "./tmp/$MEETING_NAME/Agenda.html" -q #--show-progress - fi - fi - - if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then - if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then - echo "Saving minutes with attachments..." - wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "./tmp/$MEETING_NAME/Minutes_With_Attachments.pdf" -q #--show-progress - fi - if [[ $MINUTES_PDF_URL != "" ]]; then - echo "Saving minutes..." - wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "./tmp/$MEETING_NAME/Minutes.pdf" -q #--show-progress - fi - else - if [[ $MINUTES_HTML_URL != "" ]]; then - echo "Saving minutes as HTML (no PDF found!)" - wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "./tmp/$MEETING_NAME/Minutes.html" -q #--show-progress - fi - fi - - fi - fi - - GREPMEETING=$(echo $LINE | grep "views-field-field-meeting-notes") - if [[ "$GREPMEETING" != "" ]]; then - FOUNDMEETING="TRUE" - MEETING_NAME=$(echo $LINE | sed -n 's/.*
\([^<]*\)<\/div>.*/\1/p') - fi - done < $SEARCH_PAGE - else - SEARCH_END="TRUE" - fi - else - SEARCH_END="TRUE" - fi - ((i++)) -done diff --git a/SCRAPE_MEETINGS.SH b/SCRAPE_MEETINGS.SH new file mode 100644 index 0000000..b8cc1b4 --- /dev/null +++ b/SCRAPE_MEETINGS.SH @@ -0,0 +1,257 @@ +#/bash +# Hey folks, please do not run this script more than necessary. +# Too many search requests will temporarily block searches for everyone, not just you. +# I do not want to DDOS London. I just want to allow for personal backups. Cheers! +echo -e "\n-========================================================================-" +echo -e "-=- -=-" +echo -e "-=- SCRAPE_MEETINGS.SH: Downloads committee videos and agendas -=-" +echo -e "-=- -=-" +echo -e "-=- https://gist.github.com/rvtr/1b471e5f5215c368fd78d9aba05f8dc2 -=-" +echo -e "-=- Lillian Skinner (2025) -=-" +echo -e "-=- -=-" +echo -e "-========================================================================-" + +echo "Starting job: SCRAPE_MEETINGS: $(date)" + +# Warning to all who read this script: +# It is badly written. I know it is bad, but I am tired okay, and sometimes sloppy just works. + +# London seems to have recently blocked unusual user agents. Can't use wget or even ping. Thankfully pretend to be a real person! +WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" + +TEMP_DIR="./tmp/" +SEARCH_PAGE="./tmp/index.html" +AGENDA_HTML="./tmp/work.html" +#VIDEO_TIMESTAMP_JSON="./tmp/time.json" + +if [ -d "$TEMP_DIR" ]; then + rm -r $TEMP_DIR +fi +rm -f $SEARCH_PAGE +rm -f $AGENDA_HTML + +mkdir $TEMP_DIR + +SEARCH_URL="https://london.ca/government/council-civic-administration/council-committee-meetings/meetings" +# Need to confirm. When stacking params does the type need to be f[1]? +SEARCH_FORMAT_COMMITTEE="f[1]=meeting_type%3A" +SEARCH_FORMAT_DATE="f[0]=meeting_date%3A" +SEARCH_FORMAT_QUERY="search=query&sort_by=field_meeting_date" + +# As far as I'm aware there are no meetings prior to 2011. +current_year=$(date +%Y) +current_month=$(date +%m) +current_day=$(date +%d) +i=$(date +%Y) +x=$((i + 1)) +echo $x +SEARCH_END="FALSE" +while (( i < x )); do + j=0 + SEARCH_END="FALSE" + while [[ $SEARCH_END == "FALSE" ]]; do + echo "SCRAPE_MEETINGS: Downloading search results... Page $j of $i" + wget --user-agent="$WGET_UA" $SEARCH_URL"?$SEARCH_FORMAT_DATE$i&page=$j" -O $SEARCH_PAGE -q #--show-progress + if [ $? -ne 8 ]; then + FOUNDMEETING="FALSE" + + GREP404=$(cat $SEARCH_PAGE | grep "No results found.") + if [[ "$GREP404" == "" ]]; then + while IFS= read -r LINE; do + + # All meeting items in the search results are formatted like so: + # - One line with the name + # - Second line with all other info including links + # + # We can find the first line by the class "views-field-field-meeting-notes" + # FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info + # The first two links of every second line are (in order) the PDF and HTML agendas + + if [[ "TRUE" == $FOUNDMEETING ]]; then + FOUNDMEETING="FALSE" + echo "SCRAPE_MEETINGS: -========================================================================-" + echo "SCRAPE_MEETINGS: Working on $MEETING_NAME ($MEETING_YEAR/$MEETING_MONTH/$MEETING_DAY)" + echo "SCRAPE_MEETINGS: All files to be saved as "$MEETING_NAME"/"$MEETING_YEAR"/"$MEETING_MONTH"-"$MEETING_DAY"/" + echo "SCRAPE_MEETINGS: -========================================================================-" + echo "SCRAPE_MEETINGS: Task starting on: $(date)" + + #echo "CANCEL NOW!!!" + #sleep 5 + + # Grab meeting item links + echo $LINE | sed 's/href=./\nhref="/g' | grep 'href="https' | sed 's/.*href="\([^"]*\)".*/\1/p' | uniq > "./tmp/meeting_urls" + # Grab meeting item types + echo $LINE | sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' | grep 'rel="noreferrer">' | sed 's/.*rel="noreferrer">\([^<]*\)<.*/\1/p' | uniq > "./tmp/meeting_types" + + AGENDA_HTML_URL="" + AGENDA_PDF_URL="" + AGENDA_REVISE_HTML_URL="" + AGENDA_REVISE_PDF_URL="" + MINUTES_HTML_URL="" + MINUTES_PDF_URL="" + MINUTES_ATTACH_PDF_URL="" + + echo "SCRAPE_MEETINGS: Found the following documents:" + while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do + echo "SCRAPE_MEETINGS: - $LINEA2" + + case "$LINEA2" in + "Agenda (HTML) ") + AGENDA_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;; + "Agenda (PDF) ") + AGENDA_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;; + "Revised Agenda (HTML) ") + AGENDA_REVISE_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;; + "Revised Agenda (PDF) ") + AGENDA_REVISE_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;; + "Minutes (HTML) ") + MINUTES_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;; + "Minutes (PDF) ") + MINUTES_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;; + "Minutes with Attachments (PDF) ") + MINUTES_ATTACH_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;; + esac + + done < ./tmp/meeting_urls 3< ./tmp/meeting_types + + # Always prefer Revised Agendas + echo "SCRAPE_MEETINGS: Downloading agenda HTML..." + if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then + wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress + elif [[ $AGENDA_HTML_URL != "" ]]; then + wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress + else + ERROR="TRUE" + fi + + if [[ ERROR="FALSE" ]]; then + + mkdir "./LondonArchive" + mkdir "./LondonArchive/Meetings" + + if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME" ]; then + mkdir "./LondonArchive/Meetings/$MEETING_NAME/" + fi + if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR" ]; then + mkdir "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR/" + fi + MEETING_DIR=$(printf "./LondonArchive/Meetings/%s/%s/%s-%s" "$MEETING_NAME" "$MEETING_YEAR" "$MEETING_MONTH" "$MEETING_DAY") + if [ ! -d "$MEETING_DIR" ]; then + mkdir "$MEETING_DIR/" + fi + if [ ! -d "$MEETING_DIR/Attachments" ]; then + mkdir "$MEETING_DIR/Attachments/" + fi + + # Direct video links is always "video.isilive.ca//" + # There are some eScribe ones, but those are in m3u8s and are really annoying to work with + + # ...not annoying as more sed though. + VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html | sed -n 's/.*data-stream_name="\([^"]*\)".*/\1/p' | sed 's/ /%20/g') + + if [[ $VIDEO_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving recording URL..." + echo "https://video.isilive.ca/london/"$VIDEO_URL > "$MEETING_DIR/RecordingLink.txt" + fi + + # Get attachment links + cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' | grep 'filestream.ashx' | sed 's/. data-toggle/\" data-toggle/p' | sed 's/href=.\([^/]*\)".*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_urls" + # Get attachment names + cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/data-original-title=./\ndata-original-title='\''/g' | grep 'data-original-title' | sed 's/data-original-title=.//p' | sed 's/.pdf['\'':"].*/.pdf/g' | awk '!x[$0]++' > "./tmp/attachment_names" + # Download attachment and use the name grabbed above + echo "SCRAPE_MEETINGS: Found the following agenda attachments:" + while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do + echo "SCRAPE_MEETINGS: - $LINEA2" + wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "$MEETING_DIR/Attachments/$LINEA2" -q #--show-progress + done < ./tmp/attachment_urls 3< ./tmp/attachment_names + echo "SCRAPE_MEETINGS: All attachments saved." + + if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then + if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving revised agenda as PDF..." + wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$MEETING_DIR/Agenda_Revised.pdf" -q #--show-progress + fi + if [[ $AGENDA_PDF_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving regular agenda as PDF..." + wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$MEETING_DIR/Agenda.pdf" -q #--show-progress + fi + else + if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving revised agenda as HTML... (no PDF found!)" + wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$MEETING_DIR/Agenda_Revised.html" -q #--show-progress + fi + if [[ $AGENDA_HTML_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving regular agenda as HTML... (no PDF found!)" + wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$MEETING_DIR/Agenda.html" -q #--show-progress + fi + fi + + if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then + if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving minutes with attachments as PDF..." + wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$MEETING_DIR/Minutes_With_Attachments.pdf" -q #--show-progress + fi + if [[ $MINUTES_PDF_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving minutes as PDF..." + wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$MEETING_DIR/Minutes.pdf" -q #--show-progress + fi + else + if [[ $MINUTES_HTML_URL != "" ]]; then + echo "SCRAPE_MEETINGS: Saving minutes as HTML... (no PDF found!)" + wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$MEETING_DIR/Minutes.html" -q #--show-progress + fi + fi + + fi + echo "SCRAPE_MEETINGS: All files from this meeting have been saved." + fi + + GREPMEETING=$(echo $LINE | grep "views-field-field-meeting-notes") + if [[ "$GREPMEETING" != "" ]]; then + MEETING_INFO=$(echo $LINE | sed -n 's/.*
\([^<]*\)<\/div>.*/\1/p') + + MEETING_MONTH_WORD=$(echo "$MEETING_INFO" | sed -E 's/^([A-Za-z]+) .*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//') + MEETING_DAY_SHORT=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ ([0-9]+),.*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//') + MEETING_DAY=$(printf "%02d" $MEETING_DAY_SHORT) + MEETING_YEAR=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ [0-9]+, ([0-9]+).*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//') + MEETING_NAME=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ [0-9]+, [0-9]+ - (.*)/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//') + case "$MEETING_MONTH_WORD" in + January) MEETING_MONTH="01" ;; + February) MEETING_MONTH="02" ;; + March) MEETING_MONTH="03" ;; + April) MEETING_MONTH="04" ;; + May) MEETING_MONTH="05" ;; + June) MEETING_MONTH="06" ;; + July) MEETING_MONTH="07" ;; + August) MEETING_MONTH="08" ;; + September) MEETING_MONTH="09" ;; + October) MEETING_MONTH="10" ;; + November) MEETING_MONTH="11" ;; + December) MEETING_MONTH="12" ;; + *) MEETING_MONTH="--" ;; + esac + + if (( 10#$MEETING_YEAR >= 10#$current_year )) && (( 10#$MEETING_MONTH >= $((10#$current_month - 1)) )); then + #echo "date is greater" + FOUNDMEETING="TRUE" + else + echo "SCRAPE_MEETINGS: Dates are in the past! Abort." + SEARCH_END="TRUE" + break + fi + + fi + done < $SEARCH_PAGE + else + SEARCH_END="TRUE" + echo "SCRAPE_MEETINGS: No more pages!" + fi + else + SEARCH_END="TRUE" + echo "SCRAPE_MEETINGS: No more pages!" + fi + ((j++)) + done +((i++)) +done +echo "Done job: SCRAPE_MEETINGS: $(date)"