diff --git a/SCRAPE_LONDON.SH b/SCRAPE_LONDON.SH
index 9726587..4fe7f05 100644
--- a/SCRAPE_LONDON.SH
+++ b/SCRAPE_LONDON.SH
@@ -8,25 +8,25 @@ echo -e "-=-
echo -e "-========================================================================-"
conv_date() {
- echo "$1"
+ echo "$1"
MEETING_MONTH_WORD=$(echo "$1" | sed -E 's/^([A-Za-z]+) .*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
MEETING_DAY_SHORT=$(echo "$1" | sed -E 's/^[A-Za-z]+ ([0-9]+),.*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
MEETING_DAY=$(printf "%02d" $MEETING_DAY_SHORT)
MEETING_YEAR=$(echo "$1" | sed -E 's/^[A-Za-z]+ [0-9]+, ([0-9]+).*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
case "$MEETING_MONTH_WORD" in
- Jan*) MEETING_MONTH="01" ;;
- Feb*) MEETING_MONTH="02" ;;
- Mar*) MEETING_MONTH="03" ;;
- Apr*) MEETING_MONTH="04" ;;
- May) MEETING_MONTH="05" ;;
- Jun*) MEETING_MONTH="06" ;;
- Jul*) MEETING_MONTH="07" ;;
- Aug*) MEETING_MONTH="08" ;;
- Sep*) MEETING_MONTH="09" ;;
- Oct*) MEETING_MONTH="10" ;;
- Nov*) MEETING_MONTH="11" ;;
- Dec*) MEETING_MONTH="12" ;;
- *) MEETING_MONTH="--" ;;
+ Jan*) MEETING_MONTH="01" ;;
+ Feb*) MEETING_MONTH="02" ;;
+ Mar*) MEETING_MONTH="03" ;;
+ Apr*) MEETING_MONTH="04" ;;
+ May) MEETING_MONTH="05" ;;
+ Jun*) MEETING_MONTH="06" ;;
+ Jul*) MEETING_MONTH="07" ;;
+ Aug*) MEETING_MONTH="08" ;;
+ Sep*) MEETING_MONTH="09" ;;
+ Oct*) MEETING_MONTH="10" ;;
+ Nov*) MEETING_MONTH="11" ;;
+ Dec*) MEETING_MONTH="12" ;;
+ *) MEETING_MONTH="--" ;;
esac
}
@@ -54,130 +54,130 @@ conv_date_alt() {
}
set_agenda_url() {
- case "$1" in
- '"Agenda (HTML)"')
- AGENDA_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Agenda (PDF)"')
- AGENDA_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Revised Agenda (HTML)"')
- AGENDA_REVISE_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Revised Agenda (PDF)"')
- AGENDA_REVISE_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Minutes (HTML)"')
- MINUTES_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Minutes (PDF)"')
- MINUTES_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Minutes with Attachments (PDF)"')
- MINUTES_ATTACH_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ case "$1" in
+ '"Agenda (HTML)"')
+ AGENDA_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Agenda (PDF)"')
+ AGENDA_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Revised Agenda (HTML)"')
+ AGENDA_REVISE_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Revised Agenda (PDF)"')
+ AGENDA_REVISE_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Minutes (HTML)"')
+ MINUTES_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Minutes (PDF)"')
+ MINUTES_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Minutes with Attachments (PDF)"')
+ MINUTES_ATTACH_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Agenda Full Package (HTML)"')
- AGENDA_FULL_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Agenda Full Package (PDF)"')
- AGENDA_FULL_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Agenda Cover Page (HTML)"')
- AGENDA_COVER_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Agenda Cover Page (PDF)"')
- AGENDA_COVER_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Post Agenda (HTML)"')
- AGENDA_POST_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Post Agenda (PDF)"')
- AGENDA_POST_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Addendum (HTML)"')
- ADDENDUM_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
- '"Addendum (PDF)"')
- ADDENDUM_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
- esac
+ '"Agenda Full Package (HTML)"')
+ AGENDA_FULL_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Agenda Full Package (PDF)"')
+ AGENDA_FULL_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Agenda Cover Page (HTML)"')
+ AGENDA_COVER_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Agenda Cover Page (PDF)"')
+ AGENDA_COVER_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Post Agenda (HTML)"')
+ AGENDA_POST_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Post Agenda (PDF)"')
+ AGENDA_POST_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Addendum (HTML)"')
+ ADDENDUM_HTML_URL=$(echo $2 | sed 's/\"//g') ;;
+ '"Addendum (PDF)"')
+ ADDENDUM_PDF_URL=$(echo $2 | sed 's/\"//g') ;;
+ esac
}
clear_agenda_url() {
- AGENDA_HTML_URL=""
- AGENDA_PDF_URL=""
- AGENDA_REVISE_HTML_URL=""
- AGENDA_REVISE_PDF_URL=""
- MINUTES_HTML_URL=""
- MINUTES_PDF_URL=""
- MINUTES_ATTACH_PDF_URL=""
+ AGENDA_HTML_URL=""
+ AGENDA_PDF_URL=""
+ AGENDA_REVISE_HTML_URL=""
+ AGENDA_REVISE_PDF_URL=""
+ MINUTES_HTML_URL=""
+ MINUTES_PDF_URL=""
+ MINUTES_ATTACH_PDF_URL=""
- AGENDA_FULL_HTML_URL=""
- AGENDA_FULL_PDF_URL=""
- AGENDA_COVER_HTML_URL=""
- AGENDA_COVER_PDF_URL=""
- AGENDA_POST_HTML_URL=""
- AGENDA_POST_PDF_URL=""
- ADDENDUM_HTML_URL=""
- ADDENDUM_PDF_URL=""
+ AGENDA_FULL_HTML_URL=""
+ AGENDA_FULL_PDF_URL=""
+ AGENDA_COVER_HTML_URL=""
+ AGENDA_COVER_PDF_URL=""
+ AGENDA_POST_HTML_URL=""
+ AGENDA_POST_PDF_URL=""
+ ADDENDUM_HTML_URL=""
+ ADDENDUM_PDF_URL=""
}
download_agendas() {
- if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then
+ if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then
if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then
- echo "Saving revised agenda as PDF..."
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$1/Agenda_Revised.pdf" -N -q #--show-progress
+ echo "Saving revised agenda as PDF..."
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$1/Agenda_Revised.pdf" --no-hsts -N -q #--show-progress
fi
if [[ $AGENDA_PDF_URL != "" ]]; then
- echo "Saving regular agenda as PDF..."
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$1/Agenda.pdf" -N -q #--show-progress
+ echo "Saving regular agenda as PDF..."
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$1/Agenda.pdf" --no-hsts -N -q #--show-progress
fi
- elif [[ $AGENDA_REVISE_HTML_URL != "" ]] || [[ $AGENDA_HTML_URL != "" ]]; then
+ elif [[ $AGENDA_REVISE_HTML_URL != "" ]] || [[ $AGENDA_HTML_URL != "" ]]; then
if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
- echo "Saving revised agenda as HTML... (no PDF found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$1/Agenda_Revised.html" -N -q #--show-progress
+ echo "Saving revised agenda as HTML... (no PDF found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$1/Agenda_Revised.html" --no-hsts -N -q #--show-progress
fi
if [[ $AGENDA_HTML_URL != "" ]]; then
- echo "Saving regular agenda as HTML... (no PDF found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$1/Agenda.html" -N -q #--show-progress
+ echo "Saving regular agenda as HTML... (no PDF found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$1/Agenda.html" --no-hsts -N -q #--show-progress
fi
- elif [[ $AGENDA_FULL_PDF_URL != "" ]] || [[ $AGENDA_FULL_HTML_URL != "" ]]; then
+ elif [[ $AGENDA_FULL_PDF_URL != "" ]] || [[ $AGENDA_FULL_HTML_URL != "" ]]; then
if [[ $AGENDA_FULL_PDF_URL != "" ]]; then
- echo "Saving full package agenda as PDF... (no HTML found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_FULL_PDF_URL" -O "$1/Agenda_FullPackage.pdf" -N -q #--show-progress
+ echo "Saving full package agenda as PDF... (no HTML found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_FULL_PDF_URL" -O "$1/Agenda_FullPackage.pdf" --no-hsts -N -q #--show-progress
fi
if [[ $AGENDA_FULL_HTML_URL != "" ]]; then
- echo "Saving full package agenda as HTML... (no PDF found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_FULL_HTML_URL" -O "$1/Agenda_FullPackage.html" -N -q #--show-progress
+ echo "Saving full package agenda as HTML... (no PDF found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_FULL_HTML_URL" -O "$1/Agenda_FullPackage.html" --no-hsts -N -q #--show-progress
fi
elif [[ $AGENDA_POST_PDF_URL != "" ]] || [[ $AGENDA_POST_HTML_URL != "" ]]; then
- if [[ $AGENDA_POST_PDF_URL != "" ]]; then
- echo "Saving post agenda as HTML... (no HTML found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_POST_PDF_URL" -O "$1/Agenda_Post.pdf" -N -q #--show-progress
- fi
- if [[ $AGENDA_POST_HTML_URL != "" ]]; then
- echo "Saving post agenda as HTML... (no PDF found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_POST_HTML_URL" -O "$1/Agenda_Post.html" -N -q #--show-progress
- fi
+ if [[ $AGENDA_POST_PDF_URL != "" ]]; then
+ echo "Saving post agenda as HTML... (no HTML found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_POST_PDF_URL" -O "$1/Agenda_Post.pdf" --no-hsts -N -q #--show-progress
+ fi
+ if [[ $AGENDA_POST_HTML_URL != "" ]]; then
+ echo "Saving post agenda as HTML... (no PDF found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_POST_HTML_URL" -O "$1/Agenda_Post.html" --no-hsts -N -q #--show-progress
+ fi
fi
- if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then
+ if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then
if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then
- echo "Saving minutes with attachments as PDF..."
- wget --no-check-certificate --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$1/Minutes_With_Attachments.pdf" -N -q #--show-progress
+ echo "Saving minutes with attachments as PDF..."
+ wget --no-check-certificate --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$1/Minutes_With_Attachments.pdf" --no-hsts -N -q #--show-progress
fi
if [[ $MINUTES_PDF_URL != "" ]]; then
- echo "Saving minutes as PDF..."
- wget --no-check-certificate --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$1/Minutes.pdf" -N -q #--show-progress
+ echo "Saving minutes as PDF..."
+ wget --no-check-certificate --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$1/Minutes.pdf" --no-hsts -N -q #--show-progress
fi
- else
+ else
if [[ $MINUTES_HTML_URL != "" ]]; then
echo "Saving minutes as HTML... (no PDF found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$1/Minutes.html" -N -q #--show-progress
+ wget --no-check-certificate --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$1/Minutes.html" --no-hsts -N -q #--show-progress
fi
- fi
+ fi
if [[ $AGENDA_COVER_PDF_URL != "" ]]; then
- echo "Saving cover agenda as PDF... (no HTML found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_COVER_PDF_URL" -O "$1/Agenda_Cover.pdf" -N -q #--show-progress
+ echo "Saving cover agenda as PDF... (no HTML found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_COVER_PDF_URL" -O "$1/Agenda_Cover.pdf" --no-hsts -N -q #--show-progress
fi
if [[ $AGENDA_COVER_HTML_URL != "" ]]; then
- echo "Saving cover agenda as HTML... (no PDF found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_COVER_HTML_URL" -O "$1/Agenda_Cover.html" -N -q #--show-progress
+ echo "Saving cover agenda as HTML... (no PDF found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$AGENDA_COVER_HTML_URL" -O "$1/Agenda_Cover.html" --no-hsts -N -q #--show-progress
fi
if [[ $ADDENDUM_PDF_URL != "" ]]; then
- echo "Saving addendum as PDF... (no HTML found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$ADDENDUM_PDF_URL" -O "$1/Addendum.pdf" -N -q #--show-progress
+ echo "Saving addendum as PDF... (no HTML found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$ADDENDUM_PDF_URL" -O "$1/Addendum.pdf" --no-hsts -N -q #--show-progress
fi
if [[ $ADDENDUM_HTML_URL != "" ]]; then
- echo "Saving addendum as HTML... (no PDF found!)"
- wget --no-check-certificate --user-agent="$WGET_UA" "$ADDENDUM_HTML_URL" -O "$1/Addendum.html" -N -q #--show-progress
+ echo "Saving addendum as HTML... (no PDF found!)"
+ wget --no-check-certificate --user-agent="$WGET_UA" "$ADDENDUM_HTML_URL" -O "$1/Addendum.html" --no-hsts -N -q #--show-progress
fi
}
@@ -214,209 +214,212 @@ while read -r INDEX_URL_PRE CITY_ARCHIVE_NAME_PRE CALENDAR_NAME_PRE; do
CITY_ARCHIVE_NAME=$(echo $CITY_ARCHIVE_NAME_PRE | sed 's/\"//g' | sed 's/\,//g')
CALENDAR_NAME=$(echo $CALENDAR_NAME_PRE | sed 's/\"//g' | sed 's/\,//g')
- INDEX_END="FALSE"
- while [[ $INDEX_END == "FALSE" ]]; do
- echo "SCRAPE_ESCRIBE: Downloading eScribe index..."
- wget --no-check-certificate --user-agent="$WGET_UA" $INDEX_URL -O $INDEX_PAGE --show-progress
- if [ $? -ne 8 ]; then
- FOUNDLIST="FALSE"
- while IFS= read -r LINE; do
- if [[ "TRUE" == $FOUNDLIST ]]; then
- GREPENDLIST=$(echo $LINE | grep '