mirror of
https://gist.github.com/1b471e5f5215c368fd78d9aba05f8dc2.git
synced 2026-01-26 13:42:45 -05:00
This commit is contained in:
parent
4db585f9fd
commit
e8dbcfbe09
@ -1,4 +1,4 @@
|
||||
#/bash
|
||||
#!/bin/bash
|
||||
# Hey folks, please do not run this script more than necessary.
|
||||
# Too many search requests will temporarily block searches for everyone, not just you.
|
||||
# I do not want to DDOS London. I just want to allow for personal backups. Cheers!
|
||||
@ -39,176 +39,173 @@ SEARCH_FORMAT_DATE="f[0]=meeting_date%3A"
|
||||
SEARCH_FORMAT_QUERY="search=query&sort_by=field_meeting_date"
|
||||
|
||||
# As far as I'm aware there are no meetings prior to 2011.
|
||||
current_year=$(date +%Y)
|
||||
current_month=$(date +%m)
|
||||
current_day=$(date +%d)
|
||||
i=$(date +%Y)
|
||||
i=2011
|
||||
x=$((i + 1))
|
||||
echo $x
|
||||
SEARCH_END="FALSE"
|
||||
while (( i < x )); do
|
||||
j=0
|
||||
SEARCH_END="FALSE"
|
||||
while [[ $SEARCH_END == "FALSE" ]]; do
|
||||
echo "SCRAPE_MEETINGS: Downloading search results... Page $j of $i"
|
||||
wget --user-agent="$WGET_UA" $SEARCH_URL"?$SEARCH_FORMAT_DATE$i&page=$j" -O $SEARCH_PAGE -q #--show-progress
|
||||
if [ $? -ne 8 ]; then
|
||||
FOUNDMEETING="FALSE"
|
||||
j=0
|
||||
SEARCH_END="FALSE"
|
||||
while [[ $SEARCH_END == "FALSE" ]]; do
|
||||
echo "SCRAPE_MEETINGS: Downloading search results... Page $j of $i"
|
||||
wget --user-agent="$WGET_UA" $SEARCH_URL"?$SEARCH_FORMAT_DATE$i&page=$j" -O $SEARCH_PAGE -q #--show-progress
|
||||
if [ $? -ne 8 ]; then
|
||||
FOUNDMEETING="FALSE"
|
||||
|
||||
GREP404=$(cat $SEARCH_PAGE | grep "No results found.")
|
||||
if [[ "$GREP404" == "" ]]; then
|
||||
while IFS= read -r LINE; do
|
||||
GREP404=$(cat $SEARCH_PAGE | grep "No results found.")
|
||||
if [[ "$GREP404" == "" ]]; then
|
||||
while IFS= read -r LINE; do
|
||||
|
||||
# All meeting items in the search results are formatted like so:
|
||||
# - One line with the name
|
||||
# - Second line with all other info including links
|
||||
#
|
||||
# We can find the first line by the class "views-field-field-meeting-notes"
|
||||
# FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info
|
||||
# The first two links of every second line are (in order) the PDF and HTML agendas
|
||||
# All meeting items in the search results are formatted like so:
|
||||
# - One line with the name
|
||||
# - Second line with all other info including links
|
||||
#
|
||||
# We can find the first line by the class "views-field-field-meeting-notes"
|
||||
# FOUNDMEETING=TRUE will show that the first line has been found, and so the next line read will be "confirmed" as line 2 of the meeting info
|
||||
# The first two links of every second line are (in order) the PDF and HTML agendas
|
||||
|
||||
if [[ "TRUE" == $FOUNDMEETING ]]; then
|
||||
FOUNDMEETING="FALSE"
|
||||
echo "SCRAPE_MEETINGS: -========================================================================-"
|
||||
echo "SCRAPE_MEETINGS: Working on $MEETING_NAME ($MEETING_YEAR/$MEETING_MONTH/$MEETING_DAY)"
|
||||
echo "SCRAPE_MEETINGS: All files to be saved as "$MEETING_NAME"/"$MEETING_YEAR"/"$MEETING_MONTH"-"$MEETING_DAY"/"
|
||||
echo "SCRAPE_MEETINGS: -========================================================================-"
|
||||
echo "SCRAPE_MEETINGS: Task starting on: $(date)"
|
||||
if [[ "TRUE" == $FOUNDMEETING ]]; then
|
||||
FOUNDMEETING="FALSE"
|
||||
echo "SCRAPE_MEETINGS: -========================================================================-"
|
||||
echo "SCRAPE_MEETINGS: Working on $MEETING_NAME ($MEETING_YEAR/$MEETING_MONTH/$MEETING_DAY)"
|
||||
echo "SCRAPE_MEETINGS: All files to be saved as "$MEETING_NAME"/"$MEETING_YEAR"/"$MEETING_MONTH"-"$MEETING_DAY"/"
|
||||
echo "SCRAPE_MEETINGS: -========================================================================-"
|
||||
echo "SCRAPE_MEETINGS: Task starting on: $(date)"
|
||||
|
||||
#echo "CANCEL NOW!!!"
|
||||
#sleep 5
|
||||
#echo "CANCEL NOW!!!"
|
||||
#sleep 5
|
||||
|
||||
# Grab meeting item links
|
||||
echo $LINE | sed 's/href=./\nhref="/g' | grep 'href="https' | sed 's/.*href="\([^"]*\)".*/\1/p' | uniq > "./tmp/meeting_urls"
|
||||
# Grab meeting item types
|
||||
echo $LINE | sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' | grep 'rel="noreferrer">' | sed 's/.*rel="noreferrer">\([^<]*\)<.*/\1/p' | uniq > "./tmp/meeting_types"
|
||||
# Grab meeting item links
|
||||
echo $LINE | sed 's/href=./\nhref="/g' | grep 'href="https' | sed 's/.*href="\([^"]*\)".*/\1/p' | uniq > "./tmp/meeting_urls"
|
||||
# Grab meeting item types
|
||||
echo $LINE | sed 's/rel=.noreferrer.>/\nrel="noreferrer">/g' | grep 'rel="noreferrer">' | sed 's/.*rel="noreferrer">\([^<]*\)<.*/\1/p' | uniq > "./tmp/meeting_types"
|
||||
|
||||
AGENDA_HTML_URL=""
|
||||
AGENDA_PDF_URL=""
|
||||
AGENDA_REVISE_HTML_URL=""
|
||||
AGENDA_REVISE_PDF_URL=""
|
||||
MINUTES_HTML_URL=""
|
||||
MINUTES_PDF_URL=""
|
||||
MINUTES_ATTACH_PDF_URL=""
|
||||
AGENDA_HTML_URL=""
|
||||
AGENDA_PDF_URL=""
|
||||
AGENDA_REVISE_HTML_URL=""
|
||||
AGENDA_REVISE_PDF_URL=""
|
||||
MINUTES_HTML_URL=""
|
||||
MINUTES_PDF_URL=""
|
||||
MINUTES_ATTACH_PDF_URL=""
|
||||
|
||||
echo "SCRAPE_MEETINGS: Found the following documents:"
|
||||
while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
|
||||
echo "SCRAPE_MEETINGS: - $LINEA2"
|
||||
echo "SCRAPE_MEETINGS: Found the following documents:"
|
||||
while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
|
||||
echo "SCRAPE_MEETINGS: - $LINEA2"
|
||||
|
||||
case "$LINEA2" in
|
||||
"Agenda (HTML) ")
|
||||
AGENDA_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Agenda (PDF) ")
|
||||
AGENDA_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Revised Agenda (HTML) ")
|
||||
AGENDA_REVISE_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Revised Agenda (PDF) ")
|
||||
AGENDA_REVISE_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Minutes (HTML) ")
|
||||
MINUTES_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Minutes (PDF) ")
|
||||
MINUTES_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Minutes with Attachments (PDF) ")
|
||||
MINUTES_ATTACH_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
esac
|
||||
case "$LINEA2" in
|
||||
"Agenda (HTML) ")
|
||||
AGENDA_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Agenda (PDF) ")
|
||||
AGENDA_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Revised Agenda (HTML) ")
|
||||
AGENDA_REVISE_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Revised Agenda (PDF) ")
|
||||
AGENDA_REVISE_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Minutes (HTML) ")
|
||||
MINUTES_HTML_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Minutes (PDF) ")
|
||||
MINUTES_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
"Minutes with Attachments (PDF) ")
|
||||
MINUTES_ATTACH_PDF_URL=$(echo $LINEA1 | sed 's/&/\&/g' | sed 's/'/'\''/g') ;;
|
||||
esac
|
||||
|
||||
done < ./tmp/meeting_urls 3< ./tmp/meeting_types
|
||||
done < ./tmp/meeting_urls 3< ./tmp/meeting_types
|
||||
|
||||
# Always prefer Revised Agendas
|
||||
echo "SCRAPE_MEETINGS: Downloading agenda HTML..."
|
||||
if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress
|
||||
elif [[ $AGENDA_HTML_URL != "" ]]; then
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress
|
||||
else
|
||||
ERROR="TRUE"
|
||||
fi
|
||||
# Always prefer Revised Agendas
|
||||
echo "SCRAPE_MEETINGS: Downloading agenda HTML..."
|
||||
if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O $AGENDA_HTML -q #--show-progress
|
||||
elif [[ $AGENDA_HTML_URL != "" ]]; then
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O $AGENDA_HTML -q #--show-progress
|
||||
else
|
||||
ERROR="TRUE"
|
||||
fi
|
||||
|
||||
if [[ ERROR="FALSE" ]]; then
|
||||
if [[ ERROR="FALSE" ]]; then
|
||||
|
||||
mkdir "./LondonArchive"
|
||||
mkdir "./LondonArchive/Meetings"
|
||||
|
||||
if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME" ]; then
|
||||
mkdir "./LondonArchive/Meetings/$MEETING_NAME/"
|
||||
fi
|
||||
if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR" ]; then
|
||||
mkdir "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR/"
|
||||
fi
|
||||
MEETING_DIR=$(printf "./LondonArchive/Meetings/%s/%s/%s-%s" "$MEETING_NAME" "$MEETING_YEAR" "$MEETING_MONTH" "$MEETING_DAY")
|
||||
if [ ! -d "$MEETING_DIR" ]; then
|
||||
mkdir "$MEETING_DIR/"
|
||||
fi
|
||||
if [ ! -d "$MEETING_DIR/Attachments" ]; then
|
||||
mkdir "$MEETING_DIR/Attachments/"
|
||||
fi
|
||||
|
||||
# Direct video links is always "video.isilive.ca/<REGION>/<NAME>"
|
||||
# There are some eScribe ones, but those are in m3u8s and are really annoying to work with
|
||||
if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME" ]; then
|
||||
mkdir "./LondonArchive/Meetings/$MEETING_NAME/"
|
||||
fi
|
||||
if [ ! -d "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR" ]; then
|
||||
mkdir "./LondonArchive/Meetings/$MEETING_NAME/$MEETING_YEAR/"
|
||||
fi
|
||||
MEETING_DIR=$(printf "./LondonArchive/Meetings/%s/%s/%s-%s" "$MEETING_NAME" "$MEETING_YEAR" "$MEETING_MONTH" "$MEETING_DAY")
|
||||
if [ ! -d "$MEETING_DIR" ]; then
|
||||
mkdir "$MEETING_DIR/"
|
||||
fi
|
||||
if [ ! -d "$MEETING_DIR/Attachments" ]; then
|
||||
mkdir "$MEETING_DIR/Attachments/"
|
||||
fi
|
||||
|
||||
# Direct video links is always "video.isilive.ca/<REGION>/<NAME>"
|
||||
# There are some eScribe ones, but those are in m3u8s and are really annoying to work with
|
||||
|
||||
# ...not annoying as more sed though.
|
||||
VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html | sed -n 's/.*data-stream_name="\([^"]*\)".*/\1/p' | sed 's/ /%20/g')
|
||||
|
||||
if [[ $VIDEO_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving recording URL..."
|
||||
echo "https://video.isilive.ca/london/"$VIDEO_URL > "$MEETING_DIR/RecordingLink.txt"
|
||||
fi
|
||||
|
||||
# Get attachment links
|
||||
cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' | grep 'filestream.ashx' | sed 's/. data-toggle/\" data-toggle/p' | sed 's/href=.\([^/]*\)".*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_urls"
|
||||
# Get attachment names
|
||||
cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/data-original-title=./\ndata-original-title='\''/g' | grep 'data-original-title' | sed 's/data-original-title=.//p' | sed 's/.pdf['\'':"].*/.pdf/g' | awk '!x[$0]++' > "./tmp/attachment_names"
|
||||
# Download attachment and use the name grabbed above
|
||||
echo "SCRAPE_MEETINGS: Found the following agenda attachments:"
|
||||
while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
|
||||
echo "SCRAPE_MEETINGS: - $LINEA2"
|
||||
wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "$MEETING_DIR/Attachments/$LINEA2" -q #--show-progress
|
||||
done < ./tmp/attachment_urls 3< ./tmp/attachment_names
|
||||
echo "SCRAPE_MEETINGS: All attachments saved."
|
||||
# ...not annoying as more sed though.
|
||||
VIDEO_URL=$(grep 'id="isi_player"' ./tmp/work.html | sed -n 's/.*data-stream_name="\([^"]*\)".*/\1/p' | sed 's/ /%20/g')
|
||||
|
||||
if [[ $VIDEO_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving recording URL..."
|
||||
echo "https://video.isilive.ca/london/"$VIDEO_URL > "$MEETING_DIR/RecordingLink.txt"
|
||||
fi
|
||||
|
||||
# Get attachment links
|
||||
cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/href=.filestream\.ashx/\nhref="filestream\.ashx/g' | grep 'filestream.ashx' | sed 's/. data-toggle/\" data-toggle/p' | sed 's/href=.\([^/]*\)".*/\1/p' | awk '!x[$0]++' > "./tmp/attachment_urls"
|
||||
# Get attachment names
|
||||
cat $AGENDA_HTML | grep "AgendaItemAttachment AgendaItemAttachmentNotSelected" | sed 's/data-original-title=./\ndata-original-title='\''/g' | grep 'data-original-title' | sed 's/data-original-title=.//p' | sed 's/.pdf['\'':"].*/.pdf/g' | awk '!x[$0]++' > "./tmp/attachment_names"
|
||||
# Download attachment and use the name grabbed above
|
||||
echo "SCRAPE_MEETINGS: Found the following agenda attachments:"
|
||||
while IFS= read -r LINEA1 && IFS= read -r LINEA2 <&3; do
|
||||
echo "SCRAPE_MEETINGS: - $LINEA2"
|
||||
wget --user-agent="$WGET_UA" "https://pub-london.escribemeetings.com/$LINEA1" -O "$MEETING_DIR/Attachments/$LINEA2" -q #--show-progress
|
||||
done < ./tmp/attachment_urls 3< ./tmp/attachment_names
|
||||
echo "SCRAPE_MEETINGS: All attachments saved."
|
||||
|
||||
if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then
|
||||
if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving revised agenda as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$MEETING_DIR/Agenda_Revised.pdf" -q #--show-progress
|
||||
fi
|
||||
if [[ $AGENDA_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving regular agenda as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$MEETING_DIR/Agenda.pdf" -q #--show-progress
|
||||
fi
|
||||
else
|
||||
if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving revised agenda as HTML... (no PDF found!)"
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$MEETING_DIR/Agenda_Revised.html" -q #--show-progress
|
||||
fi
|
||||
if [[ $AGENDA_HTML_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving regular agenda as HTML... (no PDF found!)"
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$MEETING_DIR/Agenda.html" -q #--show-progress
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then
|
||||
if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving minutes with attachments as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$MEETING_DIR/Minutes_With_Attachments.pdf" -q #--show-progress
|
||||
fi
|
||||
if [[ $MINUTES_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving minutes as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$MEETING_DIR/Minutes.pdf" -q #--show-progress
|
||||
fi
|
||||
else
|
||||
if [[ $MINUTES_HTML_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving minutes as HTML... (no PDF found!)"
|
||||
wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$MEETING_DIR/Minutes.html" -q #--show-progress
|
||||
fi
|
||||
fi
|
||||
if [[ $AGENDA_REVISE_PDF_URL != "" ]] || [[ $AGENDA_PDF_URL != "" ]]; then
|
||||
if [[ $AGENDA_REVISE_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving revised agenda as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_REVISE_PDF_URL" -O "$MEETING_DIR/Agenda_Revised.pdf" -q #--show-progress
|
||||
fi
|
||||
if [[ $AGENDA_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving regular agenda as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_PDF_URL" -O "$MEETING_DIR/Agenda.pdf" -q #--show-progress
|
||||
fi
|
||||
else
|
||||
if [[ $AGENDA_REVISE_HTML_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving revised agenda as HTML... (no PDF found!)"
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_REVISE_HTML_URL" -O "$MEETING_DIR/Agenda_Revised.html" -q #--show-progress
|
||||
fi
|
||||
if [[ $AGENDA_HTML_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving regular agenda as HTML... (no PDF found!)"
|
||||
wget --user-agent="$WGET_UA" "$AGENDA_HTML_URL" -O "$MEETING_DIR/Agenda.html" -q #--show-progress
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ $MINUTES_ATTACH_PDF_URL != "" ]] || [[ $MINUTES_PDF_URL != "" ]]; then
|
||||
if [[ $MINUTES_ATTACH_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving minutes with attachments as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$MINUTES_ATTACH_PDF_URL" -O "$MEETING_DIR/Minutes_With_Attachments.pdf" -q #--show-progress
|
||||
fi
|
||||
if [[ $MINUTES_PDF_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving minutes as PDF..."
|
||||
wget --user-agent="$WGET_UA" "$MINUTES_PDF_URL" -O "$MEETING_DIR/Minutes.pdf" -q #--show-progress
|
||||
fi
|
||||
else
|
||||
if [[ $MINUTES_HTML_URL != "" ]]; then
|
||||
echo "SCRAPE_MEETINGS: Saving minutes as HTML... (no PDF found!)"
|
||||
wget --user-agent="$WGET_UA" "$MINUTES_HTML_URL" -O "$MEETING_DIR/Minutes.html" -q #--show-progress
|
||||
fi
|
||||
fi
|
||||
|
||||
fi
|
||||
echo "SCRAPE_MEETINGS: All files from this meeting have been saved."
|
||||
fi
|
||||
|
||||
GREPMEETING=$(echo $LINE | grep "views-field-field-meeting-notes")
|
||||
if [[ "$GREPMEETING" != "" ]]; then
|
||||
MEETING_INFO=$(echo $LINE | sed -n 's/.*<div class="meeting__date">\([^<]*\)<\/div>.*/\1/p')
|
||||
fi
|
||||
echo "SCRAPE_MEETINGS: All files from this meeting have been saved."
|
||||
fi
|
||||
|
||||
GREPMEETING=$(echo $LINE | grep "views-field-field-meeting-notes")
|
||||
if [[ "$GREPMEETING" != "" ]]; then
|
||||
MEETING_INFO=$(echo $LINE | sed -n 's/.*<div class="meeting__date">\([^<]*\)<\/div>.*/\1/p')
|
||||
|
||||
MEETING_MONTH_WORD=$(echo "$MEETING_INFO" | sed -E 's/^([A-Za-z]+) .*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
|
||||
MEETING_DAY_SHORT=$(echo "$MEETING_INFO" | sed -E 's/^[A-Za-z]+ ([0-9]+),.*/\1/' | sed 's/^[ \t]*//;s/[ \t]*$//')
|
||||
@ -231,27 +228,19 @@ while (( i < x )); do
|
||||
*) MEETING_MONTH="--" ;;
|
||||
esac
|
||||
|
||||
if (( 10#$MEETING_YEAR >= 10#$current_year )) && (( 10#$MEETING_MONTH >= $((10#$current_month - 1)) )); then
|
||||
#echo "date is greater"
|
||||
FOUNDMEETING="TRUE"
|
||||
else
|
||||
echo "SCRAPE_MEETINGS: Dates are in the past! Abort."
|
||||
SEARCH_END="TRUE"
|
||||
break
|
||||
fi
|
||||
|
||||
fi
|
||||
done < $SEARCH_PAGE
|
||||
else
|
||||
SEARCH_END="TRUE"
|
||||
echo "SCRAPE_MEETINGS: No more pages!"
|
||||
fi
|
||||
else
|
||||
SEARCH_END="TRUE"
|
||||
echo "SCRAPE_MEETINGS: No more pages!"
|
||||
fi
|
||||
((j++))
|
||||
done
|
||||
FOUNDMEETING="TRUE"
|
||||
fi
|
||||
done < $SEARCH_PAGE
|
||||
else
|
||||
SEARCH_END="TRUE"
|
||||
echo "SCRAPE_MEETINGS: No more pages!"
|
||||
fi
|
||||
else
|
||||
SEARCH_END="TRUE"
|
||||
echo "SCRAPE_MEETINGS: No more pages!"
|
||||
fi
|
||||
((j++))
|
||||
done
|
||||
((i++))
|
||||
done
|
||||
echo "Done job: SCRAPE_MEETINGS: $(date)"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user