Delete SCRAPE_JOB.SH
This commit is contained in:
parent
c9f009765a
commit
19ffd00d88
127
SCRAPE_JOB.SH
127
SCRAPE_JOB.SH
@ -1,127 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
start_timer() {
|
||||
START_SECONDS=$(date +%s)
|
||||
START_READABLE=$(date "+%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
end_timer() {
|
||||
END_SECONDS=$(date +%s)
|
||||
END_READABLE=$(date "+%Y-%m-%d %H:%M:%S")
|
||||
ELAPSED_SECONDS=$((END_SECONDS - START_SECONDS))
|
||||
ELAPSED_READABLE=$(printf "%02d:%02d:%02d" \
|
||||
$((ELAPSED_SECONDS/3600)) \
|
||||
$(( (ELAPSED_SECONDS%3600)/60 )) \
|
||||
$((ELAPSED_SECONDS%60)))
|
||||
}
|
||||
push_log() {
|
||||
PROCURL=$(basename $(echo $1))
|
||||
echo "<tr> \
|
||||
<td bgcolor='#bababa' style='color:black;'><a href=\"./${PROCURL}\">View</td> \
|
||||
<td bgcolor='#bababa' style='color:black;'>$2</td> \
|
||||
<td bgcolor='#bababa' style='color:black;'>$START_READABLE</td> \
|
||||
<td bgcolor='#bababa' style='color:black;'>$END_READABLE</td> \
|
||||
<td bgcolor='#bababa' style='color:black;'>$ELAPSED_READABLE</td> \
|
||||
</tr>" >> $CRON_LOG_INDEX
|
||||
}
|
||||
push_webhook() {
|
||||
PROCURL=$(basename "$2")
|
||||
WEBHOOK_URL="https://discord.com/api/webhooks/1472056322886209600/8EtHDzTdVYuaU2mn0-fY6BZZwxW4ZMkNnGzFyTCJhcS6FMHYagjxeyw0rw9o5S-TNRRA"
|
||||
WEBHOOK_JSON=$(cat <<EOF
|
||||
{
|
||||
"embeds": [{
|
||||
"color": 19712,
|
||||
"title": "$1",
|
||||
"url": "https://mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/$PROCURL",
|
||||
"description": "$3",
|
||||
"footer": {
|
||||
"text": "Start: $4, time elapsed: $5"
|
||||
}
|
||||
}]
|
||||
}
|
||||
EOF
|
||||
)
|
||||
curl \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$WEBHOOK_JSON" \
|
||||
"$WEBHOOK_URL"
|
||||
}
|
||||
export datestamp=$(date +'%Y%m%d')
|
||||
export CRON_LOG_DIR="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs"
|
||||
export CRON_LOG_INDEX="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/index.html"
|
||||
export CRON_LOG_SCRIPT="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_script.txt"
|
||||
export CRON_LOG_MEET="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_meet.txt"
|
||||
export CRON_LOG_PLAN="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_plan.txt"
|
||||
export CRON_LOG_JSON="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_json.txt"
|
||||
export CRON_LOG_LTC="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_ltc.txt"
|
||||
export CRON_LOG_LPS="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_lps.txt"
|
||||
export CRON_LOG_S3="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_s3.txt"
|
||||
export CRON_LOG_SITEMAP="/var/www/mystery-of-the-typical-tiny-gold-iguana.randommeaninglesscharacters.com/lalogs/${datestamp}_sitemap.txt"
|
||||
mkdir "$CRON_LOG_DIR"
|
||||
|
||||
if [ ! -f "$CRON_LOG_INDEX" ]; then
|
||||
cp "./template/logdir.html" "$CRON_LOG_INDEX"
|
||||
fi
|
||||
|
||||
# Separate timer for main job.
|
||||
START_SECONDS_ALL=$(date +%s)
|
||||
START_READABLE_ALL=$(date "+%Y-%m-%d %H:%M:%S")
|
||||
push_webhook "Start scrape job" "$CRON_LOG_INDEX" "Starting London Archive scrapers." "$START_READABLE_ALL" "N/A"
|
||||
|
||||
mkdir "./LondonArchive"
|
||||
mkdir "./LondonArchive/Meetings"
|
||||
mkdir "./LondonArchive/Meetings (JSON)"
|
||||
mkdir "./LondonArchive/Planning Applications"
|
||||
mkdir "./LondonArchive/LTC"
|
||||
mkdir "./LondonArchive/LPS"
|
||||
|
||||
start_timer
|
||||
# Back up scripts regularly.
|
||||
mkdir "./SCRIPTS"
|
||||
cp *.SH "./SCRIPTS/"
|
||||
cp *.TXT "./SCRIPTS/"
|
||||
7z a -pAEF9D58B978A103B04016D600FD4B1E6943A3FF538B98B84F1C177B414F7018 "SCRIPTS.7z" "./SCRIPTS" -mhe=on
|
||||
rm -r "./SCRIPTS"
|
||||
mv "SCRIPTS.7z" "./LondonArchive/Log_${datestamp}.7z"
|
||||
echo "This log is private. However, all other logs are public." >> $CRON_LOG_SCRIPT
|
||||
end_timer && push_log "$CRON_LOG_SCRIPT" "BACK UP SCRIPT"
|
||||
|
||||
start_timer
|
||||
./SCRAPE_MEET.SH >> $CRON_LOG_MEET
|
||||
end_timer && push_log "$CRON_LOG_MEET" "SCRAPE_MEET.SH" && push_webhook "SCRAPE_MEET.SH" "$CRON_LOG_MEET" "Done processing city meetings." "$START_READABLE" "$ELAPSED_READABLE"
|
||||
|
||||
#start_timer
|
||||
#./SCRAPE_PLAN.SH >> $CRON_LOG_PLAN
|
||||
#end_timer && push_log "$CRON_LOG_PLAN" "SCRAPE_PLAN.SH" && push_webhook "SCRAPE_PLAN.SH" #"$CRON_LOG_PLAN" "Done processing planning applications." "$START_READABLE" "$ELAPSED_READABLE"
|
||||
|
||||
start_timer
|
||||
./SCRAPE_ESCRIBE.SH >> $CRON_LOG_JSON
|
||||
end_timer && push_log "$CRON_LOG_JSON" "SCRAPE_JSON.SH" && push_webhook "SCRAPE_JSON.SH" "$CRON_LOG_JSON" "Done backing up eScribe meeting lists." "$START_READABLE" "$ELAPSED_READABLE"
|
||||
|
||||
start_timer
|
||||
./SCRAPE_LTC.SH >> $CRON_LOG_LTC
|
||||
end_timer && push_log "$CRON_LOG_LTC" "SCRAPE_LTC.SH" && push_webhook "SCRAPE_LTC.SH" "$CRON_LOG_LTC" "Done processing LTC meetings." "$START_READABLE" "$ELAPSED_READABLE"
|
||||
|
||||
start_timer
|
||||
./SCRAPE_LPS.SH >> $CRON_LOG_LPS
|
||||
end_timer && push_log "$CRON_LOG_LPS" "SCRAPE_LTC.SH" && push_webhook "SCRAPE_LPS.SH" "$CRON_LOG_LPS" "Done processing LPS meetings." "$START_READABLE" "$ELAPSED_READABLE"
|
||||
|
||||
start_timer
|
||||
aws s3 sync ./LondonArchive "s3://public-file-browser-files-0261cd08327d/" --profile london --no-progress --size-only >> $CRON_LOG_S3
|
||||
end_timer && push_log "$CRON_LOG_S3" "AWS S3 SYNC" && push_webhook "AWS S3 SYNC" "$CRON_LOG_S3" "Done syncing files to S3." "$START_READABLE" "$ELAPSED_READABLE"
|
||||
|
||||
rm -rf "./LondonArchive"
|
||||
mkdir "./LondonArchive"
|
||||
|
||||
start_timer
|
||||
# Make/upload sitemap AFTER clearing the work dir. Otherwise everything gets uploaded again.
|
||||
./MAKE_SITEMAP.SH >> $CRON_LOG_SITEMAP
|
||||
end_timer && push_log "$CRON_LOG_SITEMAP" "MAKE_SITEMAP.SH" && push_webhook "MAKE_SITEMAP.SH" "$CRON_LOG_SITEMAP" "Done updating archive sitemap, requested YaCy indexing." "$START_READABLE" "$ELAPSED_READABLE"
|
||||
|
||||
rm -rf "./LondonArchive"
|
||||
|
||||
END_SECONDS_ALL=$(date +%s)
|
||||
ELAPSED_SECONDS_ALL=$((END_SECONDS_ALL - START_SECONDS_ALL))
|
||||
ELAPSED_READABLE_ALL=$(printf "%02d:%02d:%02d" \
|
||||
$((ELAPSED_SECONDS_ALL/3600)) \
|
||||
$(( (ELAPSED_SECONDS_ALL%3600)/60 )) \
|
||||
$((ELAPSED_SECONDS_ALL%60)))
|
||||
push_webhook "Finished scrape job" "$CRON_LOG_INDEX" "Archive is now fully updated." "$START_READABLE_ALL" "$ELAPSED_READABLE_ALL"
|
||||
Loading…
Reference in New Issue
Block a user