This commit is contained in:
Lillian Skinner 2025-06-24 19:08:40 -04:00 committed by GitHub
parent 37827ae399
commit 10c6e45c26

View File

@ -62,7 +62,7 @@ while [[ $SEARCH_END == "FALSE" ]]; do
wget --user-agent="$WGET_UA" $PROJECT_URL -O $PROJECT_PAGE --timestamping -q #--show-progress
# Removing COVID is due to the naming in the 2020s. Keeping it for revisiting wayback crawls.
PROJECT_NAME=$(cat $PROJECT_PAGE | grep "field--name-title" | sed 's/.*<span[^>]*>\([^<]*\)<[\/:-]span>.*/\1/p' | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g' | sed 's/^COVID-19//p' | uniq | tr -d '\r' | tr -d '\n' | tr '/' '-')
PROJECT_NAME=$(cat $PROJECT_PAGE | grep "page-title" | grep "field--name-title" | sed 's/.*<span[^>]*>\([^<]*\)<[\/:-]span>.*/\1/p' | sed 's/&amp;/\&/g' | sed 's/&#039;/'\''/g' | sed 's/^COVID-19//p' | uniq | tr -d '\r' | tr -d '\n' | tr '/' '-')
echo "SCRAPE_PLANAPPS: Found project: $PROJECT_NAME"
echo "SCRAPE_PLANAPPS: Finding attachments..."