From 10c6e45c266b23a757121bfd011923bd7971860f Mon Sep 17 00:00:00 2001 From: Lillian Skinner <56081713+rvtr@users.noreply.github.com> Date: Tue, 24 Jun 2025 19:08:40 -0400 Subject: [PATCH] --- SCRAPE_PLANAPPS.SH | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPE_PLANAPPS.SH b/SCRAPE_PLANAPPS.SH index 22d993c..38e5ec0 100644 --- a/SCRAPE_PLANAPPS.SH +++ b/SCRAPE_PLANAPPS.SH @@ -62,7 +62,7 @@ while [[ $SEARCH_END == "FALSE" ]]; do wget --user-agent="$WGET_UA" $PROJECT_URL -O $PROJECT_PAGE --timestamping -q #--show-progress # Removing COVID is due to the naming in the 2020s. Keeping it for revisiting wayback crawls. - PROJECT_NAME=$(cat $PROJECT_PAGE | grep "field--name-title" | sed 's/.*]*>\([^<]*\)<[\/:-]span>.*/\1/p' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/^COVID-19//p' | uniq | tr -d '\r' | tr -d '\n' | tr '/' '-') + PROJECT_NAME=$(cat $PROJECT_PAGE | grep "page-title" | grep "field--name-title" | sed 's/.*]*>\([^<]*\)<[\/:-]span>.*/\1/p' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/^COVID-19//p' | uniq | tr -d '\r' | tr -d '\n' | tr '/' '-') echo "SCRAPE_PLANAPPS: Found project: $PROJECT_NAME" echo "SCRAPE_PLANAPPS: Finding attachments..."