mirror of
https://gist.github.com/9cca361f404f07052d5870220748519e.git
synced 2026-01-26 13:42:44 -05:00
This commit is contained in:
parent
37827ae399
commit
10c6e45c26
@ -62,7 +62,7 @@ while [[ $SEARCH_END == "FALSE" ]]; do
|
|||||||
wget --user-agent="$WGET_UA" $PROJECT_URL -O $PROJECT_PAGE --timestamping -q #--show-progress
|
wget --user-agent="$WGET_UA" $PROJECT_URL -O $PROJECT_PAGE --timestamping -q #--show-progress
|
||||||
|
|
||||||
# Removing COVID is due to the naming in the 2020s. Keeping it for revisiting wayback crawls.
|
# Removing COVID is due to the naming in the 2020s. Keeping it for revisiting wayback crawls.
|
||||||
PROJECT_NAME=$(cat $PROJECT_PAGE | grep "field--name-title" | sed 's/.*<span[^>]*>\([^<]*\)<[\/:-]span>.*/\1/p' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/^COVID-19//p' | uniq | tr -d '\r' | tr -d '\n' | tr '/' '-')
|
PROJECT_NAME=$(cat $PROJECT_PAGE | grep "page-title" | grep "field--name-title" | sed 's/.*<span[^>]*>\([^<]*\)<[\/:-]span>.*/\1/p' | sed 's/&/\&/g' | sed 's/'/'\''/g' | sed 's/^COVID-19//p' | uniq | tr -d '\r' | tr -d '\n' | tr '/' '-')
|
||||||
echo "SCRAPE_PLANAPPS: Found project: $PROJECT_NAME"
|
echo "SCRAPE_PLANAPPS: Found project: $PROJECT_NAME"
|
||||||
|
|
||||||
echo "SCRAPE_PLANAPPS: Finding attachments..."
|
echo "SCRAPE_PLANAPPS: Finding attachments..."
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user