diff --git a/SCRAPE_AGIS.SH b/SCRAPE_AGIS.SH new file mode 100644 index 0000000..433809e --- /dev/null +++ b/SCRAPE_AGIS.SH @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +echo -e "\n-========================================================================-" +echo -e "-=- -=-" +echo -e "-=- SCRAPE_AGIS.SH: Downloads ArcGIS maps -=-" +echo -e "-=- -=-" +echo -e "-=- Lillian Skinner -=-" +echo -e "-=- -=-" +echo -e "-========================================================================-" + +WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" + +ARGIS_URL="https://maps.london.ca/server/rest/services" + +TMP="./tmp" +TMP_STAGING="./tmp/layers" +SERVICELIST_JSON="$TMP/servicelist.json" +FOLDER_JSON="$TMP/folder.json" +SERVICE_JSON="$TMP/service.json" +LAYERQUERY_JSON="$TMP/layer_query.json" + +mkdir "$TMP" +mkdir "$TMP_STAGING" + +wget "$ARGIS_URL?f=json" --user-agent="$WGET_UA" -O "$SERVICELIST_JSON" -q + +jq -r '.folders[]?' "$SERVICELIST_JSON" | while read -r FOLDER; do + wget "$ARGIS_URL/$FOLDER?f=json" --user-agent="$WGET_UA" -O "$FOLDER_JSON" -q + echo "Looking in $FOLDER" + jq -r '.services[] + | select(.type=="MapServer") + | .name' "$FOLDER_JSON" | while read -r SERVICE; do + echo "Found $SERVICE" + SERVICE_PATH="$FOLDER/$SERVICE" + echo "$ARGIS_URL/$SERVICE/MapServer" + wget "$ARGIS_URL/$SERVICE/MapServer?f=json" --user-agent="$WGET_UA" -O "$SERVICE_JSON" -q + + rm -r "$TMP_STAGING" + mkdir "$TMP_STAGING" + jq -r '.layers[]? | "\(.id)|\(.name)"' "$SERVICE_JSON" | while IFS='|' read -r LAYERID LAYERNAME; do + + echo "Downloading $LAYERID-$LAYERNAME..." + curl -s \ + "$ARGIS_URL/$SERVICE/MapServer/$LAYERID/query\ +?where=1=1\ +&outFields=*\ +&returnGeometry=true\ +&f=geojson" \ + -o "$TMP_STAGING/layer${LAYERID}-${LAYERNAME}.geojson" + done + mkdir -p "LondonArchive/ArcGIS/${FOLDER}/${SERVICE}" + 7z a "LondonArchive/ArcGIS/${FOLDER}/${SERVICE}/layers.7z" "$TMP_STAGING" + done +done diff --git a/SCRAPE_OPEN.SH b/SCRAPE_OPEN.SH index f36f5af..b27fd5a 100644 --- a/SCRAPE_OPEN.SH +++ b/SCRAPE_OPEN.SH @@ -10,13 +10,15 @@ echo -e "-====================================================================== WORKDIR="./tmp" STAGEDIR="./staging" -DOCDIR="./LondonArchive_OpenData" -MAPDIR="./LondonArchive_OpenData/Maps" +DOCDIR="./LondonArchive/OpenData" +MAPDIR="./LondonArchive/OpenData/Maps" WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" -mkdir $WORKDIR -mkdir $DOCDIR -mkdir $MAPDIR +mkdir -p $WORKDIR +mkdir -p $DOCDIR +mkdir -p $MAPDIR + +DOWNLOAD_MAPS=0 i=0 SEARCH_END=0 @@ -54,14 +56,15 @@ while [[ $SEARCH_END == 0 ]]; do mkdir $STAGEDIR if [[ $ITEM_NAME != "" ]] && [[ $ITEM_NAME != "null" ]]; then - wget --user-agent="$WGET_UA" "https://www.arcgis.com/sharing/rest/content/items/$ITEM_ID/data" -O "$STAGEDIR/$ITEM_NAME" -c -q + wget --user-agent="$WGET_UA" "https://www.arcgis.com/sharing/rest/content/items/$ITEM_ID/data" -O "$DOCDIR/$ITEM_NAME" -c -q echo " Downloaded." echo "Compressing." - 7z a -pAEF9D58B978A103B04016D600FD4B1E6943A3FF538B98B84F1C177B414F7018 "$DOCDIR/$ITEM_NAME.7z" "$STAGEDIR" + # No need to compress non-map data. + #7z a "$DOCDIR/$ITEM_NAME.7z" "$STAGEDIR" fi - if [[ $ITEM_URL == *"maps.london.ca/server/rest/services"* ]]; then + if [[ $ITEM_URL == *"maps.london.ca/server/rest/services"* ]] && (( DOWNLOAD_MAPS )); then MAP_ID="$(echo $ITEM_URL | sed 's/^.*\/MapServer\///')" echo " ^^^ Item is map. ($MAP_ID) " # https://hub.arcgis.com/api/v3/datasets/$ITEM_ID/downloads/data?format=[csv/shp/geojson/kml]&spatialRefId=$SPATIAL_ID&where=1=1 @@ -85,7 +88,7 @@ while [[ $SEARCH_END == 0 ]]; do echo ' Source URL is $ITEM_URL.' echo "Compressing." - 7z a -pAEF9D58B978A103B04016D600FD4B1E6943A3FF538B98B84F1C177B414F7018 "$MAPDIR/$ITEM_TITLE.7z" "$STAGEDIR" + 7z a "$MAPDIR/$ITEM_TITLE.7z" "$STAGEDIR" fi done