LondonScrapers/SCRAPE_AGIS.SH
2026-06-19 23:30:51 -04:00

67 lines
2.9 KiB
Bash
Executable File

#!/usr/bin/env bash
echo -e "\n-========================================================================-"
echo -e "-=- -=-"
echo -e "-=- SCRAPE_AGIS.SH: Downloads ArcGIS maps -=-"
echo -e "-=- -=-"
echo -e "-=- Lillian Skinner -=-"
echo -e "-=- -=-"
echo -e "-========================================================================-"
source ./functions/.functions
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
ARGIS_URL="https://maps.london.ca/server/rest/services"
TMP="./tmp"
TMP_STAGING="./tmp/layers"
SERVICELIST_JSON="$TMP/servicelist.json"
FOLDER_JSON="$TMP/folder.json"
SERVICE_JSON="$TMP/service.json"
LAYERQUERY_JSON="$TMP/layer_query.json"
mkdir "$TMP"
mkdir "$TMP_STAGING"
wget "$ARGIS_URL?f=json" --user-agent="$WGET_UA" -O "$SERVICELIST_JSON" -q
jq -r '.folders[]?' "$SERVICELIST_JSON" | while read -r FOLDER; do
wget "$ARGIS_URL/$FOLDER?f=json" --user-agent="$WGET_UA" -O "$FOLDER_JSON" -q
echo "Looking in $FOLDER"
jq -r '.services[]
| select(.type=="MapServer")
| .name' "$FOLDER_JSON" | while read -r SERVICE; do
echo "Found $SERVICE"
SERVICE_PATH="$FOLDER/$SERVICE"
echo "$ARGIS_URL/$SERVICE/MapServer"
wget "$ARGIS_URL/$SERVICE/MapServer?f=json" --user-agent="$WGET_UA" -O "$SERVICE_JSON" -q
mkdir -p "LondonArchive/ArcGIS/${SERVICE}"
jq -r '.layers[]? | "\(.id)|\(.name)"' "$SERVICE_JSON" | while IFS='|' read -r LAYERID LAYERNAME; do
rm -r "$TMP_STAGING"
mkdir "$TMP_STAGING"
LAYERNAME_CLEAN=$(echo $LAYERNAME | sed 's/\// /g' | sed 's/\\/ /g' | sed -E 's/ {2,}/ /g')
curl -s "$ARGIS_URL/$SERVICE/MapServer/$LAYERID/query?where=1=1&returnCountOnly=true&f=json" -o "$TMP/count.json"
ITEM_COUNT=$(jq -r '.count' "$TMP/count.json")
MAX_REQUESTS=2000
i=0
j=0
while (( i <= ITEM_COUNT )); do
echo "Downloading $LAYERID-${j} $LAYERNAME_CLEAN"
echo "$i of $ITEM_COUNT"
_utils_download_helper "$ARGIS_URL/$SERVICE/MapServer/$LAYERID/query?where=1=1&outFields=*&returnGeometry=true&resultOffset=${i}&resultRecordCount=${MAX_REQUESTS}&f=geojson" "$TMP_STAGING/Layer ${LAYERID}-${j} - ${LAYERNAME_CLEAN}.geojson"
echo "Done GeoJSON!"
_utils_download_helper "$ARGIS_URL/$SERVICE/MapServer/$LAYERID/query?where=1=1&outFields=*&returnGeometry=true&resultOffset=${i}&resultRecordCount=${MAX_REQUESTS}&f=kmz" "$TMP_STAGING/Layer ${LAYERID}-${j} - ${LAYERNAME_CLEAN}.kmz"
echo "Done KMZ!"
i=$(( i + MAX_REQUESTS ))
((j++))
done
7z a "LondonArchive/ArcGIS/${SERVICE}/Layer ${LAYERID} - ${LAYERNAME_CLEAN}.7z" "$TMP_STAGING"
done
done
done