Created ArcGIS scraper.

This commit is contained in:
Lillian Skinner 2026-04-11 21:51:52 -04:00
parent 770c260bd2
commit f694de0674
2 changed files with 65 additions and 9 deletions

53
SCRAPE_AGIS.SH Normal file
View File

@ -0,0 +1,53 @@
#!/usr/bin/env bash
echo -e "\n-========================================================================-"
echo -e "-=- -=-"
echo -e "-=- SCRAPE_AGIS.SH: Downloads ArcGIS maps -=-"
echo -e "-=- -=-"
echo -e "-=- Lillian Skinner -=-"
echo -e "-=- -=-"
echo -e "-========================================================================-"
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
ARGIS_URL="https://maps.london.ca/server/rest/services"
TMP="./tmp"
TMP_STAGING="./tmp/layers"
SERVICELIST_JSON="$TMP/servicelist.json"
FOLDER_JSON="$TMP/folder.json"
SERVICE_JSON="$TMP/service.json"
LAYERQUERY_JSON="$TMP/layer_query.json"
mkdir "$TMP"
mkdir "$TMP_STAGING"
wget "$ARGIS_URL?f=json" --user-agent="$WGET_UA" -O "$SERVICELIST_JSON" -q
jq -r '.folders[]?' "$SERVICELIST_JSON" | while read -r FOLDER; do
wget "$ARGIS_URL/$FOLDER?f=json" --user-agent="$WGET_UA" -O "$FOLDER_JSON" -q
echo "Looking in $FOLDER"
jq -r '.services[]
| select(.type=="MapServer")
| .name' "$FOLDER_JSON" | while read -r SERVICE; do
echo "Found $SERVICE"
SERVICE_PATH="$FOLDER/$SERVICE"
echo "$ARGIS_URL/$SERVICE/MapServer"
wget "$ARGIS_URL/$SERVICE/MapServer?f=json" --user-agent="$WGET_UA" -O "$SERVICE_JSON" -q
rm -r "$TMP_STAGING"
mkdir "$TMP_STAGING"
jq -r '.layers[]? | "\(.id)|\(.name)"' "$SERVICE_JSON" | while IFS='|' read -r LAYERID LAYERNAME; do
echo "Downloading $LAYERID-$LAYERNAME..."
curl -s \
"$ARGIS_URL/$SERVICE/MapServer/$LAYERID/query\
?where=1=1\
&outFields=*\
&returnGeometry=true\
&f=geojson" \
-o "$TMP_STAGING/layer${LAYERID}-${LAYERNAME}.geojson"
done
mkdir -p "LondonArchive/ArcGIS/${FOLDER}/${SERVICE}"
7z a "LondonArchive/ArcGIS/${FOLDER}/${SERVICE}/layers.7z" "$TMP_STAGING"
done
done

View File

@ -10,13 +10,15 @@ echo -e "-======================================================================
WORKDIR="./tmp" WORKDIR="./tmp"
STAGEDIR="./staging" STAGEDIR="./staging"
DOCDIR="./LondonArchive_OpenData" DOCDIR="./LondonArchive/OpenData"
MAPDIR="./LondonArchive_OpenData/Maps" MAPDIR="./LondonArchive/OpenData/Maps"
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87" WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
mkdir $WORKDIR mkdir -p $WORKDIR
mkdir $DOCDIR mkdir -p $DOCDIR
mkdir $MAPDIR mkdir -p $MAPDIR
DOWNLOAD_MAPS=0
i=0 i=0
SEARCH_END=0 SEARCH_END=0
@ -54,14 +56,15 @@ while [[ $SEARCH_END == 0 ]]; do
mkdir $STAGEDIR mkdir $STAGEDIR
if [[ $ITEM_NAME != "" ]] && [[ $ITEM_NAME != "null" ]]; then if [[ $ITEM_NAME != "" ]] && [[ $ITEM_NAME != "null" ]]; then
wget --user-agent="$WGET_UA" "https://www.arcgis.com/sharing/rest/content/items/$ITEM_ID/data" -O "$STAGEDIR/$ITEM_NAME" -c -q wget --user-agent="$WGET_UA" "https://www.arcgis.com/sharing/rest/content/items/$ITEM_ID/data" -O "$DOCDIR/$ITEM_NAME" -c -q
echo " Downloaded." echo " Downloaded."
echo "Compressing." echo "Compressing."
7z a -pAEF9D58B978A103B04016D600FD4B1E6943A3FF538B98B84F1C177B414F7018 "$DOCDIR/$ITEM_NAME.7z" "$STAGEDIR" # No need to compress non-map data.
#7z a "$DOCDIR/$ITEM_NAME.7z" "$STAGEDIR"
fi fi
if [[ $ITEM_URL == *"maps.london.ca/server/rest/services"* ]]; then if [[ $ITEM_URL == *"maps.london.ca/server/rest/services"* ]] && (( DOWNLOAD_MAPS )); then
MAP_ID="$(echo $ITEM_URL | sed 's/^.*\/MapServer\///')" MAP_ID="$(echo $ITEM_URL | sed 's/^.*\/MapServer\///')"
echo " ^^^ Item is map. ($MAP_ID) " echo " ^^^ Item is map. ($MAP_ID) "
# https://hub.arcgis.com/api/v3/datasets/$ITEM_ID/downloads/data?format=[csv/shp/geojson/kml]&spatialRefId=$SPATIAL_ID&where=1=1 # https://hub.arcgis.com/api/v3/datasets/$ITEM_ID/downloads/data?format=[csv/shp/geojson/kml]&spatialRefId=$SPATIAL_ID&where=1=1
@ -85,7 +88,7 @@ while [[ $SEARCH_END == 0 ]]; do
echo ' Source URL is $ITEM_URL.' echo ' Source URL is $ITEM_URL.'
echo "Compressing." echo "Compressing."
7z a -pAEF9D58B978A103B04016D600FD4B1E6943A3FF538B98B84F1C177B414F7018 "$MAPDIR/$ITEM_TITLE.7z" "$STAGEDIR" 7z a "$MAPDIR/$ITEM_TITLE.7z" "$STAGEDIR"
fi fi
done done