Created ArcGIS scraper.
This commit is contained in:
parent
770c260bd2
commit
f694de0674
53
SCRAPE_AGIS.SH
Normal file
53
SCRAPE_AGIS.SH
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
echo -e "\n-========================================================================-"
|
||||||
|
echo -e "-=- -=-"
|
||||||
|
echo -e "-=- SCRAPE_AGIS.SH: Downloads ArcGIS maps -=-"
|
||||||
|
echo -e "-=- -=-"
|
||||||
|
echo -e "-=- Lillian Skinner -=-"
|
||||||
|
echo -e "-=- -=-"
|
||||||
|
echo -e "-========================================================================-"
|
||||||
|
|
||||||
|
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
|
||||||
|
|
||||||
|
ARGIS_URL="https://maps.london.ca/server/rest/services"
|
||||||
|
|
||||||
|
TMP="./tmp"
|
||||||
|
TMP_STAGING="./tmp/layers"
|
||||||
|
SERVICELIST_JSON="$TMP/servicelist.json"
|
||||||
|
FOLDER_JSON="$TMP/folder.json"
|
||||||
|
SERVICE_JSON="$TMP/service.json"
|
||||||
|
LAYERQUERY_JSON="$TMP/layer_query.json"
|
||||||
|
|
||||||
|
mkdir "$TMP"
|
||||||
|
mkdir "$TMP_STAGING"
|
||||||
|
|
||||||
|
wget "$ARGIS_URL?f=json" --user-agent="$WGET_UA" -O "$SERVICELIST_JSON" -q
|
||||||
|
|
||||||
|
jq -r '.folders[]?' "$SERVICELIST_JSON" | while read -r FOLDER; do
|
||||||
|
wget "$ARGIS_URL/$FOLDER?f=json" --user-agent="$WGET_UA" -O "$FOLDER_JSON" -q
|
||||||
|
echo "Looking in $FOLDER"
|
||||||
|
jq -r '.services[]
|
||||||
|
| select(.type=="MapServer")
|
||||||
|
| .name' "$FOLDER_JSON" | while read -r SERVICE; do
|
||||||
|
echo "Found $SERVICE"
|
||||||
|
SERVICE_PATH="$FOLDER/$SERVICE"
|
||||||
|
echo "$ARGIS_URL/$SERVICE/MapServer"
|
||||||
|
wget "$ARGIS_URL/$SERVICE/MapServer?f=json" --user-agent="$WGET_UA" -O "$SERVICE_JSON" -q
|
||||||
|
|
||||||
|
rm -r "$TMP_STAGING"
|
||||||
|
mkdir "$TMP_STAGING"
|
||||||
|
jq -r '.layers[]? | "\(.id)|\(.name)"' "$SERVICE_JSON" | while IFS='|' read -r LAYERID LAYERNAME; do
|
||||||
|
|
||||||
|
echo "Downloading $LAYERID-$LAYERNAME..."
|
||||||
|
curl -s \
|
||||||
|
"$ARGIS_URL/$SERVICE/MapServer/$LAYERID/query\
|
||||||
|
?where=1=1\
|
||||||
|
&outFields=*\
|
||||||
|
&returnGeometry=true\
|
||||||
|
&f=geojson" \
|
||||||
|
-o "$TMP_STAGING/layer${LAYERID}-${LAYERNAME}.geojson"
|
||||||
|
done
|
||||||
|
mkdir -p "LondonArchive/ArcGIS/${FOLDER}/${SERVICE}"
|
||||||
|
7z a "LondonArchive/ArcGIS/${FOLDER}/${SERVICE}/layers.7z" "$TMP_STAGING"
|
||||||
|
done
|
||||||
|
done
|
||||||
@ -10,13 +10,15 @@ echo -e "-======================================================================
|
|||||||
|
|
||||||
WORKDIR="./tmp"
|
WORKDIR="./tmp"
|
||||||
STAGEDIR="./staging"
|
STAGEDIR="./staging"
|
||||||
DOCDIR="./LondonArchive_OpenData"
|
DOCDIR="./LondonArchive/OpenData"
|
||||||
MAPDIR="./LondonArchive_OpenData/Maps"
|
MAPDIR="./LondonArchive/OpenData/Maps"
|
||||||
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
|
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
|
||||||
|
|
||||||
mkdir $WORKDIR
|
mkdir -p $WORKDIR
|
||||||
mkdir $DOCDIR
|
mkdir -p $DOCDIR
|
||||||
mkdir $MAPDIR
|
mkdir -p $MAPDIR
|
||||||
|
|
||||||
|
DOWNLOAD_MAPS=0
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
SEARCH_END=0
|
SEARCH_END=0
|
||||||
@ -54,14 +56,15 @@ while [[ $SEARCH_END == 0 ]]; do
|
|||||||
mkdir $STAGEDIR
|
mkdir $STAGEDIR
|
||||||
|
|
||||||
if [[ $ITEM_NAME != "" ]] && [[ $ITEM_NAME != "null" ]]; then
|
if [[ $ITEM_NAME != "" ]] && [[ $ITEM_NAME != "null" ]]; then
|
||||||
wget --user-agent="$WGET_UA" "https://www.arcgis.com/sharing/rest/content/items/$ITEM_ID/data" -O "$STAGEDIR/$ITEM_NAME" -c -q
|
wget --user-agent="$WGET_UA" "https://www.arcgis.com/sharing/rest/content/items/$ITEM_ID/data" -O "$DOCDIR/$ITEM_NAME" -c -q
|
||||||
echo " Downloaded."
|
echo " Downloaded."
|
||||||
|
|
||||||
echo "Compressing."
|
echo "Compressing."
|
||||||
7z a -pAEF9D58B978A103B04016D600FD4B1E6943A3FF538B98B84F1C177B414F7018 "$DOCDIR/$ITEM_NAME.7z" "$STAGEDIR"
|
# No need to compress non-map data.
|
||||||
|
#7z a "$DOCDIR/$ITEM_NAME.7z" "$STAGEDIR"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $ITEM_URL == *"maps.london.ca/server/rest/services"* ]]; then
|
if [[ $ITEM_URL == *"maps.london.ca/server/rest/services"* ]] && (( DOWNLOAD_MAPS )); then
|
||||||
MAP_ID="$(echo $ITEM_URL | sed 's/^.*\/MapServer\///')"
|
MAP_ID="$(echo $ITEM_URL | sed 's/^.*\/MapServer\///')"
|
||||||
echo " ^^^ Item is map. ($MAP_ID) "
|
echo " ^^^ Item is map. ($MAP_ID) "
|
||||||
# https://hub.arcgis.com/api/v3/datasets/$ITEM_ID/downloads/data?format=[csv/shp/geojson/kml]&spatialRefId=$SPATIAL_ID&where=1=1
|
# https://hub.arcgis.com/api/v3/datasets/$ITEM_ID/downloads/data?format=[csv/shp/geojson/kml]&spatialRefId=$SPATIAL_ID&where=1=1
|
||||||
@ -85,7 +88,7 @@ while [[ $SEARCH_END == 0 ]]; do
|
|||||||
echo ' Source URL is $ITEM_URL.'
|
echo ' Source URL is $ITEM_URL.'
|
||||||
|
|
||||||
echo "Compressing."
|
echo "Compressing."
|
||||||
7z a -pAEF9D58B978A103B04016D600FD4B1E6943A3FF538B98B84F1C177B414F7018 "$MAPDIR/$ITEM_TITLE.7z" "$STAGEDIR"
|
7z a "$MAPDIR/$ITEM_TITLE.7z" "$STAGEDIR"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user