102 lines
4.4 KiB
Bash
102 lines
4.4 KiB
Bash
#!/usr/bin/env bash
|
|
echo -e "\n-========================================================================-"
|
|
echo -e "-=- -=-"
|
|
echo -e "-=- SCRAPE_OPENDATA.SH: Scrape Open Data from the City of London -=-"
|
|
echo -e "-=- -=-"
|
|
echo -e "-=- https://gist.github.com/rvtr/******************************** -=-"
|
|
echo -e "-=- Lillian Skinner -=-"
|
|
echo -e "-=- -=-"
|
|
echo -e "-========================================================================-"
|
|
|
|
WORKDIR="./tmp"
|
|
STAGEDIR="./staging"
|
|
DOCDIR="./LondonArchive/OpenData"
|
|
MAPDIR="./LondonArchive/OpenData/Maps"
|
|
WGET_UA="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Viewer/99.9.8782.87"
|
|
|
|
mkdir -p $WORKDIR
|
|
mkdir -p $DOCDIR
|
|
mkdir -p $MAPDIR
|
|
|
|
DOWNLOAD_MAPS=0
|
|
|
|
i=0
|
|
SEARCH_END=0
|
|
while [[ $SEARCH_END == 0 ]]; do
|
|
if ((i == 0)); then
|
|
OFFSET=""
|
|
else
|
|
OFFSET="startindex=$((i * 100))"
|
|
fi
|
|
|
|
echo "Start index download..."
|
|
curl --get \
|
|
--data-urlencode "filter=((group IN (de724381536540a5bf2d005fb32ec92a, d17e3e9bfd274e88aeed15fa165bf1e3, b7ab05d332c24dd2ba485acd2ac92837, b15cf62bc0a14990a75e348930b0cb4e)))" \
|
|
--data-urlencode "limit=100" \
|
|
--data-urlencode "$OFFSET" \
|
|
"https://hub.arcgis.com/api/search/v1/collections/all/items" \
|
|
| jq > $WORKDIR/arcgis_list.json
|
|
|
|
TOTAL_ITEMS=$(jq .numberMatched $WORKDIR/arcgis_list.json)
|
|
RETURNED_ITEMS=$(jq .numberReturned $WORKDIR/arcgis_list.json)
|
|
echo "Total items in JSON : $TOTAL_ITEMS"
|
|
echo "Returned items : $RETURNED_ITEMS"
|
|
|
|
for (( j=0; j<=$((RETURNED_ITEMS - 1)); j++ )); do
|
|
ITEM_ID=$(jq .features[$j]\ .id $WORKDIR/arcgis_list.json | sed 's/\"//g')
|
|
ITEM_TITLE=$(jq .features[$j]\ .properties\ .title $WORKDIR/arcgis_list.json | sed 's/\"//g')
|
|
ITEM_URL=$(jq .features[$j]\ .properties\ .url $WORKDIR/arcgis_list.json | sed 's/\"//g')
|
|
ITEM_NAME=$(jq .features[$j]\ .properties\ .name $WORKDIR/arcgis_list.json | sed 's/\"//g')
|
|
echo "Cur. article: $i.$j, ID : $ITEM_ID"
|
|
echo " Cur. article: $i.$j, Title: $ITEM_TITLE"
|
|
echo " Cur. article: $i.$j, URL : $ITEM_URL"
|
|
echo " Cur. article: $i.$j, Name : $ITEM_NAME"
|
|
|
|
rm -rf $STAGEDIR
|
|
mkdir $STAGEDIR
|
|
|
|
if [[ $ITEM_NAME != "" ]] && [[ $ITEM_NAME != "null" ]]; then
|
|
wget --user-agent="$WGET_UA" "https://www.arcgis.com/sharing/rest/content/items/$ITEM_ID/data" -O "$DOCDIR/$ITEM_NAME" -c -q
|
|
echo " Downloaded."
|
|
|
|
echo "Compressing."
|
|
# No need to compress non-map data.
|
|
#7z a "$DOCDIR/$ITEM_NAME.7z" "$STAGEDIR"
|
|
fi
|
|
|
|
if [[ $ITEM_URL == *"maps.london.ca/server/rest/services"* ]] && (( DOWNLOAD_MAPS )); then
|
|
MAP_ID="$(echo $ITEM_URL | sed 's/^.*\/MapServer\///')"
|
|
echo " ^^^ Item is map. ($MAP_ID) "
|
|
# https://hub.arcgis.com/api/v3/datasets/$ITEM_ID/downloads/data?format=[csv/shp/geojson/kml]&spatialRefId=$SPATIAL_ID&where=1=1
|
|
# KML and GeoJSON use the spatial ID of 4326, all others use 26917
|
|
MAP_CSV="https://hub.arcgis.com/api/v3/datasets/${ITEM_ID}_${MAP_ID}/downloads/data?format=csv&spatialRefId=26917&where=1=1"
|
|
MAP_SHP="https://hub.arcgis.com/api/v3/datasets/${ITEM_ID}_${MAP_ID}/downloads/data?format=shp&spatialRefId=26917&where=1=1"
|
|
MAP_GEO="https://hub.arcgis.com/api/v3/datasets/${ITEM_ID}_${MAP_ID}/downloads/data?format=geojson&spatialRefId=4326&where=1=1"
|
|
MAP_KML="https://hub.arcgis.com/api/v3/datasets/${ITEM_ID}_${MAP_ID}/downloads/data?format=kml&spatialRefId=4326&where=1=1"
|
|
echo " Map URL (CSV) : $MAP_CSV"
|
|
wget --user-agent="$WGET_UA" "$MAP_CSV" -O "$STAGEDIR/$ITEM_TITLE.csv" -c -q
|
|
echo " Downloaded."
|
|
echo " Map URL (Shapefile): $MAP_SHP"
|
|
wget --user-agent="$WGET_UA" "$MAP_SHP" -O "$STAGEDIR/$ITEM_TITLE.shp" -c -q
|
|
echo " Downloaded."
|
|
echo " Map URL (GeoJSON) : $MAP_GEO"
|
|
wget --user-agent="$WGET_UA" "$MAP_GEO" -O "$STAGEDIR/$ITEM_TITLE.geojson" -c -q
|
|
echo " Downloaded."
|
|
echo " Map URL (KML) : $MAP_KML"
|
|
wget --user-agent="$WGET_UA" "$MAP_KML" -O "$STAGEDIR/$ITEM_TITLE.kml" -c -q
|
|
echo " Downloaded."
|
|
echo ' Source URL is $ITEM_URL.'
|
|
|
|
echo "Compressing."
|
|
7z a "$MAPDIR/$ITEM_TITLE.7z" "$STAGEDIR"
|
|
fi
|
|
done
|
|
|
|
if (( ($((i * 100)) + j) >= TOTAL_ITEMS)); then
|
|
echo "No more items!"
|
|
SEARCH_END=1
|
|
break
|
|
fi
|
|
((i++))
|
|
done
|