mirror of
https://github.com/ezwelty/opentrees-harvester.git
synced 2025-06-18 10:45:42 -04:00
Geocode addresses with Google Geocoding API
And cache the results.
This commit is contained in:
parent
ddd75864e4
commit
3105ecfbc2
4
.env.example
Normal file
4
.env.example
Normal file
@ -0,0 +1,4 @@
|
||||
GOOGLE_MAPS_API_KEY=
|
||||
DATA_ARCHIVE=archive
|
||||
DATA_ARCHIVE_LOG=archive.jsonl
|
||||
GEOCODE_ARCHIVE=geocode
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -16,3 +16,5 @@
|
||||
/taxa*/
|
||||
archive
|
||||
archive.jsonl
|
||||
geocode
|
||||
.env
|
||||
|
@ -1,3 +1,4 @@
|
||||
require('dotenv').config()
|
||||
const fs = require('fs')
|
||||
const mime = require('mime-types')
|
||||
const puppeteer = require('puppeteer')
|
||||
@ -9,10 +10,6 @@ const streamPipeline = util.promisify(stream.pipeline)
|
||||
|
||||
const helpers = require('./helpers')
|
||||
|
||||
// NOTE: Requires execution from project root
|
||||
ARCHIVE_PATH = 'archive'
|
||||
LOG_PATH = 'archive.jsonl'
|
||||
|
||||
/**
|
||||
* Download file and compute MD5 hash of the stream.
|
||||
*
|
||||
@ -147,7 +144,7 @@ async function hashFile(file, options = {encoding: 'base64'}) {
|
||||
function buildPath({url, checksum, date = new Date()} = {}) {
|
||||
const hash = url ? md5(url) : checksum
|
||||
date = date.toISOString().replace(/:/g, '')
|
||||
return path.join(ARCHIVE_PATH, hash, date)
|
||||
return path.join(process.env.DATA_ARCHIVE, hash, date)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -172,7 +169,7 @@ async function log({ date = new Date(), ...props } = {}) {
|
||||
entry.path = existingEntry.path
|
||||
}
|
||||
}
|
||||
fs.appendFileSync(LOG_PATH, JSON.stringify(entry) + '\n')
|
||||
fs.appendFileSync(process.env.DATA_ARCHIVE_LOG, JSON.stringify(entry) + '\n')
|
||||
return entry
|
||||
}
|
||||
|
||||
@ -258,7 +255,7 @@ async function search(params, {limit, maxDays} = {}) {
|
||||
}
|
||||
const criterias = Object.entries(params || {})
|
||||
const entries = []
|
||||
for await (const log of helpers.iterateJSONL(LOG_PATH)) {
|
||||
for await (const log of helpers.iterateJSONL(process.env.DATA_ARCHIVE_LOG)) {
|
||||
if (limit && entries.length === limit) {
|
||||
return entries
|
||||
}
|
||||
@ -274,8 +271,6 @@ async function search(params, {limit, maxDays} = {}) {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ARCHIVE_PATH,
|
||||
LOG_PATH,
|
||||
md5,
|
||||
loadPage,
|
||||
readPageHtml,
|
||||
|
59
lib/geocode.js
Normal file
59
lib/geocode.js
Normal file
@ -0,0 +1,59 @@
|
||||
require('dotenv').config()
|
||||
const fs = require('fs')
|
||||
const {Client} = require('@googlemaps/google-maps-services-js')
|
||||
const client = new Client()
|
||||
const archive = require('./archive')
|
||||
|
||||
/**
|
||||
* Geocode address.
|
||||
*
|
||||
* @param {string} address - Address to geocode.
|
||||
* @returns {Promise<object>} Geocode results.
|
||||
*/
|
||||
async function geocode(address) {
|
||||
let response
|
||||
try {
|
||||
response = await client.geocode({
|
||||
params: { address, key: process.env.GOOGLE_MAPS_API_KEY},
|
||||
timeout: 1000, // milliseconds
|
||||
})
|
||||
} catch (error) {
|
||||
throw new Error(error.response.data.error_message)
|
||||
}
|
||||
return response.data.results
|
||||
}
|
||||
|
||||
function buildGeocodePath(address) {
|
||||
const hash = archive.md5(address)
|
||||
return `${process.env.GEOCODE_ARCHIVE}/${hash}.json`
|
||||
}
|
||||
|
||||
/**
|
||||
* Geocode address with caching.
|
||||
*
|
||||
* @param {string} address - Address to geocode.
|
||||
* @returns {Promise<object>} Geocode results.
|
||||
*/
|
||||
async function geocodeCached(address) {
|
||||
const path = buildGeocodePath(address)
|
||||
let data
|
||||
if (fs.existsSync(path)) {
|
||||
const txt = fs.readFileSync(path, 'utf8')
|
||||
data = JSON.parse(txt)
|
||||
if (data.address !== address) {
|
||||
throw new Error(
|
||||
`Address mismatch in ${path}: ${data.address} !== ${address}`
|
||||
)
|
||||
}
|
||||
} else {
|
||||
const result = await geocode(address)
|
||||
data = {address, date: new Date(), result}
|
||||
fs.writeFileSync(path, JSON.stringify(data))
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
geocode,
|
||||
geocodeCached,
|
||||
}
|
@ -22,6 +22,7 @@
|
||||
"@esri/arcgis-rest-form-data": "^4.0.0",
|
||||
"@esri/arcgis-rest-portal": "^4.0.3",
|
||||
"@esri/arcgis-rest-request": "^4.0.3",
|
||||
"@googlemaps/google-maps-services-js": "^3.3.42",
|
||||
"adm-zip": "^0.5.10",
|
||||
"axios": "^1.4.0",
|
||||
"colors": "^1.1.2",
|
||||
@ -29,6 +30,7 @@
|
||||
"command-line-usage": "^6.1.0",
|
||||
"csv-parser": "^2.3.2",
|
||||
"decompress": "^4.2.1",
|
||||
"dotenv": "^16.3.1",
|
||||
"fuzzyset.js": "^1.0.5",
|
||||
"gdal-async": "^3.7.0",
|
||||
"glob": "^7.1.2",
|
||||
|
79
yarn.lock
79
yarn.lock
@ -337,6 +337,24 @@
|
||||
resolved "https://registry.yarnpkg.com/@gar/promisify/-/promisify-1.1.3.tgz#555193ab2e3bb3b6adc3d551c9c030d9e860daf6"
|
||||
integrity sha512-k2Ty1JcVojjJFwrg/ThKi2ujJ7XNLYaFGNB/bWT9wGR+oSMJHMa5w+CUq6p/pVrKeNNgA7pCqEcjSnHVoqJQFw==
|
||||
|
||||
"@googlemaps/google-maps-services-js@^3.3.42":
|
||||
version "3.3.42"
|
||||
resolved "https://registry.yarnpkg.com/@googlemaps/google-maps-services-js/-/google-maps-services-js-3.3.42.tgz#61b3ba9045c84a29c1ffeca1d571ce56b316eca3"
|
||||
integrity sha512-DfqM28z0jSMr0BSw+CUcUPJLwwMhMf1f+IWfFYuPs6o/AqyYUN+jLjBQKfaUh69e8MShYM4LzcNBYjyttYtsmA==
|
||||
dependencies:
|
||||
"@googlemaps/url-signature" "^1.0.4"
|
||||
agentkeepalive "^4.1.0"
|
||||
axios "^1.5.1"
|
||||
query-string "<8.x"
|
||||
retry-axios "<3.x"
|
||||
|
||||
"@googlemaps/url-signature@^1.0.4":
|
||||
version "1.0.30"
|
||||
resolved "https://registry.yarnpkg.com/@googlemaps/url-signature/-/url-signature-1.0.30.tgz#6f82bd504c39a691628bfa66fc568e6937af29ed"
|
||||
integrity sha512-iT2Ju1t+DiAloAZa3wsRPRNIxxtCPN0v9iRCzlMM/7U3NQlmiIcTWoY6+OY3ZFZUYE1a7Z0kC6AmtbYVtSILCQ==
|
||||
dependencies:
|
||||
crypto-js "^4.2.0"
|
||||
|
||||
"@hapi/address@^2.1.2":
|
||||
version "2.1.4"
|
||||
resolved "https://registry.npmjs.org/@hapi/address/-/address-2.1.4.tgz"
|
||||
@ -837,6 +855,13 @@ agent-base@6, agent-base@^6.0.2:
|
||||
dependencies:
|
||||
debug "4"
|
||||
|
||||
agentkeepalive@^4.1.0:
|
||||
version "4.5.0"
|
||||
resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-4.5.0.tgz#2673ad1389b3c418c5a20c5d7364f93ca04be923"
|
||||
integrity sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==
|
||||
dependencies:
|
||||
humanize-ms "^1.2.1"
|
||||
|
||||
agentkeepalive@^4.1.3:
|
||||
version "4.3.0"
|
||||
resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-4.3.0.tgz#bb999ff07412653c1803b3ced35e50729830a255"
|
||||
@ -991,6 +1016,15 @@ axios@^1.4.0:
|
||||
form-data "^4.0.0"
|
||||
proxy-from-env "^1.1.0"
|
||||
|
||||
axios@^1.5.1:
|
||||
version "1.6.2"
|
||||
resolved "https://registry.yarnpkg.com/axios/-/axios-1.6.2.tgz#de67d42c755b571d3e698df1b6504cde9b0ee9f2"
|
||||
integrity sha512-7i24Ri4pmDRfJTR7LDBhsOTtcm+9kjX5WiY1X3wIisx6G9So3pfMkEiU7emUBe46oceVImccTEM3k6C5dbVW8A==
|
||||
dependencies:
|
||||
follow-redirects "^1.15.0"
|
||||
form-data "^4.0.0"
|
||||
proxy-from-env "^1.1.0"
|
||||
|
||||
babel-jest@^28.1.3:
|
||||
version "28.1.3"
|
||||
resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-28.1.3.tgz#c1187258197c099072156a0a121c11ee1e3917d5"
|
||||
@ -1486,6 +1520,11 @@ cross-spawn@^7.0.3:
|
||||
shebang-command "^2.0.0"
|
||||
which "^2.0.1"
|
||||
|
||||
crypto-js@^4.2.0:
|
||||
version "4.2.0"
|
||||
resolved "https://registry.yarnpkg.com/crypto-js/-/crypto-js-4.2.0.tgz#4d931639ecdfd12ff80e8186dba6af2c2e856631"
|
||||
integrity sha512-KALDyEYgpY+Rlob/iriUtjV6d5Eq+Y191A5g4UqLAi8CyGP9N1+FdVbkc1SxKc2r4YAYqG8JzO2KGL+AizD70Q==
|
||||
|
||||
csv-parser@^2.3.2:
|
||||
version "2.3.2"
|
||||
resolved "https://registry.npmjs.org/csv-parser/-/csv-parser-2.3.2.tgz"
|
||||
@ -1510,6 +1549,11 @@ debug@4, debug@4.3.4, debug@^4.1.0, debug@^4.1.1, debug@^4.3.3:
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
decode-uri-component@^0.2.2:
|
||||
version "0.2.2"
|
||||
resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9"
|
||||
integrity sha512-FqUYQ+8o158GyGTrMFJms9qh3CqTKvAqgqsTnkLI8sKu0028orqBhxNMFkFen0zGyg6epACD32pjVk58ngIErQ==
|
||||
|
||||
decompress-tar@^4.0.0, decompress-tar@^4.1.0, decompress-tar@^4.1.1:
|
||||
version "4.1.1"
|
||||
resolved "https://registry.npmjs.org/decompress-tar/-/decompress-tar-4.1.1.tgz"
|
||||
@ -1631,6 +1675,11 @@ dmd@^4.0.5:
|
||||
test-value "^3.0.0"
|
||||
walk-back "^4.0.0"
|
||||
|
||||
dotenv@^16.3.1:
|
||||
version "16.3.1"
|
||||
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-16.3.1.tgz#369034de7d7e5b120972693352a3bf112172cc3e"
|
||||
integrity sha512-IPzF4w4/Rd94bA9imS68tZBaYyBWSCE47V1RGuMrB94iyTOIEwRmVL2x/4An+6mETpLrKJ5hQkB8W4kFAadeIQ==
|
||||
|
||||
electron-to-chromium@^1.4.202:
|
||||
version "1.4.226"
|
||||
resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.4.226.tgz#837ea1d19b8305a913cd5f31d135681c4b6d63b1"
|
||||
@ -1819,6 +1868,11 @@ fill-range@^7.0.1:
|
||||
dependencies:
|
||||
to-regex-range "^5.0.1"
|
||||
|
||||
filter-obj@^1.1.0:
|
||||
version "1.1.0"
|
||||
resolved "https://registry.yarnpkg.com/filter-obj/-/filter-obj-1.1.0.tgz#9b311112bc6c6127a16e016c6c5d7f19e0805c5b"
|
||||
integrity sha512-8rXg1ZnX7xzy2NGDVkBVaAy+lSlPNwad13BtgSlLuxfIslyt5Vg64U7tFcCt4WS1R0hvtnQybT/IyCkGZ3DpXQ==
|
||||
|
||||
find-replace@^3.0.0:
|
||||
version "3.0.0"
|
||||
resolved "https://registry.npmjs.org/find-replace/-/find-replace-3.0.0.tgz"
|
||||
@ -3428,6 +3482,16 @@ puppeteer@^19.8.5:
|
||||
proxy-from-env "1.1.0"
|
||||
puppeteer-core "19.8.5"
|
||||
|
||||
query-string@<8.x:
|
||||
version "7.1.3"
|
||||
resolved "https://registry.yarnpkg.com/query-string/-/query-string-7.1.3.tgz#a1cf90e994abb113a325804a972d98276fe02328"
|
||||
integrity sha512-hh2WYhq4fi8+b+/2Kg9CEge4fDPvHS534aOOvOZeQ3+Vf2mCFsaFBYj0i+iXcAq6I9Vzp5fjMFBlONvayDC1qg==
|
||||
dependencies:
|
||||
decode-uri-component "^0.2.2"
|
||||
filter-obj "^1.1.0"
|
||||
split-on-first "^1.0.0"
|
||||
strict-uri-encode "^2.0.0"
|
||||
|
||||
react-is@^18.0.0:
|
||||
version "18.2.0"
|
||||
resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b"
|
||||
@ -3532,6 +3596,11 @@ resolve@^1.20.0:
|
||||
path-parse "^1.0.7"
|
||||
supports-preserve-symlinks-flag "^1.0.0"
|
||||
|
||||
retry-axios@<3.x:
|
||||
version "2.6.0"
|
||||
resolved "https://registry.yarnpkg.com/retry-axios/-/retry-axios-2.6.0.tgz#d4dc5c8a8e73982e26a705e46a33df99a28723e0"
|
||||
integrity sha512-pOLi+Gdll3JekwuFjXO3fTq+L9lzMQGcSq7M5gIjExcl3Gu1hd4XXuf5o3+LuSBsaULQH7DiNbsqPd1chVpQGQ==
|
||||
|
||||
retry@^0.12.0:
|
||||
version "0.12.0"
|
||||
resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b"
|
||||
@ -3673,6 +3742,11 @@ source-map@^0.6.0, source-map@^0.6.1:
|
||||
resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
|
||||
integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
|
||||
|
||||
split-on-first@^1.0.0:
|
||||
version "1.1.0"
|
||||
resolved "https://registry.yarnpkg.com/split-on-first/-/split-on-first-1.1.0.tgz#f610afeee3b12bce1d0c30425e76398b78249a5f"
|
||||
integrity sha512-43ZssAJaMusuKWL8sKUBQXHWOpq8d6CfN/u1p4gUzfJkM05C8rxTmYrkIPTXapZpORA6LkkzcUulJ8FqA7Uudw==
|
||||
|
||||
split2@^2.1.0:
|
||||
version "2.2.0"
|
||||
resolved "https://registry.npmjs.org/split2/-/split2-2.2.0.tgz"
|
||||
@ -3718,6 +3792,11 @@ stream-via@^1.0.4:
|
||||
resolved "https://registry.npmjs.org/stream-via/-/stream-via-1.0.4.tgz"
|
||||
integrity sha512-DBp0lSvX5G9KGRDTkR/R+a29H+Wk2xItOF+MpZLLNDWbEV9tGPnqLPxHEYjmiz8xGtJHRIqmI+hCjmNzqoA4nQ==
|
||||
|
||||
strict-uri-encode@^2.0.0:
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-2.0.0.tgz#b9c7330c7042862f6b142dc274bbcc5866ce3546"
|
||||
integrity sha512-QwiXZgpRcKkhTj2Scnn++4PKtWsH0kpzZ62L2R6c/LUVYv7hVnZqcg2+sMuT6R7Jusu1vviK/MFsu6kNJfWlEQ==
|
||||
|
||||
string-length@^4.0.1:
|
||||
version "4.0.2"
|
||||
resolved "https://registry.yarnpkg.com/string-length/-/string-length-4.0.2.tgz#a8a8dc7bd5c1a82b9b3c8b87e125f66871b6e57a"
|
||||
|
Loading…
Reference in New Issue
Block a user