opentrees-harvester/lib/helpers.js
ezwelty c1d1ab0a2f Fix writing of 0 and false to CSV
Falsy values were written as ''. Fixed by testing for null/undefined/nan only.
2024-09-24 23:11:41 +02:00

1151 lines
34 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Helper functions.
*
* @module
* @private
*/
const gdal = require('gdal-async')
const fs = require('fs')
const { DownloaderHelper } = require('node-downloader-helper')
const csv = require('csv-parser')
const arcgis = require('@esri/arcgis-rest-feature-service')
const path = require('path')
const crypto = require('crypto')
const XLSX = require('xlsx')
const AdmZip = require('adm-zip')
const xpath = require('xpath')
const xmldom = require('xmldom')
/**
* Common geometry field names.
*
* @property {string[]} wkt - Field names for well-known-text (WKT)
* @property {string[]} lon - Field names for longitude
* @property {string[]} lat - Field names for latitude
* @property {string[]} x - Field names for x (longitude, easting)
* @property {string[]} y - Field names for y (latitude, northing)
*/
const GEOMETRY_FIELDS = {
wkt: [
'geom', 'the_geom', 'wkb_geometry', 'shape', 'geo_shape', 'geometrie',
'geometry'
],
lon: [
'longitude', 'lon', 'lng', 'long', 'x_long', 'coord long'
],
lat: [
'latitude', 'lat', 'y_lat', 'coord lat'
],
x: [
'x', 'x_koordina', 'x-koordinate', 'x_coord', 'coordenada x', 'xcoord',
'easting', 'east', 'e', 'point_x'
],
y: [
'y', 'y_koordina', 'y-koordinate', 'y_coord', 'coordenada y', 'ycoord',
'northing', 'north', 'n', 'point_y'
]
}
/**
* Common file extensions for vector datasets.
*
* @property {string[]} 1 - Primary file extensions (take precedence)
* @property {string[]} 2 - Secondary file extensions
*/
const FILE_EXTENSIONS = {
1: ['geojson', 'geojsonl', 'topojson', 'shp', 'vrt', 'gml', 'kml'],
2: ['csv', 'json']
}
/**
* Regular expressions matching vector file extensions supported by GDAL.
*
* @property {RegExp} any - Matches any supported file extension
* @property {RegExp} primary - Matches primary file extensions
* @property {RegExp} secondary - Matches secondary file extensions
*/
const gdalFilePatterns = {
any: new RegExp(`\\.(${getGdalExtensions().join('|')})$`, 'i'),
primary: new RegExp(`\\.(${FILE_EXTENSIONS[1].join('|')})$`, 'i'),
secondary: new RegExp(`\\.(${FILE_EXTENSIONS[2].join('|')})$`, 'i')
}
/**
* Convert GDAL list to string.
*
* @param {Array} list - Array
* @returns {string} List as a pipe-delimited string
*/
function listToString(list) {
return list.join('|')
}
/**
* String formatters by GDAL field type.
*/
const gdalStringFormatters = {
[gdal.OFTDate]: dateToString,
[gdal.OFTTime]: timeToString,
[gdal.OFTDateTime]: datetimeToString,
[gdal.OFTRealList]: listToString,
[gdal.OFTIntegerList]: listToString,
[gdal.OFTInteger64List]: listToString,
[gdal.OFTStringList]: listToString,
[gdal.OFTWideStringList]: listToString
}
/**
* Format a date object as a string.
*
* Formats a date as `YYYY-MM-DD`, `YYYY-MM`, or `YYYY`.
* See https://en.wikipedia.org/wiki/ISO_8601.
*
* @param {object} obj - Date
* @param {integer} obj.year - Year (e.g. `2020`)
* @param {integer} [obj.month] - Month (`1`-`12`)
* @param {integer} [obj.day] - Day of the month (`1`-`31`)
* @returns {string} ISO 8601 date
*/
function dateToString(obj) {
if (!obj) return ''
const yyyy = obj.year.toString().padStart(4, '0')
if (!obj.month) return `${yyyy}`
const mm = obj.month.toString().padStart(2, '0')
if (!obj.day) return `${yyyy}-${mm}`
const dd = obj.day.toString().padStart(2, '0')
return `${yyyy}-${mm}-${dd}`
}
/**
* Format a time object as string.
*
* Formats a time as `hh:mm:ss.*`.
* See https://en.wikipedia.org/wiki/ISO_8601.
*
* @param {object} obj - Time
* @param {integer} [obj.hour=0] - Hour (`0` - `23`)
* @param {integer} [obj.minute=0] - Minute (`0` - `59`)
* @param {number} [obj.second=0] - Second (`0` - `59.*`)
* @returns {string} ISO 8601 time
*/
function timeToString(obj) {
if (!obj) return ''
const hh = (obj.hour || 0).toString().padStart(2, '0')
const mm = (obj.minute || 0).toString().padStart(2, '0')
const ssms = (obj.second || 0).toString().split('.')
const ss = ssms[0].padStart(2, '0')
const ms = ssms[1] ? `.${ssms[1]}` : ''
return `${hh}:${mm}:${ss}${ms}`
}
/**
* Format a GDAL timezone flag as a string.
*
* Formats a GDAL timezone flag as `''` (unknown), `Z` (UTC), or `[+-]hh:mm`.
* See https://en.wikipedia.org/wiki/ISO_8601.
*
* @param {integer} [timezone=0] - GDAL timezone flag
* @returns {string} ISO 8601 timezone
*/
function gdalTimezoneToString(timezone = 0) {
// TZFlag: 0=unknown, 1=ambiguous, 100=GMT, 104=GMT+1, 80=GMT-5
// See https://gdal.org/development/rfc/rfc56_millisecond_precision.html
const min = timezone > 1 ? (timezone - 100) * 15 : null
const hh = Math.floor(Math.abs(min) / 60).toString().padStart(2, '0')
const mm = (Math.abs(min) % 60).toString().padStart(2, '0')
return min ? `${min > 0 ? '+' : '-'}${hh}:${mm}` : (min == 0 ? 'Z' : '')
}
/**
* Format a datetime object as a string.
*
* Formats a datetime as `YYYY-MM-DDThh:mm:ss.*` and the appropriate timezone.
* See https://en.wikipedia.org/wiki/ISO_8601.
*
* @param {object} obj - Datetime
* @param {integer} obj.year - Year (e.g. `2020`)
* @param {integer} obj.month - Month (`1`-`12`)
* @param {integer} obj.day - Day of the month (`1`-`31`)
* @param {integer} [obj.hour=0] - Hour (`0` - `23`)
* @param {integer} [obj.minute=0] - Minute (`0` - `59`)
* @param {number} [obj.second=0] - Second (`0` - `59.*`)
* @param {integer} [obj.timezone=0] - Timezone flag
* @param {boolean} [truncate=false] - Whether to return only date when hour,
* minute, and second are all zero.
* @param {boolean} [gdalTimezone=true] - Whether `obj.timezone` is a GDAL
* timezone flag (see {@link gdalTimezoneToString}) or already formatted as
* an ISO 8601 timezone.
* @returns {string} ISO 8601 datetime
*/
function datetimeToString(obj, truncate = false, gdalTimezone = true) {
if (!obj) return ''
const date = dateToString(obj)
if (truncate && !(obj.hour || obj.minute || obj.second)) {
return date
}
const time = timeToString(obj)
let timezone
if (gdalTimezone) {
timezone = gdalTimezoneToString(obj.timezone)
} else {
timezone = obj.timezone || ''
}
return `${date}T${time}${timezone}`
}
/**
* Reformat a datetime string as an ISO 8601 string.
*
* Reformats a datetime string as `YYYY-MM-DDThh:mm:ss.*` and the appropriate
* timezone. See https://en.wikipedia.org/wiki/ISO_8601.
*
* @param {string} x - Datetime string
* @param {RegExp[]} patterns - Regular expressions with capture groups
* with names from the following list: `year`, `month`, `day`, `hour`, `minute`,
* `second`, `timezone`. Returns result from first matching pattern.
* @returns {string} ISO 8601 datetime
*/
function reformatDatetime(x, patterns) {
// Skip if already parsed to object by gdal
if (!x || typeof x === 'object') return x
x = x.trim()
for (const pattern of patterns) {
const matches = x.match(pattern)
if (matches) {
return datetimeToString(
matches.groups, truncate = true, gdalTimezone = false)
}
}
console.warn('Failed to parse datetime:', x)
return x
}
/**
* Map object properties to a schema iteratively.
*
* @param {object} obj - Object
* @param {object|object[]} mapping - Either a single mapping or an array
* of mappings applied iteratively.
* @param {object} [mapping.crosswalk={}] - Schema crosswalk. Each key is a new
* property name and each value is either the original property name (string) or
* a function called as f(obj).
* @param {boolean} [mapping.keep=false] - Whether to keep original object
* properties
* @param {string} [mapping.prefix=''] - String to append to original property
* names
* @returns {object} Mapped object
*/
function mapObject(obj, mapping) {
let final = obj
if (!Array.isArray(mapping)) mapping = [mapping]
for (const { crosswalk = {}, keep, prefix = '' } of mapping) {
const current = {}
if (keep) {
for (const key in final) {
current[`${prefix}${key}`] = final[key]
}
}
for (const key in crosswalk) {
current[key] = (typeof crosswalk[key] === 'function') ?
crosswalk[key](final) : final[crosswalk[key]]
}
final = current
}
return final
}
/**
* Guess feature layer geometry fields based on their name.
*
* @param {gdal.Layer} layer - Feature layer
* @returns {object[]} Names of fields with potential WKT geometry (wkt),
* geographic coordinates (lon, lat), or geographic or projected coordinates (x,
* y).
*/
function guessGeometryFields(layer) {
const geometry = {}
const names = layer.fields.getNames()
Object.keys(GEOMETRY_FIELDS).forEach(key => {
geometry[key] = names.filter(x =>
GEOMETRY_FIELDS[key].includes(x.toLowerCase()))
})
return geometry
}
/**
* Get the list of vector file extensions supported by GDAL.
*
* @returns {string[]} File extensions
*/
function getGdalExtensions() {
const extensions = []
gdal.drivers.forEach(driver => {
const meta = driver.getMetadata()
if (meta.DCAP_VECTOR === 'YES') {
if (meta.DMD_EXTENSION) extensions.push(meta.DMD_EXTENSION)
if (meta.DMD_EXTENSIONS) extensions.push(...meta.DMD_EXTENSIONS.split(' '))
}
})
return [...new Set(extensions.sort())]
}
/**
* Get the list of GDAL driver names assigned to each file extension.
*
* @returns {object} GDAL driver names by file extension
*/
function getGdalDrivers() {
const drivers = {}
getGdalExtensions().forEach(extension => drivers[extension] = [])
gdal.drivers.forEach(driver => {
const meta = driver.getMetadata()
if (meta.DCAP_VECTOR === 'YES') {
const extensions = []
if (meta.DMD_EXTENSION) {
extensions.push(meta.DMD_EXTENSION)
}
if (meta.DMD_EXTENSIONS) {
extensions.push(...meta.DMD_EXTENSIONS.split(' '))
}
[...new Set(extensions)].forEach(extension =>
drivers[extension].push(driver.description.toLowerCase()))
}
})
return drivers
}
/**
* Get the file extension from a local or remote file path.
*
* @param {string} file - Local or remote file path
* @returns {string} File extension
*/
function getFileExtension(file) {
const matches = file.match(/\.([^\.\/\?\#]+)(?:$|\?|\#)/)
return matches ? matches[1] : ''
}
/**
* Get the transformation between two spatial reference systems (SRS).
*
* @param {string|gdal.SpatialReference} source - Source SRS
* @param {string|gdal.SpatialReference} target - Target SRS
* @returns {gdal.CoordinateTransformation|undefined} Coordinate transformation,
* or undefined if the two SRS are equal.
*/
function getTransform(source, target) {
if (typeof source === 'string') {
source = gdal.SpatialReference.fromUserInput(source)
}
if (typeof target === 'string') {
target = gdal.SpatialReference.fromUserInput(target)
}
// NOTE: Only sure way to test for equality is to test the transformation
const same = source.isSame(target) ||
(source.isSameGeogCS(target) && !source.isProjected() && !target.isProjected())
const sameAxes = isAxesXY(source) === isAxesXY(target)
if (!(same && sameAxes)) {
return new gdal.CoordinateTransformation(source, target)
}
}
/**
* Build the GDAL polygon corresponding to a bounding box.
*
* @param {number[]} bounds - Bounding box [xmin, ymin, xmax, ymax]
* @param {string|gdal.SpatialReference} [srs] - Spatial reference system
* @returns {gdal.Polygon}
*/
function boundsToPolygon(bounds, srs) {
if (typeof srs === 'string') {
srs = gdal.SpatialReference.fromUserInput(srs)
}
const polygon = new gdal.Polygon()
const ring = new gdal.LinearRing()
ring.points.add(new gdal.Point(bounds[0], bounds[1]))
ring.points.add(new gdal.Point(bounds[0], bounds[3]))
ring.points.add(new gdal.Point(bounds[2], bounds[3]))
ring.points.add(new gdal.Point(bounds[2], bounds[1]))
ring.points.add(new gdal.Point(bounds[0], bounds[1]))
polygon.rings.add(ring)
polygon.srs = srs
return polygon
}
/**
* Check whether spatial reference system has x (east), y (north) axis order.
*
* @param {string|gdal.SpatialReference} srs - Spatial reference system
*/
function isAxesXY(srs) {
if (typeof srs === 'string') {
srs = gdal.SpatialReference.fromUserInput(srs)
}
wkt = srs.toWKT()
xi = wkt.match(/axis\[[^\]]*(east|west|longitude)[^\]]*\]/i)
yi = wkt.match(/axis\[[^\]]*(north|south|latitude)[^\]]*\]/i)
if (xi && xi) {
return xi.index < yi.index
} else {
// NOTE: Assumes x, y axis order if axes not defined
return true
}
}
/**
* Determine whether geometry x, y has swapped axes relative to a target.
*
* @param {number[]} xy - Coordinates x, y.
* @param {number[]} targetXY - Target coordinates x, y.
* @returns {boolean} Whether x, y has swapped axes relative to target x, y.
*/
function hasSwappedAxes(xy, targetXY) {
return (
(Math.abs(xy[0] - targetXY[0]) > Math.abs(xy[0] - targetXY[1])) ||
(Math.abs(xy[1] - targetXY[1]) > Math.abs(xy[1] - targetXY[0]))
)
}
/**
* Download a remote file to a directory.
*
* @param {string} url - Path to the remote file.
* @param {string} dir - Local directory to download the file to.
* @param {object} options - Downloader Helper constructor options (see
* https://github.com/hgouveia/node-downloader-helper#options.
* @param {object} listeners - Downloader Helper event listeners (see
* https://github.com/hgouveia/node-downloader-helper#events).
* @returns {Promise<string>} Resolves to the path of the downloaded file.
*/
async function downloadFile(url, dir, options = {}, listeners = {}) {
fs.mkdirSync(dir, { recursive: true })
const downloader = new DownloaderHelper(url, dir, options)
let failure
listeners = {
download: info => console.log(`Downloading ${info.fileName}`),
end: info => console.log(
`Downloaded ${info.fileName} (${(info.downloadedSize / 1e6).toFixed()} MB)`
),
error: error => console.error(`Download failed for ${url}: ${error.message}`),
...listeners
}
for (const key in listeners) {
if (key != 'error') {
downloader.on(key, listeners[key])
}
}
downloader.on('error', error => {
const attempt = downloader.__retryCount + 1
const attempts = options.retry ? options.retry.maxRetries + 1 : 1
if (attempt === attempts) {
failure = error
}
})
try {
await downloader.start()
} catch (error) {
failure = error
}
if (failure) {
listeners.error(failure)
} else {
return downloader.getDownloadPath()
}
}
/**
* Read a CSV file.
*
* Uses `csv-parser`, which is faster than GDAL.
*
* @param {string} file - Path to file.
* @param {object} options - Parse options
* (https://www.npmjs.com/package/csv-parser#options).
* @returns {Promise<object[]>}
*/
function readCSV(file, options) {
return new Promise((resolve, reject) => {
const rows = []
fs.createReadStream(file).
pipe(csv(options)).
on('data', (row) => {
rows.push(row)
}).
on('end', () => {
console.log(`${file} successfully parsed`)
resolve(rows)
}).
on('error', reject)
})
}
/**
* Write a CSV file.
*
* Converts values to string and wraps strings containing commas in quotes.
* Objects need not contain the same properties, nor in the same order.
*
* @param {object[]} objects Objects to write to a CSV file.
* @param {string} [file=null] Path to file.
* @returns {string|null} CSV as a string (if `file` is `null`).
* @example
* objects = [ { x: 1 }, { y: 2 }, { x: 3, y: 3, z: 'foo,bar' } ]
* writeCSV(objects)
* // 'x,y,z\n1,,\n,2,\n3,3,"foo,bar"'
*/
function writeCSV(objects, file = null) {
const columnSet = new Set()
// Load unique column names
objects.forEach(obj => {
Object.keys(obj).forEach(columnSet.add, columnSet)
})
const columns = [...columnSet]
// Generate header
const header = columns.map(x => {
return (typeof (x) === 'string' && x.includes(',')) ? `"${x}"` : String(x)
}).join(',')
// Generate rows
const rows = objects.map(obj => {
return columns.map(key => {
const x = obj[key]
const type = typeof x
if (x == null || (type === 'number' && isNaN(x))) {
return ''
}
return (type === 'string' && x.includes(',')) ? `"${x}"` : String(x)
}).join(',')
})
// Combine
const txt = [header, ...rows].join('\n')
if (file) {
fs.mkdirSync(path.dirname(file), { recursive: true })
fs.writeFileSync(file, [header, ...rows].join('\n'))
} else {
return txt
}
}
/**
* Convert string to sentence case.
*
* Capitalizes the first character and converts the rest to lower case.
*
* @param {string} s
* @returns {string}
* @example
* toSentenceCase('') // ''
* toSentenceCase('m') // 'M'
* toSentenceCase('malus') // 'Malus'
* toSentenceCase('malus pumila') // 'Malus pumila'
*/
function toSentenceCase(s) {
return s.slice(0, 1).toUpperCase() + s.slice(1).toLowerCase()
}
/**
* Convert string to title case.
*
* Capitalizes the first character of every word and converts the rest to lower case.
*
* @param {*} s
* @example
* toTitleCase('') // ''
* toTitleCase('m') // 'M'
* toTitleCase('malus') // 'Malus'
* toTitleCase('GOLDEN DELICIOUS') // 'Golden Delicious'
*/
function toTitleCase(s) {
return s.replace(
/(\w)(\w*)/g,
(_, firstChar, remainder) => firstChar.toUpperCase() + remainder.toLowerCase()
)
}
const ASCII = {
A: 'AⒶÀÁÂẦẤẪẨÃĀĂẰẮẴẲȦǠÄǞẢÅǺǍȀȂẠẬẶḀĄȺⱯ',
AA: 'Ꜳ',
AE: 'ÆǼǢ',
AO: 'Ꜵ',
AU: 'Ꜷ',
AV: 'ꜸꜺ',
AY: 'Ꜽ',
B: 'BⒷḂḄḆɃƂƁ',
C: 'CⒸĆĈĊČÇḈƇȻ',
D: 'DⒹḊĎḌḐḒḎĐƋƊƉ',
DZ: 'DZDŽ',
Dz: 'DzDž',
E: 'EⒺÈÉÊỀẾỄỂẼĒḔḖĔĖËẺĚȄȆẸỆȨḜĘḘḚƐƎ',
F: 'FⒻḞƑ',
G: 'GⒼǴĜḠĞĠǦĢǤƓꞠ',
H: 'HⒽĤḢḦȞḤḨḪĦⱧ',
I: 'IⒾÌÍÎĨĪĬİÏḮỈǏȈȊỊĮḬƗ',
J: 'JⒿĴ',
K: 'KⓀḰǨḲĶḴƘⱩꝀꝂꝄꞢ',
L: 'LⓁĿĹĽḶḸĻḼḺŁȽⱢⱠꝈ',
LJ: 'LJ',
Lj: 'Lj',
M: 'MⓂḾṀṂƜ',
N: 'NⓃǸŃÑṄŇṆŅṊṈȠƝꞐꞤ',
NJ: 'NJ',
Nj: 'Nj',
O: 'OⓄÒÓÔỒỐỖỔÕṌȬṎŌṐṒŎȮȰÖȪỎŐǑȌȎƠỜỚỠỞỢỌỘǪǬØǾƆƟꝊꝌ',
OE: 'Œ',
OI: 'Ƣ',
OO: 'Ꝏ',
OU: 'Ȣ',
P: 'PⓅṔṖƤⱣꝐꝒꝔ',
Q: 'QⓆꝖꝘ',
R: 'RⓇŔṘŘȐȒṚṜŖṞɌⱤꞦ',
S: 'SⓈẞŚṤŜṠŠṦṢṨȘŞⱾꞨ',
T: 'TⓉṪŤṬȚŢṰṮŦƬƮȾ',
TZ: 'Ꜩ',
U: 'UⓊÙÚÛŨṸŪṺŬÜǛǗǕǙỦŮŰǓȔȖƯỪỨỮỬỰỤṲŲṶṴɄ',
V: 'VⓋṼṾƲꝞɅ',
VY: 'Ꝡ',
W: 'WⓌẀẂŴẆẄẈⱲ',
X: 'XⓍẊẌ',
Y: 'YⓎỲÝŶỸȲẎŸỶỴƳɎỾ',
Z: 'ZⓏŹẐŻŽẒẔƵȤⱿⱫꝢ',
a: 'aⓐẚàáâầấẫẩãāăằắẵẳȧǡäǟảåǻǎȁȃạậặḁąⱥɐꜳ',
ae: 'æǽǣ',
ao: 'ꜵ',
au: 'ꜷ',
av: 'ꜹꜻ',
ay: 'ꜽ',
b: 'bⓑḃḅḇƀƃɓ',
c: 'cⓒćĉċčçḉƈȼ',
d: 'dⓓḋďḍḑḓḏđƌɖɗꝺð',
dz: 'dzdž',
e: 'eⓔèéêềếễểẽēḕḗĕėëẻěȅȇẹệȩḝęḙḛɇɛǝ',
f: 'fⓕḟƒ',
g: 'gⓖǵĝḡğġǧģǥɠꞡᵹ',
h: 'hⓗĥḣḧȟḥḩḫẖħⱨɥ',
hv: 'ƕ',
i: 'iⓘìíîĩīĭïḯỉǐȉȋịįḭɨı',
j: 'jⓙĵǰɉ',
k: 'kⓚḱǩḳķḵƙⱪꝁꝃꝅꞣ',
l: 'lⓛŀĺľḷḹļḽḻłƚɫ',
lj: 'lj',
m: 'mⓜḿṁṃɱɯᶆ',
n: 'nⓝǹńñṅňṇņṋṉƞɲʼnꞑꞥ',
nj: 'nj',
o: 'oⓞòóôồốỗổõṍȭṏōṑṓŏȯȱöȫỏőǒȍȏơờớỡởợọộǫǭøǿɔꝋꝍɵ',
oe: 'œ',
oi: 'ƣ',
ou: 'ȣ',
oo: 'ꝏ',
p: 'pⓟṕṗƥᵽꝑꝓꝕ',
q: 'qⓠɋꝗꝙ',
r: 'rⓡŕṙřȑȓṛṝŗṟɍɽ',
s: 'sⓢśṥŝṡšṧṣṩșşȿꞩẛſß',
t: 'tⓣṫẗťṭțţṱṯŧƭʈⱦ',
tz: 'ꜩ',
u: 'uⓤùúûũṹūṻŭüǜǘǖǚủůűǔȕȗưừứữửựụṳųṷṵʉ',
v: 'vⓥṽṿʋꝟʌ',
w: 'wⓦẁẃŵẇẅẘẉⱳ',
x: 'xⓧẋẍxᶍ',
y: 'yⓨỳýŷỹȳẏÿỷẙỵƴɏỿ',
z: 'zⓩźẑżžẓẕƶȥɀⱬ'
}
const ASCII_MAP = {}
for (const key in ASCII) {
for (const letter of ASCII[key]) {
ASCII_MAP[letter] = key
}
}
/**
* Latinize a string.
*
* Converts accents (diacritics, etc) to latin characters.
*
* @param {string} s - String.
* @param {string} keep - Characters to preserve.
* @returns {string}
* @example
* latinize('Leœptura') // 'Leoeptura'
* latinize('Ærenea') // 'AErenea'
* latinize('Fallén') // 'Fallen'
* latinize('Choriozopella') // 'Choriozopella'
* latinize('trägårdhi') // 'tragardhi'
* latinize('rosa-sinensis') // 'rosa-sinensis'
* latinize('ỆᶍǍᶆṔƚÉ áéíóúýčďěňřšťžů') // 'ExAmPlE aeiouycdenrstzu'
*/
function latinize(s, keep = '') {
return s.replace(/[^\u0000-\u007E]/g, x => keep.includes(x) ? x : ASCII_MAP[x] || x)
}
/**
* Interpolate string as template with values from object.
*
* @param {string} x - String to interpolate
* @param {object} obj - Object with properties to use for interpolation
* @example
* interpolateString('data/${id}/${id}/input', {id: 'melbourne'})
* interpolateString('data/${ids}/input', {id: 'melbourne'})
*/
function interpolateString(x, obj) {
let ix = x
let match
while ((match = ix.match(/\$\{([^\}]*)\}/))) {
ix = ix.replace(match[0], obj[match[1]])
}
return ix
}
/**
* Convert string to lowercase and remove whitespace.
*
* @param {string} x - String to reduce
* @example
* reduceString('United Kingdom')
*/
function reduceString(x) {
return x.toLowerCase().replace(/\s/, '')
}
/**
* Pause code execution.
*
* @param {number} ms - Number of milliseconds to pause.
* @returns {Promise}
*/
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms))
}
/**
* Get features from ArcGIS Feature Server layer.
*
* Features are queried as EPSG:4326 EsriJSON sorted by object ID.
*
* @see https://developers.arcgis.com/rest/services-reference/query-feature-service-layer-.htm
*
* @param {string} url - URL of ArcGIS Feature Server layer.
* @returns {Promise<object>} Layer features.
*/
async function getLayerFeatures(url) {
// Get layer metadata
const layer = await arcgis.getLayer({url: url, f: 'json'})
// Verify that layer can be queried
const capabilities = layer.capabilities.toLowerCase().split(/,\s?/)
if (!capabilities.includes('query')) {
throw new Error('Layer does not support queries')
}
const queryFormats = layer.supportedQueryFormats.toLowerCase().split(/,\s?/)
if (!queryFormats.includes('json')) {
throw new Error('Layer does not support JSON queries')
}
// Whether layer supports pagination
const pagination = layer.advancedQueryCapabilities?.supportsPagination === true
// Whether layer supports 'standard' result type
const standard = (
layer.advancedQueryCapabilities?.supportsQueryWithResultType === true &&
layer.standardMaxRecordCount > layer.maxRecordCount
)
// Cap record count to a reasonable maximum
const count = Math.min(
(standard ? layer.standardMaxRecordCount : layer.maxRecordCount), 20000
)
// Order by object ID field
let orderByField = layer.objectIdField
if (!orderByField) {
for (const field of layer.fields) {
if (field.type.toLowerCase() === 'esrifieldtypeoid') {
orderByField = field.name
break
}
}
}
if (!orderByField) {
throw new Error('Layer does not have an object ID field')
}
// Get layer features
let exceededTransferLimit = true
let offset = 0
let result
while (exceededTransferLimit) {
console.log(`Downloading features (${url}): ${offset} ...`)
const response = await arcgis.queryFeatures({
// HACK: For servers requring a where clause
where: '1=1',
url: url,
f: 'json',
outFields: '*',
orderByFields: orderByField,
outSR: { wkid: 4326 },
resultType: standard ? 'standard' : 'none',
...(pagination ? {resultOffset: offset, resultRecordCount: count} : {}),
})
if (result) {
result.features.push(...response.features)
} else {
result = response
}
exceededTransferLimit = (
response.exceededTransferLimit === true ||
response.properties?.exceededTransferLimit === true
)
if (exceededTransferLimit) {
if (pagination) {
offset += response.features.length
await sleep(2000)
} else {
throw new Error('Layer transfer limit exceeded')
}
}
}
return result
}
/**
* Write values as newline-delimited JSON.
*
* References:
* - https://www.interline.io/blog/geojsonl-extracts
* - https://jsonlines.org
* - https://stevage.github.io/ndgeojson
*
* @param {Array} values Values to independently write as JSON.
* @param {string} [file=null] Path to file.
* @returns {string|null} JSON Lines as a string (if `file` is `null`).
* @example
* values = [ 1, ['b'], { x: null } ]
* writeJSONL(values) // '1\n["b"]\n{"x":null}'
*/
function writeJSONL(values, file = null) {
const txt = values.map(JSON.stringify).join('\n')
if (file) {
fs.mkdirSync(path.dirname(file), { recursive: true })
fs.writeFileSync(file, txt)
} else {
return txt
}
}
/**
* Read newline-delimited JSON.
*
* @param {string} file - Path to file.
* @returns {object}
*/
function readJSONL(file) {
return fs.readFileSync(file, 'utf8').split('\n').filter(x => x).map(JSON.parse)
}
/**
* Read newline-delimited JSON line by line.
*
* @param {string} file - Path to file.
* @yields {object}
*/
async function* iterateJSONL(file) {
const readInterface = readline.createInterface({
input: fs.createReadStream(file),
})
for await (const line of readInterface) {
if (line.trim() === '') {
continue
}
try {
yield JSON.parse(line)
} catch (error) {
console.error(`Error parsing as JSON: '${line}'\n${error.message}`)
}
}
}
/**
* Read file from ZIP archive.
*
* @param {string} zipfile - Path to zip file.
* @param {string} file - Path to file inside zip file.
* @returns {Buffer}
*/
function readFileInZip(zipfile, file) {
const zip = new AdmZip(zipfile)
return zip.readFile(file)
}
/**
* Open Excel-compatible file with GDAL.
*
* Uses `xlsx` because bundled GDAL (gdal-async) does not support
* XLS, XLSX, and CSV with custom separator.
*
* @param {Buffer|string} data File data.
* @param {object} options
* @param {string} [options.type] File type (buffer, file).
* @param {integer} [options.sheet=0] Sheet index or name.
* @returns {gdal.Dataset}
*/
function openExcelWithGdal(data, {type, sheet = 0} = {}) {
const book = XLSX.read(data, {type, sheets: sheet})
const txt = XLSX.utils.sheet_to_csv(book.Sheets[book.SheetNames[sheet]])
const buffer = Buffer.from(txt)
const name = crypto.randomBytes(6).toString('hex')
gdal.vsimem.set(buffer, `/vsimem/${name}.csv`)
return gdal.open(`/vsimem/${name}.csv`)
}
/**
* Read all files in a RAR archive to GDAL VSIMEM.
*
* @param {string} rarfile - Path to RAR file.
* @returns {string[]} Paths to files in GDAL VSIMEM.
*/
async function readFilesInRarToVsimem(file) {
// HACK: Import unrar here to prevent it from making program exit on error
const unrar = require('node-unrar-js')
const buffer = Uint8Array.from(fs.readFileSync(file)).buffer
const extractor = await unrar.createExtractorFromData({data: buffer})
const extracted = extractor.extract()
const rarFiles = [...extracted.files]
const vsiFiles = []
for (const rarFile of rarFiles) {
if (rarFile.extraction) {
const vsiFile = path.join('/vsimem/', file, rarFile.fileHeader.name)
gdal.vsimem.set(Buffer.from(rarFile.extraction), vsiFile)
vsiFiles.push(vsiFile)
}
}
return vsiFiles
}
/**
* Open files in GDAL as a single layer (using VRT union).
*
* @param {string[]} files - Paths to files.
* @returns {gdal.Dataset}
*/
function openFileUnionWithGdal(files) {
let vrt = '<OGRVRTDataSource><OGRVRTUnionLayer name="union">'
for (const file of files) {
vrt += `<OGRVRTLayer name="${path.parse(file).name}">
<SrcDataSource>${file}</SrcDataSource>
</OGRVRTLayer>`
}
vrt += '</OGRVRTUnionLayer></OGRVRTDataSource>'
const name = crypto.randomBytes(6).toString('hex')
gdal.vsimem.set(Buffer.from(vrt, 'utf8'), `/vsimem/${name}.vrt`)
return gdal.open(`/vsimem/${name}.vrt`)
}
/**
* Open a KISSB XML file with GDAL.
*
* See https://www.carneios.de/kissb/pdf/Spezifikation_Katasteraustauschformat.pdf
* Fields with objects as values are flattened using dot notation.
*
* @param {string|Buffer} file - Path to file or buffer.
* @param {object} params
* @param {string} [params.type=file] - File type (file, buffer).
* @returns {gdal.Dataset}
*/
function openKissbXmlWithGdal(file, {type = 'file'} = {}) {
// Read from file or buffer
let xml
if (type === 'buffer') {
xml = file.toString('utf8')
} else {
xml = fs.readFileSync(file, 'utf8')
}
const parser = new xmldom.DOMParser()
const tree = parser.parseFromString(xml)
// Extract field (property) names by id
const fieldNames = {}
const properties = xpath.select(
"//*[name()='configuration']//*[name()='property']",
tree
)
properties.forEach(property => {
const id = xpath.select("*[name()='id']", property)?.[0].firstChild.data
const name = xpath.select("*[name()='name']", property)?.[0].firstChild.data
if (id && name) {
fieldNames[id] = name
}
})
// Extract features (protocols)
const protocols = xpath.select("//*[name()='data']/*[name()='protocol']", tree)
const features = protocols.map(protocol => {
const fields = xpath.select("*/*", protocol)
const feature = {}
for (const field of fields) {
const fieldName = fieldNames[field.nodeName]
const children = field.childNodes
if (children.length === 0) {
// Empty: <field/>
continue
} else if (children.length === 1) {
// Text value: <field>Berlin</field>
const value = children[0].data
if (value) {
feature[fieldName] = value
}
} else {
// Children: <field><genus>Salix</genus><type>alba</type></field>
for (const child of Object.values(children)) {
const name = child.nodeName
if (name !== '#text') {
const value = child.firstChild?.data
if (value) {
feature[`${fieldName}.${name}`] = value
}
}
}
}
}
return feature
})
// HACK: Write features as CSV and open with GDAL
const txt = writeCSV(features)
const name = crypto.randomBytes(6).toString('hex')
gdal.vsimem.set(Buffer.from(txt, 'utf8'), `/vsimem/${name}.csv`)
return gdal.open(`/vsimem/${name}.csv`)
}
/**
* Read ESRI Feature Collection to GDAL VSIMEM as ESRIJSON.
*
* @param {string} file - Path to file.
* @param {object} params
* @param {string} [params.layerName] - Layer name.
* @returns {string} Path to VSIMEM file.
*/
function readEsriFeatureCollectionToVsimem(file, {layerName} = {}) {
const data = JSON.parse(fs.readFileSync(file, 'utf8'))
let layerNames
let layers
// Find layers
if (data.operationalLayers) {
layers = data.operationalLayers
layerNames = data.operationalLayers.map(x => x.id)
} else if (data.layers) {
layers = data.layers
layerNames = layers.map(x => x.layerDefinition?.name)
} else {
throw new Error('No layers or operationalLayers property')
}
// Select layer
let layerIndex
if (layers.length == 0) {
throw new Error('Empty (operational)layers property')
}
if (layers.length == 1) {
layerIndex = 0
} else {
if (!layerName) {
throw new Error(`Multiple layers found: ${layerNames}`)
}
if (!layerNames.includes(layerName)) {
throw new Error(`Layer '${layerName}' not found in ${layerNames}`)
}
layerIndex = layerNames.indexOf(layerName)
}
// Load layer
let layer
if (data.operationalLayers) {
layers = data.operationalLayers[layerIndex].featureCollection?.layers
if (!layers) {
throw new Error('No layers found inside operational layer')
}
if (layers.length > 1) {
layerNames = layers.map(x => x.layerDefinition?.name)
throw new Error(`Multiple layers inside operational layer: ${layerNames}`)
}
layer = layers[0]
} else {
layer = data.layers[layerIndex]
}
// Verify layer
if (!layer.featureSet?.features) {
throw new Error('No features found')
}
if (!layer.layerDefinition) {
throw new Error('No layer definition found')
}
const esrijson = {
objectIdFieldName: layer.layerDefinition.objectIdField,
geometryType: layer.layerDefinition.geometryType || layer.featureSet.geometryType,
spatialReference: layer.layerDefinition.spatialReference,
fields: layer.layerDefinition.fields,
features: layer.featureSet.features,
}
// HACK: Write features as ESRIJSON
const txt = JSON.stringify(esrijson)
const name = crypto.randomBytes(6).toString('hex')
const vsimem = `/vsimem/${name}.json`
gdal.vsimem.set(Buffer.from(txt, 'utf8'), vsimem)
return vsimem
}
/**
* Open ESRI Feature Collection with GDAL.
*
* @param {string} file - Path to file.
* @param {object} params
* @param {string} [params.layerName] - Layer name.
* @returns {gdal.Dataset}
*/
function openEsriFeatureCollectionWithGdal(file, {layerName} = {}) {
vsimem = readEsriFeatureCollectionToVsimem(file, {layerName})
return gdal.open(`ESRIJSON:${vsimem}`)
}
/**
* Parse decimal degrees from degrees, minutes, seconds.
*
* @param {string} dms - Degrees, minutes, seconds.
* @param {RegExp} pattern - Regular expression with named capture groups deg,
* min, sec.
* @returns {number} Decimal degrees.
*/
function parseDecimalFromDMS(dms, pattern) {
const match = dms.match(pattern)
if (!match) {
console.warn(`Failed to parse DMS: ${dms}`)
return null
}
return (
Number(match.groups.deg) +
Number(match.groups.min) / 60 +
Number(match.groups.sec) / 3600
)
}
module.exports = {
gdalFilePatterns,
gdalStringFormatters,
reformatDatetime,
mapObject,
guessGeometryFields,
getGdalDrivers,
getFileExtension,
getTransform,
boundsToPolygon,
isAxesXY,
hasSwappedAxes,
downloadFile,
readCSV,
writeCSV,
toSentenceCase,
toTitleCase,
latinize,
interpolateString,
reduceString,
getLayerFeatures,
writeJSONL,
readJSONL,
iterateJSONL,
readFileInZip,
openExcelWithGdal,
readFilesInRarToVsimem,
openFileUnionWithGdal,
openKissbXmlWithGdal,
readEsriFeatureCollectionToVsimem,
openEsriFeatureCollectionWithGdal,
parseDecimalFromDMS
}