/** * Helper functions. * * @module * @private */ const gdal = require('gdal-async') const fs = require('fs') const { DownloaderHelper } = require('node-downloader-helper') const csv = require('csv-parser') const arcgis = require('@esri/arcgis-rest-feature-service') const path = require('path') const crypto = require('crypto') const XLSX = require('xlsx') const AdmZip = require('adm-zip') const xpath = require('xpath') const xmldom = require('xmldom') /** * Common geometry field names. * * @property {string[]} wkt - Field names for well-known-text (WKT) * @property {string[]} lon - Field names for longitude * @property {string[]} lat - Field names for latitude * @property {string[]} x - Field names for x (longitude, easting) * @property {string[]} y - Field names for y (latitude, northing) */ const GEOMETRY_FIELDS = { wkt: [ 'geom', 'the_geom', 'wkb_geometry', 'shape', 'geo_shape', 'geometrie', 'geometry' ], lon: [ 'longitude', 'lon', 'lng', 'long', 'x_long', 'coord long' ], lat: [ 'latitude', 'lat', 'y_lat', 'coord lat' ], x: [ 'x', 'x_koordina', 'x-koordinate', 'x_coord', 'coordenada x', 'xcoord', 'easting', 'east', 'e', 'point_x' ], y: [ 'y', 'y_koordina', 'y-koordinate', 'y_coord', 'coordenada y', 'ycoord', 'northing', 'north', 'n', 'point_y' ] } /** * Common file extensions for vector datasets. * * @property {string[]} 1 - Primary file extensions (take precedence) * @property {string[]} 2 - Secondary file extensions */ const FILE_EXTENSIONS = { 1: ['geojson', 'geojsonl', 'topojson', 'shp', 'vrt', 'gml', 'kml'], 2: ['csv', 'json'] } /** * Regular expressions matching vector file extensions supported by GDAL. * * @property {RegExp} any - Matches any supported file extension * @property {RegExp} primary - Matches primary file extensions * @property {RegExp} secondary - Matches secondary file extensions */ const gdalFilePatterns = { any: new RegExp(`\\.(${getGdalExtensions().join('|')})$`, 'i'), primary: new RegExp(`\\.(${FILE_EXTENSIONS[1].join('|')})$`, 'i'), secondary: new RegExp(`\\.(${FILE_EXTENSIONS[2].join('|')})$`, 'i') } /** * Convert GDAL list to string. * * @param {Array} list - Array * @returns {string} List as a pipe-delimited string */ function listToString(list) { return list.join('|') } /** * String formatters by GDAL field type. */ const gdalStringFormatters = { [gdal.OFTDate]: dateToString, [gdal.OFTTime]: timeToString, [gdal.OFTDateTime]: datetimeToString, [gdal.OFTRealList]: listToString, [gdal.OFTIntegerList]: listToString, [gdal.OFTInteger64List]: listToString, [gdal.OFTStringList]: listToString, [gdal.OFTWideStringList]: listToString } /** * Format a date object as a string. * * Formats a date as `YYYY-MM-DD`, `YYYY-MM`, or `YYYY`. * See https://en.wikipedia.org/wiki/ISO_8601. * * @param {object} obj - Date * @param {integer} obj.year - Year (e.g. `2020`) * @param {integer} [obj.month] - Month (`1`-`12`) * @param {integer} [obj.day] - Day of the month (`1`-`31`) * @returns {string} ISO 8601 date */ function dateToString(obj) { if (!obj) return '' const yyyy = obj.year.toString().padStart(4, '0') if (!obj.month) return `${yyyy}` const mm = obj.month.toString().padStart(2, '0') if (!obj.day) return `${yyyy}-${mm}` const dd = obj.day.toString().padStart(2, '0') return `${yyyy}-${mm}-${dd}` } /** * Format a time object as string. * * Formats a time as `hh:mm:ss.*`. * See https://en.wikipedia.org/wiki/ISO_8601. * * @param {object} obj - Time * @param {integer} [obj.hour=0] - Hour (`0` - `23`) * @param {integer} [obj.minute=0] - Minute (`0` - `59`) * @param {number} [obj.second=0] - Second (`0` - `59.*`) * @returns {string} ISO 8601 time */ function timeToString(obj) { if (!obj) return '' const hh = (obj.hour || 0).toString().padStart(2, '0') const mm = (obj.minute || 0).toString().padStart(2, '0') const ssms = (obj.second || 0).toString().split('.') const ss = ssms[0].padStart(2, '0') const ms = ssms[1] ? `.${ssms[1]}` : '' return `${hh}:${mm}:${ss}${ms}` } /** * Format a GDAL timezone flag as a string. * * Formats a GDAL timezone flag as `''` (unknown), `Z` (UTC), or `[+-]hh:mm`. * See https://en.wikipedia.org/wiki/ISO_8601. * * @param {integer} [timezone=0] - GDAL timezone flag * @returns {string} ISO 8601 timezone */ function gdalTimezoneToString(timezone = 0) { // TZFlag: 0=unknown, 1=ambiguous, 100=GMT, 104=GMT+1, 80=GMT-5 // See https://gdal.org/development/rfc/rfc56_millisecond_precision.html const min = timezone > 1 ? (timezone - 100) * 15 : null const hh = Math.floor(Math.abs(min) / 60).toString().padStart(2, '0') const mm = (Math.abs(min) % 60).toString().padStart(2, '0') return min ? `${min > 0 ? '+' : '-'}${hh}:${mm}` : (min == 0 ? 'Z' : '') } /** * Format a datetime object as a string. * * Formats a datetime as `YYYY-MM-DDThh:mm:ss.*` and the appropriate timezone. * See https://en.wikipedia.org/wiki/ISO_8601. * * @param {object} obj - Datetime * @param {integer} obj.year - Year (e.g. `2020`) * @param {integer} obj.month - Month (`1`-`12`) * @param {integer} obj.day - Day of the month (`1`-`31`) * @param {integer} [obj.hour=0] - Hour (`0` - `23`) * @param {integer} [obj.minute=0] - Minute (`0` - `59`) * @param {number} [obj.second=0] - Second (`0` - `59.*`) * @param {integer} [obj.timezone=0] - Timezone flag * @param {boolean} [truncate=false] - Whether to return only date when hour, * minute, and second are all zero. * @param {boolean} [gdalTimezone=true] - Whether `obj.timezone` is a GDAL * timezone flag (see {@link gdalTimezoneToString}) or already formatted as * an ISO 8601 timezone. * @returns {string} ISO 8601 datetime */ function datetimeToString(obj, truncate = false, gdalTimezone = true) { if (!obj) return '' const date = dateToString(obj) if (truncate && !(obj.hour || obj.minute || obj.second)) { return date } const time = timeToString(obj) let timezone if (gdalTimezone) { timezone = gdalTimezoneToString(obj.timezone) } else { timezone = obj.timezone || '' } return `${date}T${time}${timezone}` } /** * Reformat a datetime string as an ISO 8601 string. * * Reformats a datetime string as `YYYY-MM-DDThh:mm:ss.*` and the appropriate * timezone. See https://en.wikipedia.org/wiki/ISO_8601. * * @param {string} x - Datetime string * @param {RegExp[]} patterns - Regular expressions with capture groups * with names from the following list: `year`, `month`, `day`, `hour`, `minute`, * `second`, `timezone`. Returns result from first matching pattern. * @returns {string} ISO 8601 datetime */ function reformatDatetime(x, patterns) { // Skip if already parsed to object by gdal if (!x || typeof x === 'object') return x x = x.trim() for (const pattern of patterns) { const matches = x.match(pattern) if (matches) { return datetimeToString( matches.groups, truncate = true, gdalTimezone = false) } } console.warn('Failed to parse datetime:', x) return x } /** * Map object properties to a schema iteratively. * * @param {object} obj - Object * @param {object|object[]} mapping - Either a single mapping or an array * of mappings applied iteratively. * @param {object} [mapping.crosswalk={}] - Schema crosswalk. Each key is a new * property name and each value is either the original property name (string) or * a function called as f(obj). * @param {boolean} [mapping.keep=false] - Whether to keep original object * properties * @param {string} [mapping.prefix=''] - String to append to original property * names * @returns {object} Mapped object */ function mapObject(obj, mapping) { let final = obj if (!Array.isArray(mapping)) mapping = [mapping] for (const { crosswalk = {}, keep, prefix = '' } of mapping) { const current = {} if (keep) { for (const key in final) { current[`${prefix}${key}`] = final[key] } } for (const key in crosswalk) { current[key] = (typeof crosswalk[key] === 'function') ? crosswalk[key](final) : final[crosswalk[key]] } final = current } return final } /** * Guess feature layer geometry fields based on their name. * * @param {gdal.Layer} layer - Feature layer * @returns {object[]} Names of fields with potential WKT geometry (wkt), * geographic coordinates (lon, lat), or geographic or projected coordinates (x, * y). */ function guessGeometryFields(layer) { const geometry = {} const names = layer.fields.getNames() Object.keys(GEOMETRY_FIELDS).forEach(key => { geometry[key] = names.filter(x => GEOMETRY_FIELDS[key].includes(x.toLowerCase())) }) return geometry } /** * Get the list of vector file extensions supported by GDAL. * * @returns {string[]} File extensions */ function getGdalExtensions() { const extensions = [] gdal.drivers.forEach(driver => { const meta = driver.getMetadata() if (meta.DCAP_VECTOR === 'YES') { if (meta.DMD_EXTENSION) extensions.push(meta.DMD_EXTENSION) if (meta.DMD_EXTENSIONS) extensions.push(...meta.DMD_EXTENSIONS.split(' ')) } }) return [...new Set(extensions.sort())] } /** * Get the list of GDAL driver names assigned to each file extension. * * @returns {object} GDAL driver names by file extension */ function getGdalDrivers() { const drivers = {} getGdalExtensions().forEach(extension => drivers[extension] = []) gdal.drivers.forEach(driver => { const meta = driver.getMetadata() if (meta.DCAP_VECTOR === 'YES') { const extensions = [] if (meta.DMD_EXTENSION) { extensions.push(meta.DMD_EXTENSION) } if (meta.DMD_EXTENSIONS) { extensions.push(...meta.DMD_EXTENSIONS.split(' ')) } [...new Set(extensions)].forEach(extension => drivers[extension].push(driver.description.toLowerCase())) } }) return drivers } /** * Get the file extension from a local or remote file path. * * @param {string} file - Local or remote file path * @returns {string} File extension */ function getFileExtension(file) { const matches = file.match(/\.([^\.\/\?\#]+)(?:$|\?|\#)/) return matches ? matches[1] : '' } /** * Get the transformation between two spatial reference systems (SRS). * * @param {string|gdal.SpatialReference} source - Source SRS * @param {string|gdal.SpatialReference} target - Target SRS * @returns {gdal.CoordinateTransformation|undefined} Coordinate transformation, * or undefined if the two SRS are equal. */ function getTransform(source, target) { if (typeof source === 'string') { source = gdal.SpatialReference.fromUserInput(source) } if (typeof target === 'string') { target = gdal.SpatialReference.fromUserInput(target) } // NOTE: Only sure way to test for equality is to test the transformation const same = source.isSame(target) || (source.isSameGeogCS(target) && !source.isProjected() && !target.isProjected()) const sameAxes = isAxesXY(source) === isAxesXY(target) if (!(same && sameAxes)) { return new gdal.CoordinateTransformation(source, target) } } /** * Build the GDAL polygon corresponding to a bounding box. * * @param {number[]} bounds - Bounding box [xmin, ymin, xmax, ymax] * @param {string|gdal.SpatialReference} [srs] - Spatial reference system * @returns {gdal.Polygon} */ function boundsToPolygon(bounds, srs) { if (typeof srs === 'string') { srs = gdal.SpatialReference.fromUserInput(srs) } const polygon = new gdal.Polygon() const ring = new gdal.LinearRing() ring.points.add(new gdal.Point(bounds[0], bounds[1])) ring.points.add(new gdal.Point(bounds[0], bounds[3])) ring.points.add(new gdal.Point(bounds[2], bounds[3])) ring.points.add(new gdal.Point(bounds[2], bounds[1])) ring.points.add(new gdal.Point(bounds[0], bounds[1])) polygon.rings.add(ring) polygon.srs = srs return polygon } /** * Check whether spatial reference system has x (east), y (north) axis order. * * @param {string|gdal.SpatialReference} srs - Spatial reference system */ function isAxesXY(srs) { if (typeof srs === 'string') { srs = gdal.SpatialReference.fromUserInput(srs) } wkt = srs.toWKT() xi = wkt.match(/axis\[[^\]]*(east|west|longitude)[^\]]*\]/i) yi = wkt.match(/axis\[[^\]]*(north|south|latitude)[^\]]*\]/i) if (xi && xi) { return xi.index < yi.index } else { // NOTE: Assumes x, y axis order if axes not defined return true } } /** * Determine whether geometry x, y has swapped axes relative to a target. * * @param {number[]} xy - Coordinates x, y. * @param {number[]} targetXY - Target coordinates x, y. * @returns {boolean} Whether x, y has swapped axes relative to target x, y. */ function hasSwappedAxes(xy, targetXY) { return ( (Math.abs(xy[0] - targetXY[0]) > Math.abs(xy[0] - targetXY[1])) || (Math.abs(xy[1] - targetXY[1]) > Math.abs(xy[1] - targetXY[0])) ) } /** * Download a remote file to a directory. * * @param {string} url - Path to the remote file. * @param {string} dir - Local directory to download the file to. * @param {object} options - Downloader Helper constructor options (see * https://github.com/hgouveia/node-downloader-helper#options. * @param {object} listeners - Downloader Helper event listeners (see * https://github.com/hgouveia/node-downloader-helper#events). * @returns {Promise} Resolves to the path of the downloaded file. */ async function downloadFile(url, dir, options = {}, listeners = {}) { fs.mkdirSync(dir, { recursive: true }) const downloader = new DownloaderHelper(url, dir, options) let failure listeners = { download: info => console.log(`Downloading ${info.fileName}`), end: info => console.log( `Downloaded ${info.fileName} (${(info.downloadedSize / 1e6).toFixed()} MB)` ), error: error => console.error(`Download failed for ${url}: ${error.message}`), ...listeners } for (const key in listeners) { if (key != 'error') { downloader.on(key, listeners[key]) } } downloader.on('error', error => { const attempt = downloader.__retryCount + 1 const attempts = options.retry ? options.retry.maxRetries + 1 : 1 if (attempt === attempts) { failure = error } }) try { await downloader.start() } catch (error) { failure = error } if (failure) { listeners.error(failure) } else { return downloader.getDownloadPath() } } /** * Read a CSV file. * * Uses `csv-parser`, which is faster than GDAL. * * @param {string} file - Path to file. * @param {object} options - Parse options * (https://www.npmjs.com/package/csv-parser#options). * @returns {Promise} */ function readCSV(file, options) { return new Promise((resolve, reject) => { const rows = [] fs.createReadStream(file). pipe(csv(options)). on('data', (row) => { rows.push(row) }). on('end', () => { console.log(`${file} successfully parsed`) resolve(rows) }). on('error', reject) }) } /** * Write a CSV file. * * Converts values to string and wraps strings containing commas in quotes. * Objects need not contain the same properties, nor in the same order. * * @param {object[]} objects – Objects to write to a CSV file. * @param {string} [file=null] – Path to file. * @returns {string|null} CSV as a string (if `file` is `null`). * @example * objects = [ { x: 1 }, { y: 2 }, { x: 3, y: 3, z: 'foo,bar' } ] * writeCSV(objects) * // 'x,y,z\n1,,\n,2,\n3,3,"foo,bar"' */ function writeCSV(objects, file = null) { const columnSet = new Set() // Load unique column names objects.forEach(obj => { Object.keys(obj).forEach(columnSet.add, columnSet) }) const columns = [...columnSet] // Generate header const header = columns.map(x => { return (typeof (x) === 'string' && x.includes(',')) ? `"${x}"` : String(x) }).join(',') // Generate rows const rows = objects.map(obj => { return columns.map(key => { const x = obj[key] const type = typeof x if (x == null || (type === 'number' && isNaN(x))) { return '' } return (type === 'string' && x.includes(',')) ? `"${x}"` : String(x) }).join(',') }) // Combine const txt = [header, ...rows].join('\n') if (file) { fs.mkdirSync(path.dirname(file), { recursive: true }) fs.writeFileSync(file, [header, ...rows].join('\n')) } else { return txt } } /** * Convert string to sentence case. * * Capitalizes the first character and converts the rest to lower case. * * @param {string} s * @returns {string} * @example * toSentenceCase('') // '' * toSentenceCase('m') // 'M' * toSentenceCase('malus') // 'Malus' * toSentenceCase('malus pumila') // 'Malus pumila' */ function toSentenceCase(s) { return s.slice(0, 1).toUpperCase() + s.slice(1).toLowerCase() } /** * Convert string to title case. * * Capitalizes the first character of every word and converts the rest to lower case. * * @param {*} s * @example * toTitleCase('') // '' * toTitleCase('m') // 'M' * toTitleCase('malus') // 'Malus' * toTitleCase('GOLDEN DELICIOUS') // 'Golden Delicious' */ function toTitleCase(s) { return s.replace( /(\w)(\w*)/g, (_, firstChar, remainder) => firstChar.toUpperCase() + remainder.toLowerCase() ) } const ASCII = { A: 'AⒶAÀÁÂẦẤẪẨÃĀĂẰẮẴẲȦǠÄǞẢÅǺǍȀȂẠẬẶḀĄȺⱯ', AA: 'Ꜳ', AE: 'ÆǼǢ', AO: 'Ꜵ', AU: 'Ꜷ', AV: 'ꜸꜺ', AY: 'Ꜽ', B: 'BⒷBḂḄḆɃƂƁ', C: 'CⒸCĆĈĊČÇḈƇȻ', D: 'DⒹDḊĎḌḐḒḎĐƋƊƉ', DZ: 'DZDŽ', Dz: 'DzDž', E: 'EⒺEÈÉÊỀẾỄỂẼĒḔḖĔĖËẺĚȄȆẸỆȨḜĘḘḚƐƎ', F: 'FⒻFḞƑ', G: 'GⒼGǴĜḠĞĠǦĢǤƓꞠ', H: 'HⒽHĤḢḦȞḤḨḪĦⱧ', I: 'IⒾIÌÍÎĨĪĬİÏḮỈǏȈȊỊĮḬƗ', J: 'JⒿJĴ', K: 'KⓀKḰǨḲĶḴƘⱩꝀꝂꝄꞢ', L: 'LⓁLĿĹĽḶḸĻḼḺŁȽⱢⱠꝈ', LJ: 'LJ', Lj: 'Lj', M: 'MⓂMḾṀṂƜ', N: 'NⓃNǸŃÑṄŇṆŅṊṈȠƝꞐꞤ', NJ: 'NJ', Nj: 'Nj', O: 'OⓄOÒÓÔỒỐỖỔÕṌȬṎŌṐṒŎȮȰÖȪỎŐǑȌȎƠỜỚỠỞỢỌỘǪǬØǾƆƟꝊꝌ', OE: 'Œ', OI: 'Ƣ', OO: 'Ꝏ', OU: 'Ȣ', P: 'PⓅPṔṖƤⱣꝐꝒꝔ', Q: 'QⓆQꝖꝘ', R: 'RⓇRŔṘŘȐȒṚṜŖṞɌⱤꝚꞦ', S: 'SⓈSẞŚṤŜṠŠṦṢṨȘŞⱾꞨ', T: 'TⓉTṪŤṬȚŢṰṮŦƬƮȾ', TZ: 'Ꜩ', U: 'UⓊUÙÚÛŨṸŪṺŬÜǛǗǕǙỦŮŰǓȔȖƯỪỨỮỬỰỤṲŲṶṴɄ', V: 'VⓋVṼṾƲꝞɅ', VY: 'Ꝡ', W: 'WⓌWẀẂŴẆẄẈⱲ', X: 'XⓍXẊẌ', Y: 'YⓎYỲÝŶỸȲẎŸỶỴƳɎỾ', Z: 'ZⓏZŹẐŻŽẒẔƵȤⱿⱫꝢ', a: 'aⓐaẚàáâầấẫẩãāăằắẵẳȧǡäǟảåǻǎȁȃạậặḁąⱥɐꜳ', ae: 'æǽǣ', ao: 'ꜵ', au: 'ꜷ', av: 'ꜹꜻ', ay: 'ꜽ', b: 'bⓑbḃḅḇƀƃɓ', c: 'cⓒcćĉċčçḉƈȼ', d: 'dⓓdḋďḍḑḓḏđƌɖɗꝺð', dz: 'dzdž', e: 'eⓔeèéêềếễểẽēḕḗĕėëẻěȅȇẹệȩḝęḙḛɇɛǝ', f: 'fⓕfḟƒ', g: 'gⓖgǵĝḡğġǧģǥɠꞡᵹ', h: 'hⓗhĥḣḧȟḥḩḫẖħⱨɥ', hv: 'ƕ', i: 'iⓘiìíîĩīĭïḯỉǐȉȋịįḭɨı', j: 'jⓙjĵǰɉ', k: 'kⓚkḱǩḳķḵƙⱪꝁꝃꝅꞣ', l: 'lⓛlŀĺľḷḹļḽḻłƚɫ', lj: 'lj', m: 'mⓜmḿṁṃɱɯᶆ', n: 'nⓝnǹńñṅňṇņṋṉƞɲʼnꞑꞥ', nj: 'nj', o: 'oⓞoòóôồốỗổõṍȭṏōṑṓŏȯȱöȫỏőǒȍȏơờớỡởợọộǫǭøǿɔꝋꝍɵ', oe: 'œ', oi: 'ƣ', ou: 'ȣ', oo: 'ꝏ', p: 'pⓟpṕṗƥᵽꝑꝓꝕ', q: 'qⓠqɋꝗꝙ', r: 'rⓡrŕṙřȑȓṛṝŗṟɍɽ', s: 'sⓢsśṥŝṡšṧṣṩșşȿꞩẛſß', t: 'tⓣtṫẗťṭțţṱṯŧƭʈⱦ', tz: 'ꜩ', u: 'uⓤuùúûũṹūṻŭüǜǘǖǚủůűǔȕȗưừứữửựụṳųṷṵʉ', v: 'vⓥvṽṿʋꝟʌ', w: 'wⓦwẁẃŵẇẅẘẉⱳ', x: 'xⓧẋẍxᶍx', y: 'yⓨyỳýŷỹȳẏÿỷẙỵƴɏỿ', z: 'zⓩzźẑżžẓẕƶȥɀⱬ' } const ASCII_MAP = {} for (const key in ASCII) { for (const letter of ASCII[key]) { ASCII_MAP[letter] = key } } /** * Latinize a string. * * Converts accents (diacritics, etc) to latin characters. * * @param {string} s - String. * @param {string} keep - Characters to preserve. * @returns {string} * @example * latinize('Leœptura') // 'Leoeptura' * latinize('Ærenea') // 'AErenea' * latinize('Fallén') // 'Fallen' * latinize('Choriozopella') // 'Choriozopella' * latinize('trägårdhi') // 'tragardhi' * latinize('rosa-sinensis') // 'rosa-sinensis' * latinize('ỆᶍǍᶆṔƚÉ áéíóúýčďěňřšťžů') // 'ExAmPlE aeiouycdenrstzu' */ function latinize(s, keep = 'x') { return s.replace(/[^\u0000-\u007E]/g, x => keep.includes(x) ? x : ASCII_MAP[x] || x) } /** * Interpolate string as template with values from object. * * @param {string} x - String to interpolate * @param {object} obj - Object with properties to use for interpolation * @example * interpolateString('data/${id}/${id}/input', {id: 'melbourne'}) * interpolateString('data/${ids}/input', {id: 'melbourne'}) */ function interpolateString(x, obj) { let ix = x let match while ((match = ix.match(/\$\{([^\}]*)\}/))) { ix = ix.replace(match[0], obj[match[1]]) } return ix } /** * Convert string to lowercase and remove whitespace. * * @param {string} x - String to reduce * @example * reduceString('United Kingdom') */ function reduceString(x) { return x.toLowerCase().replace(/\s/, '') } /** * Pause code execution. * * @param {number} ms - Number of milliseconds to pause. * @returns {Promise} */ function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)) } /** * Get features from ArcGIS Feature Server layer. * * Features are queried as EPSG:4326 EsriJSON sorted by object ID. * * @see https://developers.arcgis.com/rest/services-reference/query-feature-service-layer-.htm * * @param {string} url - URL of ArcGIS Feature Server layer. * @returns {Promise} Layer features. */ async function getLayerFeatures(url) { // Get layer metadata const layer = await arcgis.getLayer({url: url, f: 'json'}) // Verify that layer can be queried const capabilities = layer.capabilities.toLowerCase().split(/,\s?/) if (!capabilities.includes('query')) { throw new Error('Layer does not support queries') } const queryFormats = layer.supportedQueryFormats.toLowerCase().split(/,\s?/) if (!queryFormats.includes('json')) { throw new Error('Layer does not support JSON queries') } // Whether layer supports pagination const pagination = layer.advancedQueryCapabilities?.supportsPagination === true // Whether layer supports 'standard' result type const standard = ( layer.advancedQueryCapabilities?.supportsQueryWithResultType === true && layer.standardMaxRecordCount > layer.maxRecordCount ) // Cap record count to a reasonable maximum const count = Math.min( (standard ? layer.standardMaxRecordCount : layer.maxRecordCount), 20000 ) // Order by object ID field let orderByField = layer.objectIdField if (!orderByField) { for (const field of layer.fields) { if (field.type.toLowerCase() === 'esrifieldtypeoid') { orderByField = field.name break } } } if (!orderByField) { throw new Error('Layer does not have an object ID field') } // Get layer features let exceededTransferLimit = true let offset = 0 let result while (exceededTransferLimit) { console.log(`Downloading features (${url}): ${offset} – ...`) const response = await arcgis.queryFeatures({ // HACK: For servers requring a where clause where: '1=1', url: url, f: 'json', outFields: '*', orderByFields: orderByField, outSR: { wkid: 4326 }, resultType: standard ? 'standard' : 'none', ...(pagination ? {resultOffset: offset, resultRecordCount: count} : {}), }) if (result) { result.features.push(...response.features) } else { result = response } exceededTransferLimit = ( response.exceededTransferLimit === true || response.properties?.exceededTransferLimit === true ) if (exceededTransferLimit) { if (pagination) { offset += response.features.length await sleep(2000) } else { throw new Error('Layer transfer limit exceeded') } } } return result } /** * Write values as newline-delimited JSON. * * References: * - https://www.interline.io/blog/geojsonl-extracts * - https://jsonlines.org * - https://stevage.github.io/ndgeojson * * @param {Array} values Values to independently write as JSON. * @param {string} [file=null] Path to file. * @returns {string|null} JSON Lines as a string (if `file` is `null`). * @example * values = [ 1, ['b'], { x: null } ] * writeJSONL(values) // '1\n["b"]\n{"x":null}' */ function writeJSONL(values, file = null) { const txt = values.map(JSON.stringify).join('\n') if (file) { fs.mkdirSync(path.dirname(file), { recursive: true }) fs.writeFileSync(file, txt) } else { return txt } } /** * Read newline-delimited JSON. * * @param {string} file - Path to file. * @returns {object} */ function readJSONL(file) { return fs.readFileSync(file, 'utf8').split('\n').filter(x => x).map(JSON.parse) } /** * Read newline-delimited JSON line by line. * * @param {string} file - Path to file. * @yields {object} */ async function* iterateJSONL(file) { const readInterface = readline.createInterface({ input: fs.createReadStream(file), }) for await (const line of readInterface) { if (line.trim() === '') { continue } try { yield JSON.parse(line) } catch (error) { console.error(`Error parsing as JSON: '${line}'\n${error.message}`) } } } /** * Read file from ZIP archive. * * @param {string} zipfile - Path to zip file. * @param {string} file - Path to file inside zip file. * @returns {Buffer} */ function readFileInZip(zipfile, file) { const zip = new AdmZip(zipfile) return zip.readFile(file) } /** * Open Excel-compatible file with GDAL. * * Uses `xlsx` because bundled GDAL (gdal-async) does not support * XLS, XLSX, and CSV with custom separator. * * @param {Buffer|string} data – File data. * @param {object} options * @param {string} [options.type] – File type (buffer, file). * @param {integer} [options.sheet=0] – Sheet index or name. * @returns {gdal.Dataset} */ function openExcelWithGdal(data, {type, sheet = 0} = {}) { const book = XLSX.read(data, {type, sheets: sheet}) const txt = XLSX.utils.sheet_to_csv(book.Sheets[book.SheetNames[sheet]]) const buffer = Buffer.from(txt) const name = crypto.randomBytes(6).toString('hex') gdal.vsimem.set(buffer, `/vsimem/${name}.csv`) return gdal.open(`/vsimem/${name}.csv`) } /** * Read all files in a RAR archive to GDAL VSIMEM. * * @param {string} rarfile - Path to RAR file. * @returns {string[]} Paths to files in GDAL VSIMEM. */ async function readFilesInRarToVsimem(file) { // HACK: Import unrar here to prevent it from making program exit on error const unrar = require('node-unrar-js') const buffer = Uint8Array.from(fs.readFileSync(file)).buffer const extractor = await unrar.createExtractorFromData({data: buffer}) const extracted = extractor.extract() const rarFiles = [...extracted.files] const vsiFiles = [] for (const rarFile of rarFiles) { if (rarFile.extraction) { const vsiFile = path.join('/vsimem/', file, rarFile.fileHeader.name) gdal.vsimem.set(Buffer.from(rarFile.extraction), vsiFile) vsiFiles.push(vsiFile) } } return vsiFiles } /** * Open files in GDAL as a single layer (using VRT union). * * @param {string[]} files - Paths to files. * @returns {gdal.Dataset} */ function openFileUnionWithGdal(files) { let vrt = '' for (const file of files) { vrt += ` ${file} ` } vrt += '' const name = crypto.randomBytes(6).toString('hex') gdal.vsimem.set(Buffer.from(vrt, 'utf8'), `/vsimem/${name}.vrt`) return gdal.open(`/vsimem/${name}.vrt`) } /** * Open a KISSB XML file with GDAL. * * See https://www.carneios.de/kissb/pdf/Spezifikation_Katasteraustauschformat.pdf * Fields with objects as values are flattened using dot notation. * * @param {string|Buffer} file - Path to file or buffer. * @param {object} params * @param {string} [params.type=file] - File type (file, buffer). * @returns {gdal.Dataset} */ function openKissbXmlWithGdal(file, {type = 'file'} = {}) { // Read from file or buffer let xml if (type === 'buffer') { xml = file.toString('utf8') } else { xml = fs.readFileSync(file, 'utf8') } const parser = new xmldom.DOMParser() const tree = parser.parseFromString(xml) // Extract field (property) names by id const fieldNames = {} const properties = xpath.select( "//*[name()='configuration']//*[name()='property']", tree ) properties.forEach(property => { const id = xpath.select("*[name()='id']", property)?.[0].firstChild.data const name = xpath.select("*[name()='name']", property)?.[0].firstChild.data if (id && name) { fieldNames[id] = name } }) // Extract features (protocols) const protocols = xpath.select("//*[name()='data']/*[name()='protocol']", tree) const features = protocols.map(protocol => { const fields = xpath.select("*/*", protocol) const feature = {} for (const field of fields) { const fieldName = fieldNames[field.nodeName] const children = field.childNodes if (children.length === 0) { // Empty: continue } else if (children.length === 1) { // Text value: Berlin const value = children[0].data if (value) { feature[fieldName] = value } } else { // Children: Salixalba for (const child of Object.values(children)) { const name = child.nodeName if (name !== '#text') { const value = child.firstChild?.data if (value) { feature[`${fieldName}.${name}`] = value } } } } } return feature }) // HACK: Write features as CSV and open with GDAL const txt = writeCSV(features) const name = crypto.randomBytes(6).toString('hex') gdal.vsimem.set(Buffer.from(txt, 'utf8'), `/vsimem/${name}.csv`) return gdal.open(`/vsimem/${name}.csv`) } /** * Read ESRI Feature Collection to GDAL VSIMEM as ESRIJSON. * * @param {string} file - Path to file. * @param {object} params * @param {string} [params.layerName] - Layer name. * @returns {string} Path to VSIMEM file. */ function readEsriFeatureCollectionToVsimem(file, {layerName} = {}) { const data = JSON.parse(fs.readFileSync(file, 'utf8')) let layerNames let layers // Find layers if (data.operationalLayers) { layers = data.operationalLayers layerNames = data.operationalLayers.map(x => x.id) } else if (data.layers) { layers = data.layers layerNames = layers.map(x => x.layerDefinition?.name) } else { throw new Error('No layers or operationalLayers property') } // Select layer let layerIndex if (layers.length == 0) { throw new Error('Empty (operational)layers property') } if (layers.length == 1) { layerIndex = 0 } else { if (!layerName) { throw new Error(`Multiple layers found: ${layerNames}`) } if (!layerNames.includes(layerName)) { throw new Error(`Layer '${layerName}' not found in ${layerNames}`) } layerIndex = layerNames.indexOf(layerName) } // Load layer let layer if (data.operationalLayers) { layers = data.operationalLayers[layerIndex].featureCollection?.layers if (!layers) { throw new Error('No layers found inside operational layer') } if (layers.length > 1) { layerNames = layers.map(x => x.layerDefinition?.name) throw new Error(`Multiple layers inside operational layer: ${layerNames}`) } layer = layers[0] } else { layer = data.layers[layerIndex] } // Verify layer if (!layer.featureSet?.features) { throw new Error('No features found') } if (!layer.layerDefinition) { throw new Error('No layer definition found') } const esrijson = { objectIdFieldName: layer.layerDefinition.objectIdField, geometryType: layer.layerDefinition.geometryType || layer.featureSet.geometryType, spatialReference: layer.layerDefinition.spatialReference, fields: layer.layerDefinition.fields, features: layer.featureSet.features, } // HACK: Write features as ESRIJSON const txt = JSON.stringify(esrijson) const name = crypto.randomBytes(6).toString('hex') const vsimem = `/vsimem/${name}.json` gdal.vsimem.set(Buffer.from(txt, 'utf8'), vsimem) return vsimem } /** * Open ESRI Feature Collection with GDAL. * * @param {string} file - Path to file. * @param {object} params * @param {string} [params.layerName] - Layer name. * @returns {gdal.Dataset} */ function openEsriFeatureCollectionWithGdal(file, {layerName} = {}) { vsimem = readEsriFeatureCollectionToVsimem(file, {layerName}) return gdal.open(`ESRIJSON:${vsimem}`) } /** * Parse decimal degrees from degrees, minutes, seconds. * * @param {string} dms - Degrees, minutes, seconds. * @param {RegExp} pattern - Regular expression with named capture groups deg, * min, sec. * @returns {number} Decimal degrees. */ function parseDecimalFromDMS(dms, pattern) { const match = dms.match(pattern) if (!match) { console.warn(`Failed to parse DMS: ${dms}`) return null } return ( Number(match.groups.deg) + Number(match.groups.min) / 60 + Number(match.groups.sec) / 3600 ) } module.exports = { gdalFilePatterns, gdalStringFormatters, reformatDatetime, mapObject, guessGeometryFields, getGdalDrivers, getFileExtension, getTransform, boundsToPolygon, isAxesXY, hasSwappedAxes, downloadFile, readCSV, writeCSV, toSentenceCase, toTitleCase, latinize, interpolateString, reduceString, getLayerFeatures, writeJSONL, readJSONL, iterateJSONL, readFileInZip, openExcelWithGdal, readFilesInRarToVsimem, openFileUnionWithGdal, openKissbXmlWithGdal, readEsriFeatureCollectionToVsimem, openEsriFeatureCollectionWithGdal, parseDecimalFromDMS }