Move type definitions to separate module

This commit is contained in:
ezwelty 2024-02-23 15:41:21 +01:00 committed by Ethan Welty
parent d571657b26
commit 18b47ab7b1
7 changed files with 238 additions and 222 deletions

View File

@ -9,6 +9,7 @@ const stream = require('stream')
const streamPipeline = util.promisify(stream.pipeline)
const helpers = require('./helpers')
const {ArchiveEntry} = require('./types')
/**
* Download file and compute MD5 hash of the stream.
@ -152,7 +153,7 @@ function buildPath({url, checksum, date = new Date()} = {}) {
*
* @param {object} params
* @param {Date} params.date
* @returns {Promise<object>}
* @returns {Promise<ArchiveEntry>}
*/
async function log({ date = new Date(), ...props } = {}) {
let entry = { date, ...props }
@ -238,7 +239,7 @@ function guessFilename({headers = {}, defaultBasename = 'response', url = null}
* @param {string} params.filename
* @param {string} params.url
* @param {Date} params.date
* @returns {object}
* @returns {ArchiveEntry}
*/
function logData({ data, filename, url, date = new Date(), ...props } = {}) {
const dir = buildPath(url, date)
@ -257,7 +258,7 @@ function logData({ data, filename, url, date = new Date(), ...props } = {}) {
* @param {object} options
* @param {int} [options.limit] - Maximum number of results to return
* @param {int} [options.maxDays] - Maximum age of result in days
* @returns {object[]} Entries that match search criteria, sorted by date
* @returns {ArchiveEntry[]} Entries that match search criteria, sorted by date
* descending.
*/
function search(params, {limit, maxDays} = {}) {

View File

@ -8,11 +8,11 @@
/**
* Build regular expression.
*
* @param {object} obj
* @param {string[]} obj.values - Values (joined with '|')
* @param {string} obj.prefix - Prefix for each value (e.g. '^')
* @param {string} obj.suffix - Suffix for each value (e.g. '$')
* @param {string} flags - Search flags (e.g. 'i')
* @param {object} params
* @param {string[]} params.values - Values (joined with '|')
* @param {string} params.prefix - Prefix for each value (e.g. '^')
* @param {string} params.suffix - Suffix for each value (e.g. '$')
* @param {string} params.flags - Search flags (e.g. 'i')
* @returns {RegExp}
*/
function buildPattern({ values, prefix = '', suffix = '', flags = '' }) {
@ -80,7 +80,7 @@ DELETE.pattern = buildPattern(DELETE)
* Determine whether feature should be deleted.
*
* @param {object} obj
* @returns {str} Name of first field name triggering deletion (or `undefined`)
* @returns {string|undefined} Name of first field name triggering deletion
* @example
* deleteFeature({ scientific: 'vacant planting', common: 'Apple' })
* deleteFeature({ scientific: 'Malus pumila', common: 'apple' })

View File

@ -5,6 +5,7 @@
*/
const helpers = require('./helpers')
const {Infraspecies, ParsedScientificName} = require('./types')
// ---- Components ----
@ -181,45 +182,6 @@ const HYBRID = {
}
}
// ---- Object definitions ----
/**
* Infraspecies.
*
* @typedef {object} Infraspecies
* @property {string} rank - Rank (`subsp.`, `var.`, `f.`, `subvar.`, `subf.`).
* @property {string} epithet - Epithet (lowercase: e.g. `pontica`).
*/
/**
* Hybrid.
*
* Represents a secondary scientific name in a hybrid formula.
*
* @property {string} genus - Genus (capitalized: e.g. `Malus`).
* @property {string} subgenus - Subgenus (capitalized: e.g. `Malus`).
* @property {string} species - Specific epithet (lowercase: e.g. `pumila`).
* @property {Infraspecies[]} infraspecies - Infraspecific epithets.
* @property {string} cultivar - Cultivar (title case: e.g. `Golden Delicious`).
*/
/**
* Scientific name.
*
* @typedef {object} ParsedScientificName
* @property {string} head - Unparsed head.
* @property {string} uninomial Uninomial name (maybe `genus`).
* @property {string} genus - Genus (capitalized: e.g. `Malus`).
* @property {string} subgenus - Subgenus (capitalized: e.g. `Malus`).
* @property {string} species - Specific epithet (lowercase: e.g. `pumila`).
* @property {Infraspecies[]} infraspecies - Infraspecific epithets.
* @property {string} cultivar - Cultivar (title case: e.g. `Golden Delicious`).
* @property {boolean} hybrid - Whether this is a hybrid.
* @property {boolean} hybridGenus Whether `genus` is a nothogenus (e.g. `× Sorbopyrus`).
* @property {Hybrid[]} hybrids Secondary names in a hybrid formula.
* @property {string} tail - Unparsed tail.
*/
// ---- Helper functions ----
/**

View File

@ -14,173 +14,12 @@ const helpers = require('./helpers')
const archive = require('./archive')
const workflow = require('./workflow')
const LICENSES = require('./licenses')
const {ArchiveFile, ArchiveEntry, SourceProperties, BrowserFormat, FileType, SourceFile} = require('./types')
const LINK_TYPES = ['checksum', 'manual', 'file', 'arcgis', 'browser']
const VSI_FORMATS = ['/vsizip/', '/vsigzip/', '/vsitar/']
const SOURCE_ARCHIVE_PROPS = ['id', 'country', 'state', 'city', 'designation', 'notes']
/**
* File download method.
*
* - manual: Download manually (e.g. clicking on a button in the browser)
* - file: Download directly
* - arcgis: Download with the ArcGIS Feature Layer API with paginated requests
* - browser: Render in a browser and save the resulting webpage
*
* @typedef {'manual'|'file'|'arcgis'|'browser'} DownloadMethod
*/
/**
* Browser webpage export format.
*
* - mhtml: Webpage with resources saved in a single file
* - html: Webpage HTML only
* - png: Screenshot as PNG image
* - pdf: Webpage as PDF document
*
* @typedef {'mhtml'|'html'|'png'|'pdf'} BrowserFormat
*/
/**
* File type.
*
* - data: Data file (Source.props.data)
* - metadata: Metadata file (Source.props.metadata)
* - license: License file (Source.props.license)
*
* @typedef {'data'|'metadata'|'license'} FileType
*/
/**
* Checksum file descriptor.
*
* @typedef {object} ChecksumFile
* @property {string} checksum - File checksum (base-64 md5 hash).
*/
/**
* URL file descriptor.
*
* @typedef {object} UrlFile
* @property {string} url - File URL.
* @property {DownloadMethod} method
* @property {BrowserFormat} [format]
*/
/**
* Archive file descriptor.
*
* @typedef {ChecksumFile|UrlFile} ArchiveFile
*/
/**
* Source file descriptor.
*
* @typedef {string|{manual: string}|{file: string}|{browser: string}|{arcgis: string}} SourceFile
*/
/**
* Archive entry.
*
* @typedef {object} ArchiveEntry
* @property {string} path - File path.
* @property {string} checksum - File checksum (base-64 md5 hash).
* @property {Date} date - Date of acquisition.
* @property {boolean} [maxDate] - Whether `date` is a maximum-possible date.
* @property {Date} [dateAdded] - Date added to archive, if not `date`
* (if file was registered manually rather than downloaded).
* @property {string} [url] - File URL.
* @property {DownloadMethod} [method] - File download method.
* @property {BrowserFormat} [format] - Browser webpage export format
* (if `method` is `browser`).
* @property {boolean} [existed] - Whether file already existed in archive, in
* which case `path` is the path of the existing file.
* @property {object} [props] - Additional properties.
*/
/**
* Properties used by {@link Source} for data processing.
*
* @typedef {object} SourceProperties
* @property {string} id - Identifier prepended to console output.
* @property {SourceFile|SourceFile[]} data - Data file(s).
* @property {string} vfs - GDAL virtual file system type (`/vsizip/`).
* @property {string} filename - Relative path to the file to open with GDAL
* within an archive file.
* @property {string} layer - Layer name to open with GDAL within a file. Only
* relevant for files with multiple layers.
* @property {function} openFunc - Function that takes a file path (or array)
* and returns a GDAL dataset. If provided, takes precedence over `vfs` and
* `filename`.
* @property {object} geometry - Geometry field names for formats without
* explicit geometries (e.g. CSV). If not provided, will
* attempt to guess from field names.
* @property {string} geometry.wkt - Name of field with well-known-text (wkt)
* geometry. If provided, takes precedence over `x` and `y`.
* @property {string} geometry.x - Name of field with x coordinate (longitude,
* easting).
* @property {string} geometry.y - Name of field with y coordinate (latitude,
* northing).
* @property {string} srs - Spatial reference system in any format supported by
* [OGRSpatialReference.SetFromUserInput()](https://gdal.org/api/ogrspatialref.html#classOGRSpatialReference_1aec3c6a49533fe457ddc763d699ff8796).
* @property {Object.<string, string|function>} crosswalk - Crosswalk mapping to
* a target schema. For each `key: value` pair, `key` is the new field name and
* `value` is either the old field name (e.g. `height: 'HEIGHT'`) or a function
* that takes an object (of feature field values) and returns a value (e.g.
* `height: x => x['HEIGHT'] / 100`).
* @property {function} coordsFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns a number array of point
* coordinates `[x, y]`. This is a useful alternative to `geometry` if the
* coordinates need to be extracted from field values (e.g. `obj =>
* obj.XY.split(';').map(Number)`).
* @property {function} addressFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns an address string for
* geocoding.
* @property {function} deleteFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns a value (e.g. `x =>
* x['HEALTH'] === 'dead'`). The feature is excluded from the output if the
* returned value evaluates to `true`.
* @property {SourceFile|SourceFile[]} metadata - Metadata webpage(s) or
* file(s).
* @property {object} license - Data license.
* @property {string} license.id - License identifier (see `./lib/licenses.js`).
* @property {string} license.name - License name. Only provide if `id` is not.
* @property {string} license.url - License URL. Only provide if `id` is not.
*/
/**
* Additional properties not used by {@link Source} but used elsewhere.
*
* @typedef {SourceProperties} SourcePropertiesExtended
* @property {string} pending - Pending issues preventing processing.
* @property {string} omit Reason for omitting from processing.
* @property {string} country - Country name in English (e.g. `Spain`).
* @property {string} state - Local name of first-level administrative division
* (see https://en.wikipedia.org/wiki/List_of_administrative_divisions_by_country)
* with the exception of:
* - Ireland: NUTS 3 Region (https://en.wikipedia.org/wiki/NUTS_statistical_regions_of_Ireland)
* - Japan: Region (https://en.wikipedia.org/wiki/List_of_regions_of_Japan)
* - Netherlands: Province (https://en.wikipedia.org/wiki/Provinces_of_the_Netherlands)
* - New Zealand: Region (https://en.wikipedia.org/wiki/Regions_of_New_Zealand)
* - United Kingdom (England): Region (https://en.wikipedia.org/wiki/Regions_of_England)
* - United Kingdom (other): Country
* @property {string} city - Local name of city or municipality.
* @property {string} designation - Local name of `city` subset, administrative
* unit, university, or other institution if not `country`, `state`, or `city`.
* @property {string} language - Language of contents as an [ISO
* 639-1](https://en.wikipedia.org/wiki/ISO_639-1) code (e.g. `en`) and an
* optional [ISO 3166-1 alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)
* region code (e.g. `en-AU`).
* @property {string} primary - `id` of the primary source (for grouping sources
* together).
* @property {string} long - Full name of the government body, university, or
* other institution (e.g. `City of Melbourne`).
* @property {string} short - Short name (e.g. `Melbourne`).
* @property {object} centre - Centre point (for map label placement).
* @property {number} centre.lon - Longitude in decimal degrees (EPSG:4326).
* @property {number} centre.lat - Latitude in decimal degrees (EPSG:4326).
*/
/**
* Class representing a source dataset.
*

View File

@ -3,12 +3,17 @@
*
* @module
*/
const path = require('path')
const util = require('util')
const fs = require('fs')
const {SourceProperties} = require('./types')
/**
* Read source properties from a file.
*
* @param {string} file - Path to source properties file.
* @returns {object[]} Source properties.
* @returns {SourceProperties[]} Source properties.
*/
function readSourceProperties(file) {
const absolutePath = require.resolve(path.resolve(file))
@ -19,7 +24,7 @@ function readSourceProperties(file) {
/**
* Write source properties to a file.
*
* @param {object[]} sources - Source properties.
* @param {SourceProperties[]} sources - Source properties.
* @param {string} file - Path to new source properties file.
* @param {string} currentFile - Path to current source properties file (
* defaults to `file`). Used to replicate the header
@ -30,7 +35,7 @@ function writeSourceProperties(sources, file, currentFile) {
currentFile = file
}
// const copies = structuredClone(sources)
copies = sources
const copies = sources
// Define custom inspection for functions
function inspectFunction() {
const txt = this.toString()
@ -65,8 +70,11 @@ function writeSourceProperties(sources, file, currentFile) {
)
// Extract imports from current file
const sourceCode = fs.readFileSync(currentFile, 'utf8')
const header = sourceCode.match(/^([\s\S]*)?module\.exports/m)[1]
fs.writeFileSync(file, `${header}module.exports = ${txt}\n`)
const headerMatch = sourceCode.match(/^([\s\S]*)?module\.exports/m)
if (!headerMatch) {
throw new Error('Could not parse header')
}
fs.writeFileSync(file, `${headerMatch[1]}module.exports = ${txt}\n`)
}
module.exports = {

205
lib/types.js Normal file
View File

@ -0,0 +1,205 @@
/**
* @module types
*/
/**
* File download method.
*
* - manual: Download manually (e.g. clicking on a button in the browser)
* - file: Download directly
* - arcgis: Download with the ArcGIS Feature Layer API with paginated requests
* - browser: Render in a browser and save the resulting webpage
*
* @typedef {'manual'|'file'|'arcgis'|'browser'} DownloadMethod
*/
/**
* Browser webpage export format.
*
* - mhtml: Webpage with resources saved in a single file
* - html: Webpage HTML only
* - png: Screenshot as PNG image
* - pdf: Webpage as PDF document
*
* @typedef {'mhtml'|'html'|'png'|'pdf'} BrowserFormat
*/
/**
* File type.
*
* - data: Data file (Source.props.data)
* - metadata: Metadata file (Source.props.metadata)
* - license: License file (Source.props.license)
*
* @typedef {'data'|'metadata'|'license'} FileType
*/
/**
* Checksum file descriptor.
*
* @typedef {object} ChecksumFile
* @property {string} checksum - File checksum (base-64 md5 hash).
*/
/**
* URL file descriptor.
*
* @typedef {object} UrlFile
* @property {string} url - File URL.
* @property {DownloadMethod} method
* @property {BrowserFormat} [format]
*/
/**
* Archive file descriptor.
*
* @typedef {ChecksumFile|UrlFile} ArchiveFile
*/
/**
* Source file descriptor.
*
* @typedef {string|{manual: string}|{file: string}|{browser: string}|{arcgis: string}} SourceFile
*/
/**
* Archive entry.
*
* @typedef {object} ArchiveEntry
* @property {string} path - File path.
* @property {string} checksum - File checksum (base-64 md5 hash).
* @property {Date} date - Date of acquisition.
* @property {boolean} [maxDate] - Whether `date` is a maximum-possible date.
* @property {Date} [dateAdded] - Date added to archive, if not `date`
* (if file was registered manually rather than downloaded).
* @property {string} [url] - File URL.
* @property {DownloadMethod} [method] - File download method.
* @property {BrowserFormat} [format] - Browser webpage export format
* (if `method` is `browser`).
* @property {boolean} [existed] - Whether file already existed in archive, in
* which case `path` is the path of the existing file.
* @property {object} [props] - Additional properties.
*/
/**
* Properties used by {@link Source} for data processing.
*
* @typedef {object} SourceProperties
* @property {string} id - Identifier prepended to console output.
* @property {SourceFile|SourceFile[]} data - Data file(s).
* @property {string} vfs - GDAL virtual file system type (`/vsizip/`).
* @property {string} filename - Relative path to the file to open with GDAL
* within an archive file.
* @property {string} layer - Layer name to open with GDAL within a file. Only
* relevant for files with multiple layers.
* @property {function} openFunc - Function that takes a file path (or array)
* and returns a GDAL dataset. If provided, takes precedence over `vfs` and
* `filename`.
* @property {object} geometry - Geometry field names for formats without
* explicit geometries (e.g. CSV). If not provided, will
* attempt to guess from field names.
* @property {string} geometry.wkt - Name of field with well-known-text (wkt)
* geometry. If provided, takes precedence over `x` and `y`.
* @property {string} geometry.x - Name of field with x coordinate (longitude,
* easting).
* @property {string} geometry.y - Name of field with y coordinate (latitude,
* northing).
* @property {string} srs - Spatial reference system in any format supported by
* [OGRSpatialReference.SetFromUserInput()](https://gdal.org/api/ogrspatialref.html#classOGRSpatialReference_1aec3c6a49533fe457ddc763d699ff8796).
* @property {Object.<string, string|function>} crosswalk - Crosswalk mapping to
* a target schema. For each `key: value` pair, `key` is the new field name and
* `value` is either the old field name (e.g. `height: 'HEIGHT'`) or a function
* that takes an object (of feature field values) and returns a value (e.g.
* `height: x => x['HEIGHT'] / 100`).
* @property {function} coordsFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns a number array of point
* coordinates `[x, y]`. This is a useful alternative to `geometry` if the
* coordinates need to be extracted from field values (e.g. `obj =>
* obj.XY.split(';').map(Number)`).
* @property {function} addressFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns an address string for
* geocoding.
* @property {function} deleteFunc - Function that takes an object (of feature
* field values before the crosswalk) and returns a value (e.g. `x =>
* x['HEALTH'] === 'dead'`). The feature is excluded from the output if the
* returned value evaluates to `true`.
* @property {SourceFile|SourceFile[]} metadata - Metadata webpage(s) or
* file(s).
* @property {object} license - Data license.
* @property {string} license.id - License identifier (see `./lib/licenses.js`).
* @property {string} license.name - License name. Only provide if `id` is not.
* @property {string} license.url - License URL. Only provide if `id` is not.
*/
/**
* Additional properties not used by {@link Source} but used elsewhere.
*
* @typedef {SourceProperties} SourcePropertiesExtended
* @property {string} pending - Pending issues preventing processing.
* @property {string} omit Reason for omitting from processing.
* @property {string} country - Country name in English (e.g. `Spain`).
* @property {string} state - Local name of first-level administrative division
* (see https://en.wikipedia.org/wiki/List_of_administrative_divisions_by_country)
* with the exception of:
* - Ireland: NUTS 3 Region (https://en.wikipedia.org/wiki/NUTS_statistical_regions_of_Ireland)
* - Japan: Region (https://en.wikipedia.org/wiki/List_of_regions_of_Japan)
* - Netherlands: Province (https://en.wikipedia.org/wiki/Provinces_of_the_Netherlands)
* - New Zealand: Region (https://en.wikipedia.org/wiki/Regions_of_New_Zealand)
* - United Kingdom (England): Region (https://en.wikipedia.org/wiki/Regions_of_England)
* - United Kingdom (other): Country
* @property {string} city - Local name of city or municipality.
* @property {string} designation - Local name of `city` subset, administrative
* unit, university, or other institution if not `country`, `state`, or `city`.
* @property {string} language - Language of contents as an [ISO
* 639-1](https://en.wikipedia.org/wiki/ISO_639-1) code (e.g. `en`) and an
* optional [ISO 3166-1 alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)
* region code (e.g. `en-AU`).
* @property {string} primary - `id` of the primary source (for grouping sources
* together).
* @property {string} long - Full name of the government body, university, or
* other institution (e.g. `City of Melbourne`).
* @property {string} short - Short name (e.g. `Melbourne`).
* @property {object} centre - Centre point (for map label placement).
* @property {number} centre.lon - Longitude in decimal degrees (EPSG:4326).
* @property {number} centre.lat - Latitude in decimal degrees (EPSG:4326).
*/
/**
* Infraspecies epithet.
*
* @typedef {object} Infraspecies
* @property {string} rank - Rank (`subsp.`, `var.`, `f.`, `subvar.`, `subf.`).
* @property {string} epithet - Epithet (lowercase: e.g. `pontica`).
*/
/**
* Hybrid name.
*
* Represents a secondary scientific name in a hybrid formula.
*
* @typedef {object} Hybrid
* @property {string} genus - Genus (capitalized: e.g. `Malus`).
* @property {string} subgenus - Subgenus (capitalized: e.g. `Malus`).
* @property {string} species - Specific epithet (lowercase: e.g. `pumila`).
* @property {Infraspecies[]} infraspecies - Infraspecific epithets.
* @property {string} cultivar - Cultivar (title case: e.g. `Golden Delicious`).
*/
/**
* Scientific name.
*
* @typedef {object} ParsedScientificName
* @property {string} head - Unparsed head.
* @property {string} uninomial Uninomial name (maybe `genus`).
* @property {string} genus - Genus (capitalized: e.g. `Malus`).
* @property {string} subgenus - Subgenus (capitalized: e.g. `Malus`).
* @property {string} species - Specific epithet (lowercase: e.g. `pumila`).
* @property {Infraspecies[]} infraspecies - Infraspecific epithets.
* @property {string} cultivar - Cultivar (title case: e.g. `Golden Delicious`).
* @property {boolean} hybrid - Whether this is a hybrid.
* @property {boolean} hybridGenus Whether `genus` is a nothogenus (e.g. `× Sorbopyrus`).
* @property {Hybrid[]} hybrids Secondary names in a hybrid formula.
* @property {string} tail - Unparsed tail.
*/
exports.unused = {}

View File

@ -5,6 +5,7 @@ const wfs = require('./wfs')
const fs = require('fs')
const os = require('os')
const puppeteer = require('puppeteer')
const {ArchiveEntry, BrowserFormat} = require('./types')
let BROWSER = null
const PAGE_FORMAT_FUNCTIONS = {
@ -36,8 +37,8 @@ async function getBrowser() {
* @param {string} params.url - URL to download
* @param {number} params.maxDays - Maximum age of existing result in days that
* would prevent downloading again
* @param {string} [params.props] - Additional properties to log
* @returns {Promise<object>} Log entry
* @param {object} [params.props] - Additional properties to log
* @returns {Promise<ArchiveEntry>} Log entry
*/
async function downloadFile({url, maxDays, props } = {}) {
// Check that we have not recently downloaded this URL
@ -89,8 +90,8 @@ async function downloadFile({url, maxDays, props } = {}) {
* @param {string} params.url - Feature layer URL
* @param {number} params.maxDays - Maximum age of existing result in days that
* would prevent downloading again
* @param {string} [params.props] - Additional properties to log
* @returns {Promise<object>} Log entry
* @param {object} [params.props] - Additional properties to log
* @returns {Promise<ArchiveEntry>} Log entry
*/
async function downloadArcgisFeatureLayer({url, maxDays, props} = {}) {
// Check that we have not recently downloaded this URL
@ -143,7 +144,7 @@ async function downloadArcgisFeatureLayer({url, maxDays, props} = {}) {
* @param {number} params.maxDays - Maximum age of existing result in days that
* would prevent downloading again
* @param {object} [params.props] - Additional properties to log
* @returns {Promise<object>} Log entry
* @returns {Promise<ArchiveEntry>} Log entry
*/
async function registerFile({file, date, url, method, maxDays, props} = {}) {
// Check that we have not recently downloaded this URL
@ -210,11 +211,11 @@ async function buildWfsDownloadUrl(url) {
*
* @param {object} params - Parameters
* @param {string} params.url - URL to download
* @param {'mhtml'|'html'|'png'|'pdf'} params.format - Format to save page as
* @param {BrowserFormat} params.format - Format to save page as
* @param {number} params.maxDays - Maximum age of existing result in days that
* would prevent downloading again
* @param {object} [params.props] - Additional properties to log
* @returns {Promise<object>} Log entry
* @returns {Promise<ArchiveEntry>} Log entry
*/
async function downloadPage({url, format, maxDays, props} = {}) {
const readFunction = PAGE_FORMAT_FUNCTIONS[format]