/** * Parse scientific names. * * @module */ const helpers = require('./helpers') const {Infraspecies, ParsedScientificName} = require('./types') // ---- Components ---- /** * Generic epithet. * * Minimum two letters. * Dash can be within three letters of end (e.g. 'Uva-ursi', 'Filix-mas'). */ const GENERIC = `(?:[A-Z][a-z]+|[A-z][a-z]{2,}-[a-z]{3,})` /** * Specifc epithet. * * Minimum two letters. * Dashes can be within one letter of end (e.g. 's-stylata', 'laurel-y'). */ const SPECIFIC = `[a-z]+[a-z-]*[a-z]+` /** * Subgenus rank. * * subg: subg(.) | subgen(.) | subgenus */ const SUBG = `(?:subg(?:en)?\\.?|subgenus)` /** * Species rank. * * sp: sp(.), spp(.), species */ const SP = `(?:spp?\\.?|species)` /** * Infraspecific ranks. * * subsp: subsp(.) | subspp(.) | ssp(.) | sspp(.) | subspecies * var: var(.) | variety | varietas * subvar: subvar(.), subvariety, subvarietas * f: f(.) | form | forma * subf: subf(.) | subform | subforma */ const RANKS = { subsp: `subspp?\\.?|sspp?\\.?|subspecies`, var: `var\\.?|variety|varietas`, subvar: `subvar\\.?|subvariety|subvarietas`, f: `f\\.?|forma?`, subf: `subf\\.?|subforma?` } /** * Any infraspecific rank. */ const RANK = Object.values(RANKS).join('|') // ---- Named capture groups ---- /** * Everything before the first (latin) letter or hybrid symbol. */ const HEAD = `^(?[^A-z×]+)` /** * Uninomial. */ const UNINOMIAL = `(?${GENERIC})` /** * Genus. * * Identical to uninomial, but inferred to be a genus based on context. */ const GENUS = `(?${GENERIC})` /** * Secondary genus in hybrid formula. * * May be abbreviated down to a single letter. */ const HYBRID_GENUS = `(?[A-Z](?:${SPECIFIC})?)\\.?` /** * Subgenus. */ const SUBGENUS = `${SUBG} (?${GENERIC})` /** * Species. */ const SPECIES = `(?${SPECIFIC})` /** * One or more infraspecific epithets, each preceded by an optional rank. */ const INFRASPECIES = `(?(?:(?:(?:${RANK}) )?${SPECIFIC}(?=$| ) ?)+)` /** * Single infraspecific epithet preceded by an optional rank. */ const RANK_EPITHET = `(?:(?:(?${RANK}) )?(?${SPECIFIC}))(?:$| )` /** * Cultivar. * * Must be wrapped in quotes and not include certain characters. */ const CULTIVAR = `(?:(?['"]+) ?(?(?:[A-z0-9'-\\. ](?! [xX] ))+[^ ]) ?\\k)` // ---- Decision trees ---- /** * Parse a scientific name (or the first name in a hybrid formula). * * Each key is a regular expression with named capture groups. Try each in * order. As soon as a match is found, proceed to the children keys and repeat * until `null` or no more children are found. Any `tags`, if encountered, are * added to the result. */ const FIRST = { [`(?:[xX] |× ?)${GENUS}`]: { tags: { hybrid: true, hybridGenus: true }, [SP]: CULTIVAR, [CULTIVAR]: null, [SPECIES]: { [CULTIVAR]: null, [INFRASPECIES]: CULTIVAR } }, [`${GENUS} (?:[xX] |× ?)${SPECIES}`]: { tags: { hybrid: true }, [CULTIVAR]: null, [INFRASPECIES]: CULTIVAR }, [`${GENUS} (?:[xX×]|hybrid)`]: { tags: { hybrid: true }, [CULTIVAR]: null }, [`${GENUS} ${SUBGENUS}`]: { tags: { hybrid: false } }, [`${GENUS} ${SP}`]: { [CULTIVAR]: null }, [`${GENUS} ${CULTIVAR}`]: { }, [`${GENUS} ${SPECIES}`]: { [CULTIVAR]: null, [INFRASPECIES]: { [CULTIVAR]: null } }, [UNINOMIAL]: { tags: { hybrid: false } } } /** * Parse a secondary name in a hybrid formula. */ const HYBRID = { [SPECIES]: { [CULTIVAR]: null, [INFRASPECIES]: { [CULTIVAR]: null } }, [`${HYBRID_GENUS} ${CULTIVAR}`]: { }, [`${HYBRID_GENUS} ${SPECIES}`]: { [CULTIVAR]: null, [INFRASPECIES]: { [CULTIVAR]: null } } } // ---- Helper functions ---- /** * Clean name string. * * - Latinizes characters. * - Replaces whitespace sequences with a single space. * - Removes leading and trailing whitespace. * * @param {string} s * @returns {string} * @example * cleanName(' Acer platanoïdes ') // 'Acer platanoides' */ function cleanName(s) { return helpers.latinize(s).replace(/\s+/g, ' ').trim() } /** * Parse infraspecific ranks and epithets. * * @param {string} s * @returns {Infraspecies[]} * @example * parseInfraspecies('foo f bar') // [{epithet: 'foo'}, {rank: 'f.', epithet: 'bar'}] */ function parseInfraspecies(s) { const infraspecies = [] let subs = s while (match = subs.match(RANK_EPITHET)) { if (match.groups.rank) { for (const key in RANKS) { if (match.groups.rank.match(`^(${RANKS[key]})$`)) { match.groups.rank = `${key}.` break } } } else { delete match.groups.rank } infraspecies.push(match.groups) subs = subs.slice(match.index + match[0].length) } return infraspecies } /** * Print infraspecific ranks and epithets. * * @param {Infraspecies[]} infraspecies * @param {object} options * @param {object} [options.n=Infinity] – Number of infraspecies. * @param {object} [options.rank=true] – Print infraspecies rank. * @returns {string} * @example * printInfraspecies([ { rank: 'f.', epithet: 'mora' } ]) * // 'f. mora' * printInfraspecies([ { rank: 'f.', epithet: 'mora' } ], { rank: false }) * // 'mora' */ function printInfraspecies(infraspecies, { n = Infinity, rank = true } = {}) { const s = [] if (n && infraspecies) { infraspecies.slice(0, n).forEach(i => { if (i.epithet) { if (rank && i.rank) { s.push(`${i.rank} ${i.epithet}`) } else { s.push(i.epithet) } } }) } return s.join(' ') } // ---- Main functions ---- /** * Print scientific name. * * @param {ParsedScientificName} name - Scientific name. * @param {object} [options] - Printing options. * @param {number} [options.infraspecies=Infinity] - Number of infraspecies. * @param {boolean} [options.hybrid=true] - Print hybrid symbol and formulas. * @param {boolean} [options.rank=true] - Print infraspecies rank. * @param {boolean} [options.cultivar=true] - Print cultivar. * @returns {string} * @example * name = { * genus: 'Genus', * species: 'speciosa', * infraspecies: [{ rank: 'f.', epithet: 'formosa' }], * cultivar: 'Gala', * hybrid: true, * hybrids: [{ genus: 'Genus', species: 'pendula' }] * } * printScientificName(name) * // "Genus speciosa f. formosa 'Gala' × Genus pendula'" * printScientificName(name, {cultivar: false}) * // "Genus speciosa f. formosa × Genus pendula'" * printScientificName(name, {infraspecies: 0, cultivar: false}) * // 'Genus speciosa × Genus pendula' * printScientificName(name, {hybrid: false, infraspecies: 0, cultivar: false}) * // 'Genus speciosa' */ function printScientificName( name, { infraspecies = Infinity, hybrid = true, rank = true, cultivar = true } = {} ) { if (name.uninomial) { return name.uninomial } const s = [] if (hybrid && name.hybridGenus) { s.push(`×${name.genus}`) } else { s.push(name.genus) } if (name.subgenus) { s.push(`subg. ${name.subgenus}`) } if (hybrid && name.hybrid && !name.hybridGenus && !name.hybrids) { if (name.species) { s.push(`×${name.species}`) } else { s.push('×') } } else if (name.species) { s.push(name.species) } if (infraspecies && name.infraspecies) { s.push( printInfraspecies(name.infraspecies, { n: infraspecies, rank: rank }) ) } if (cultivar && name.cultivar) { s.push(`'${name.cultivar}'`) } if (hybrid && name.hybrids) { name.hybrids.forEach(h => { const printedHybrid = printScientificName( h, { infraspecies: infraspecies, rank: rank, cultivar: cultivar } ) s.push(`× ${printedHybrid}`) }) } return s.join(' ') } /** * Format scientific name. * * @param {ParsedScientificName} name – Scientific name. * @param {string|boolean} defaultGenus – Genus to assume if hybrid genus is * blank or an abbreviation of `defaultGenus`. Defaults to `genus` if `null`, or * skipped if `false`. * @returns {ParsedScientificName} * @example * name = { * genus: ' GENUS', * species: 'SPECIOSA ', * infraspecies: [ { rank: 'VAR', epithet: 'FORMOSA' } ], * cultivar: 'CULTI VAR', * hybrids: [ {genus: 'G', species: 'spéciosa' } ], * hybrid: true * } * formatScientificName(name) * // { * // genus: 'Genus', * // species: 'speciosa', * // infraspecies: [ { rank: 'var.', epithet: 'formosa' } ], * // cultivar: 'Culti Var', * // hybrids: [ { genus: 'Genus', species: 'speciosa' } ], * // hybrid: true * // } */ function formatScientificName(name, defaultGenus = null) { const CASE = { uninomial: helpers.toSentenceCase, genus: helpers.toSentenceCase, subgenus: helpers.toSentenceCase, species: s => s.toLowerCase(), cultivar: helpers.toTitleCase } // Deep copy const result = JSON.parse(JSON.stringify(name)) // Top-level strings for (const key in CASE) { if (name[key]) { let s = name[key] if (s) { s = CASE[key](cleanName(s)) } if (s) { result[key] = s } else { delete result[key] } } } // Default genus if (typeof defaultGenus === 'string') { if ( (!result.genus && !result.uninomial) || (result.genus && defaultGenus.match(`^${result.genus.replace(/\.$/, '')}`)) ) { result.genus = defaultGenus } } // Infraspecies if (name.infraspecies) { const infraspecies = result.infraspecies.map(i => { for (const key of ['rank', 'epithet']) { if (key in i) { let s = i[key] if (s) { s = cleanName(s).toLowerCase() } if (s) { if (key === 'rank') { // Normalize rank for (const key in RANKS) { if (s.match(`^(${RANKS[key]})$`)) { s = `${key}.` break } } } i[key] = s } else { delete i[key] } } } return i }).filter(obj => Object.keys(obj).length > 0) if (infraspecies.length > 0) { result.infraspecies = infraspecies } else { delete result.infraspecies } } // Hybrids if (name.hybrids) { if (defaultGenus === null && result.genus) { defaultGenus = result.genus } const hybrids = ( result.hybrids .map(h => formatScientificName(h, defaultGenus)) .filter(obj => Object.keys(obj).length > 0) ) if (hybrids.length > 0) { result.hybrids = hybrids } else { delete result.hybrids } } // Remove empty optional properties for (const key of ['hybrid', 'hybridGenus', 'head', 'tail']) { if (!result[key]) { delete result[key] } } return result } /** * Parse scientific name. * * @param {string} name - Name to parse as a scientific name. * @returns {ParsedScientificName} * @example * parseScientificName(`Genus`) * // { uninomial: 'Genus' } * parseScientificName(`Genus speciosa var. segunda 'Cultivar' x Genus hybrida`) * // { * // genus: 'Genus', * // species: 'speciosa', * // infraspecies: [ { rank: 'var.', epithet: 'segunda' } ], * // cultivar: 'Cultivar', * // hybrids: [ { genus: 'Genus', species: 'hybrida' } ], * // hybrid: true * // } */ function parseScientificName(s) { // Ignore case if string all upper or lower case let ignoreCase = false if (s === s.toLowerCase() || s === s.toUpperCase()) { ignoreCase = true } s = cleanName(s) const parsed = {} function recurse(node, results) { for (const key in node) { const value = node[key] if (key === 'tags') { Object.assign(results, value) continue } let pattern = `^${key}(?:$| )` if (ignoreCase) { pattern = new RegExp(pattern, 'i') } if (parse(pattern, results)) { if (typeof value === 'string') { let pattern = `^${value}(?:$| )` if (ignoreCase) { pattern = new RegExp(pattern, 'i') } parse(pattern, results) } else if (value) { recurse(value, results) } break } } } function parse(pattern, results) { match = s.match(pattern) if (match) { if (match.groups) { Object.assign(results, match.groups) } s = s.slice(match.index + match[0].length) return true } return false } // Unparsed head parse(HEAD, parsed) // Parse // HACK: Modify string to not require space in case-sensitive scenarios if (!ignoreCase) { // xGenus s = s.replace(/^x([A-Z])/, 'x $1') // Xspecies s = s.replace(new RegExp(`^${GENUS} X([a-z])`), '$ X $2') } recurse(FIRST, parsed) // Parse infraspecies if ('infraspecies' in parsed) { parsed.infraspecies = parseInfraspecies(parsed.infraspecies) } delete parsed['cultivarQuote'] if (Object.keys(parsed).length === 0) { return { tail: s } } // Hybrid formulas if (!('hybrid' in parsed)) { const hybrids = [] while (parse(`^(?:[xX×])(?:$| )`, parsed)) { const temp = {} recurse(HYBRID, temp) if ('infraspecies' in temp) { temp['infraspecies'] = parseInfraspecies(temp['infraspecies']) } delete temp['cultivarQuote'] if (Object.keys(temp).length > 0) { hybrids.push(temp) } } if (hybrids.length > 0) { parsed.hybrids = hybrids parsed.hybrid = true } } // Unparsed tail if (s) { parsed.tail = s } return formatScientificName(parsed) } // ---- Class definition ---- /** * Class representing a scientific name. * * @param {ParsedScientificName} obj - Parsed scientific name. * * @property {ParsedScientificName} parsed - Parsed scientific name. * @property {string|object} [input] - Input from which `parsed` was derived. * @property {object[]} [matches] - Matches from a taxonomic database. */ class ScientificName { constructor(obj) { this.parsed = formatScientificName(obj) } /** * Build scientific name from string. * * @param {string} str * @returns {ScientificName} * @example * ScientificName.fromString('Malus pumila') * // ScientificName { * // parsed: { genus: 'Malus', species: 'pumila' }, * // input: 'Malus pumila' * // } */ static fromString(str) { const parsed = parseScientificName(str) const sciname = new ScientificName(parsed) sciname.input = str return sciname } /** * Scientific name fields. * @private */ static _allFields = ['scientific', 'genus', 'species', 'cultivar', 'infraspecies'] /** * Scientific name fields secondary to `scientific`. * @private */ static _secondaryFields = ['genus', 'species', 'cultivar'] /** * Build scientific name from feature fields. * * @param {object} fields * @returns {ScientificName} * @example * ScientificName.fromFields({ scientific: 'Malus pumila', other: 'Bloop' }) * // ScientificName { * // parsed: { genus: 'Malus', species: 'pumila' }, * // input: { scientific: 'Malus pumila' } * // } * ScientificName.fromFields({ genus: 'malus', species: 'PLATANOÏDES' }) * // ScientificName { * // parsed: { genus: 'Malus', species: 'platanoides' }, * // input: { genus: 'malus', species: 'PLATANOÏDES'} * // } */ static fromFields(obj) { const inputs = {} ScientificName._allFields.forEach(key => { if (obj[key]) { inputs[key] = obj[key] } }) let parsed = { ...inputs } if (parsed.scientific) { Object.assign(parsed, parseScientificName(parsed.scientific)) delete parsed.scientific } const sciname = new ScientificName(parsed) sciname.input = inputs if (sciname.parsed.species && sciname.parsed.species.match(/^([xX×]|hybrid)$/)) { sciname.parsed.hybrid = true delete sciname.parsed.species } return sciname } /** * Print scientific name to string. * * @param {object} options - Print options (see {@link printScientificName}). * @returns {string} * @example * ScientificName.fromString(`Malus pumila var. asiatica 'Gala'`).toString() * // "Malus pumila var. asiatica 'Gala'" */ toString(options) { return printScientificName(this.parsed, options) } /** * Generate compare function for sorting by string representation. * * @param {object} options - Print options (see {@link printScientificName}). * @returns {function} Compare function (a, b). * @example * l = [new ScientificName({genus: 'Prunus'}), new ScientificName({genus: 'Malus'})] * l.sort(ScientificName.compareStrings()) * // [ * // ScientificName { parsed: { genus: 'Malus' } }, * // ScientificName { parsed: { genus: 'Prunus' } } * // ] */ static compareStrings(options) { return function (a, b) { const s = [a.toString(options), b.toString(options)] if (s[0] < s[1]) return -1 if (s[0] > s[1]) return 1 return 0 } } /** * Get warnings. * * @returns {string[]} * @example * ScientificName.fromString('... Malus x pumila ...').warnings() * // [ 'Unparsed head', 'Unparsed tail', 'Hybrid' ] * ScientificName.fromFields({genus: 'Malus', species: 'pumila', scientific: 'Pyrus communis'}).warnings() * // [ 'Inconsistent secondary fields: genus, species' ] */ warnings() { const warnings = [] if (this.parsed.head) { warnings.push('Unparsed head') } if (this.parsed.tail) { warnings.push('Unparsed tail') } if (this.parsed.hybrid) { if (this.parsed.hybridGenus) { warnings.push('Hybrid genus') } else if (this.parsed.hybrids) { warnings.push('Hybrid formula') } else { warnings.push('Hybrid') } } if (typeof this.input === 'object' && this.input.scientific) { const bad = [] ScientificName._secondaryFields.forEach(key => { if (this.parsed[key] && this.input[key] && this.parsed[key] !== this.input[key]) { bad.push(key) } }) if (bad.length) { warnings.push(`Inconsistent secondary fields: ${bad.join(', ')}`) } } if (this.matches && this.matches[0]) { if (this.matches[0].phonetic) { warnings.push('Phonetic match') } if (this.matches[0].fuzzy) { warnings.push('Fuzzy match') } } return warnings.length ? warnings : undefined } /** * Get errors. * * @returns {string[]} * @example * (new ScientificName({species: 'pumila'})).errors() * // [ 'Missing genus' ] */ errors() { const errors = [] if (!this.parsed.genus) { errors.push('Missing genus') } if (this.matches) { if (!this.matches.length) { errors.push('Missing matches') } else { if (this.matches[0].incomplete) { errors.push('Incomplete match') } } } return errors.length ? errors : undefined } /** * Get full report. * * @returns {object} * @example * ScientificName.fromString('... Malus x pumila ...').report() * // { * // input: '... Malus x pumila ...', * // parsed: { * // head: '... ', * // genus: 'Malus', * // species: 'pumila', * // hybrid: true, * // tail: '...' * // }, * // warnings: [ 'Unparsed head', 'Unparsed tail', 'Hybrid' ] * // } */ report() { const errors = this.errors() const warnings = this.warnings() return { ...this.input && { input: this.input }, parsed: this.parsed, ...this.matches && this.matches.length && { matches: this.matches }, ...errors && { errors: errors }, ...warnings && { warnings: warnings } } } } module.exports = { parseScientificName, printScientificName, ScientificName }