mirror of
https://github.com/ezwelty/opentrees-harvester.git
synced 2025-06-18 10:45:42 -04:00
Log data archive paths as relative to archive path
This commit is contained in:
parent
97c78f287f
commit
0b5dc54e90
@ -1,4 +1,4 @@
|
||||
GOOGLE_MAPS_API_KEY=
|
||||
DATA_ARCHIVE=archive
|
||||
DATA_ARCHIVE_LOG=archive.jsonl
|
||||
GEOCODE_ARCHIVE=geocode
|
||||
DATA_ARCHIVE=archive/data
|
||||
DATA_ARCHIVE_LOG=archive/data.jsonl
|
||||
GEOCODE_ARCHIVE=archive/geocode
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -15,6 +15,4 @@
|
||||
/out/
|
||||
/taxa*/
|
||||
archive
|
||||
archive.jsonl
|
||||
geocode
|
||||
.env
|
||||
|
@ -156,9 +156,20 @@ function buildPath({url, checksum, date = new Date()} = {}) {
|
||||
*/
|
||||
async function log({ date = new Date(), ...props } = {}) {
|
||||
let entry = { date, ...props }
|
||||
if (entry.path) {
|
||||
// Check that the path is in the archive
|
||||
const parent = path.resolve(process.env.DATA_ARCHIVE)
|
||||
if (!path.resolve(entry.path).startsWith(parent)) {
|
||||
throw new Error(`Path ${entry.path} is not in ${process.env.DATA_ARCHIVE}`)
|
||||
}
|
||||
}
|
||||
if (entry.path && !entry.checksum) {
|
||||
entry.checksum = await hashFile(entry.path)
|
||||
}
|
||||
// Make path relative to archive
|
||||
if (entry.path) {
|
||||
entry.path = path.relative(process.env.DATA_ARCHIVE, entry.path)
|
||||
}
|
||||
if (entry.checksum) {
|
||||
const entries = await search({ checksum: entry.checksum })
|
||||
if (entries.length > 0) {
|
||||
@ -231,7 +242,7 @@ function logData({ data, filename, url, date = new Date(), ...props } = {}) {
|
||||
const dir = buildPath(url, date)
|
||||
const file = path.join(dir, filename)
|
||||
// Write file
|
||||
fs.mkdirSync(file, { recursive: true })
|
||||
fs.mkdirSync(dir, { recursive: true })
|
||||
fs.writeFileSync(file, data)
|
||||
// Log file
|
||||
return log({ url, file, date, ...props })
|
||||
@ -257,7 +268,7 @@ async function search(params, {limit, maxDays} = {}) {
|
||||
const entries = []
|
||||
for await (const log of helpers.iterateJSONL(process.env.DATA_ARCHIVE_LOG)) {
|
||||
if (limit && entries.length === limit) {
|
||||
return entries
|
||||
break
|
||||
}
|
||||
if (criterias.map(([key, value]) => log[key] === value).every(Boolean)) {
|
||||
if (!maxDate || new Date(log.date) > maxDate) {
|
||||
@ -267,6 +278,10 @@ async function search(params, {limit, maxDays} = {}) {
|
||||
}
|
||||
// Sort entries by date descending
|
||||
entries.sort((a, b) => new Date(b.date) - new Date(a.date))
|
||||
// Convert paths to full paths
|
||||
entries.forEach(entry => {
|
||||
entry.path = path.join(process.env.DATA_ARCHIVE, entry.path)
|
||||
})
|
||||
return entries
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user