diff --git a/backend/package-lock.json b/backend/package-lock.json index d46e19c9d..5e8b3d394 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -26,6 +26,7 @@ "express-rate-limit": "^7.5.0", "fast-csv": "^5.0.2", "jsonwebtoken": "^9.0.2", + "jszip": "^3.10.1", "mariadb": "^3.3.0", "md5": "^2.3.0", "nodemailer": "^6.9.14", @@ -5155,6 +5156,11 @@ "url": "https://opencollective.com/core-js" } }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" + }, "node_modules/cors": { "version": "2.8.5", "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", @@ -7088,6 +7094,11 @@ "node": ">= 4" } }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" + }, "node_modules/import-fresh": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", @@ -9433,6 +9444,49 @@ "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/jwa": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.1.tgz", @@ -9496,6 +9550,14 @@ "node": ">= 0.8.0" } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", @@ -10148,6 +10210,11 @@ "node": ">=6" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -10412,6 +10479,11 @@ "fsevents": "2.3.3" } }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" + }, "node_modules/prompts": { "version": "2.4.2", "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", @@ -10907,6 +10979,11 @@ "node": ">= 0.4" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, "node_modules/setprototypeof": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", diff --git a/backend/package.json b/backend/package.json index 961df819a..a0e576d37 100644 --- a/backend/package.json +++ b/backend/package.json @@ -55,6 +55,7 @@ "express-rate-limit": "^7.5.0", "fast-csv": "^5.0.2", "jsonwebtoken": "^9.0.2", + "jszip": "^3.10.1", "mariadb": "^3.3.0", "md5": "^2.3.0", "nodemailer": "^6.9.14", diff --git a/backend/src/api-tests/locality/dwcArchiveExportLocalities.test.ts b/backend/src/api-tests/locality/dwcArchiveExportLocalities.test.ts new file mode 100644 index 000000000..b030f7340 --- /dev/null +++ b/backend/src/api-tests/locality/dwcArchiveExportLocalities.test.ts @@ -0,0 +1,65 @@ +import { afterAll, beforeAll, describe, expect, it } from '@jest/globals' +import request from 'supertest' +import JSZip from 'jszip' +import type { Response } from 'superagent' +import app from '../../app' +import { pool } from '../../utils/db' +import { noPermError, resetDatabase, resetDatabaseTimeout, send } from '../utils' + +type ResponseStream = { + on: (event: 'data', handler: (chunk: Buffer) => void) => void +} & { + on: (event: 'end', handler: () => void) => void +} + +const parseBinary = (res: Response, callback: (err: Error | null, body: Buffer) => void) => { + const data: Buffer[] = [] + const stream = res as unknown as ResponseStream + stream.on('data', chunk => data.push(chunk)) + stream.on('end', () => { + callback(null, Buffer.concat(data)) + }) +} + +describe('DwC-A locality export (admin-only)', () => { + beforeAll(async () => { + await resetDatabase() + }, resetDatabaseTimeout) + + afterAll(async () => { + await pool.end() + }) + + it('returns a ZIP archive for admins', async () => { + const loginResult = await send<{ token: string }>('user/login', 'POST', { username: 'testSu', password: 'test' }) + expect(loginResult.status).toEqual(200) + + const result = await request(app) + .get('/locality/export/dwc-archive') + .set('authorization', `bearer ${loginResult.body.token}`) + .buffer(true) + .parse(parseBinary) + + expect(result.status).toEqual(200) + expect(result.headers['content-type']).toMatch(/application\/zip/i) + expect(result.headers['content-disposition']).toMatch(/attachment;\s*filename="now_dwc_localities_test_export_/i) + + const zip = await JSZip.loadAsync(result.body as unknown as Buffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + + const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') + expect(measurementCsv).toContain('"measurementID"') + expect(measurementCsv).toContain('"parentMeasurementID"') + expect(measurementCsv).toContain('"verbatimMeasurementType"') + }) + + it('rejects non-admin requests', async () => { + const result = await request(app).get('/locality/export/dwc-archive') + expect(result.status).toEqual(403) + expect(result.body).toEqual(noPermError) + }) +}) diff --git a/backend/src/api-tests/occurrence/dwcArchiveExportOccurrences.test.ts b/backend/src/api-tests/occurrence/dwcArchiveExportOccurrences.test.ts new file mode 100644 index 000000000..4566ff0af --- /dev/null +++ b/backend/src/api-tests/occurrence/dwcArchiveExportOccurrences.test.ts @@ -0,0 +1,70 @@ +import { afterAll, beforeAll, describe, expect, it } from '@jest/globals' +import request from 'supertest' +import JSZip from 'jszip' +import type { Response } from 'superagent' +import app from '../../app' +import { pool } from '../../utils/db' +import { noPermError, resetDatabase, resetDatabaseTimeout, send } from '../utils' + +type ResponseStream = { + on: (event: 'data', handler: (chunk: Buffer) => void) => void +} & { + on: (event: 'end', handler: () => void) => void +} + +const parseBinary = (res: Response, callback: (err: Error | null, body: Buffer) => void) => { + const data: Buffer[] = [] + const stream = res as unknown as ResponseStream + stream.on('data', chunk => data.push(chunk)) + stream.on('end', () => { + callback(null, Buffer.concat(data)) + }) +} + +describe('DwC-A occurrence export (admin-only)', () => { + beforeAll(async () => { + await resetDatabase() + }, resetDatabaseTimeout) + + afterAll(async () => { + await pool.end() + }) + + it('returns a ZIP archive for admins', async () => { + const loginResult = await send<{ token: string }>('user/login', 'POST', { username: 'testSu', password: 'test' }) + expect(loginResult.status).toEqual(200) + + const result = await request(app) + .get('/occurrence/export/dwc-archive') + .set('authorization', `bearer ${loginResult.body.token}`) + .buffer(true) + .parse(parseBinary) + + expect(result.status).toEqual(200) + expect(result.headers['content-type']).toMatch(/application\/zip/i) + expect(result.headers['content-disposition']).toMatch(/attachment;\s*filename="now_dwc_occurrences_test_export_/i) + + const zip = await JSZip.loadAsync(result.body as unknown as Buffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('occurrence.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + + const occurrenceCsv = await zip.file('occurrence.csv')!.async('string') + expect(occurrenceCsv).toContain('"occurrenceID"') + expect(occurrenceCsv).toContain('"locationID"') + expect(occurrenceCsv).toContain('"taxonID"') + + const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') + expect(measurementCsv).toContain('"verbatimMeasurementType"') + }) + + it('rejects non-admin requests', async () => { + const result = await request(app).get('/occurrence/export/dwc-archive') + expect(result.status).toEqual(403) + expect(result.body).toEqual(noPermError) + }) +}) diff --git a/backend/src/api-tests/species/dwcArchiveExport.test.ts b/backend/src/api-tests/species/dwcArchiveExport.test.ts new file mode 100644 index 000000000..271a03718 --- /dev/null +++ b/backend/src/api-tests/species/dwcArchiveExport.test.ts @@ -0,0 +1,74 @@ +import { afterAll, beforeAll, describe, expect, it } from '@jest/globals' +import request from 'supertest' +import JSZip from 'jszip' +import type { Response } from 'superagent' +import app from '../../app' +import { pool } from '../../utils/db' +import { noPermError, resetDatabase, resetDatabaseTimeout, send } from '../utils' + +type ResponseStream = { + on: (event: 'data', handler: (chunk: Buffer) => void) => void +} & { + on: (event: 'end', handler: () => void) => void +} + +const parseBinary = (res: Response, callback: (err: Error | null, body: Buffer) => void) => { + const data: Buffer[] = [] + const stream = res as unknown as ResponseStream + stream.on('data', chunk => data.push(chunk)) + stream.on('end', () => { + callback(null, Buffer.concat(data)) + }) +} + +describe('DwC-A species export (admin-only)', () => { + beforeAll(async () => { + await resetDatabase() + }, resetDatabaseTimeout) + + afterAll(async () => { + await pool.end() + }) + + it('returns a ZIP archive for admins', async () => { + const loginResult = await send<{ token: string }>('user/login', 'POST', { username: 'testSu', password: 'test' }) + expect(loginResult.status).toEqual(200) + + const result = await request(app) + .get('/species/export/dwc-archive') + .set('authorization', `bearer ${loginResult.body.token}`) + .buffer(true) + .parse(parseBinary) + + expect(result.status).toEqual(200) + expect(result.headers['content-type']).toMatch(/application\/zip/i) + expect(result.headers['content-disposition']).toMatch(/attachment;\s*filename="now_dwc_test_export_/i) + + const zip = await JSZip.loadAsync(result.body as unknown as Buffer) + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + + const taxonCsv = await zip.file('taxon.csv')!.async('string') + expect(taxonCsv).toContain('"taxonID"') + expect(taxonCsv).toContain('"nomenclaturalCode"') + expect(taxonCsv).toContain('"genericName"') + + const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') + expect(measurementCsv).toContain('"measurementID"') + expect(measurementCsv).toContain('"parentMeasurementID"') + expect(measurementCsv).toContain('"verbatimMeasurementType"') + expect(measurementCsv).not.toContain('"measurementRemarks"') + + const metaXml = await zip.file('meta.xml')!.async('string') + expect(metaXml).toContain(' { + const result = await request(app).get('/species/export/dwc-archive') + expect(result.status).toEqual(403) + expect(result.body).toEqual(noPermError) + }) +}) diff --git a/backend/src/routes/locality.ts b/backend/src/routes/locality.ts index 55582977f..04460f1b2 100644 --- a/backend/src/routes/locality.ts +++ b/backend/src/routes/locality.ts @@ -10,6 +10,8 @@ import { fixBigInt } from '../utils/common' import { EditDataType, EditMetaData, LocalityDetailsType, Role } from '../../../frontend/src/shared/types' import { AccessError, requireOneOf } from '../middlewares/authorizer' import { deleteLocality, writeLocality } from '../services/write/locality' +import { buildDwcLocalityArchiveZipBuffer } from '../services/dwcArchiveExportLocalities' +import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' const router = Router() @@ -18,6 +20,16 @@ router.get('/all', async (req, res) => { return res.status(200).send(fixBigInt(localities)) }) +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res) => { + const zipBuffer = await buildDwcLocalityArchiveZipBuffer() + res.setHeader('Content-Type', 'application/zip') + res.setHeader( + 'Content-Disposition', + `attachment; filename="now_dwc_localities_test_export_${currentDateAsString()}.zip"` + ) + return res.status(200).send(zipBuffer) +}) + router.get('/:id', async (req, res) => { const id = parseInt(req.params.id) const locality = await getLocalityDetails(id, req.user) diff --git a/backend/src/routes/occurrence.ts b/backend/src/routes/occurrence.ts index b1663d683..2d2e83286 100644 --- a/backend/src/routes/occurrence.ts +++ b/backend/src/routes/occurrence.ts @@ -1,10 +1,68 @@ import { Router } from 'express' +import { pipeline } from 'stream' import { getOccurrenceDetail, updateOccurrenceDetail } from '../controllers/occurrenceController' import { requireOneOf } from '../middlewares/authorizer' import { Role } from '../../../frontend/src/shared/types' +import { + buildDwcOccurrenceArchiveZipStream, + type DwcOccurrenceExportProgress, +} from '../services/dwcArchiveExportOccurrences' +import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' +import { logger } from '../utils/logger' const router = Router() +const occurrenceExportProgress = new Map() + +const scheduleProgressCleanup = (exportId: string) => { + setTimeout( + () => { + occurrenceExportProgress.delete(exportId) + }, + 5 * 60 * 1000 + ) +} + +router.get('/export/dwc-archive/progress/:exportId', requireOneOf([Role.Admin]), (req, res) => { + const progress = occurrenceExportProgress.get(req.params.exportId) + if (!progress) return res.status(404).send({ message: 'Occurrence export progress not found.' }) + return res.status(200).send(progress) +}) + +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (req, res, next) => { + const exportId = typeof req.query.exportId === 'string' ? req.query.exportId : undefined + const reportProgress = exportId + ? (progress: DwcOccurrenceExportProgress) => { + occurrenceExportProgress.set(exportId, progress) + } + : undefined + + const archive = await buildDwcOccurrenceArchiveZipStream({ reportProgress }).catch(error => { + if (exportId) scheduleProgressCleanup(exportId) + throw error + }) + res.setHeader('Content-Type', 'application/zip') + res.setHeader( + 'Content-Disposition', + `attachment; filename="now_dwc_occurrences_test_export_${currentDateAsString()}.zip"` + ) + pipeline(archive.stream, res, error => { + archive.cleanup().catch(cleanupError => { + logger.error(`Failed to clean up occurrence DwC export temp files: ${String(cleanupError)}`) + }) + if (exportId) { + occurrenceExportProgress.set(exportId, { + stage: 'complete', + generated: 1, + total: 1, + message: 'DwC-A ZIP export ready.', + }) + scheduleProgressCleanup(exportId) + } + if (error) next(error) + }) +}) + router.get('/:lid/:speciesId', getOccurrenceDetail) router.put( '/:lid/:speciesId', diff --git a/backend/src/routes/species.ts b/backend/src/routes/species.ts index 8311c6089..d59558173 100644 --- a/backend/src/routes/species.ts +++ b/backend/src/routes/species.ts @@ -4,6 +4,8 @@ import { fixBigInt } from '../utils/common' import { EditMetaData, SpeciesDetailsType, Role } from '../../../frontend/src/shared/types' import { deleteSpecies, writeSpecies } from '../services/write/species' import { requireOneOf } from '../middlewares/authorizer' +import { buildDwcArchiveZipBuffer } from '../services/dwcArchiveExport' +import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' const router = Router() @@ -17,6 +19,13 @@ router.get('/synonyms', async (_req, res) => { return res.status(200).send(fixBigInt(synonyms)) }) +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res) => { + const zipBuffer = await buildDwcArchiveZipBuffer() + res.setHeader('Content-Type', 'application/zip') + res.setHeader('Content-Disposition', `attachment; filename="now_dwc_test_export_${currentDateAsString()}.zip"`) + return res.status(200).send(zipBuffer) +}) + router.get('/:id', async (req, res) => { const id = parseInt(req.params.id) const species = await getSpeciesDetails(id, req.user) diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts new file mode 100644 index 000000000..ea5226319 --- /dev/null +++ b/backend/src/services/dwcArchiveExport.ts @@ -0,0 +1,1066 @@ +import Prisma from '../../prisma/generated/now_test_client' +import { format } from 'fast-csv' +import { Writable } from 'stream' +import JSZip from 'jszip' + +const isMeaningfulString = (value: unknown): value is string => { + if (typeof value !== 'string') return false + const trimmed = value.trim() + if (!trimmed) return false + if (trimmed === '-') return false + return true +} + +const toDwcString = (value: unknown): string => { + if (value === null || value === undefined) return '' + if (typeof value === 'bigint') return value.toString() + if (typeof value === 'number') return Number.isFinite(value) ? value.toString() : '' + if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value + if (typeof value === 'object') { + try { + return JSON.stringify(value) ?? '' + } catch { + return '' + } + } + return '' +} + +const writeCsvString = async (headers: string[], rows: Array>): Promise => { + return await new Promise((resolve, reject) => { + let output = '' + const csvStream = format({ + delimiter: ',', + headers, + quoteColumns: true, + quoteHeaders: true, + includeEndRowDelimiter: true, + }) + + const sink = new Writable({ + write(chunk: Buffer, _encoding, callback) { + output += chunk.toString('utf8') + callback() + }, + }) + + sink.on('finish', () => resolve(output)) + sink.on('error', reject) + csvStream.on('error', reject) + + csvStream.pipe(sink) + for (const row of rows) { + csvStream.write(row) + } + csvStream.end() + }) +} + +export const TAXON_HEADERS = [ + 'taxonID', + 'nomenclaturalCode', + 'scientificName', + 'genericName', + 'scientificNameAuthorship', + 'vernacularName', + 'taxonRank', + 'taxonomicStatus', + 'kingdom', + 'phylum', + 'class', + 'order', + 'superfamily', + 'family', + 'subfamily', + 'tribe', + 'subtribe', + 'genus', + 'specificEpithet', + 'infraspecificEpithet', + 'higherClassification', + 'taxonRemarks', +] as const + +export type TaxonCsvHeader = (typeof TAXON_HEADERS)[number] +export type TaxonCsvRow = Record + +type SpeciesForTaxonExport = Pick< + Prisma.com_species, + | 'species_id' + | 'class_name' + | 'subclass_or_superorder_name' + | 'order_name' + | 'suborder_or_superfamily_name' + | 'family_name' + | 'subfamily_name' + | 'genus_name' + | 'species_name' + | 'unique_identifier' + | 'taxonomic_status' + | 'common_name' + | 'sp_author' + | 'sp_comment' +> + +const endsWithSuffix = (value: string | null, suffix: string): boolean => { + if (!isMeaningfulString(value)) return false + return value.trim().toLowerCase().endsWith(suffix.toLowerCase()) +} + +const isMeaningfulTaxonName = (value: string | null): boolean => { + if (!isMeaningfulString(value)) return false + const trimmed = value.trim() + if (trimmed.includes(' ')) return false + if (trimmed.includes('.')) return false + return true +} + +const isSingleLowercaseWord = (value: string | null): boolean => { + if (!isMeaningfulString(value)) return false + const trimmed = value.trim() + return /^[a-z]+$/.test(trimmed) +} + +const isSinglePropercaseWord = (value: string | null): boolean => { + if (!isMeaningfulString(value)) return false + const trimmed = value.trim() + return /^[A-Z][a-z]+$/.test(trimmed) +} + +const isSpeciesSp = (value: string): boolean => /^sp\.?$/i.test(value.trim()) + +const includesIndet = (value: string): boolean => value.toLowerCase().includes('indet.') + +const resolveTaxonRank = ({ + family, + genus, + specificEpithet, + uniqueIdentifier, + subclassOrSuperorderName, + subfamily, + tribe, + subtribe, +}: { + family: string + genus: string + specificEpithet: string + uniqueIdentifier: string | null + subclassOrSuperorderName: string | null + subfamily: string + tribe: string + subtribe: string +}): string => { + const genusIsPropercaseWord = isSinglePropercaseWord(genus) + const specificEpithetIsLowercaseWord = isSingleLowercaseWord(specificEpithet) + const uniqueIdentifierIsSingleLowercaseWord = isSingleLowercaseWord(uniqueIdentifier) + + // IMPORTANT: Rule order matters; implement in the exact execution order requested. + if (uniqueIdentifierIsSingleLowercaseWord && specificEpithetIsLowercaseWord && genusIsPropercaseWord) { + return 'subspecies' + } + + if (isSpeciesSp(specificEpithet) && genusIsPropercaseWord) return 'species' + + if (uniqueIdentifier === '-' && specificEpithetIsLowercaseWord && genusIsPropercaseWord) return 'species' + + if (uniqueIdentifier !== null && specificEpithetIsLowercaseWord && genusIsPropercaseWord) return 'species' + + if (includesIndet(specificEpithet) && genusIsPropercaseWord) return 'genus' + + if (includesIndet(genus)) { + if (subtribe) return 'subtribe' + if (tribe) return 'tribe' + if (subfamily) return 'subfamily' + + const familyTrimmed = family.trim() + const isIncertaeSedis = familyTrimmed.toLowerCase() === 'incertae sedis' + const endsWithIdae = endsWithSuffix(familyTrimmed, 'idae') + if (!isMeaningfulString(subclassOrSuperorderName) && (endsWithIdae || isIncertaeSedis)) return 'family' + } + + if (includesIndet(family)) return 'order' + + return 'species' +} + +export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRow => { + const genusName = isMeaningfulString(species.genus_name) ? species.genus_name.trim() : '' + const speciesName = isMeaningfulString(species.species_name) ? species.species_name.trim() : '' + const authorship = isMeaningfulString(species.sp_author) ? species.sp_author.trim() : '' + + const higherClassification = [ + species.class_name, + species.subclass_or_superorder_name, + species.order_name, + species.suborder_or_superfamily_name, + species.family_name, + species.subfamily_name, + ] + .map(value => (isMeaningfulString(value) ? value.trim() : null)) + .filter((value): value is string => Boolean(value)) + .join('|') + + const infraspecificEpithet = isMeaningfulString(species.unique_identifier) ? species.unique_identifier.trim() : '' + + const taxonomicStatus = isMeaningfulString(species.taxonomic_status) ? species.taxonomic_status.trim() : 'accepted' + + const superfamily = endsWithSuffix(species.subclass_or_superorder_name, 'oidea') + ? species.subclass_or_superorder_name!.trim() + : '' + + const subfamilyRaw = isMeaningfulString(species.subfamily_name) ? species.subfamily_name.trim() : '' + const subfamily = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('inae') ? subfamilyRaw : '' + const tribe = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('ini') ? subfamilyRaw : '' + const subtribe = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('ina') ? subfamilyRaw : '' + + const genericName = isMeaningfulTaxonName(speciesName) ? genusName : '' + + const taxonRank = resolveTaxonRank({ + family: isMeaningfulString(species.family_name) ? species.family_name.trim() : '', + genus: genusName, + specificEpithet: speciesName, + uniqueIdentifier: isMeaningfulString(species.unique_identifier) ? species.unique_identifier.trim() : null, + subfamily, + tribe, + subtribe, + subclassOrSuperorderName: species.subclass_or_superorder_name, + }) + + const scientificName = (() => { + const familyName = isMeaningfulString(species.family_name) ? species.family_name.trim() : '' + const orderName = isMeaningfulString(species.order_name) ? species.order_name.trim() : '' + const className = isMeaningfulString(species.class_name) ? species.class_name.trim() : '' + + switch (taxonRank) { + case 'subspecies': + return [genusName, speciesName, infraspecificEpithet, authorship].filter(Boolean).join(' ').trim() + case 'species': + return [genusName, speciesName, authorship].filter(Boolean).join(' ').trim() + case 'genus': + return [genusName, authorship].filter(Boolean).join(' ').trim() + case 'family': + return familyName + case 'superfamily': + return superfamily + case 'subfamily': + return subfamily + case 'tribe': + return tribe + case 'subtribe': + return subtribe + case 'order': + return orderName + case 'class': + return className + default: + return [genusName, speciesName, authorship].filter(Boolean).join(' ').trim() + } + })() + + return { + taxonID: `NOW:${species.species_id}`, + nomenclaturalCode: 'ICZN', + scientificName, + genericName, + scientificNameAuthorship: authorship, + vernacularName: isMeaningfulString(species.common_name) ? species.common_name.trim() : '', + taxonRank, + taxonomicStatus, + kingdom: 'Animalia', + phylum: 'Chordata', + class: isMeaningfulString(species.class_name) ? species.class_name.trim() : '', + order: isMeaningfulString(species.order_name) ? species.order_name.trim() : '', + superfamily, + family: isMeaningfulString(species.family_name) ? species.family_name.trim() : '', + subfamily, + tribe, + subtribe, + genus: genusName, + specificEpithet: speciesName, + infraspecificEpithet, + higherClassification, + taxonRemarks: isMeaningfulString(species.sp_comment) ? species.sp_comment.trim() : '', + } +} + +export const MEASUREMENT_HEADERS = [ + 'taxonID', + 'measurementID', + 'parentMeasurementID', + 'measurementType', + 'verbatimMeasurementType', + 'measurementValue', + 'measurementUnit', + 'measurementMethod', +] as const + +export type MeasurementCsvHeader = (typeof MEASUREMENT_HEADERS)[number] +export type MeasurementCsvRow = Record + +type SpeciesForMeasurementExport = Pick< + Prisma.com_species, + | 'species_id' + | 'strain' + | 'gene' + | 'taxon_status' + | 'body_mass' + | 'brain_mass' + | 'sv_length' + | 'sd_size' + | 'sd_display' + | 'tshm' + | 'symph_mob' + | 'relative_blade_length' + | 'tht' + | 'diet1' + | 'diet2' + | 'diet3' + | 'diet_description' + | 'rel_fib' + | 'selectivity' + | 'digestion' + | 'feedinghab1' + | 'feedinghab2' + | 'shelterhab1' + | 'shelterhab2' + | 'locomo1' + | 'locomo2' + | 'locomo3' + | 'hunt_forage' + | 'activity' + | 'crowntype' + | 'microwear' + | 'horizodonty' + | 'cusp_shape' + | 'cusp_count_buccal' + | 'cusp_count_lingual' + | 'loph_count_lon' + | 'loph_count_trs' + | 'fct_al' + | 'fct_ol' + | 'fct_sf' + | 'fct_ot' + | 'fct_cm' + | 'mesowear' + | 'mw_or_high' + | 'mw_or_low' + | 'mw_cs_sharp' + | 'mw_cs_round' + | 'mw_cs_blunt' + | 'mw_scale_min' + | 'mw_scale_max' + | 'mw_value' + | 'pop_struc' + | 'sp_status' +> + +const isMeaningfulMeasurementValue = (value: unknown): boolean => { + if (value === null || value === undefined) return false + if (typeof value === 'string') { + return isMeaningfulString(value) + } + return true +} + +const buildCrownTypeMeasurementId = (speciesId: number, kind: 'developmental_crown_type' | 'functional_crown_type') => + `NOW:${speciesId}:${kind}` + +type CrownSegment = string | number | null | undefined + +const mapCrownSegment = (segment: CrownSegment): string => { + if (segment === null || segment === undefined || segment === '') { + return '-' + } + + return String(segment) +} + +const formatDevelopmentalCrownType = (source: SpeciesForMeasurementExport): string => { + return [ + source.cusp_shape, + source.cusp_count_buccal, + source.cusp_count_lingual, + source.loph_count_lon, + source.loph_count_trs, + ] + .map(mapCrownSegment) + .join('') +} + +const formatFunctionalCrownType = (source: SpeciesForMeasurementExport): string => { + return [source.fct_al, source.fct_ol, source.fct_sf, source.fct_ot, source.fct_cm].map(mapCrownSegment).join('') +} + +const MEASUREMENT_FIELD_MAPPINGS: Array<{ + field: keyof SpeciesForMeasurementExport + measurementType: string + measurementUnit: string + measurementMethod: string + parentKind?: 'developmental_crown_type' | 'functional_crown_type' +}> = [ + // NOTE: In v1, measurementMethod is populated from the Pantheria VSP manual where available: + // https://www.pantherion.com/dbmanual97/VSP.html + { + field: 'strain', + measurementType: 'strain', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'gene', + measurementType: 'gene', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'taxon_status', + measurementType: 'taxon status', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'body_mass', + measurementType: 'body mass', + measurementUnit: 'g', + measurementMethod: + 'The average adult body mass estimated for the species, in grams. Where there is sexual dimorphism in size, put the mean of the two sexes here and record the masses per sex, if known, in the Comment field. Confidence intervals, if known, can also be put there.', + }, + { + field: 'brain_mass', + measurementType: 'brain mass', + measurementUnit: 'g', + measurementMethod: + 'The average adult brain mass estimated for the species, in grams. Where there is sexual dimorphism in size, put the mean of the two sexes here and record the masses per sex, if known, in the Comment field. Confidence intervals, if known, can also be put there.', + }, + { + field: 'sv_length', + measurementType: 'snout-vent length', + measurementUnit: '', + measurementMethod: + 'For many species body-mass values will be unavailable or cannot be estimated with any confidence. However, every species should be classifiable into one of the gross size ranges listed below. This field will allow at least a crude characterization of body sizes for any fossil locality.', + }, + { + field: 'sd_size', + measurementType: 'sexual dimorphism - size', + measurementUnit: '', + measurementMethod: 'Whether there is sexual dimorphism in overall body size.', + }, + { + field: 'sd_display', + measurementType: 'sexual dimorphism - display', + measurementUnit: '', + measurementMethod: + 'Whether there is evidence of sexual dimorphism in display (or sexual combat) structures. (e. g., horns, antlers, dome-heads, canines). If the presence of these features is unknown, leave the field blank rather than enter "n."', + }, + { + field: 'tshm', + measurementType: 'tooth shape -- multicuspid', + measurementUnit: '', + measurementMethod: + 'A description of the morphology of the tooth crown, for multicusped teeth (if present). In concert with the other tooth morphology fields, this may allow functional interpretations to be made independently of whatever has been entered in the diet fields. Terminology for tooth-crown morphology is most highly developed for extant and fossil mammals, but no system has gained universal acceptance. The following reflects a compromise among many competing traditional systems, and is based partly on Fortelius (1985) and Janis and Fortelius (1988). This field is currently subject to further development. Improved nomenclature for some mammal groups, such as rodents and insectivores, might be more functionally indicative. Also, an expanded list of terms would be useful to characterize more fully the variation found among nonmammalian terrestrial vertebrates -- dinosaurs and therapsids in particular. The similar Molar Crown Type field is based on an alternative descriptive classification scheme, and currently applies only to mammals.', + }, + { + field: 'symph_mob', + measurementType: 'symphyseal mobility', + measurementUnit: '', + measurementMethod: 'Whether or not the mandibular symphysis is mobile.', + }, + { + field: 'relative_blade_length', + measurementType: 'relative blade length', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'tht', + measurementType: 'tooth height', + measurementUnit: '', + measurementMethod: + 'An indication of hypsodonty (tooth crown height) or the nature of other adaptations to deal with the problem of lifetime tooth wear. Tooth replacement, Tooth plates, and Hypselodont (ever-growing teeth) are absolute descriptors. The terms Brachydont, Mesodont and Hypsodont refer to different degrees of crown height of (mammalian) cheek teeth, and are subject to a variety of interpretations. Hypsodont (high-crowned) teeth may be defined objectively as those where the antero-posterior length is exceeded by the dorso-ventral height (Janis & Fortelius, 1988). "Somewhat hypsodont" teeth, intermediate between brachydont and hypsodont, are referred to as "mesodont," but there is no corresponding objective definition of this term. Quantitative indices of hypsodonty have been used (Janis, 1988), and might prove superior to the classification scheme presented here. Thus, this field is currently subject to further development.', + }, + { + field: 'diet1', + measurementType: 'diet category 1', + measurementUnit: '', + measurementMethod: + 'The predominant food type in the diet of the species, at the coarsest level of resolution: Animal, Plant, Omnivore. See also Diet 3, Diet 2, Relative Fiber Content, Selectivity, Food Processing Mode, Digestion.', + }, + { + field: 'diet2', + measurementType: 'diet category 2', + measurementUnit: '', + measurementMethod: + 'The predominant food type in the diet of the species, at an intermediate level of resolution. See also Diet 1, Diet 3, Relative Fiber Content, Selectivity, Food Processing Mode, Digestion.', + }, + { + field: 'diet3', + measurementType: 'diet category 3', + measurementUnit: '', + measurementMethod: + 'The predominant, or most important or most characteristic, food type in the diet of the species, at a detailed level of resolution. At this scale, the diets of many species will not be clearly distinguishable from one another using only a single term for the most common dietary component. Nevertheless, highly variable food-type categories often delineate distinct ecological/adaptive/functional types (as in the case of mixed browsing/grazing ungulates). That is, calling something a "frugivore" may not explicitly describe other components of its diet, some of which may be of adaptive importance to the species; it does not allow one to distinguish among species within the frugivore category, either. But it does allow one to place the species between omnivores or insectivores, on the one hand, and browsers, on the other.', + }, + { + field: 'diet_description', + measurementType: 'diet description', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'rel_fib', + measurementType: 'relative fiber content', + measurementUnit: '', + measurementMethod: + 'The relative amount of plant fiber in the food of the species. Plant food can be divided into cell contents such as sugars, proteins and storage carbohydrates, which are directly digestible by vertebrates. Plant cell-walls, however, are composed of material ("fiber") partially digestible only by microbial fermentation. Thus, the higher the fiber content, relative to the amount of energy contained in the easily-digested portion, the harder it is to obtain energy from the forage and the poorer the "quality" of the food on a per-unit basis. In addition, the proportion of the fiber digestible by fermentation also varies among plant species, plant parts, and growth stages. This field describes the food as having high, medium, and low levels of fiber. It is intended as a rough indication of the nutritional quality of a species\' diet. It refers only to herbivorous diets, or the plant portions of omnivorous diets. (The field basically functions to group various Diet 3 categories by relative fiber content.)', + }, + { + field: 'selectivity', + measurementType: 'selectivity', + measurementUnit: '', + measurementMethod: + 'Within its food-type category (Diet 1-3) a species may feed selectively or unselectively. Thus this field applies to any dietary category. Some food types impose selectivity restrictions on the species that feed on them. For example, most large grazers are less selective than mixed feeders or browsers. This is not what this field is meant to indicate! Rather, it applies within dietary categories. It could, for example, be used to distinguish between relatively selective and relatively unselective grazers.', + }, + { + field: 'digestion', + measurementType: 'digestion', + measurementUnit: '', + measurementMethod: + 'There are different broad strategies for breaking down plant material by means of microbial activity in the gut. Hindgut fermenters (hg) and foregut fermenters (fg) are found in a variety of living taxa. True ruminants (ru) are confined to the ruminant artiodactyls; they are separated here from other foregut fermenters, of which they form a special derived subclass.', + }, + { + field: 'feedinghab1', + measurementType: 'feeding habitat 1', + measurementUnit: '', + measurementMethod: + 'The general habitat from which the species obtains the major part of its trophic resources, and in which it ordinarily spends time feeding. The allowed values are identical to those for Shelter Habitat 1. See also Feeding Habitat 2.', + }, + { + field: 'feedinghab2', + measurementType: 'feeding habitat 2', + measurementUnit: '', + measurementMethod: + 'For the Terrestrial (te) entry in Feeding Habitat 1 only, a further breakdown into more specific feeding habitats. They are described more fully below.', + }, + { + field: 'shelterhab1', + measurementType: 'shelter habitat 1', + measurementUnit: '', + measurementMethod: + 'The general habitat in which the animal sleeps, shelters, or avoids predation when not feeding. The allowed values are identical to those for Feeding Habitat 1. See also Shelter Habitat 2.', + }, + { + field: 'shelterhab2', + measurementType: 'shelter habitat 2', + measurementUnit: '', + measurementMethod: + 'For the Terrestrial (te) entry in Shelter Habitat 1 only, a further breakdown into more specific shelter habitats. They are described more fully below, and are mostly identical to the fields for Feeding Habitat 2.', + }, + { + field: 'locomo1', + measurementType: 'locomotion 1', + measurementUnit: '', + measurementMethod: + 'The general substrate upon which locomotion characteristically takes place. These categories are the same as those in Feeding Habitat 1 and Shelter Habitat 1.', + }, + { + field: 'locomo2', + measurementType: 'locomotion 2', + measurementUnit: '', + measurementMethod: + 'For non-aquatic, non-aerial species the terrestrial substrate upon which locomotion characteristically takes place. "Arboreal" describes species that almost never come to the ground, or, if they do, it is almost always for the purpose of dispersing to another tree or trees. "Scansorial" is a broad category including those species that habitually use both trees and the ground in their movements. At the non-arboreal extreme, it includes species that rarely in practice use the trees, but are not morphologically prevented from doing so. [This category may eventually have to be split to distinguish species that exhibit some arboreal adaptations (e.g., squirrels), from those that could climb in a limited way if they had to (e.g., lions).] "Surficial" refers to those creatures who use only the ground surface in locomotion (e.g., sauropods, wildebeeste).', + }, + { + field: 'locomo3', + measurementType: 'locomotion 3', + measurementUnit: '', + measurementMethod: + 'The predominant mode of locomotor activity. [These categories are not necessarily complete at this time.] The categorization of flight locomotion in Locomotion 2 and Locomotion 3 is based on Norberg (1985).', + }, + { + field: 'hunt_forage', + measurementType: 'hunt/forage', + measurementUnit: '', + measurementMethod: + 'The predominant hunting or foraging mode for carnivores. These categories are based upon those of Van Valkenburgh (1985) and are described more fully there. This field might also be of eventual use in describing foraging modes of non-carnivores, but at present these cannot be determined directly upon morphological criteria (such inferences as can be made are already taken care of in Feeding Habitat, Diet and Locomotion.)', + }, + { + field: 'activity', + measurementType: 'activity', + measurementUnit: '', + measurementMethod: + 'The primary time of day during which the species was active. Choices are Diurnal, Crepuscular, or Nocturnal.', + }, + { + field: 'crowntype', + measurementType: 'crown type', + measurementUnit: '', + measurementMethod: + 'This field describes the morphology of mammalian molar crowns, and is complimentary to the Tooth Shape - Multicuspid field. The latter presents a traditional classification of molar crown types (and other multicusped teeth) for vertebrates. Molar Crown Type, in contrast, uses a more recently developed classification scheme that is currently restricted to mammals. The scheme is phylogenetically neutral and descriptive, allowing functional interpretations and interpretations of underlying developmental mechanisms (see Jernvall, 1995). Currently, the values for the field consist of five-letter alphanumeric codes, described in Jernvall, et al. (1996), and the reader is referred to that paper for further explanation.', + }, + { + field: 'microwear', + measurementType: 'microwear', + measurementUnit: '', + measurementMethod: + "This field describes the kind of microwear (in terms of striations or pits) revealed by microscopic examination of the wear facets of the tooth crowns of the species. A considerable literature exists concerning the ways to infer aspects of a species' diet from patterns of microwear.", + }, + { + field: 'horizodonty', + measurementType: 'horizodonty', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'cusp_shape', + measurementType: 'cusp shape', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'developmental_crown_type', + }, + { + field: 'cusp_count_buccal', + measurementType: 'cusp count (buccal)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'developmental_crown_type', + }, + { + field: 'cusp_count_lingual', + measurementType: 'cusp count (lingual)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'developmental_crown_type', + }, + { + field: 'loph_count_lon', + measurementType: 'loph count (longitudinal)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'developmental_crown_type', + }, + { + field: 'loph_count_trs', + measurementType: 'loph count (transverse)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'developmental_crown_type', + }, + { + field: 'fct_al', + measurementType: 'functional crown type (AL)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'functional_crown_type', + }, + { + field: 'fct_ol', + measurementType: 'functional crown type (OL)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'functional_crown_type', + }, + { + field: 'fct_sf', + measurementType: 'functional crown type (SF)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'functional_crown_type', + }, + { + field: 'fct_ot', + measurementType: 'functional crown type (OT)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'functional_crown_type', + }, + { + field: 'fct_cm', + measurementType: 'functional crown type (CM)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + parentKind: 'functional_crown_type', + }, + { + field: 'mesowear', + measurementType: 'mesowear', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_or_high', + measurementType: 'cusp relief high (OR%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_or_low', + measurementType: 'cusp relief low (OR%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_cs_sharp', + measurementType: 'cusp shape sharp (CS%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_cs_round', + measurementType: 'cusp shape round (CS%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_cs_blunt', + measurementType: 'cusp shape blunt (CS%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_scale_min', + measurementType: 'mesowear scale min', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_scale_max', + measurementType: 'mesowear scale max', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_value', + measurementType: 'mesowear value', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'pop_struc', + measurementType: 'population structure', + measurementUnit: '', + measurementMethod: + 'Occasionally there will be evidence of herding or other gregarious behavior for a species. This could include evidence from mass deaths, well-preserved trace fossils (e.g., trackways), nesting-site or burrow aggregations, or association of individuals in burrows. It could also be based, less directly, on other aspects of the organism\'s biology -- for example, sexual dimorphism in sexual display or combat features. If so, indicate "soc" here and give details briefly in the Comment field. The choice "sol" (solitary) is allowed for completeness, but ordinarily there will be no positive evidence for solitary behavior, so the alternative to "soc" is usually a blank.', + }, + { + field: 'sp_status', + measurementType: 'species status', + measurementUnit: '', + // TODO(#1150): Add field description / meaning for NOW database usage. + measurementMethod: '', + }, +] + +export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport): MeasurementCsvRow[] => { + const taxonID = `NOW:${species.species_id}` + const speciesId = species.species_id + + const developmentalSegments = [ + species.cusp_shape, + species.cusp_count_buccal, + species.cusp_count_lingual, + species.loph_count_lon, + species.loph_count_trs, + ] + const functionalSegments = [species.fct_al, species.fct_ol, species.fct_sf, species.fct_ot, species.fct_cm] + + const hasDevelopmentalCrownType = developmentalSegments.some(isMeaningfulMeasurementValue) + const hasFunctionalCrownType = functionalSegments.some(isMeaningfulMeasurementValue) + + const parentIds = { + developmental_crown_type: hasDevelopmentalCrownType + ? buildCrownTypeMeasurementId(speciesId, 'developmental_crown_type') + : '', + functional_crown_type: hasFunctionalCrownType + ? buildCrownTypeMeasurementId(speciesId, 'functional_crown_type') + : '', + } as const + + const calculatedRows: MeasurementCsvRow[] = [] + + if (hasDevelopmentalCrownType) { + calculatedRows.push({ + taxonID, + measurementID: parentIds.developmental_crown_type, + parentMeasurementID: '', + measurementType: 'developmental crown type', + verbatimMeasurementType: 'developmental_crown_type', + measurementValue: formatDevelopmentalCrownType(species), + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }) + } + + if (hasFunctionalCrownType) { + calculatedRows.push({ + taxonID, + measurementID: parentIds.functional_crown_type, + parentMeasurementID: '', + measurementType: 'functional crown type', + verbatimMeasurementType: 'functional_crown_type', + measurementValue: formatFunctionalCrownType(species), + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }) + } + + const fieldRows = MEASUREMENT_FIELD_MAPPINGS.flatMap(mapping => { + if (mapping.field === 'species_id') return [] + const rawValue = species[mapping.field] + if (rawValue === null || rawValue === undefined) return [] + + if (typeof rawValue === 'string' && !isMeaningfulString(rawValue)) return [] + + const measurementValue = toDwcString(rawValue).trim() + if (!measurementValue) return [] + + return [ + { + taxonID, + measurementID: `NOW:${species.species_id}:${mapping.field.toString()}`, + parentMeasurementID: mapping.parentKind ? parentIds[mapping.parentKind] : '', + measurementType: mapping.measurementType, + verbatimMeasurementType: mapping.field.toString(), + measurementValue, + measurementUnit: mapping.measurementUnit, + measurementMethod: mapping.measurementMethod, + }, + ] + }) + + return [...calculatedRows, ...fieldRows] +} + +const DWC_TERMS = { + taxon: { + rowType: 'http://rs.tdwg.org/dwc/terms/Taxon', + taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', + nomenclaturalCode: 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode', + scientificName: 'http://rs.tdwg.org/dwc/terms/scientificName', + genericName: 'http://rs.tdwg.org/dwc/terms/genericName', + scientificNameAuthorship: 'http://rs.tdwg.org/dwc/terms/scientificNameAuthorship', + vernacularName: 'http://rs.tdwg.org/dwc/terms/vernacularName', + taxonRank: 'http://rs.tdwg.org/dwc/terms/taxonRank', + taxonomicStatus: 'http://rs.tdwg.org/dwc/terms/taxonomicStatus', + kingdom: 'http://rs.tdwg.org/dwc/terms/kingdom', + phylum: 'http://rs.tdwg.org/dwc/terms/phylum', + class: 'http://rs.tdwg.org/dwc/terms/class', + order: 'http://rs.tdwg.org/dwc/terms/order', + superfamily: 'http://rs.tdwg.org/dwc/terms/superfamily', + family: 'http://rs.tdwg.org/dwc/terms/family', + subfamily: 'http://rs.tdwg.org/dwc/terms/subfamily', + tribe: 'http://rs.tdwg.org/dwc/terms/tribe', + subtribe: 'http://rs.tdwg.org/dwc/terms/subtribe', + genus: 'http://rs.tdwg.org/dwc/terms/genus', + specificEpithet: 'http://rs.tdwg.org/dwc/terms/specificEpithet', + infraspecificEpithet: 'http://rs.tdwg.org/dwc/terms/infraspecificEpithet', + higherClassification: 'http://rs.tdwg.org/dwc/terms/higherClassification', + taxonRemarks: 'http://rs.tdwg.org/dwc/terms/taxonRemarks', + }, + measurement: { + rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', + taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', + measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + parentMeasurementID: 'http://rs.tdwg.org/dwc/terms/parentMeasurementID', + measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', + verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', + measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', + measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', + measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', + }, +} as const + +export const buildMetaXml = (): string => { + const taxonFields = TAXON_HEADERS.map((header, index) => { + const term = (DWC_TERMS.taxon as Record)[header] + return ` ` + }).join('\n') + + const measurementFields = MEASUREMENT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.measurement as Record)[header] + return ` ` + }).join('\n') + + return ` + + + + taxon.csv + + +${taxonFields} + + + + measurementorfact.csv + + +${measurementFields} + + +` +} + +export const buildEmlXml = (publicationDateIso: string): string => { + return ` + + + + NOW database Darwin Core test export + + + NOW database + + + + + NOW database + + + ${publicationDateIso} + + Admin-only test Darwin Core Archive export from NOW database. Field mappings are intentionally limited for v1. + + + TODO(#1150): Add rights / license information. + + + +` +} + +export const buildDwcArchiveZipBufferFromSpecies = async ( + speciesRows: Array +): Promise => { + const taxonRows = speciesRows.map(mapSpeciesToTaxonRow) + const measurementRows = speciesRows.flatMap(mapSpeciesToMeasurementRows) + + const taxonCsv = await writeCsvString([...TAXON_HEADERS], taxonRows) + const measurementCsv = await writeCsvString([...MEASUREMENT_HEADERS], measurementRows) + const metaXml = buildMetaXml() + const publicationDateIso = new Date().toISOString().slice(0, 10) + const emlXml = buildEmlXml(publicationDateIso) + + const zip = new JSZip() + zip.file('taxon.csv', taxonCsv) + zip.file('measurementorfact.csv', measurementCsv) + zip.file('meta.xml', metaXml) + zip.file('eml.xml', emlXml) + + return await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } }) +} + +export const fetchSpeciesForDwcExport = async (): Promise< + Array +> => { + const { nowDb } = await import('../utils/db') + // NOTE: v1 intentionally exports only com_species rows as taxa. + // TODO(#1150): Add synonym export from com_taxa_synonym. + return await nowDb.com_species.findMany({ + select: { + species_id: true, + class_name: true, + subclass_or_superorder_name: true, + order_name: true, + suborder_or_superfamily_name: true, + family_name: true, + subfamily_name: true, + genus_name: true, + species_name: true, + unique_identifier: true, + taxonomic_status: true, + common_name: true, + sp_author: true, + sp_comment: true, + strain: true, + gene: true, + taxon_status: true, + body_mass: true, + brain_mass: true, + sv_length: true, + sd_size: true, + sd_display: true, + tshm: true, + symph_mob: true, + relative_blade_length: true, + tht: true, + diet1: true, + diet2: true, + diet3: true, + diet_description: true, + rel_fib: true, + selectivity: true, + digestion: true, + feedinghab1: true, + feedinghab2: true, + shelterhab1: true, + shelterhab2: true, + locomo1: true, + locomo2: true, + locomo3: true, + hunt_forage: true, + activity: true, + crowntype: true, + microwear: true, + mesowear: true, + horizodonty: true, + cusp_shape: true, + cusp_count_buccal: true, + cusp_count_lingual: true, + loph_count_lon: true, + loph_count_trs: true, + fct_al: true, + fct_ol: true, + fct_sf: true, + fct_ot: true, + fct_cm: true, + mw_or_high: true, + mw_or_low: true, + mw_cs_sharp: true, + mw_cs_round: true, + mw_cs_blunt: true, + mw_scale_min: true, + mw_scale_max: true, + mw_value: true, + pop_struc: true, + sp_status: true, + }, + }) +} + +export const buildDwcArchiveZipBuffer = async (): Promise => { + const speciesRows = await fetchSpeciesForDwcExport() + return await buildDwcArchiveZipBufferFromSpecies(speciesRows) +} diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts new file mode 100644 index 000000000..5c257a4c6 --- /dev/null +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -0,0 +1,1536 @@ +import type { now_loc, now_time_unit } from '../../prisma/generated/now_test_client' +import { format } from 'fast-csv' +import { Writable } from 'stream' +import JSZip from 'jszip' +import { getContinentByCountry } from '../../../frontend/src/shared/validators/countryContinents' + +const isMeaningfulString = (value: unknown): value is string => { + if (typeof value !== 'string') return false + const trimmed = value.trim() + if (!trimmed) return false + if (trimmed === '-') return false + return true +} + +const toDwcString = (value: unknown): string => { + if (value === null || value === undefined) return '' + if (typeof value === 'bigint') return value.toString() + if (typeof value === 'number') return Number.isFinite(value) ? value.toString() : '' + if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value + if (typeof value === 'object') { + if (value instanceof Date) return value.toISOString() + if (typeof (value as { toString?: unknown }).toString === 'function') { + const asString = (value as { toString: () => string }).toString() + if (asString && asString !== '[object Object]') return asString + } + try { + return JSON.stringify(value) ?? '' + } catch { + return '' + } + } + return '' +} + +const writeCsvString = async (headers: string[], rows: Array>): Promise => { + return await new Promise((resolve, reject) => { + let output = '' + const csvStream = format({ + delimiter: ',', + headers, + quoteColumns: true, + quoteHeaders: true, + includeEndRowDelimiter: true, + }) + + const sink = new Writable({ + write(chunk: Buffer | string, _encoding: BufferEncoding, callback: (error?: Error | null) => void) { + if (typeof chunk === 'string') { + output += chunk + } else { + output += chunk.toString('utf8') + } + callback() + }, + }) + + sink.on('finish', () => resolve(output)) + sink.on('error', reject) + csvStream.on('error', reject) + + csvStream.pipe(sink) + for (const row of rows) { + csvStream.write(row) + } + csvStream.end() + }) +} + +export const LOCATION_HEADERS = [ + 'locationID', + 'locality', + 'continent', + 'country', + 'stateProvince', + 'county', + 'higherGeography', + 'decimalLatitude', + 'decimalLongitude', + 'verbatimLatitude', + 'verbatimLongitude', + 'verbatimElevation', + 'locationRemarks', +] as const + +export type LocationCsvHeader = (typeof LOCATION_HEADERS)[number] +export type LocationCsvRow = Record + +export const GEOLOGICAL_CONTEXT_HEADERS = [ + 'locationID', + 'geologicalContextID', + 'lithostratigraphicTerms', + 'group', + 'formation', + 'member', + 'bed', + 'earliestAgeOrLowestStage', + 'latestAgeOrHighestStage', +] as const + +export type GeologicalContextCsvHeader = (typeof GEOLOGICAL_CONTEXT_HEADERS)[number] +export type GeologicalContextCsvRow = Record + +export const LOCALITY_MEASUREMENT_HEADERS = [ + 'taxonID', + 'measurementID', + 'parentMeasurementID', + 'measurementType', + 'verbatimMeasurementType', + 'measurementValue', + 'measurementUnit', + 'measurementMethod', +] as const + +export type LocalityMeasurementCsvHeader = (typeof LOCALITY_MEASUREMENT_HEADERS)[number] +export type LocalityMeasurementCsvRow = Record + +type TimeUnitForLocalityExport = Pick + +type LocalityForExport = Pick< + now_loc, + | 'lid' + | 'loc_name' + | 'basin' + | 'subbasin' + | 'country' + | 'state' + | 'county' + | 'dec_lat' + | 'dec_long' + | 'dms_lat' + | 'dms_long' + | 'approx_coord' + | 'altitude' + | 'loc_detail' + | 'chron' + | 'lgroup' + | 'formation' + | 'member' + | 'bed' + | 'bfa_max' + | 'bfa_min' + | 'bfa_max_abs' + | 'bfa_min_abs' + | 'frac_max' + | 'frac_min' + | 'max_age' + | 'min_age' + | 'date_meth' + | 'age_comm' + | 'site_area' + | 'gen_loc' + | 'plate' + | 'appr_num_spm' + | 'num_spm' + | 'true_quant' + | 'complete' + | 'num_quad' + | 'rock_type' + | 'rt_adj' + | 'lith_comm' + | 'depo_context1' + | 'depo_context2' + | 'depo_context3' + | 'depo_context4' + | 'depo_comm' + | 'sed_env_1' + | 'sed_env_2' + | 'event_circum' + | 'se_comm' + | 'assem_fm' + | 'transport' + | 'trans_mod' + | 'weath_trmp' + | 'pt_conc' + | 'size_type' + | 'vert_pres' + | 'plant_pres' + | 'invert_pres' + | 'time_rep' + | 'taph_comm' + | 'tax_comm' + | 'datum_plane' + | 'tos' + | 'bos' + | 'loc_status' + | 'hominin_skeletal_remains' + | 'climate_type' + | 'biome' + | 'v_ht' + | 'v_struct' + | 'v_envi_det' + | 'disturb' + | 'nutrients' + | 'water' + | 'seasonality' + | 'seas_intens' + | 'pri_prod' + | 'moisture' + | 'temperature' + | 'estimate_precip' + | 'estimate_temp' + | 'estimate_npp' + | 'pers_woody_cover' + | 'pers_pollen_ap' + | 'pers_pollen_nap' + | 'pers_pollen_other' + | 'stone_tool_cut_marks_on_bones' + | 'bipedal_footprints' + | 'stone_tool_technology' + | 'technological_mode_1' + | 'technological_mode_2' + | 'technological_mode_3' + | 'cultural_stage_1' + | 'cultural_stage_2' + | 'cultural_stage_3' + | 'regional_culture_1' + | 'regional_culture_2' + | 'regional_culture_3' +> & { + now_time_unit_now_loc_bfa_maxTonow_time_unit: TimeUnitForLocalityExport | null + now_time_unit_now_loc_bfa_minTonow_time_unit: TimeUnitForLocalityExport | null + now_syn_loc: ReadonlyArray<{ synonym: string | null }> + now_ss: ReadonlyArray<{ sed_struct: string }> + now_coll_meth: ReadonlyArray<{ coll_meth: string }> + now_mus: ReadonlyArray<{ + museum: string + com_mlist: { + institution: string + alt_int_name: string | null + city: string | null + state: string | null + country: string | null + } + }> + now_ls: ReadonlyArray<{ + com_species: { + order_name: string + tht: string | null + genus_name: string | null + } + }> +} + +const locationIdForLocality = (lid: number): string => `NOW:LOC:${lid}` + +const toMaybeMeaningful = (value: string | null | undefined): string => (isMeaningfulString(value) ? value.trim() : '') + +const toMaybeMeaningfulNumber = (value: number | null | undefined): string => { + if (value === null || value === undefined) return '' + if (!Number.isFinite(value)) return '' + // In NOW, many numeric fields default to 0 for "unknown". Treat 0 as empty for export. + if (value === 0) return '' + return value.toString() +} + +export const mapLocalityToLocationRow = (locality: LocalityForExport): LocationCsvRow => { + const locationID = locationIdForLocality(locality.lid) + const continent = getContinentByCountry(locality.country) ?? '' + + const higherGeography = [ + continent, + toMaybeMeaningful(locality.country), + toMaybeMeaningful(locality.state), + toMaybeMeaningful(locality.county), + toMaybeMeaningful(locality.basin), + toMaybeMeaningful(locality.subbasin), + ] + .filter(Boolean) + .join('|') + + return { + locationID, + locality: toMaybeMeaningful(locality.loc_name), + continent, + country: toMaybeMeaningful(locality.country), + stateProvince: toMaybeMeaningful(locality.state), + county: toMaybeMeaningful(locality.county), + higherGeography, + decimalLatitude: toMaybeMeaningfulNumber(locality.dec_lat), + decimalLongitude: toMaybeMeaningfulNumber(locality.dec_long), + verbatimLatitude: toMaybeMeaningful(locality.dms_lat), + verbatimLongitude: toMaybeMeaningful(locality.dms_long), + verbatimElevation: locality.altitude === null || locality.altitude === undefined ? '' : String(locality.altitude), + locationRemarks: [ + toMaybeMeaningful(locality.loc_detail), + toMaybeMeaningful(locality.age_comm), + toMaybeMeaningful(locality.tax_comm), + ] + .filter(Boolean) + .join(' | '), + } +} + +const timeUnitDisplayOrName = (timeUnit: TimeUnitForLocalityExport | null, fallbackName: string | null): string => { + if (timeUnit) { + return isMeaningfulString(timeUnit.tu_display_name) ? timeUnit.tu_display_name.trim() : timeUnit.tu_name.trim() + } + return isMeaningfulString(fallbackName) ? fallbackName.trim() : '' +} + +export const mapLocalityToGeologicalContextRow = (locality: LocalityForExport): GeologicalContextCsvRow => { + const locationID = locationIdForLocality(locality.lid) + + const lithostratigraphicTerms = [ + toMaybeMeaningful(locality.chron), + toMaybeMeaningful(locality.lgroup), + toMaybeMeaningful(locality.formation), + toMaybeMeaningful(locality.member), + toMaybeMeaningful(locality.bed), + ] + .filter(Boolean) + .join(' | ') + + return { + locationID, + geologicalContextID: `NOW:LOC:${locality.lid}:geology`, + lithostratigraphicTerms, + group: toMaybeMeaningful(locality.lgroup), + formation: toMaybeMeaningful(locality.formation), + member: toMaybeMeaningful(locality.member), + bed: toMaybeMeaningful(locality.bed), + earliestAgeOrLowestStage: timeUnitDisplayOrName( + locality.now_time_unit_now_loc_bfa_maxTonow_time_unit, + locality.bfa_max + ), + latestAgeOrHighestStage: timeUnitDisplayOrName( + locality.now_time_unit_now_loc_bfa_minTonow_time_unit, + locality.bfa_min + ), + } +} + +const isMeaningfulMeasurementValue = (value: unknown): boolean => { + if (value === null || value === undefined) return false + if (typeof value === 'string') return isMeaningfulString(value) + if (typeof value === 'number') return Number.isFinite(value) && value !== 0 + if (typeof value === 'boolean') return value + return true +} + +const buildLocalityMeasurementId = (lid: number, kind: string): string => `NOW:LOC:${lid}:${kind}` + +const formatAgeRange = (locality: LocalityForExport): string => { + const minAge = toMaybeMeaningfulNumber(locality.min_age) + const maxAge = toMaybeMeaningfulNumber(locality.max_age) + if (minAge && maxAge) return `${minAge}-${maxAge}` + if (minAge) return minAge + if (maxAge) return maxAge + return '' +} + +const calculateMeanHypsodontyForExport = (locality: Pick): number => { + const relevantOrderNames = [ + 'Perissodactyla', + 'Artiodactyla', + 'Primates', + 'Proboscidea', + 'Hyracoidea', + 'Dinocerata', + 'Embrithopoda', + 'Notoungulata', + 'Astrapotheria', + 'Pyrotheria', + 'Litopterna', + 'Condylarthra', + 'Pantodonta', + ] + + const thtToValue = { + bra: 1.0, + mes: 2.0, + hyp: 3.0, + hys: 3.0, + none: 0.0, + } as Record + + const values = locality.now_ls + .map(row => row.com_species) + .filter(species => relevantOrderNames.includes(species.order_name)) + .map(species => thtToValue[species.tht ?? 'none']) + + const sum = values.reduce((acc, cur) => acc + cur, 0.0) + const mean = values.length > 0 ? sum / values.length : 0.0 + return parseFloat((Math.floor(mean * 100) / 100).toFixed(2)) +} + +const hasHomininSkeletalRemainsForExport = (locality: Pick): boolean => { + const hominins = [ + 'sahelanthropus', + 'orrorin', + 'ardipithecus', + 'kenyanthropus', + 'australopithecus', + 'paranthropus', + 'homo', + ] + + return locality.now_ls.some(({ com_species }) => { + const genusName = com_species.genus_name + if (!genusName) return false + return hominins.includes(genusName.toLowerCase()) + }) +} + +const isNumberMeaningful = (value: number | null | undefined, { allowZero }: { allowZero: boolean }): boolean => { + if (value === null || value === undefined) return false + if (!Number.isFinite(value)) return false + if (!allowZero && value === 0) return false + return true +} + +const toMaybeMeaningfulNumberWithZeroOption = ( + value: number | null | undefined, + { allowZero }: { allowZero: boolean } +): string => { + if (!isNumberMeaningful(value, { allowZero })) return '' + return value!.toString() +} + +const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ + field: keyof LocalityForExport + measurementType: string + measurementUnit: string + measurementMethod: string + allowZero?: boolean +}> = [ + { + field: 'bfa_max', + measurementType: 'Basis for age (Time Unit)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. + measurementMethod: '', + }, + { + field: 'bfa_min', + measurementType: 'Basis for age (Time Unit)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. + measurementMethod: '', + }, + { + field: 'bfa_max_abs', + measurementType: 'Basis for age (Absolute)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. + measurementMethod: '', + }, + { + field: 'bfa_min_abs', + measurementType: 'Basis for age (Absolute)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. + measurementMethod: '', + }, + { + field: 'frac_max', + measurementType: 'Basis for age (Fraction)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. + measurementMethod: '', + }, + { + field: 'frac_min', + measurementType: 'Basis for age (Fraction)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. + measurementMethod: '', + }, + { + field: 'site_area', + measurementType: 'site area', + measurementUnit: '', + // TODO(#1150): Add unit and definition for NOW locality site_area. + measurementMethod: '', + }, + { + field: 'approx_coord', + measurementType: 'approximate coordinates', + measurementUnit: '', + // TODO(#1150): Add definition (what qualifies as approximate). + measurementMethod: '', + }, + { + field: 'gen_loc', + measurementType: 'general locality', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary / definition. + measurementMethod: '', + }, + { + field: 'plate', + measurementType: 'tectonic plate', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary / definition. + measurementMethod: '', + }, + { + field: 'appr_num_spm', + measurementType: 'approximate number of specimens', + measurementUnit: '', + // TODO(#1150): Confirm whether this is a sample-unit or locality-level count. + measurementMethod: '', + }, + { + field: 'num_spm', + measurementType: 'number of specimens', + measurementUnit: '', + // TODO(#1150): Confirm whether this is a sample-unit or locality-level count. + measurementMethod: '', + }, + { + field: 'true_quant', + measurementType: 'true quantification', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary for NOW locality true_quant. + measurementMethod: '', + }, + { + field: 'complete', + measurementType: 'complete sampling', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary for NOW locality complete. + measurementMethod: '', + }, + { + field: 'num_quad', + measurementType: 'number of quadrats', + measurementUnit: '', + // TODO(#1150): Add definition for NOW locality num_quad. + measurementMethod: '', + }, + { + field: 'date_meth', + measurementType: 'dating method', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary / description. + measurementMethod: '', + }, + { + field: 'basin', + measurementType: 'basin', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'subbasin', + measurementType: 'subbasin', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'rock_type', + measurementType: 'rock type', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'rt_adj', + measurementType: 'rock type adjective', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'lith_comm', + measurementType: 'lithology comment', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'sed_env_1', + measurementType: 'sedimentary environment 1', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'sed_env_2', + measurementType: 'sedimentary environment 2', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'event_circum', + measurementType: 'event circumstances', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'se_comm', + measurementType: 'sedimentary environment comment', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context1', + measurementType: 'depositional context 1', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context2', + measurementType: 'depositional context 2', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context3', + measurementType: 'depositional context 3', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context4', + measurementType: 'depositional context 4', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_comm', + measurementType: 'depositional context comment', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'chron', + measurementType: 'chron', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'loc_status', + measurementType: 'locality status', + measurementUnit: '', + // TODO(#1150): Add definition. + measurementMethod: '', + }, + { + field: 'hominin_skeletal_remains', + measurementType: 'Hominin skeletal remains (field)', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'assem_fm', + measurementType: 'Assemblage Formation', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'transport', + measurementType: 'Transport', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'trans_mod', + measurementType: 'Abrasion', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'weath_trmp', + measurementType: 'Weathering / Trampling', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'pt_conc', + measurementType: 'Part Concentration', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'size_type', + measurementType: 'Assemblage Component Size', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'time_rep', + measurementType: 'Time Represented (years)', + measurementUnit: '', + // TODO(#1150): Add unit and definition (years bins). + measurementMethod: '', + }, + { + field: 'vert_pres', + measurementType: 'Vertebrate Preservation', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary. + measurementMethod: '', + }, + { + field: 'plant_pres', + measurementType: 'Plant Preservation', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary. + measurementMethod: '', + }, + { + field: 'invert_pres', + measurementType: 'Invertebrate Preservation', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary. + measurementMethod: '', + }, + { + field: 'taph_comm', + measurementType: 'Taphonomy comment', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'climate_type', + measurementType: 'Climate Type', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'temperature', + measurementType: 'Temperature', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'moisture', + measurementType: 'Moisture', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'disturb', + measurementType: 'Agent(s) of Disturbance', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'v_envi_det', + measurementType: 'Environment & Vegetation Detail', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'seasonality', + measurementType: 'Seasonality', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'seas_intens', + measurementType: 'Seasonality Intensity', + measurementUnit: '', + // TODO(#1150): Add unit and definition. + measurementMethod: '', + }, + { + field: 'biome', + measurementType: 'Biome', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'v_ht', + measurementType: 'Vegetation Height', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'v_struct', + measurementType: 'Vegetation Structure', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'pri_prod', + measurementType: 'Primary Productivity Level', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'nutrients', + measurementType: 'Nutrient Availability', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'water', + measurementType: 'Water Availability', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'pers_pollen_ap', + measurementType: 'Arboreal pollen (AP%)', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'pers_pollen_nap', + measurementType: 'Non-arboreal pollen (NAP%)', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'pers_pollen_other', + measurementType: 'Other pollen (OP%)', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'estimate_precip', + measurementType: 'Estimate of annual precipitation (mm)', + measurementUnit: 'mm', + measurementMethod: '', + allowZero: true, + }, + { + field: 'estimate_temp', + measurementType: 'Estimate of mean annual temperature (°C)', + measurementUnit: '°C', + measurementMethod: '', + allowZero: true, + }, + { + field: 'estimate_npp', + measurementType: 'Estimate of net primary productivity (g/m2/yr)', + measurementUnit: 'g/m2/yr', + measurementMethod: '', + allowZero: true, + }, + { + field: 'pers_woody_cover', + measurementType: 'Woody cover percentage', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'stone_tool_cut_marks_on_bones', + measurementType: 'Stone tool cut marks on bones', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'bipedal_footprints', + measurementType: 'Bipedal footprints', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'stone_tool_technology', + measurementType: 'Stone tool technology', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'technological_mode_1', + measurementType: 'Technological mode 1', + measurementUnit: '', + measurementMethod: '', + allowZero: true, + }, + { + field: 'technological_mode_2', + measurementType: 'Technological mode 2', + measurementUnit: '', + measurementMethod: '', + allowZero: true, + }, + { + field: 'technological_mode_3', + measurementType: 'Technological mode 3', + measurementUnit: '', + measurementMethod: '', + allowZero: true, + }, + { + field: 'cultural_stage_1', + measurementType: 'Cultural stage 1', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'cultural_stage_2', + measurementType: 'Cultural stage 2', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'cultural_stage_3', + measurementType: 'Cultural stage 3', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'regional_culture_1', + measurementType: 'Regional culture 1', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'regional_culture_2', + measurementType: 'Regional culture 2', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'regional_culture_3', + measurementType: 'Regional culture 3', + measurementUnit: '', + measurementMethod: '', + }, +] + +export const mapLocalityToMeasurementRows = (locality: LocalityForExport): LocalityMeasurementCsvRow[] => { + const taxonID = locationIdForLocality(locality.lid) + const lid = locality.lid + + const ageParentId = buildLocalityMeasurementId(lid, 'age') + const maxAgeId = buildLocalityMeasurementId(lid, 'max_age') + const minAgeId = buildLocalityMeasurementId(lid, 'min_age') + + const stratigraphyParentId = buildLocalityMeasurementId(lid, 'stratigraphy') + const tosId = buildLocalityMeasurementId(lid, 'tos') + const bosId = buildLocalityMeasurementId(lid, 'bos') + const datumPlaneId = buildLocalityMeasurementId(lid, 'datum_plane') + + const hasMaxAgeGroup = + isMeaningfulMeasurementValue(locality.max_age) || + isMeaningfulMeasurementValue(locality.bfa_max) || + isMeaningfulMeasurementValue(locality.bfa_max_abs) || + isMeaningfulMeasurementValue(locality.frac_max) + + const hasMinAgeGroup = + isMeaningfulMeasurementValue(locality.min_age) || + isMeaningfulMeasurementValue(locality.bfa_min) || + isMeaningfulMeasurementValue(locality.bfa_min_abs) || + isMeaningfulMeasurementValue(locality.frac_min) + + const hasAnyAgeBasis = hasMaxAgeGroup || hasMinAgeGroup + + const rows: LocalityMeasurementCsvRow[] = [] + + const hasAnyStratigraphy = + isMeaningfulMeasurementValue(locality.datum_plane) || + isMeaningfulMeasurementValue(locality.tos) || + isMeaningfulMeasurementValue(locality.bos) + + if (hasAnyStratigraphy) { + rows.push({ + taxonID, + measurementID: stratigraphyParentId, + parentMeasurementID: '', + measurementType: 'stratigraphic section', + verbatimMeasurementType: 'stratigraphy', + measurementValue: '', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + if (isMeaningfulMeasurementValue(locality.datum_plane)) { + rows.push({ + taxonID, + measurementID: datumPlaneId, + parentMeasurementID: hasAnyStratigraphy ? stratigraphyParentId : '', + measurementType: 'datum plane', + verbatimMeasurementType: 'datum_plane', + measurementValue: toMaybeMeaningful(locality.datum_plane), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + if (typeof locality.tos === 'number') { + const value = toMaybeMeaningfulNumberWithZeroOption(locality.tos, { allowZero: true }) + if (value) { + rows.push({ + taxonID, + measurementID: tosId, + parentMeasurementID: hasAnyStratigraphy ? stratigraphyParentId : '', + measurementType: 'top of section', + verbatimMeasurementType: 'tos', + measurementValue: value, + measurementUnit: '', + // TODO(#1150): Add unit and definition. + measurementMethod: '', + }) + } + } + + if (typeof locality.bos === 'number') { + const value = toMaybeMeaningfulNumberWithZeroOption(locality.bos, { allowZero: true }) + if (value) { + rows.push({ + taxonID, + measurementID: bosId, + parentMeasurementID: hasAnyStratigraphy ? stratigraphyParentId : '', + measurementType: 'bottom of section', + verbatimMeasurementType: 'bos', + measurementValue: value, + measurementUnit: '', + // TODO(#1150): Add unit and definition. + measurementMethod: '', + }) + } + } + + if (hasAnyAgeBasis) { + rows.push({ + taxonID, + measurementID: ageParentId, + parentMeasurementID: '', + measurementType: 'age range', + verbatimMeasurementType: 'age_range', + measurementValue: formatAgeRange(locality), + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality age range semantics. + measurementMethod: '', + }) + } + + if (hasMaxAgeGroup) { + rows.push({ + taxonID, + measurementID: maxAgeId, + parentMeasurementID: hasAnyAgeBasis ? ageParentId : '', + measurementType: 'maximum age', + verbatimMeasurementType: 'max_age', + measurementValue: toMaybeMeaningfulNumber(locality.max_age), + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality max_age semantics. + measurementMethod: '', + }) + } + + if (hasMinAgeGroup) { + rows.push({ + taxonID, + measurementID: minAgeId, + parentMeasurementID: hasAnyAgeBasis ? ageParentId : '', + measurementType: 'minimum age', + verbatimMeasurementType: 'min_age', + measurementValue: toMaybeMeaningfulNumber(locality.min_age), + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality min_age semantics. + measurementMethod: '', + }) + } + + const coreRows = LOCALITY_MEASUREMENT_MAPPINGS.flatMap(mapping => { + const rawValue = locality[mapping.field] + + const measurementValue = (() => { + if (typeof rawValue === 'number') { + return toMaybeMeaningfulNumberWithZeroOption(rawValue, { allowZero: mapping.allowZero ?? false }) + } + if (!isMeaningfulMeasurementValue(rawValue)) return '' + return toDwcString(rawValue).trim() + })() + + if (!measurementValue) return [] + + const verbatimMeasurementType = mapping.field.toString() + + const parentMeasurementID = (() => { + if (verbatimMeasurementType === 'bfa_max') return hasMaxAgeGroup ? maxAgeId : '' + if (verbatimMeasurementType === 'bfa_max_abs') return hasMaxAgeGroup ? maxAgeId : '' + if (verbatimMeasurementType === 'frac_max') return hasMaxAgeGroup ? maxAgeId : '' + + if (verbatimMeasurementType === 'bfa_min') return hasMinAgeGroup ? minAgeId : '' + if (verbatimMeasurementType === 'bfa_min_abs') return hasMinAgeGroup ? minAgeId : '' + if (verbatimMeasurementType === 'frac_min') return hasMinAgeGroup ? minAgeId : '' + + return '' + })() + + const measurementID = buildLocalityMeasurementId(lid, verbatimMeasurementType) + + return [ + { + taxonID, + measurementID, + parentMeasurementID, + measurementType: mapping.measurementType, + verbatimMeasurementType, + measurementValue, + measurementUnit: mapping.measurementUnit, + measurementMethod: mapping.measurementMethod, + }, + ] + }) + + rows.push(...coreRows) + + const museums = locality.now_mus + .map(row => { + const institution = row.com_mlist.alt_int_name ?? row.com_mlist.institution + const locationBits = [ + toMaybeMeaningful(row.com_mlist.city), + toMaybeMeaningful(row.com_mlist.state), + toMaybeMeaningful(row.com_mlist.country), + ] + .filter(Boolean) + .join(', ') + return [row.museum, institution, locationBits ? `(${locationBits})` : ''].filter(Boolean).join(' ') + }) + .filter(isMeaningfulString) + .map(value => value.trim()) + + if (museums.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'museums'), + parentMeasurementID: '', + measurementType: 'Museums', + verbatimMeasurementType: 'now_mus.museum', + measurementValue: museums.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const collectingMethods = locality.now_coll_meth + .map(method => method.coll_meth) + .filter(value => isMeaningfulString(value)) + .map(value => value.trim()) + + if (collectingMethods.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'collecting_methods'), + parentMeasurementID: '', + measurementType: 'Collecting Methods', + verbatimMeasurementType: 'now_coll_meth.coll_meth', + measurementValue: collectingMethods.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const meanHypsodonty = calculateMeanHypsodontyForExport(locality) + if (isNumberMeaningful(meanHypsodonty, { allowZero: true })) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'mean_hypsodonty'), + parentMeasurementID: '', + measurementType: 'Mean hypsodonty', + verbatimMeasurementType: 'calculated_mean_hypsodonty', + measurementValue: meanHypsodonty.toString(), + measurementUnit: '', + // TODO(#1150): Document calculation provenance (see frontend shared calculations). + measurementMethod: '', + }) + } + + const homininSkeletalRemains = hasHomininSkeletalRemainsForExport(locality) + if (homininSkeletalRemains) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'hominin_skeletal_remains'), + parentMeasurementID: '', + measurementType: 'Hominin skeletal remains', + verbatimMeasurementType: 'calculated_hominin_skeletal_remains', + measurementValue: 'true', + measurementUnit: '', + // TODO(#1150): Document calculation provenance (see frontend shared calculations). + measurementMethod: '', + }) + } + + const localitySynonyms = locality.now_syn_loc + .map(row => row.synonym) + .filter((syn): syn is string => isMeaningfulString(syn)) + .map(syn => syn.trim()) + + if (localitySynonyms.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'synonyms'), + parentMeasurementID: '', + measurementType: 'synonyms', + verbatimMeasurementType: 'synonym', + measurementValue: localitySynonyms.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const sedimentaryStructures = locality.now_ss + .map(row => row.sed_struct) + .filter(value => isMeaningfulString(value)) + .map(value => value.trim()) + + if (sedimentaryStructures.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'sedimentary_structures'), + parentMeasurementID: '', + measurementType: 'Sedimentary structures', + verbatimMeasurementType: 'now_ss.sed_struct', + measurementValue: sedimentaryStructures.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }) + } + + return rows +} + +const DWC_TERMS = { + location: { + rowType: 'http://rs.tdwg.org/dwc/terms/Location', + locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + locality: 'http://rs.tdwg.org/dwc/terms/locality', + continent: 'http://rs.tdwg.org/dwc/terms/continent', + country: 'http://rs.tdwg.org/dwc/terms/country', + stateProvince: 'http://rs.tdwg.org/dwc/terms/stateProvince', + county: 'http://rs.tdwg.org/dwc/terms/county', + higherGeography: 'http://rs.tdwg.org/dwc/terms/higherGeography', + decimalLatitude: 'http://rs.tdwg.org/dwc/terms/decimalLatitude', + decimalLongitude: 'http://rs.tdwg.org/dwc/terms/decimalLongitude', + verbatimLatitude: 'http://rs.tdwg.org/dwc/terms/verbatimLatitude', + verbatimLongitude: 'http://rs.tdwg.org/dwc/terms/verbatimLongitude', + verbatimElevation: 'http://rs.tdwg.org/dwc/terms/verbatimElevation', + locationRemarks: 'http://rs.tdwg.org/dwc/terms/locationRemarks', + }, + geologicalContext: { + rowType: 'http://rs.tdwg.org/dwc/terms/GeologicalContext', + locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + geologicalContextID: 'http://rs.tdwg.org/dwc/terms/geologicalContextID', + lithostratigraphicTerms: 'http://rs.tdwg.org/dwc/terms/lithostratigraphicTerms', + group: 'http://rs.tdwg.org/dwc/terms/group', + formation: 'http://rs.tdwg.org/dwc/terms/formation', + member: 'http://rs.tdwg.org/dwc/terms/member', + bed: 'http://rs.tdwg.org/dwc/terms/bed', + earliestAgeOrLowestStage: 'http://rs.tdwg.org/dwc/terms/earliestAgeOrLowestStage', + latestAgeOrHighestStage: 'http://rs.tdwg.org/dwc/terms/latestAgeOrHighestStage', + }, + measurement: { + rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', + taxonID: 'http://rs.tdwg.org/dwc/terms/locationID', + measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + parentMeasurementID: 'http://rs.tdwg.org/dwc/terms/parentMeasurementID', + measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', + verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', + measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', + measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', + measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', + }, +} as const + +export const buildLocalityMetaXml = (): string => { + const locationFields = LOCATION_HEADERS.map((header, index) => { + const term = (DWC_TERMS.location as Record)[header] + return ` ` + }).join('\n') + + const geologyFields = GEOLOGICAL_CONTEXT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.geologicalContext as Record)[header] + return ` ` + }).join('\n') + + const measurementFields = LOCALITY_MEASUREMENT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.measurement as Record)[header] + return ` ` + }).join('\n') + + return ` + + + + location.csv + + +${locationFields} + + + + geologicalcontext.csv + + +${geologyFields} + + + + measurementorfact.csv + + +${measurementFields} + + +` +} + +export const buildLocalityEmlXml = (publicationDateIso: string): string => { + return ` + + + + NOW database Darwin Core test export (localities) + + + NOW database + + + + + NOW database + + + ${publicationDateIso} + + Admin-only test Darwin Core Archive export for localities, mapping Location + GeologicalContext + MeasurementOrFact terms. Field mappings are intentionally limited for v1. + + + TODO(#1150): Add rights / license information. + + + +` +} + +export const buildDwcLocalityArchiveZipBufferFromLocalities = async ( + localities: LocalityForExport[] +): Promise => { + const locationRows = localities.map(mapLocalityToLocationRow) + const geologicalContextRows = localities.map(mapLocalityToGeologicalContextRow) + const measurementRows = localities.flatMap(mapLocalityToMeasurementRows) + + const locationCsv = await writeCsvString([...LOCATION_HEADERS], locationRows) + const geologyCsv = await writeCsvString([...GEOLOGICAL_CONTEXT_HEADERS], geologicalContextRows) + const measurementCsv = await writeCsvString([...LOCALITY_MEASUREMENT_HEADERS], measurementRows) + const metaXml = buildLocalityMetaXml() + + const publicationDateIso = new Date().toISOString().slice(0, 10) + const emlXml = buildLocalityEmlXml(publicationDateIso) + + const zip = new JSZip() + zip.file('location.csv', locationCsv) + zip.file('geologicalcontext.csv', geologyCsv) + zip.file('measurementorfact.csv', measurementCsv) + zip.file('meta.xml', metaXml) + zip.file('eml.xml', emlXml) + + return await zip.generateAsync({ type: 'nodebuffer' }) +} + +export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { + const { nowDb } = await import('../utils/db') + const localities = await nowDb.now_loc.findMany({ + select: { + lid: true, + loc_name: true, + basin: true, + subbasin: true, + country: true, + state: true, + county: true, + dec_lat: true, + dec_long: true, + dms_lat: true, + dms_long: true, + approx_coord: true, + altitude: true, + loc_detail: true, + chron: true, + lgroup: true, + formation: true, + member: true, + bed: true, + bfa_max: true, + bfa_min: true, + bfa_max_abs: true, + bfa_min_abs: true, + frac_max: true, + frac_min: true, + max_age: true, + min_age: true, + date_meth: true, + age_comm: true, + site_area: true, + gen_loc: true, + plate: true, + appr_num_spm: true, + num_spm: true, + true_quant: true, + complete: true, + num_quad: true, + rock_type: true, + rt_adj: true, + lith_comm: true, + depo_context1: true, + depo_context2: true, + depo_context3: true, + depo_context4: true, + depo_comm: true, + sed_env_1: true, + sed_env_2: true, + event_circum: true, + se_comm: true, + assem_fm: true, + transport: true, + trans_mod: true, + weath_trmp: true, + pt_conc: true, + size_type: true, + vert_pres: true, + plant_pres: true, + invert_pres: true, + time_rep: true, + taph_comm: true, + tax_comm: true, + datum_plane: true, + tos: true, + bos: true, + loc_status: true, + hominin_skeletal_remains: true, + climate_type: true, + biome: true, + v_ht: true, + v_struct: true, + v_envi_det: true, + disturb: true, + nutrients: true, + water: true, + seasonality: true, + seas_intens: true, + pri_prod: true, + moisture: true, + temperature: true, + estimate_precip: true, + estimate_temp: true, + estimate_npp: true, + pers_woody_cover: true, + pers_pollen_ap: true, + pers_pollen_nap: true, + pers_pollen_other: true, + stone_tool_cut_marks_on_bones: true, + bipedal_footprints: true, + stone_tool_technology: true, + technological_mode_1: true, + technological_mode_2: true, + technological_mode_3: true, + cultural_stage_1: true, + cultural_stage_2: true, + cultural_stage_3: true, + regional_culture_1: true, + regional_culture_2: true, + regional_culture_3: true, + now_time_unit_now_loc_bfa_maxTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + now_time_unit_now_loc_bfa_minTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + now_syn_loc: { + select: { synonym: true }, + }, + now_ss: { + select: { sed_struct: true }, + }, + now_coll_meth: { + select: { coll_meth: true }, + }, + now_mus: { + select: { + museum: true, + com_mlist: { select: { institution: true, alt_int_name: true, city: true, state: true, country: true } }, + }, + }, + now_ls: { + select: { + com_species: { + select: { order_name: true, tht: true, genus_name: true }, + }, + }, + }, + }, + }) + + return await buildDwcLocalityArchiveZipBufferFromLocalities(localities as unknown as LocalityForExport[]) +} diff --git a/backend/src/services/dwcArchiveExportOccurrences.ts b/backend/src/services/dwcArchiveExportOccurrences.ts new file mode 100644 index 000000000..d17213596 --- /dev/null +++ b/backend/src/services/dwcArchiveExportOccurrences.ts @@ -0,0 +1,833 @@ +import Prisma from '../../prisma/generated/now_test_client' +import { format } from 'fast-csv' +import { createReadStream, createWriteStream } from 'fs' +import { mkdtemp, rm } from 'fs/promises' +import { tmpdir } from 'os' +import path from 'path' +import { once } from 'events' +import { Writable } from 'stream' +import JSZip from 'jszip' +import { + GEOLOGICAL_CONTEXT_HEADERS, + LOCATION_HEADERS, + mapLocalityToGeologicalContextRow, + mapLocalityToLocationRow, +} from './dwcArchiveExportLocalities' +import { MEASUREMENT_HEADERS, TAXON_HEADERS, mapSpeciesToTaxonRow, type MeasurementCsvRow } from './dwcArchiveExport' + +const writeCsvString = async (headers: string[], rows: Array>): Promise => { + if (rows.length === 0) { + return `${headers.map(header => `"${header.replace(/"/g, '""')}"`).join(',')}\n` + } + + return await new Promise((resolve, reject) => { + let output = '' + const csvStream = format({ + delimiter: ',', + headers, + quoteColumns: true, + quoteHeaders: true, + includeEndRowDelimiter: true, + }) + + const sink = new Writable({ + write(chunk: Buffer | string, _encoding: BufferEncoding, callback: (error?: Error | null) => void) { + output += typeof chunk === 'string' ? chunk : chunk.toString('utf8') + callback() + }, + }) + + sink.on('finish', () => resolve(output)) + sink.on('error', reject) + csvStream.on('error', reject) + + csvStream.pipe(sink) + for (const row of rows) { + csvStream.write(row) + } + csvStream.end() + }) +} + +const isMeaningfulString = (value: unknown): value is string => { + if (typeof value !== 'string') return false + const trimmed = value.trim() + if (!trimmed) return false + if (trimmed === '-') return false + return true +} + +const toDwcString = (value: unknown): string => { + if (value === null || value === undefined) return '' + if (typeof value === 'bigint') return value.toString() + if (typeof value === 'number') return Number.isFinite(value) ? value.toString() : '' + if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value + if (typeof value === 'object' && typeof (value as { toString?: unknown }).toString === 'function') { + const asString = (value as { toString: () => string }).toString() + return asString === '[object Object]' ? '' : asString + } + return '' +} + +const toMaybeMeaningful = (value: string | null | undefined): string => (isMeaningfulString(value) ? value.trim() : '') + +const occurrenceIdForRow = (lid: number, speciesId: number): string => `NOW:OCC:${lid}:${speciesId}` + +const taxonIdForSpecies = (speciesId: number): string => `NOW:${speciesId}` + +export const OCCURRENCE_HEADERS = [ + 'occurrenceID', + 'locationID', + 'taxonID', + 'scientificName', + 'occurrenceStatus', + 'organismQuantity', + 'organismQuantityType', + 'identificationQualifier', + 'occurrenceRemarks', +] as const + +export type OccurrenceCsvHeader = (typeof OCCURRENCE_HEADERS)[number] +export type OccurrenceCsvRow = Record + +type LocalityForOccurrenceExport = Parameters[0] +type SpeciesForOccurrenceExport = Parameters[0] + +type OccurrenceForExport = Pick< + Prisma.now_ls, + | 'lid' + | 'species_id' + | 'nis' + | 'pct' + | 'quad' + | 'mni' + | 'qua' + | 'id_status' + | 'orig_entry' + | 'source_name' + | 'body_mass' + | 'mesowear' + | 'mw_or_high' + | 'mw_or_low' + | 'mw_cs_sharp' + | 'mw_cs_round' + | 'mw_cs_blunt' + | 'mw_scale_min' + | 'mw_scale_max' + | 'mw_value' + | 'microwear' + | 'dc13_mean' + | 'dc13_n' + | 'dc13_max' + | 'dc13_min' + | 'dc13_stdev' + | 'do18_mean' + | 'do18_n' + | 'do18_max' + | 'do18_min' + | 'do18_stdev' +> & { + com_species: SpeciesForOccurrenceExport +} + +type OccurrenceWithLocalityForExport = OccurrenceForExport & { + now_loc: LocalityForOccurrenceExport +} + +type DwcOccurrenceArchiveStream = { + stream: NodeJS.ReadableStream + cleanup: () => Promise +} + +export type DwcOccurrenceExportProgress = { + stage: 'occurrences' | 'localities' | 'taxa' | 'zipping' | 'complete' + generated: number + total: number | null + message: string +} + +type DwcOccurrenceExportProgressReporter = (progress: DwcOccurrenceExportProgress) => void + +const scientificNameForOccurrence = (species: SpeciesForOccurrenceExport): string => { + const nameParts = [ + toMaybeMeaningful(species.genus_name), + toMaybeMeaningful(species.species_name), + toMaybeMeaningful(species.unique_identifier), + ].filter(Boolean) + const authorship = toMaybeMeaningful(species.sp_author) + return [nameParts.join(' '), authorship].filter(Boolean).join(' ').trim() +} + +const occurrenceQuantity = ( + occurrence: OccurrenceForExport +): Pick => { + if (occurrence.mni !== null) { + return { organismQuantity: occurrence.mni.toString(), organismQuantityType: 'minimum number of individuals' } + } + if (occurrence.nis !== null) { + return { organismQuantity: occurrence.nis.toString(), organismQuantityType: 'number of identified specimens' } + } + if (occurrence.pct !== null) { + return { organismQuantity: occurrence.pct.toString(), organismQuantityType: 'percentage' } + } + if (occurrence.quad !== null) { + return { organismQuantity: occurrence.quad.toString(), organismQuantityType: 'quadrat count' } + } + return { organismQuantity: '', organismQuantityType: '' } +} + +export const mapOccurrenceToOccurrenceRow = (occurrence: OccurrenceForExport): OccurrenceCsvRow => { + const quantity = occurrenceQuantity(occurrence) + const occurrenceRemarks = [ + toMaybeMeaningful(occurrence.orig_entry), + toMaybeMeaningful(occurrence.source_name), + toMaybeMeaningful(occurrence.qua), + ] + .filter(Boolean) + .join(' | ') + + return { + occurrenceID: occurrenceIdForRow(occurrence.lid, occurrence.species_id), + locationID: `NOW:LOC:${occurrence.lid}`, + taxonID: taxonIdForSpecies(occurrence.species_id), + scientificName: scientificNameForOccurrence(occurrence.com_species), + occurrenceStatus: 'present', + organismQuantity: quantity.organismQuantity, + organismQuantityType: quantity.organismQuantityType, + identificationQualifier: toMaybeMeaningful(occurrence.id_status), + occurrenceRemarks, + } +} + +const NOW_LS_MEASUREMENT_MAPPINGS: Array<{ + field: keyof OccurrenceForExport + measurementType: string + measurementUnit: string + measurementMethod: string +}> = [ + { field: 'nis', measurementType: 'number of identified specimens', measurementUnit: '', measurementMethod: '' }, + { field: 'pct', measurementType: 'percentage', measurementUnit: '%', measurementMethod: '' }, + { field: 'quad', measurementType: 'quadrat count', measurementUnit: '', measurementMethod: '' }, + { field: 'mni', measurementType: 'minimum number of individuals', measurementUnit: '', measurementMethod: '' }, + { field: 'body_mass', measurementType: 'occurrence body mass', measurementUnit: 'g', measurementMethod: '' }, + { field: 'mesowear', measurementType: 'occurrence mesowear', measurementUnit: '', measurementMethod: '' }, + { + field: 'mw_or_high', + measurementType: 'occurrence mesowear high occlusal relief', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_or_low', + measurementType: 'occurrence mesowear low occlusal relief', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_cs_sharp', + measurementType: 'occurrence mesowear sharp cusp shape', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_cs_round', + measurementType: 'occurrence mesowear round cusp shape', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_cs_blunt', + measurementType: 'occurrence mesowear blunt cusp shape', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_scale_min', + measurementType: 'occurrence mesowear scale minimum', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_scale_max', + measurementType: 'occurrence mesowear scale maximum', + measurementUnit: '', + measurementMethod: '', + }, + { field: 'mw_value', measurementType: 'occurrence mesowear value', measurementUnit: '', measurementMethod: '' }, + { field: 'microwear', measurementType: 'occurrence microwear', measurementUnit: '', measurementMethod: '' }, + { + field: 'dc13_mean', + measurementType: 'occurrence delta C13 mean', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { field: 'dc13_n', measurementType: 'occurrence delta C13 sample count', measurementUnit: '', measurementMethod: '' }, + { + field: 'dc13_max', + measurementType: 'occurrence delta C13 maximum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'dc13_min', + measurementType: 'occurrence delta C13 minimum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'dc13_stdev', + measurementType: 'occurrence delta C13 standard deviation', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'do18_mean', + measurementType: 'occurrence delta O18 mean', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { field: 'do18_n', measurementType: 'occurrence delta O18 sample count', measurementUnit: '', measurementMethod: '' }, + { + field: 'do18_max', + measurementType: 'occurrence delta O18 maximum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'do18_min', + measurementType: 'occurrence delta O18 minimum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'do18_stdev', + measurementType: 'occurrence delta O18 standard deviation', + measurementUnit: '', + measurementMethod: '', + }, +] + +export const mapOccurrenceToMeasurementRows = (occurrence: OccurrenceForExport): MeasurementCsvRow[] => { + const occurrenceID = occurrenceIdForRow(occurrence.lid, occurrence.species_id) + + return NOW_LS_MEASUREMENT_MAPPINGS.flatMap(mapping => { + const rawValue = occurrence[mapping.field] + if (rawValue === null || rawValue === undefined) return [] + if (typeof rawValue === 'string' && !isMeaningfulString(rawValue)) return [] + + const measurementValue = toDwcString(rawValue).trim() + if (!measurementValue) return [] + + const verbatimMeasurementType = `now_ls.${mapping.field.toString()}` + return [ + { + taxonID: occurrenceID, + measurementID: `${occurrenceID}:${verbatimMeasurementType}`, + parentMeasurementID: '', + measurementType: mapping.measurementType, + verbatimMeasurementType, + measurementValue, + measurementUnit: mapping.measurementUnit, + measurementMethod: mapping.measurementMethod, + }, + ] + }) +} + +const uniqueBy = (rows: T[], keyFn: (row: T) => string): T[] => { + const byKey = new Map() + for (const row of rows) { + const key = keyFn(row) + if (!byKey.has(key)) byKey.set(key, row) + } + return [...byKey.values()] +} + +const OCCURRENCE_EXPORT_PAGE_SIZE = 1000 +const LOOKUP_EXPORT_CHUNK_SIZE = 1000 + +const occurrenceSelect = { + lid: true, + species_id: true, + nis: true, + pct: true, + quad: true, + mni: true, + qua: true, + id_status: true, + orig_entry: true, + source_name: true, + body_mass: true, + mesowear: true, + mw_or_high: true, + mw_or_low: true, + mw_cs_sharp: true, + mw_cs_round: true, + mw_cs_blunt: true, + mw_scale_min: true, + mw_scale_max: true, + mw_value: true, + microwear: true, + dc13_mean: true, + dc13_n: true, + dc13_max: true, + dc13_min: true, + dc13_stdev: true, + do18_mean: true, + do18_n: true, + do18_max: true, + do18_min: true, + do18_stdev: true, + com_species: { + select: { + species_id: true, + class_name: true, + subclass_or_superorder_name: true, + order_name: true, + suborder_or_superfamily_name: true, + family_name: true, + subfamily_name: true, + genus_name: true, + species_name: true, + unique_identifier: true, + taxonomic_status: true, + common_name: true, + sp_author: true, + sp_comment: true, + }, + }, +} as const + +const localityLookupSelect = { + lid: true, + loc_name: true, + basin: true, + subbasin: true, + country: true, + state: true, + county: true, + dec_lat: true, + dec_long: true, + dms_lat: true, + dms_long: true, + altitude: true, + loc_detail: true, + age_comm: true, + tax_comm: true, + chron: true, + lgroup: true, + formation: true, + member: true, + bed: true, + bfa_max: true, + bfa_min: true, + now_time_unit_now_loc_bfa_maxTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + now_time_unit_now_loc_bfa_minTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, +} as const + +const speciesLookupSelect = occurrenceSelect.com_species.select + +const csvCell = (value: unknown): string => `"${toDwcString(value).replace(/"/g, '""')}"` + +const csvLine = (headers: readonly string[], row: Record): string => + `${headers.map(header => csvCell(row[header])).join(',')}\n` + +const createCsvFileWriter = async (filePath: string, headers: readonly string[]) => { + const stream = createWriteStream(filePath, { encoding: 'utf8' }) + await new Promise((resolve, reject) => { + stream.once('open', () => resolve()) + stream.once('error', reject) + }) + + const write = async (line: string): Promise => { + if (!stream.write(line)) await once(stream, 'drain') + } + + await write(`${headers.map(csvCell).join(',')}\n`) + + return { + writeRow: async (row: Record): Promise => { + await write(csvLine(headers, row)) + }, + close: async (): Promise => { + stream.end() + await once(stream, 'finish') + }, + } +} + +async function* iterateOccurrenceRows(): AsyncGenerator { + const { nowDb } = await import('../utils/db') + let cursor: { lid: number; species_id: number } | undefined + + while (true) { + const page = await nowDb.now_ls.findMany({ + take: OCCURRENCE_EXPORT_PAGE_SIZE, + ...(cursor ? { cursor: { lid_species_id: cursor }, skip: 1 } : {}), + orderBy: [{ lid: 'asc' }, { species_id: 'asc' }], + select: occurrenceSelect, + }) + + if (page.length === 0) return + + for (const occurrence of page) { + yield occurrence as unknown as OccurrenceForExport + } + + const last = page[page.length - 1] + cursor = { lid: last.lid, species_id: last.species_id } + } +} + +const countOccurrenceRows = async (): Promise => { + const { nowDb } = await import('../utils/db') + return await nowDb.now_ls.count() +} + +const chunk = (values: T[], size: number): T[][] => { + const chunks: T[][] = [] + for (let index = 0; index < values.length; index += size) { + chunks.push(values.slice(index, index + size)) + } + return chunks +} + +const DWC_TERMS = { + occurrence: { + rowType: 'http://rs.tdwg.org/dwc/terms/Occurrence', + occurrenceID: 'http://rs.tdwg.org/dwc/terms/occurrenceID', + locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', + scientificName: 'http://rs.tdwg.org/dwc/terms/scientificName', + occurrenceStatus: 'http://rs.tdwg.org/dwc/terms/occurrenceStatus', + organismQuantity: 'http://rs.tdwg.org/dwc/terms/organismQuantity', + organismQuantityType: 'http://rs.tdwg.org/dwc/terms/organismQuantityType', + identificationQualifier: 'http://rs.tdwg.org/dwc/terms/identificationQualifier', + occurrenceRemarks: 'http://rs.tdwg.org/dwc/terms/occurrenceRemarks', + }, + measurement: { + rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', + taxonID: 'http://rs.tdwg.org/dwc/terms/occurrenceID', + measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + parentMeasurementID: 'http://rs.tdwg.org/dwc/terms/parentMeasurementID', + measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', + verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', + measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', + measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', + measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', + }, +} as const + +export const buildOccurrenceMetaXml = (): string => { + const occurrenceFields = OCCURRENCE_HEADERS.map((header, index) => { + const term = (DWC_TERMS.occurrence as Record)[header] + return ` ` + }).join('\n') + + const measurementFields = MEASUREMENT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.measurement as Record)[header] + return ` ` + }).join('\n') + + return ` + + + + occurrence.csv + + +${occurrenceFields} + + + + measurementorfact.csv + + +${measurementFields} + + +` +} + +export const buildOccurrenceEmlXml = (publicationDateIso: string): string => { + return ` + + + + NOW database Darwin Core test export (occurrences) + + + NOW database + + + + + NOW database + + + ${publicationDateIso} + + Admin-only test Darwin Core Archive export for occurrence records from now_ls. Location and taxon lookup files are included with the same structures as the locality and taxon exports. + + + TODO(#1150): Add rights / license information. + + + +` +} + +export const buildDwcOccurrenceArchiveZipBufferFromOccurrences = async ( + occurrences: OccurrenceWithLocalityForExport[] +): Promise => { + const localities = uniqueBy( + occurrences.map(occurrence => occurrence.now_loc), + locality => locality.lid.toString() + ) + const speciesRows = uniqueBy( + occurrences.map(occurrence => occurrence.com_species), + species => species.species_id.toString() + ) + + const locationCsv = await writeCsvString([...LOCATION_HEADERS], localities.map(mapLocalityToLocationRow)) + const geologicalContextCsv = await writeCsvString( + [...GEOLOGICAL_CONTEXT_HEADERS], + localities.map(mapLocalityToGeologicalContextRow) + ) + const taxonCsv = await writeCsvString([...TAXON_HEADERS], speciesRows.map(mapSpeciesToTaxonRow)) + const occurrenceCsv = await writeCsvString([...OCCURRENCE_HEADERS], occurrences.map(mapOccurrenceToOccurrenceRow)) + const measurementCsv = await writeCsvString( + [...MEASUREMENT_HEADERS], + occurrences.flatMap(mapOccurrenceToMeasurementRows) + ) + const metaXml = buildOccurrenceMetaXml() + const emlXml = buildOccurrenceEmlXml(new Date().toISOString().slice(0, 10)) + + const zip = new JSZip() + zip.file('location.csv', locationCsv) + zip.file('geologicalcontext.csv', geologicalContextCsv) + zip.file('taxon.csv', taxonCsv) + zip.file('occurrence.csv', occurrenceCsv) + zip.file('measurementorfact.csv', measurementCsv) + zip.file('meta.xml', metaXml) + zip.file('eml.xml', emlXml) + + return await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } }) +} + +const writeOccurrenceAndMeasurementFiles = async ({ + occurrenceFilePath, + measurementFilePath, + reportProgress, +}: { + occurrenceFilePath: string + measurementFilePath: string + reportProgress?: DwcOccurrenceExportProgressReporter +}): Promise<{ localityIds: number[]; speciesIds: number[] }> => { + const occurrenceWriter = await createCsvFileWriter(occurrenceFilePath, OCCURRENCE_HEADERS) + const measurementWriter = await createCsvFileWriter(measurementFilePath, MEASUREMENT_HEADERS) + const localityIds = new Set() + const speciesIds = new Set() + const totalOccurrences = await countOccurrenceRows() + let generatedOccurrences = 0 + + reportProgress?.({ + stage: 'occurrences', + generated: generatedOccurrences, + total: totalOccurrences, + message: `Generating occurrence rows: ${generatedOccurrences}/${totalOccurrences} generated`, + }) + + try { + for await (const occurrence of iterateOccurrenceRows()) { + localityIds.add(occurrence.lid) + speciesIds.add(occurrence.species_id) + await occurrenceWriter.writeRow(mapOccurrenceToOccurrenceRow(occurrence)) + + for (const measurementRow of mapOccurrenceToMeasurementRows(occurrence)) { + await measurementWriter.writeRow(measurementRow) + } + + generatedOccurrences += 1 + if (generatedOccurrences === totalOccurrences || generatedOccurrences % OCCURRENCE_EXPORT_PAGE_SIZE === 0) { + reportProgress?.({ + stage: 'occurrences', + generated: generatedOccurrences, + total: totalOccurrences, + message: `Generating occurrence rows: ${generatedOccurrences}/${totalOccurrences} generated`, + }) + } + } + } finally { + await occurrenceWriter.close() + await measurementWriter.close() + } + + return { + localityIds: [...localityIds].sort((a, b) => a - b), + speciesIds: [...speciesIds].sort((a, b) => a - b), + } +} + +const writeLocalityLookupFiles = async ({ + localityIds, + locationFilePath, + geologicalContextFilePath, + reportProgress, +}: { + localityIds: number[] + locationFilePath: string + geologicalContextFilePath: string + reportProgress?: DwcOccurrenceExportProgressReporter +}): Promise => { + const { nowDb } = await import('../utils/db') + const locationWriter = await createCsvFileWriter(locationFilePath, LOCATION_HEADERS) + const geologicalContextWriter = await createCsvFileWriter(geologicalContextFilePath, GEOLOGICAL_CONTEXT_HEADERS) + let generatedLocalities = 0 + + reportProgress?.({ + stage: 'localities', + generated: generatedLocalities, + total: localityIds.length, + message: `Generating location lookup rows: ${generatedLocalities}/${localityIds.length} generated`, + }) + + try { + for (const ids of chunk(localityIds, LOOKUP_EXPORT_CHUNK_SIZE)) { + const localities = await nowDb.now_loc.findMany({ + where: { lid: { in: ids } }, + orderBy: { lid: 'asc' }, + select: localityLookupSelect, + }) + + for (const locality of localities) { + const localityForExport = locality as unknown as LocalityForOccurrenceExport + await locationWriter.writeRow(mapLocalityToLocationRow(localityForExport)) + await geologicalContextWriter.writeRow(mapLocalityToGeologicalContextRow(localityForExport)) + generatedLocalities += 1 + } + + reportProgress?.({ + stage: 'localities', + generated: generatedLocalities, + total: localityIds.length, + message: `Generating location lookup rows: ${generatedLocalities}/${localityIds.length} generated`, + }) + } + } finally { + await locationWriter.close() + await geologicalContextWriter.close() + } +} + +const writeTaxonLookupFile = async ({ + speciesIds, + taxonFilePath, + reportProgress, +}: { + speciesIds: number[] + taxonFilePath: string + reportProgress?: DwcOccurrenceExportProgressReporter +}): Promise => { + const { nowDb } = await import('../utils/db') + const taxonWriter = await createCsvFileWriter(taxonFilePath, TAXON_HEADERS) + let generatedTaxa = 0 + + reportProgress?.({ + stage: 'taxa', + generated: generatedTaxa, + total: speciesIds.length, + message: `Generating taxon lookup rows: ${generatedTaxa}/${speciesIds.length} generated`, + }) + + try { + for (const ids of chunk(speciesIds, LOOKUP_EXPORT_CHUNK_SIZE)) { + const speciesRows = await nowDb.com_species.findMany({ + where: { species_id: { in: ids } }, + orderBy: { species_id: 'asc' }, + select: speciesLookupSelect, + }) + + for (const species of speciesRows) { + await taxonWriter.writeRow(mapSpeciesToTaxonRow(species)) + generatedTaxa += 1 + } + + reportProgress?.({ + stage: 'taxa', + generated: generatedTaxa, + total: speciesIds.length, + message: `Generating taxon lookup rows: ${generatedTaxa}/${speciesIds.length} generated`, + }) + } + } finally { + await taxonWriter.close() + } +} + +export const buildDwcOccurrenceArchiveZipStream = async ({ + reportProgress, +}: { + reportProgress?: DwcOccurrenceExportProgressReporter +} = {}): Promise => { + const tempDirectory = await mkdtemp(path.join(tmpdir(), 'now-dwc-occurrences-')) + const files = { + location: path.join(tempDirectory, 'location.csv'), + geologicalContext: path.join(tempDirectory, 'geologicalcontext.csv'), + taxon: path.join(tempDirectory, 'taxon.csv'), + occurrence: path.join(tempDirectory, 'occurrence.csv'), + measurement: path.join(tempDirectory, 'measurementorfact.csv'), + } + + try { + const { localityIds, speciesIds } = await writeOccurrenceAndMeasurementFiles({ + occurrenceFilePath: files.occurrence, + measurementFilePath: files.measurement, + reportProgress, + }) + await writeLocalityLookupFiles({ + localityIds, + locationFilePath: files.location, + geologicalContextFilePath: files.geologicalContext, + reportProgress, + }) + await writeTaxonLookupFile({ speciesIds, taxonFilePath: files.taxon, reportProgress }) + + reportProgress?.({ + stage: 'zipping', + generated: 0, + total: null, + message: 'Compressing DwC-A ZIP...', + }) + + const zip = new JSZip() + zip.file('location.csv', createReadStream(files.location)) + zip.file('geologicalcontext.csv', createReadStream(files.geologicalContext)) + zip.file('taxon.csv', createReadStream(files.taxon)) + zip.file('occurrence.csv', createReadStream(files.occurrence)) + zip.file('measurementorfact.csv', createReadStream(files.measurement)) + zip.file('meta.xml', buildOccurrenceMetaXml()) + zip.file('eml.xml', buildOccurrenceEmlXml(new Date().toISOString().slice(0, 10))) + + return { + stream: zip.generateNodeStream({ type: 'nodebuffer', streamFiles: true, compression: 'DEFLATE' }), + cleanup: async () => { + await rm(tempDirectory, { recursive: true, force: true }) + }, + } + } catch (error) { + await rm(tempDirectory, { recursive: true, force: true }) + throw error + } +} diff --git a/backend/src/services/locality.ts b/backend/src/services/locality.ts index f27c8cded..88f51343d 100644 --- a/backend/src/services/locality.ts +++ b/backend/src/services/locality.ts @@ -20,6 +20,7 @@ import { logDb, nowDb } from '../utils/db' import { validateCollectingMethodValues } from '../utils/validation/collectingMethodValues' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { getReferenceDetails } from './reference' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' const normalizeNumberField = (value: unknown) => { if (typeof value === 'string') { @@ -259,10 +260,7 @@ export const getLocalityDetails = async (id: number, user: User | undefined) => now_lr: { include: { ref_ref: { - include: { - ref_authors: true, - ref_journal: true, - }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -291,6 +289,7 @@ export const getLocalityDetails = async (id: number, user: User | undefined) => ...lau, lau_coordinator: getPersonDisplayName(coordinatorPerson, lau.lau_coordinator), lau_authorizer: getPersonDisplayName(authorizerPerson, lau.lau_authorizer), + now_lr: addNullExactDateToReferenceJoins(lau.now_lr), updates, } }) diff --git a/backend/src/services/occurrenceService.ts b/backend/src/services/occurrenceService.ts index 459894f2b..914c5ef5d 100644 --- a/backend/src/services/occurrenceService.ts +++ b/backend/src/services/occurrenceService.ts @@ -4,6 +4,7 @@ import { AccessError } from '../middlewares/authorizer' import { logDb, nowDb } from '../utils/db' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { generateOccurrenceDetailSql } from './queries/crossSearchQuery' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' const getAllowedLocalities = async (user: User) => { const usersProjects = await nowDb.now_proj_people.findMany({ @@ -193,7 +194,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { now_lr: { include: { ref_ref: { - include: { ref_authors: true, ref_journal: true }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -207,7 +208,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { now_sr: { include: { ref_ref: { - include: { ref_authors: true, ref_journal: true }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -233,7 +234,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { update.lau_coordinator ), occ_comment: update.lau_comment ?? '', - references: update.now_lr as unknown as AnyReference[], + references: addNullExactDateToReferenceJoins(update.now_lr) as unknown as AnyReference[], updates: nowLsLogs.filter(logRow => logRow.luid === update.luid), })), ...speciesUpdates.map(update => ({ @@ -247,7 +248,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { update.sau_coordinator ), occ_comment: update.sau_comment ?? '', - references: update.now_sr as unknown as AnyReference[], + references: addNullExactDateToReferenceJoins(update.now_sr) as unknown as AnyReference[], updates: nowLsLogs.filter(logRow => logRow.suid === update.suid), })), ]) diff --git a/backend/src/services/reference.ts b/backend/src/services/reference.ts index 50d91a1ad..f1c9f51c4 100644 --- a/backend/src/services/reference.ts +++ b/backend/src/services/reference.ts @@ -7,6 +7,20 @@ import { } from './referenceValidation' import { Role, User } from '../../../frontend/src/shared/types' import { getIdsOfUsersProjects } from './locality' +import { referenceWithoutExactDateSelect } from './utils/referenceDate' + +type RawReferenceExactDate = { + exact_date: string | null +} + +const normalizeRawExactDate = (value: string | null | undefined): string | null => { + if (!value) return null + const date = value.slice(0, 10) + const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(date) + if (!match) return null + if (match[2] === '00' || match[3] === '00') return null + return date +} export const getAllReferences = async () => { const result = await nowDb.ref_ref.findMany({ @@ -41,22 +55,20 @@ export const getAllReferences = async () => { export const getReferenceDetails = async (id: number, _user?: User) => { const result = await nowDb.ref_ref.findUnique({ where: { rid: id }, - include: { ref_authors: true, ref_journal: true }, + select: referenceWithoutExactDateSelect, }) if (!result) { return null } - //changing exact_date to yyyy-mm-dd string since frontend uses that + we don't want to display ISO string in frontend - if (result && result.exact_date) { - const date = new Date(result.exact_date) - const formattedDate = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}` + const exactDateRows = await nowDb.$queryRaw` + SELECT CAST(exact_date AS CHAR) AS exact_date + FROM ref_ref + WHERE rid = ${id} + ` - return { ...result, exact_date: formattedDate } - } - - return result + return { ...result, exact_date: normalizeRawExactDate(exactDateRows[0]?.exact_date) } } // Fetch localities that have been updated by the given reference id diff --git a/backend/src/services/timeBound.ts b/backend/src/services/timeBound.ts index 5457ee7f1..5d8b2025c 100644 --- a/backend/src/services/timeBound.ts +++ b/backend/src/services/timeBound.ts @@ -6,6 +6,7 @@ import { ValidationObject, referenceValidator } from '../../../frontend/src/shar import { getReferenceDetails } from './reference' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { TabListQueryOptions } from './tabularQuery' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' export const getAllTimeBounds = async () => { const result = await nowDb.now_tu_bound.findMany({ @@ -30,10 +31,7 @@ export const getTimeBoundDetails = async (id: number) => { now_br: { include: { ref_ref: { - include: { - ref_authors: true, - ref_journal: true, - }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -61,6 +59,7 @@ export const getTimeBoundDetails = async (id: number) => { ...bau, bau_coordinator: getPersonDisplayName(coordinatorPerson, bau.bau_coordinator), bau_authorizer: getPersonDisplayName(authorizerPerson, bau.bau_authorizer), + now_br: addNullExactDateToReferenceJoins(bau.now_br), updates, } }) diff --git a/backend/src/services/timeUnit.ts b/backend/src/services/timeUnit.ts index dca55afaf..f627e1323 100644 --- a/backend/src/services/timeUnit.ts +++ b/backend/src/services/timeUnit.ts @@ -5,6 +5,7 @@ import { validateTimeUnit } from '../../../frontend/src/shared/validators/timeUn import { getReferenceDetails } from './reference' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { TabListQueryOptions } from './tabularQuery' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' export const getAllTimeUnits = async () => { const result = await nowDb.now_time_unit.findMany({ @@ -53,10 +54,7 @@ export const getTimeUnitDetails = async (id: string) => { now_tr: { include: { ref_ref: { - include: { - ref_authors: true, - ref_journal: true, - }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -85,6 +83,7 @@ export const getTimeUnitDetails = async (id: string) => { ...tau, tau_coordinator: getPersonDisplayName(coordinatorPerson, tau.tau_coordinator), tau_authorizer: getPersonDisplayName(authorizerPerson, tau.tau_authorizer), + now_tr: addNullExactDateToReferenceJoins(tau.now_tr), updates, } }) diff --git a/backend/src/services/utils/referenceDate.ts b/backend/src/services/utils/referenceDate.ts new file mode 100644 index 000000000..2fbe4bd8d --- /dev/null +++ b/backend/src/services/utils/referenceDate.ts @@ -0,0 +1,39 @@ +export const referenceWithoutExactDateSelect = { + rid: true, + ref_type_id: true, + journal_id: true, + title_primary: true, + date_primary: true, + volume: true, + issue: true, + start_page: true, + end_page: true, + publisher: true, + pub_place: true, + title_secondary: true, + date_secondary: true, + title_series: true, + issn_isbn: true, + ref_abstract: true, + web_url: true, + misc_1: true, + misc_2: true, + gen_notes: true, + printed_language: true, + used_morph: true, + used_now: true, + used_gene: true, + ref_authors: true, + ref_journal: true, +} as const + +export const addNullExactDateToReference = (reference: T) => ({ + ...reference, + exact_date: null, +}) + +export const addNullExactDateToReferenceJoins = (references: T[]) => + references.map(reference => ({ + ...reference, + ref_ref: addNullExactDateToReference(reference.ref_ref), + })) diff --git a/backend/src/unit-tests/dwcArchiveExport.test.ts b/backend/src/unit-tests/dwcArchiveExport.test.ts new file mode 100644 index 000000000..2453a3156 --- /dev/null +++ b/backend/src/unit-tests/dwcArchiveExport.test.ts @@ -0,0 +1,204 @@ +import { describe, expect, it } from '@jest/globals' +import JSZip from 'jszip' +import { + buildDwcArchiveZipBufferFromSpecies, + buildMetaXml, + mapSpeciesToMeasurementRows, + mapSpeciesToTaxonRow, +} from '../services/dwcArchiveExport' + +describe('DwC-A export mapping', () => { + it('maps com_species row to a DwC Taxon row', () => { + const row = mapSpeciesToTaxonRow({ + species_id: 123, + class_name: 'Mammalia', + subclass_or_superorder_name: null, + order_name: 'Carnivora', + suborder_or_superfamily_name: null, + family_name: 'Felidae', + subfamily_name: 'Felinae', + genus_name: 'Felis', + species_name: 'catus', + unique_identifier: '-', + taxonomic_status: null, + common_name: 'Cat', + sp_author: 'Linnaeus, 1758', + sp_comment: 'Test comment', + }) + + expect(row.taxonID).toEqual('NOW:123') + expect(row.nomenclaturalCode).toEqual('ICZN') + expect(row.scientificName).toEqual('Felis catus Linnaeus, 1758') + expect(row.genericName).toEqual('Felis') + expect(row.scientificNameAuthorship).toEqual('Linnaeus, 1758') + expect(row.vernacularName).toEqual('Cat') + expect(row.taxonRank).toEqual('species') + expect(row.taxonomicStatus).toEqual('accepted') + expect(row.kingdom).toEqual('Animalia') + expect(row.phylum).toEqual('Chordata') + expect(row.superfamily).toEqual('') + expect(row.subfamily).toEqual('Felinae') + expect(row.tribe).toEqual('') + expect(row.subtribe).toEqual('') + expect(row.higherClassification).toEqual('Mammalia|Carnivora|Felidae|Felinae') + expect(row.taxonRemarks).toEqual('Test comment') + }) + + it('generates measurement rows only for meaningful values', () => { + const rows = mapSpeciesToMeasurementRows({ + species_id: 123, + strain: null, + gene: null, + taxon_status: null, + body_mass: BigInt(2500), + brain_mass: null, + sv_length: null, + sd_size: null, + sd_display: null, + tshm: null, + symph_mob: null, + relative_blade_length: null, + tht: null, + diet1: '-', + diet2: 'Herbivore', + diet3: '', + diet_description: 'Leaves', + rel_fib: null, + selectivity: null, + digestion: null, + feedinghab1: null, + feedinghab2: null, + shelterhab1: null, + shelterhab2: null, + locomo1: null, + locomo2: 'Arboreal', + locomo3: null, + hunt_forage: null, + activity: 'Diurnal', + crowntype: null, + microwear: 'High', + horizodonty: null, + cusp_shape: null, + cusp_count_buccal: null, + cusp_count_lingual: null, + loph_count_lon: null, + loph_count_trs: null, + fct_al: null, + fct_ol: null, + fct_sf: null, + fct_ot: null, + fct_cm: null, + mesowear: null, + mw_or_high: null, + mw_or_low: null, + mw_cs_sharp: null, + mw_cs_round: null, + mw_cs_blunt: null, + mw_scale_min: null, + mw_scale_max: null, + mw_value: 1.5, + pop_struc: null, + sp_status: null, + }) + + expect(rows.some(row => row.taxonID === 'NOW:123')).toEqual(true) + + const ids = rows.map(row => row.measurementID) + expect(ids).toContain('NOW:123:body_mass') + expect(ids).toContain('NOW:123:diet2') + expect(ids).toContain('NOW:123:diet_description') + expect(ids).toContain('NOW:123:locomo2') + expect(ids).toContain('NOW:123:activity') + expect(ids).toContain('NOW:123:microwear') + expect(ids).toContain('NOW:123:mw_value') + expect(ids).not.toContain('NOW:123:brain_mass') + expect(ids).not.toContain('NOW:123:diet1') + expect(ids).not.toContain('NOW:123:diet3') + }) + + it('produces a ZIP containing the expected DwC-A files', async () => { + const zipBuffer = await buildDwcArchiveZipBufferFromSpecies([ + { + species_id: 1, + class_name: 'Mammalia', + subclass_or_superorder_name: null, + order_name: 'Primates', + suborder_or_superfamily_name: null, + family_name: 'Hominidae', + subfamily_name: null, + genus_name: 'Homo', + species_name: 'sapiens', + unique_identifier: '-', + taxonomic_status: 'accepted', + common_name: 'Human', + sp_author: null, + sp_comment: null, + strain: null, + gene: null, + taxon_status: null, + body_mass: BigInt(70000), + brain_mass: 1350, + sv_length: null, + sd_size: null, + sd_display: null, + tshm: null, + symph_mob: null, + relative_blade_length: null, + tht: null, + diet1: null, + diet2: null, + diet3: null, + diet_description: null, + rel_fib: null, + selectivity: null, + digestion: null, + feedinghab1: null, + feedinghab2: null, + shelterhab1: null, + shelterhab2: null, + locomo1: null, + locomo2: null, + locomo3: null, + hunt_forage: null, + activity: null, + crowntype: null, + microwear: null, + mesowear: null, + horizodonty: null, + cusp_shape: null, + cusp_count_buccal: null, + cusp_count_lingual: null, + loph_count_lon: null, + loph_count_trs: null, + fct_al: null, + fct_ol: null, + fct_sf: null, + fct_ot: null, + fct_cm: null, + mw_or_high: null, + mw_or_low: null, + mw_cs_sharp: null, + mw_cs_round: null, + mw_cs_blunt: null, + mw_scale_min: null, + mw_scale_max: null, + mw_value: null, + pop_struc: null, + sp_status: null, + }, + ]) + + const zip = await JSZip.loadAsync(zipBuffer) + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + }) + + it('generates valid meta.xml attributes for enclosed fields', () => { + const metaXml = buildMetaXml() + expect(metaXml).toContain("fieldsEnclosedBy='\"'") + expect(metaXml).not.toContain('fieldsEnclosedBy="""') + expect(metaXml).not.toContain('fieldsEnclosedBy="\\""') + }) +}) diff --git a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts new file mode 100644 index 000000000..5489229db --- /dev/null +++ b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts @@ -0,0 +1,230 @@ +import { describe, expect, it } from '@jest/globals' +import JSZip from 'jszip' +import { + buildDwcLocalityArchiveZipBufferFromLocalities, + buildLocalityMetaXml, + mapLocalityToGeologicalContextRow, + mapLocalityToLocationRow, + mapLocalityToMeasurementRows, +} from '../services/dwcArchiveExportLocalities' + +describe('DwC-A locality export mapping', () => { + const baseLocality = { + lid: 42, + loc_name: 'Test locality', + basin: 'Test basin', + subbasin: 'Test subbasin', + country: 'Finland', + state: 'Uusimaa', + county: 'Helsinki', + dec_lat: 60.1699, + dec_long: 24.9384, + dms_lat: null, + dms_long: null, + approx_coord: null, + altitude: 123, + loc_detail: 'Some notes', + chron: 'Test chron', + lgroup: 'Test group', + formation: 'Test formation', + member: 'Test member', + bed: 'Test bed', + bfa_max: 'BFA_MAX', + bfa_min: 'BFA_MIN', + bfa_max_abs: null, + bfa_min_abs: null, + frac_max: null, + frac_min: null, + max_age: 12.3, + min_age: 4.5, + date_meth: 'radioisotope', + age_comm: 'Age comment', + site_area: null, + gen_loc: null, + plate: null, + appr_num_spm: null, + num_spm: null, + true_quant: null, + complete: null, + num_quad: null, + rock_type: null, + rt_adj: null, + lith_comm: null, + depo_context1: null, + depo_context2: null, + depo_context3: null, + depo_context4: null, + depo_comm: null, + sed_env_1: null, + sed_env_2: null, + event_circum: null, + se_comm: null, + assem_fm: null, + transport: null, + trans_mod: null, + weath_trmp: null, + pt_conc: null, + size_type: null, + vert_pres: null, + plant_pres: null, + invert_pres: null, + time_rep: null, + taph_comm: null, + tax_comm: null, + datum_plane: null, + tos: null, + bos: null, + loc_status: null, + hominin_skeletal_remains: false, + climate_type: null, + biome: null, + v_ht: null, + v_struct: null, + v_envi_det: null, + disturb: null, + nutrients: null, + water: null, + seasonality: null, + seas_intens: null, + pri_prod: null, + moisture: null, + temperature: null, + estimate_precip: null, + estimate_temp: null, + estimate_npp: null, + pers_woody_cover: null, + pers_pollen_ap: null, + pers_pollen_nap: null, + pers_pollen_other: null, + stone_tool_cut_marks_on_bones: false, + bipedal_footprints: false, + stone_tool_technology: false, + technological_mode_1: null, + technological_mode_2: null, + technological_mode_3: null, + cultural_stage_1: null, + cultural_stage_2: null, + cultural_stage_3: null, + regional_culture_1: null, + regional_culture_2: null, + regional_culture_3: null, + now_syn_loc: [], + now_ss: [], + now_coll_meth: [], + now_mus: [], + now_ls: [], + now_time_unit_now_loc_bfa_maxTonow_time_unit: null, + now_time_unit_now_loc_bfa_minTonow_time_unit: null, + } as const + + it('maps now_loc row to a DwC Location row', () => { + const row = mapLocalityToLocationRow(baseLocality) + expect(row.locationID).toEqual('NOW:LOC:42') + expect(row.locality).toEqual('Test locality') + expect(row.continent).toEqual('Europe') + expect(row.country).toEqual('Finland') + expect(row.stateProvince).toEqual('Uusimaa') + expect(row.county).toEqual('Helsinki') + expect(row.higherGeography).toEqual('Europe|Finland|Uusimaa|Helsinki|Test basin|Test subbasin') + expect(row.decimalLatitude).toEqual('60.1699') + expect(row.decimalLongitude).toEqual('24.9384') + expect(row.verbatimElevation).toEqual('123') + expect(row.locationRemarks).toContain('Some notes') + }) + + it('maps now_loc row to a DwC GeologicalContext row', () => { + const row = mapLocalityToGeologicalContextRow(baseLocality) + expect(row.locationID).toEqual('NOW:LOC:42') + expect(row.geologicalContextID).toEqual('NOW:LOC:42:geology') + expect(row.group).toEqual('Test group') + expect(row.formation).toEqual('Test formation') + expect(row.member).toEqual('Test member') + expect(row.bed).toEqual('Test bed') + expect(row.earliestAgeOrLowestStage).toEqual('BFA_MAX') + expect(row.latestAgeOrHighestStage).toEqual('BFA_MIN') + }) + + it('emits locality measurements only for meaningful values', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + chron: '-', + bfa_min: null, + }) + expect(rows.some(r => r.verbatimMeasurementType === 'max_age')).toEqual(true) + expect(rows.some(r => r.verbatimMeasurementType === 'min_age')).toEqual(true) + expect(rows.some(r => r.verbatimMeasurementType === 'chron')).toEqual(false) + expect(rows.some(r => r.verbatimMeasurementType === 'bfa_min')).toEqual(false) + }) + + it('exports requested locality fields and omits project and last-update rows', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + basin: 'Basin value', + subbasin: 'Subbasin value', + bipedal_footprints: true, + invert_pres: 'Invert preservation', + nutrients: 'High', + pers_pollen_ap: 10, + pers_pollen_nap: 20, + pers_pollen_other: 0, + plant_pres: 'Plant preservation', + }) + + expect(rows).toEqual( + expect.arrayContaining([ + expect.objectContaining({ verbatimMeasurementType: 'basin', measurementValue: 'Basin value' }), + expect.objectContaining({ verbatimMeasurementType: 'subbasin', measurementValue: 'Subbasin value' }), + expect.objectContaining({ verbatimMeasurementType: 'bipedal_footprints', measurementValue: 'true' }), + expect.objectContaining({ verbatimMeasurementType: 'invert_pres', measurementValue: 'Invert preservation' }), + expect.objectContaining({ verbatimMeasurementType: 'nutrients', measurementValue: 'High' }), + expect.objectContaining({ verbatimMeasurementType: 'pers_pollen_ap', measurementValue: '10' }), + expect.objectContaining({ verbatimMeasurementType: 'pers_pollen_nap', measurementValue: '20' }), + expect.objectContaining({ verbatimMeasurementType: 'pers_pollen_other', measurementValue: '0' }), + expect.objectContaining({ verbatimMeasurementType: 'plant_pres', measurementValue: 'Plant preservation' }), + ]) + ) + expect(rows.some(r => r.verbatimMeasurementType.startsWith('now_plr'))).toEqual(false) + expect(rows.some(r => r.verbatimMeasurementType.startsWith('now_lau'))).toEqual(false) + }) + + it('concatenates collecting methods with |', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + now_coll_meth: [{ coll_meth: 'screenwash' }, { coll_meth: 'quarry' }], + }) + const collectingMethodsRow = rows.find(r => r.verbatimMeasurementType === 'now_coll_meth.coll_meth') + expect(collectingMethodsRow?.measurementValue).toEqual('screenwash|quarry') + }) + + it('uses parentMeasurementID for stratigraphy fields', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + datum_plane: 'Datum', + tos: 0, + bos: 12.5, + }) + const parent = rows.find(r => r.verbatimMeasurementType === 'stratigraphy') + expect(parent).toBeTruthy() + const tosRow = rows.find(r => r.verbatimMeasurementType === 'tos') + const bosRow = rows.find(r => r.verbatimMeasurementType === 'bos') + expect(tosRow?.parentMeasurementID).toEqual(parent?.measurementID) + expect(bosRow?.parentMeasurementID).toEqual(parent?.measurementID) + }) + + it('generates a ZIP archive with expected files', async () => { + const zipBuffer = await buildDwcLocalityArchiveZipBufferFromLocalities([baseLocality]) + const zip = await JSZip.loadAsync(zipBuffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + }) + + it('generates valid meta.xml attributes for enclosed fields', () => { + const metaXml = buildLocalityMetaXml() + expect(metaXml).toContain("fieldsEnclosedBy='\"'") + expect(metaXml).not.toContain('fieldsEnclosedBy="""') + expect(metaXml).not.toContain('fieldsEnclosedBy="\\""') + }) +}) diff --git a/backend/src/unit-tests/dwcArchiveExportOccurrences.test.ts b/backend/src/unit-tests/dwcArchiveExportOccurrences.test.ts new file mode 100644 index 000000000..122cab4e6 --- /dev/null +++ b/backend/src/unit-tests/dwcArchiveExportOccurrences.test.ts @@ -0,0 +1,215 @@ +import { describe, expect, it } from '@jest/globals' +import JSZip from 'jszip' +import { + buildDwcOccurrenceArchiveZipBufferFromOccurrences, + mapOccurrenceToMeasurementRows, + mapOccurrenceToOccurrenceRow, +} from '../services/dwcArchiveExportOccurrences' + +describe('DwC-A occurrence export mapping', () => { + const baseOccurrence = { + lid: 42, + species_id: 21052, + nis: 7, + pct: null, + quad: null, + mni: 2, + qua: 'A', + id_status: 'confirmed', + orig_entry: 'Original occurrence note', + source_name: 'Source collection', + body_mass: BigInt(1234), + mesowear: 'mix', + mw_or_high: 1, + mw_or_low: null, + mw_cs_sharp: null, + mw_cs_round: 2, + mw_cs_blunt: null, + mw_scale_min: 0, + mw_scale_max: 3, + mw_value: 2, + microwear: 'scratch', + dc13_mean: -11.2, + dc13_n: 4, + dc13_max: -10.1, + dc13_min: -12.3, + dc13_stdev: 0.4, + do18_mean: 1.2, + do18_n: 3, + do18_max: 2.4, + do18_min: 0.4, + do18_stdev: 0.5, + now_loc: { + lid: 42, + loc_name: 'Test locality', + basin: 'Test basin', + subbasin: 'Test subbasin', + country: 'Finland', + state: 'Uusimaa', + county: 'Helsinki', + dec_lat: 60.1699, + dec_long: 24.9384, + dms_lat: null, + dms_long: null, + approx_coord: null, + altitude: 123, + loc_detail: 'Some notes', + chron: 'Test chron', + lgroup: 'Test group', + formation: 'Test formation', + member: 'Test member', + bed: 'Test bed', + bfa_max: 'BFA_MAX', + bfa_min: 'BFA_MIN', + bfa_max_abs: null, + bfa_min_abs: null, + frac_max: null, + frac_min: null, + max_age: 12.3, + min_age: 4.5, + date_meth: 'radioisotope', + age_comm: 'Age comment', + site_area: null, + gen_loc: null, + plate: null, + appr_num_spm: null, + num_spm: null, + true_quant: null, + complete: null, + num_quad: null, + rock_type: null, + rt_adj: null, + lith_comm: null, + depo_context1: null, + depo_context2: null, + depo_context3: null, + depo_context4: null, + depo_comm: null, + sed_env_1: null, + sed_env_2: null, + event_circum: null, + se_comm: null, + assem_fm: null, + transport: null, + trans_mod: null, + weath_trmp: null, + pt_conc: null, + size_type: null, + vert_pres: null, + plant_pres: null, + invert_pres: null, + time_rep: null, + taph_comm: null, + tax_comm: null, + datum_plane: null, + tos: null, + bos: null, + loc_status: null, + hominin_skeletal_remains: false, + climate_type: null, + biome: null, + v_ht: null, + v_struct: null, + v_envi_det: null, + disturb: null, + nutrients: null, + water: null, + seasonality: null, + seas_intens: null, + pri_prod: null, + moisture: null, + temperature: null, + estimate_precip: null, + estimate_temp: null, + estimate_npp: null, + pers_woody_cover: null, + pers_pollen_ap: null, + pers_pollen_nap: null, + pers_pollen_other: null, + stone_tool_cut_marks_on_bones: false, + bipedal_footprints: false, + stone_tool_technology: false, + technological_mode_1: null, + technological_mode_2: null, + technological_mode_3: null, + cultural_stage_1: null, + cultural_stage_2: null, + cultural_stage_3: null, + regional_culture_1: null, + regional_culture_2: null, + regional_culture_3: null, + now_syn_loc: [], + now_ss: [], + now_coll_meth: [], + now_mus: [], + now_ls: [], + now_time_unit_now_loc_bfa_maxTonow_time_unit: null, + now_time_unit_now_loc_bfa_minTonow_time_unit: null, + }, + com_species: { + species_id: 21052, + class_name: 'Mammalia', + subclass_or_superorder_name: null, + order_name: 'Rodentia', + suborder_or_superfamily_name: null, + family_name: 'Testidae', + subfamily_name: null, + genus_name: 'Simplomys', + species_name: 'simplicidens', + unique_identifier: '-', + taxonomic_status: null, + common_name: null, + sp_author: 'Test Author', + sp_comment: null, + }, + } as const + + it('maps now_ls row to a DwC Occurrence row', () => { + const row = mapOccurrenceToOccurrenceRow(baseOccurrence) + expect(row).toEqual( + expect.objectContaining({ + occurrenceID: 'NOW:OCC:42:21052', + locationID: 'NOW:LOC:42', + taxonID: 'NOW:21052', + scientificName: 'Simplomys simplicidens Test Author', + occurrenceStatus: 'present', + organismQuantity: '2', + organismQuantityType: 'minimum number of individuals', + identificationQualifier: 'confirmed', + occurrenceRemarks: 'Original occurrence note | Source collection | A', + }) + ) + }) + + it('prefixes now_ls measurement verbatim names to avoid com_species collisions', () => { + const rows = mapOccurrenceToMeasurementRows(baseOccurrence) + expect(rows).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + taxonID: 'NOW:OCC:42:21052', + measurementID: 'NOW:OCC:42:21052:now_ls.body_mass', + verbatimMeasurementType: 'now_ls.body_mass', + measurementValue: '1234', + measurementUnit: 'g', + }), + expect.objectContaining({ + measurementID: 'NOW:OCC:42:21052:now_ls.mesowear', + verbatimMeasurementType: 'now_ls.mesowear', + measurementValue: 'mix', + }), + ]) + ) + }) + + it('generates a ZIP archive with occurrence export files', async () => { + const zipBuffer = await buildDwcOccurrenceArchiveZipBufferFromOccurrences([baseOccurrence]) + const zip = await JSZip.loadAsync(zipBuffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('occurrence.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + }) +}) diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md new file mode 100644 index 000000000..e1462a817 --- /dev/null +++ b/documentation/functionality/dwc_export.md @@ -0,0 +1,139 @@ +# Darwin Core Archive export (v1, admin-only) + +Issue: `nowcommunity/nowdatabase#1150` + +This repository includes an **admin-only** Darwin Core Archive (DwC-A) export intended for initial testing. + +## Access + +- Backend route: `GET /species/export/dwc-archive` (**Role.Admin only**) +- The frontend exposes this as an export option on the `/species` page for administrators. + +## Output + +The downloaded ZIP contains: + +- `taxon.csv` (DwC Taxon core) +- `measurementorfact.csv` (DwC MeasurementOrFact extension) +- `meta.xml` (DwC-A descriptor) +- `eml.xml` (minimal placeholder metadata; TODOs included) + +## v1 field mappings + +### `taxon.csv` + +One row per `com_species` record. + +Columns: + +- `taxonID` = `com_species.species_id` +- `taxonID` = `NOW:` +- `nomenclaturalCode` = `ICZN` +- `scientificName` = `${genus_name} ${species_name} ${sp_author}` (trimmed; authorship appended when present) +- `genericName` = `genus_name` (only when `species_name` is a simple epithet; no spaces or dots) +- `scientificNameAuthorship` = `sp_author` +- `vernacularName` = `common_name` +- `taxonRank`: + - `order` if `family_name` contains `.` + - `family` if `genus_name` contains `.` + - `genus` if `species_name` contains a space or `.` + - `species` if `unique_identifier` is `-` + - `subspecies` if `unique_identifier` is a single lowercase word +- `taxonomicStatus` = `taxonomic_status` (fallback: `accepted`) +- `kingdom` = `Animalia` +- `phylum` = `Chordata` +- `class` = `class_name` +- `order` = `order_name` +- `superfamily` = `subclass_or_superorder_name` (only when it ends with `-oidea`) +- `family` = `family_name` +- `subfamily` = `subfamily_name` (only when it ends with `-inae`) +- `tribe` = `subfamily_name` (only when it ends with `-ini`) +- `subtribe` = `subfamily_name` (only when it ends with `-ina`) +- `genus` = `genus_name` +- `specificEpithet` = `species_name` +- `infraspecificEpithet` = `unique_identifier` (only when meaningful and not `-`) +- `higherClassification` = `class_name|subclass_or_superorder_name|order_name|suborder_or_superfamily_name|family_name|subfamily_name` (skip empty / `-`) +- `taxonRemarks` = `sp_comment` + +Note: + +- v1 intentionally exports only `com_species` rows as taxa (no synonyms yet). + +### `measurementorfact.csv` + +Long-format measurements linked by `taxonID`. + +Columns: + +- `taxonID` = `NOW:` +- `measurementID` = `NOW::` +- `parentMeasurementID` = empty by default; for crown-type segments points to the calculated parent row +- `measurementType` / `measurementUnit` / `measurementValue` per field mapping +- `verbatimMeasurementType` = original DB field name (e.g. `diet1`, `body_mass`) +- `measurementMethod` = Pantheria VSP field description where available (`https://www.pantherion.com/dbmanual97/VSP.html`) + +Calculated tooth rows (emitted only when at least one segment field is present): + +- `NOW::developmental_crown_type` (value is a 5-char concatenation of `cusp_shape`, `cusp_count_buccal`, `cusp_count_lingual`, `loph_count_lon`, `loph_count_trs`, using `-` for missing) +- `NOW::functional_crown_type` (value is a 5-char concatenation of `fct_al`, `fct_ol`, `fct_sf`, `fct_ot`, `fct_cm`, using `-` for missing) + +v1 includes these `com_species` fields (rows emitted only when source value is non-null and non-empty; `-` is treated as empty): + +- `strain` +- `gene` +- `taxon_status` +- `body_mass` → type: `body mass`, unit: `g` +- `brain_mass` → type: `brain mass`, unit: `g` +- `sv_length` +- `sd_size` +- `sd_display` +- `tshm` +- `symph_mob` +- `relative_blade_length` +- `tht` +- `diet1` → type: `diet category 1` +- `diet2` → type: `diet category 2` +- `diet3` → type: `diet category 3` +- `diet_description` → type: `diet description` +- `rel_fib` +- `selectivity` +- `digestion` +- `feedinghab1` +- `feedinghab2` +- `shelterhab1` +- `shelterhab2` +- `locomo1` → type: `locomotion 1` +- `locomo2` → type: `locomotion 2` +- `locomo3` → type: `locomotion 3` +- `hunt_forage` +- `activity` → type: `activity` +- `crowntype` → type: `crown type` +- `microwear` → type: `microwear` +- `mesowear` → type: `mesowear` +- `horizodonty` +- `cusp_shape` +- `cusp_count_buccal` +- `cusp_count_lingual` +- `loph_count_lon` +- `loph_count_trs` +- `fct_al` +- `fct_ol` +- `fct_sf` +- `fct_ot` +- `fct_cm` +- `mw_or_high` +- `mw_or_low` +- `mw_cs_sharp` +- `mw_cs_round` +- `mw_cs_blunt` +- `mw_scale_min` +- `mw_scale_max` +- `mw_value` → type: `mesowear value` +- `pop_struc` +- `sp_status` + +## Extension points (TODOs) + +- Add synonym export from `com_taxa_synonym` (either separate Taxon rows or a dedicated extension). +- Add additional traits/measurements from `com_species`. +- Replace the placeholder `eml.xml` generator with a real dataset-level EML implementation. diff --git a/documentation/functionality/dwc_export_localities.md b/documentation/functionality/dwc_export_localities.md new file mode 100644 index 000000000..4606eadbb --- /dev/null +++ b/documentation/functionality/dwc_export_localities.md @@ -0,0 +1,75 @@ +# DwC-A export: localities (v1) + +This document describes the admin-only Darwin Core Archive (DwC-A) test export for localities. + +## Files + +The export ZIP contains: + +- `location.csv` (DwC `Location` core) +- `geologicalcontext.csv` (DwC `GeologicalContext` extension) +- `measurementorfact.csv` (DwC `MeasurementOrFact` extension) +- `meta.xml` (DwC-A metadata) +- `eml.xml` (minimal placeholder EML metadata) + +## Core: `location.csv` + +Core rowType: `http://rs.tdwg.org/dwc/terms/Location` + +v1 columns: + +- `locationID` = `NOW:LOC:` +- `locality` = `loc_name` +- `continent` = derived from `country` (via shared country→continent map) +- `country` = `country` +- `stateProvince` = `state` +- `county` = `county` +- `higherGeography` = `continent|country|state|county|basin|subbasin` (skip empty) +- `decimalLatitude` / `decimalLongitude` = `dec_lat` / `dec_long` (0 treated as empty for v1) +- `verbatimLatitude` / `verbatimLongitude` = `dms_lat` / `dms_long` +- `verbatimElevation` = `altitude` +- `locationRemarks` = `loc_detail` and `age_comm` (joined with ` | `) + +## Extension: `geologicalcontext.csv` + +Extension rowType: `http://rs.tdwg.org/dwc/terms/GeologicalContext` + +v1 columns: + +- `locationID` = `NOW:LOC:` (core id) +- `geologicalContextID` = `NOW:LOC::geology` +- `lithostratigraphicTerms` = `chron`, `lgroup`, `formation`, `member`, `bed` (joined with ` | `) +- `group` / `formation` / `member` / `bed` mapped from locality columns +- `earliestAgeOrLowestStage` = `bfa_max` (uses related `now_time_unit.tu_display_name` when available) +- `latestAgeOrHighestStage` = `bfa_min` (uses related `now_time_unit.tu_display_name` when available) + +## Extension: `measurementorfact.csv` + +Extension rowType: `http://rs.tdwg.org/dwc/terms/MeasurementOrFact` + +`measurementorfact.csv` uses the same column headings as the taxa export (for consistency), but the `taxonID` column contains the locality `locationID` value (`NOW:LOC:`). + +Each emitted row has: + +- `taxonID` = `NOW:LOC:` (core id for Location) +- `measurementID` = `NOW:LOC::` (or calculated group id) +- `parentMeasurementID` is used for the age hierarchy: + - `NOW:LOC::age` (parent) → `max_age` / `min_age` + - `bfa_*`, `*_abs`, `frac_*` link to the relevant `max_age` / `min_age` +- `verbatimMeasurementType` = original DB field name + +Concatenation rules (v1): + +- Locality synonyms are concatenated with `|` into a single `synonyms` measurement row. +- Collecting methods (`now_coll_meth`) are concatenated with `|` into a single measurement row. +- Sedimentary structures (`now_ss`) are concatenated with `|` into a single measurement row. + +Field coverage (v1): + +- Fossil Assemblage, Taphonomy, Climate, Ecometrics, and Archaeology tab fields are exported as `MeasurementOrFact` rows when populated (plus selected calculated values such as mean hypsodonty). +- `basin` and `subbasin` are exported both as part of `higherGeography` and as explicit `MeasurementOrFact` rows. +- `now_plr` project links and `now_lau` last-update rows are intentionally excluded. + +## Admin-only + +The backend route is restricted to `Role.Admin`. diff --git a/documentation/functionality/dwc_export_occurrences.md b/documentation/functionality/dwc_export_occurrences.md new file mode 100644 index 000000000..fe38347ce --- /dev/null +++ b/documentation/functionality/dwc_export_occurrences.md @@ -0,0 +1,51 @@ +# DwC-A export: occurrences (v1) + +This document describes the admin-only Darwin Core Archive (DwC-A) test export for occurrence records (`now_ls`). + +## Files + +The export ZIP contains: + +- `occurrence.csv` (DwC `Occurrence` core) +- `measurementorfact.csv` (DwC `MeasurementOrFact` extension for `now_ls` facts) +- `location.csv` (companion file using the same structure as the locality export) +- `geologicalcontext.csv` (companion file using the same structure as the locality export) +- `taxon.csv` (companion file using the same structure as the taxon export) +- `meta.xml` +- `eml.xml` (minimal placeholder EML metadata) + +## Core: `occurrence.csv` + +Core rowType: `http://rs.tdwg.org/dwc/terms/Occurrence` + +v1 columns: + +- `occurrenceID` = `NOW:OCC::` +- `locationID` = `NOW:LOC:` +- `taxonID` = `NOW:` +- `scientificName` = genus, species, optional unique identifier, and authorship from `com_species` +- `occurrenceStatus` = `present` +- `organismQuantity` / `organismQuantityType` = first available quantity from `mni`, `nis`, `pct`, then `quad` +- `identificationQualifier` = `id_status` +- `occurrenceRemarks` = `orig_entry`, `source_name`, and `qua` (joined with `|`) + +## Extension: `measurementorfact.csv` + +The occurrence export uses the same `measurementorfact.csv` column structure as the taxon and locality exports. + +For occurrence-level measurements, the `taxonID` column contains the occurrence core id (`NOW:OCC::`). `verbatimMeasurementType` values from `now_ls` are prefixed with `now_ls.` so they do not collide with same-named `com_species` fields such as `body_mass`, `mesowear`, `mw_value`, or `microwear`. + +Included `now_ls` fields for v1: + +- count / abundance fields: `nis`, `pct`, `quad`, `mni` +- body mass: `body_mass` +- wear fields: `mesowear`, `mw_or_high`, `mw_or_low`, `mw_cs_sharp`, `mw_cs_round`, `mw_cs_blunt`, `mw_scale_min`, `mw_scale_max`, `mw_value`, `microwear` +- isotope fields: `dc13_mean`, `dc13_n`, `dc13_max`, `dc13_min`, `dc13_stdev`, `do18_mean`, `do18_n`, `do18_max`, `do18_min`, `do18_stdev` + +## Companion Files + +`location.csv`, `geologicalcontext.csv`, and `taxon.csv` are included as lookup/context files for the occurrence rows and intentionally reuse the existing locality and taxon export structures. + +## Admin-only + +The backend route is restricted to `Role.Admin`. diff --git a/frontend/src/components/CrossSearch/CrossSearchTable.tsx b/frontend/src/components/CrossSearch/CrossSearchTable.tsx index a3cf690cf..28ec70266 100755 --- a/frontend/src/components/CrossSearch/CrossSearchTable.tsx +++ b/frontend/src/components/CrossSearch/CrossSearchTable.tsx @@ -8,6 +8,7 @@ import { usePageContext } from '../Page' import { LocalitiesMap } from '../Map/LocalitiesMap' import { formatWithMaxThreeDecimals } from '@/util/numberFormatting' import { occurrenceLabels } from '@/constants/occurrenceLabels' +import { OccurrenceDwcExportMenuItem } from '@/components/Occurrence/OccurrenceDwcExportMenuItem' import { matchesCountryOrContinent } from '@/shared/validators/countryContinents' export const CrossSearchTable = ({ selectorFn }: { selectorFn?: (newObject: CrossSearch) => void }) => { @@ -1089,6 +1090,7 @@ export const CrossSearchTable = ({ selectorFn }: { selectorFn?: (newObject: Cros isCrossSearchTable={true} isError={isError} error={error} + renderExtraExportMenuItems={handleClose => } /> ) diff --git a/frontend/src/components/Locality/LocalityDwcExportMenuItem.tsx b/frontend/src/components/Locality/LocalityDwcExportMenuItem.tsx new file mode 100644 index 000000000..25f601356 --- /dev/null +++ b/frontend/src/components/Locality/LocalityDwcExportMenuItem.tsx @@ -0,0 +1,88 @@ +import { useState } from 'react' +import { MenuItem } from '@mui/material' +import { useNotify } from '@/hooks/notification' +import { BACKEND_URL } from '@/util/config' +import { useUser } from '@/hooks/user' +import { Role } from '@/shared/types' +import { currentDateAsString } from '@/shared/currentDateAsString' + +export const LocalityDwcExportMenuItem = ({ handleClose }: { handleClose: () => void }) => { + const [loading, setLoading] = useState(false) + const { notify, setMessage: setNotificationMessage } = useNotify() + const user = useUser() + + if (user.role !== Role.Admin) { + return null + } + + const fetchOptions = user.token ? { headers: { Authorization: `Bearer ${user.token}` } } : {} + const filename = `now_dwc_localities_test_export_${currentDateAsString()}.zip` + + const fetchZipFile = async () => { + setLoading(true) + notify('Generating DwC-A ZIP export, please wait...', 'info', null) + + try { + const response = await fetch(`${BACKEND_URL}/locality/export/dwc-archive`, fetchOptions) + if (!response.ok) { + throw new Error('Server response was not OK.') + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Missing response stream.') + } + + const file: Uint8Array[] = [] + let bytes = 0 + let closed = false + + const showDownloadProgress = () => { + if (!closed) { + setTimeout(() => { + setNotificationMessage(`Downloading DwC-A ZIP, ${Math.round((bytes / 1000000) * 10) / 10} MB`) + showDownloadProgress() + }, 500) + } + } + + notify('Downloading DwC-A ZIP...', 'info', null) + showDownloadProgress() + + while (true) { + const { done, value } = await reader.read() + if (done) break + bytes = bytes + value.length + file.push(value) + } + closed = true + + const blobUrl = window.URL.createObjectURL(new Blob(file, { type: 'application/zip' })) + const downloadLink = document.createElement('a') + downloadLink.href = blobUrl + downloadLink.download = filename + document.body.appendChild(downloadLink) + downloadLink.click() + downloadLink.remove() + window.URL.revokeObjectURL(blobUrl) + + notify('Download finished.') + } catch { + notify('Downloading DwC-A export failed.', 'error') + } finally { + setLoading(false) + } + } + + return ( + { + void fetchZipFile() + handleClose() + }} + disabled={loading} + > + Export DwC-A (localities) + + ) +} diff --git a/frontend/src/components/Locality/LocalityTable.tsx b/frontend/src/components/Locality/LocalityTable.tsx index e76ff9798..efc158c97 100755 --- a/frontend/src/components/Locality/LocalityTable.tsx +++ b/frontend/src/components/Locality/LocalityTable.tsx @@ -10,6 +10,7 @@ import { usePageContext } from '../Page' import { LocalitySynonymsModal } from './LocalitySynonymsModal' import { currentDateAsString } from '@/shared/currentDateAsString' import { matchesCountryOrContinent } from '@/shared/validators/countryContinents' +import { LocalityDwcExportMenuItem } from './LocalityDwcExportMenuItem' const LocalitiesMap = lazy(async () => { const module = await import('../Map/LocalitiesMap') @@ -523,6 +524,7 @@ export const LocalityTable = ({ selectorFn }: { selectorFn?: (newObject: Localit url="locality" kmlExport={kmlExport} svgExport={svgExport} + renderExtraExportMenuItems={handleClose => } enableColumnFilterModes={true} tableRowAction={handleLocalityRowActionClick} /> diff --git a/frontend/src/components/Occurrence/OccurrenceDwcExportMenuItem.tsx b/frontend/src/components/Occurrence/OccurrenceDwcExportMenuItem.tsx new file mode 100644 index 000000000..92b5f434c --- /dev/null +++ b/frontend/src/components/Occurrence/OccurrenceDwcExportMenuItem.tsx @@ -0,0 +1,128 @@ +import { useState } from 'react' +import { MenuItem } from '@mui/material' +import { useNotify } from '@/hooks/notification' +import { BACKEND_URL } from '@/util/config' +import { useUser } from '@/hooks/user' +import { Role } from '@/shared/types' +import { currentDateAsString } from '@/shared/currentDateAsString' + +type OccurrenceExportProgress = { + message: string +} + +const createExportId = () => { + if (window.crypto?.randomUUID) return window.crypto.randomUUID() + return `${Date.now()}-${Math.random().toString(36).slice(2)}` +} + +export const OccurrenceDwcExportMenuItem = ({ handleClose }: { handleClose: () => void }) => { + const [loading, setLoading] = useState(false) + const { notify, setMessage: setNotificationMessage } = useNotify() + const user = useUser() + + if (user.role !== Role.Admin) { + return null + } + + const fetchOptions = user.token ? { headers: { Authorization: `Bearer ${user.token}` } } : {} + const filename = `now_dwc_occurrences_test_export_${currentDateAsString()}.zip` + + const fetchZipFile = async () => { + setLoading(true) + const exportId = createExportId() + let generationProgressTimer: number | undefined + notify('Generating DwC-A ZIP export...', 'info', null) + + const updateGenerationProgress = async () => { + try { + const response = await fetch(`${BACKEND_URL}/occurrence/export/dwc-archive/progress/${exportId}`, fetchOptions) + if (!response.ok) return + + const progress = (await response.json()) as OccurrenceExportProgress + setNotificationMessage(progress.message) + } catch { + // The download request owns the final success/failure notification. + } + } + + const stopGenerationProgress = () => { + if (generationProgressTimer !== undefined) { + window.clearInterval(generationProgressTimer) + generationProgressTimer = undefined + } + } + + try { + generationProgressTimer = window.setInterval(() => { + void updateGenerationProgress() + }, 1000) + + const response = await fetch( + `${BACKEND_URL}/occurrence/export/dwc-archive?${new URLSearchParams({ exportId })}`, + fetchOptions + ) + stopGenerationProgress() + + if (!response.ok) { + throw new Error('Server response was not OK.') + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Missing response stream.') + } + + const file: Uint8Array[] = [] + let bytes = 0 + let closed = false + + const showDownloadProgress = () => { + if (!closed) { + setTimeout(() => { + setNotificationMessage(`Downloading DwC-A ZIP, ${Math.round((bytes / 1000000) * 10) / 10} MB`) + showDownloadProgress() + }, 500) + } + } + + notify('Downloading DwC-A ZIP...', 'info', null) + showDownloadProgress() + + while (true) { + const { done, value } = await reader.read() + if (done) break + bytes = bytes + value.length + file.push(value) + } + closed = true + + const blobUrl = window.URL.createObjectURL(new Blob(file, { type: 'application/zip' })) + const downloadLink = document.createElement('a') + downloadLink.href = blobUrl + downloadLink.download = filename + document.body.appendChild(downloadLink) + downloadLink.click() + downloadLink.remove() + window.URL.revokeObjectURL(blobUrl) + + notify('Download finished.') + } catch { + stopGenerationProgress() + notify('Downloading DwC-A export failed.', 'error') + } finally { + setLoading(false) + } + } + + return ( + { + void fetchZipFile() + handleClose() + }} + disabled={loading} + > + Export DwC-A (occurrences) + + ) +} diff --git a/frontend/src/components/Species/SpeciesDwcExportMenuItem.tsx b/frontend/src/components/Species/SpeciesDwcExportMenuItem.tsx new file mode 100644 index 000000000..6707f5e5c --- /dev/null +++ b/frontend/src/components/Species/SpeciesDwcExportMenuItem.tsx @@ -0,0 +1,88 @@ +import { useState } from 'react' +import { MenuItem } from '@mui/material' +import { useNotify } from '@/hooks/notification' +import { BACKEND_URL } from '@/util/config' +import { useUser } from '@/hooks/user' +import { Role } from '@/shared/types' +import { currentDateAsString } from '@/shared/currentDateAsString' + +export const SpeciesDwcExportMenuItem = ({ handleClose }: { handleClose: () => void }) => { + const [loading, setLoading] = useState(false) + const { notify, setMessage: setNotificationMessage } = useNotify() + const user = useUser() + + if (user.role !== Role.Admin) { + return null + } + + const fetchOptions = user.token ? { headers: { Authorization: `Bearer ${user.token}` } } : {} + const filename = `now_dwc_test_export_${currentDateAsString()}.zip` + + const fetchZipFile = async () => { + setLoading(true) + notify('Generating DwC-A ZIP export, please wait...', 'info', null) + + try { + const response = await fetch(`${BACKEND_URL}/species/export/dwc-archive`, fetchOptions) + if (!response.ok) { + throw new Error('Server response was not OK.') + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Missing response stream.') + } + + const file: Uint8Array[] = [] + let bytes = 0 + let closed = false + + const showDownloadProgress = () => { + if (!closed) { + setTimeout(() => { + setNotificationMessage(`Downloading DwC-A ZIP, ${Math.round((bytes / 1000000) * 10) / 10} MB`) + showDownloadProgress() + }, 500) + } + } + + notify('Downloading DwC-A ZIP...', 'info', null) + showDownloadProgress() + + while (true) { + const { done, value } = await reader.read() + if (done) break + bytes = bytes + value.length + file.push(value) + } + closed = true + + const blobUrl = window.URL.createObjectURL(new Blob(file, { type: 'application/zip' })) + const downloadLink = document.createElement('a') + downloadLink.href = blobUrl + downloadLink.download = filename + document.body.appendChild(downloadLink) + downloadLink.click() + downloadLink.remove() + window.URL.revokeObjectURL(blobUrl) + + notify('Download finished.') + } catch { + notify('Downloading DwC-A export failed.', 'error') + } finally { + setLoading(false) + } + } + + return ( + { + void fetchZipFile() + handleClose() + }} + disabled={loading} + > + Export DwC-A (taxa + measurements) + + ) +} diff --git a/frontend/src/components/Species/SpeciesTable.tsx b/frontend/src/components/Species/SpeciesTable.tsx index 8120952e1..50db86418 100755 --- a/frontend/src/components/Species/SpeciesTable.tsx +++ b/frontend/src/components/Species/SpeciesTable.tsx @@ -7,6 +7,7 @@ import type { ColumnVisibilityGroup } from '../TableView/TableToolBar' import { useGetAllSpeciesQuery } from '../../redux/speciesReducer' import { SynonymsModal } from './SynonymsModal' import { SpeciesCommentDialog } from './SpeciesCommentDialog' +import { SpeciesDwcExportMenuItem } from './SpeciesDwcExportMenuItem' const normalizeFilterValue = (value: unknown): string => { if (typeof value === 'string') { @@ -566,6 +567,7 @@ export const SpeciesTable = ({ selectorFn }: { selectorFn?: (id: Species) => voi tableRowAction={handleSpeciesRowActionClick} filterFns={synonymFilterFns} renderRowActionExtras={renderCommentAction} + renderExtraExportMenuItems={handleClose => } /> ({ showNewButton, hideLeftButtons, columnVisibilityGroups, + renderExtraExportMenuItems, }: { table: MRT_TableInstance tableName: string @@ -48,6 +49,7 @@ export const TableToolBar = ({ showNewButton?: boolean hideLeftButtons?: boolean columnVisibilityGroups?: ColumnVisibilityGroup[] + renderExtraExportMenuItems?: (handleClose: () => void) => ReactNode }) => { const { previousTableUrls, setPreviousTableUrls } = usePageContext() const location = useLocation() @@ -297,6 +299,8 @@ export const TableToolBar = ({ )} + {renderExtraExportMenuItems ? renderExtraExportMenuItems(handleClose) : null} + {kmlExport && ( { diff --git a/frontend/src/components/TableView/TableView.tsx b/frontend/src/components/TableView/TableView.tsx index fa54fe5f2..05723e1f2 100755 --- a/frontend/src/components/TableView/TableView.tsx +++ b/frontend/src/components/TableView/TableView.tsx @@ -91,6 +91,7 @@ export const TableView = ({ paginationPlacement, tableContainerMaxHeight, columnVisibilityGroups, + renderExtraExportMenuItems, }: { data: T[] | undefined columns: MRT_ColumnDef[] @@ -114,6 +115,7 @@ export const TableView = ({ error?: FetchBaseQueryError | SerializedError filterFns?: Record> renderRowActionExtras?: ({ row }: { row: MRT_Row }) => ReactNode + renderExtraExportMenuItems?: (handleClose: () => void) => ReactNode paginationPlacement?: 'top' | 'bottom' | 'both' tableContainerMaxHeight?: string | number columnVisibilityGroups?: ColumnVisibilityGroup[] @@ -532,6 +534,7 @@ export const TableView = ({ selectorFn={selectorFn} hideLeftButtons={false} columnVisibilityGroups={columnVisibilityGroups} + renderExtraExportMenuItems={renderExtraExportMenuItems} />