From 00a28a26d763b5c6e5907b0d7509bd5fc2261141 Mon Sep 17 00:00:00 2001 From: karilint Date: Thu, 23 Apr 2026 12:42:52 +0300 Subject: [PATCH 01/23] Admin-only DwC-A export for species --- backend/package-lock.json | 77 ++++ backend/package.json | 1 + .../species/dwcArchiveExport.test.ts | 69 ++++ backend/src/routes/species.ts | 9 + backend/src/services/dwcArchiveExport.ts | 384 ++++++++++++++++++ .../src/unit-tests/dwcArchiveExport.test.ts | 110 +++++ documentation/functionality/dwc_export.md | 83 ++++ .../Species/SpeciesDwcExportMenuItem.tsx | 88 ++++ .../src/components/Species/SpeciesTable.tsx | 2 + .../src/components/TableView/TableToolBar.tsx | 4 + .../src/components/TableView/TableView.tsx | 3 + 11 files changed, 830 insertions(+) create mode 100644 backend/src/api-tests/species/dwcArchiveExport.test.ts create mode 100644 backend/src/services/dwcArchiveExport.ts create mode 100644 backend/src/unit-tests/dwcArchiveExport.test.ts create mode 100644 documentation/functionality/dwc_export.md create mode 100644 frontend/src/components/Species/SpeciesDwcExportMenuItem.tsx diff --git a/backend/package-lock.json b/backend/package-lock.json index d46e19c9d..5e8b3d394 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -26,6 +26,7 @@ "express-rate-limit": "^7.5.0", "fast-csv": "^5.0.2", "jsonwebtoken": "^9.0.2", + "jszip": "^3.10.1", "mariadb": "^3.3.0", "md5": "^2.3.0", "nodemailer": "^6.9.14", @@ -5155,6 +5156,11 @@ "url": "https://opencollective.com/core-js" } }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" + }, "node_modules/cors": { "version": "2.8.5", "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", @@ -7088,6 +7094,11 @@ "node": ">= 4" } }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" + }, "node_modules/import-fresh": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", @@ -9433,6 +9444,49 @@ "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/jwa": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.1.tgz", @@ -9496,6 +9550,14 @@ "node": ">= 0.8.0" } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", @@ -10148,6 +10210,11 @@ "node": ">=6" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -10412,6 +10479,11 @@ "fsevents": "2.3.3" } }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" + }, "node_modules/prompts": { "version": "2.4.2", "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", @@ -10907,6 +10979,11 @@ "node": ">= 0.4" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, "node_modules/setprototypeof": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", diff --git a/backend/package.json b/backend/package.json index 961df819a..a0e576d37 100644 --- a/backend/package.json +++ b/backend/package.json @@ -55,6 +55,7 @@ "express-rate-limit": "^7.5.0", "fast-csv": "^5.0.2", "jsonwebtoken": "^9.0.2", + "jszip": "^3.10.1", "mariadb": "^3.3.0", "md5": "^2.3.0", "nodemailer": "^6.9.14", diff --git a/backend/src/api-tests/species/dwcArchiveExport.test.ts b/backend/src/api-tests/species/dwcArchiveExport.test.ts new file mode 100644 index 000000000..fdabbd594 --- /dev/null +++ b/backend/src/api-tests/species/dwcArchiveExport.test.ts @@ -0,0 +1,69 @@ +import { afterAll, beforeAll, describe, expect, it } from '@jest/globals' +import request from 'supertest' +import JSZip from 'jszip' +import type { Response } from 'superagent' +import app from '../../app' +import { pool } from '../../utils/db' +import { noPermError, resetDatabase, resetDatabaseTimeout, send } from '../utils' + +type ResponseStream = { + on: (event: 'data', handler: (chunk: Buffer) => void) => void +} & { + on: (event: 'end', handler: () => void) => void +} + +const parseBinary = (res: Response, callback: (err: Error | null, body: Buffer) => void) => { + const data: Buffer[] = [] + const stream = res as unknown as ResponseStream + stream.on('data', chunk => data.push(chunk)) + stream.on('end', () => { + callback(null, Buffer.concat(data)) + }) +} + +describe('DwC-A species export (admin-only)', () => { + beforeAll(async () => { + await resetDatabase() + }, resetDatabaseTimeout) + + afterAll(async () => { + await pool.end() + }) + + it('returns a ZIP archive for admins', async () => { + const loginResult = await send<{ token: string }>('user/login', 'POST', { username: 'testSu', password: 'test' }) + expect(loginResult.status).toEqual(200) + + const result = await request(app) + .get('/species/export/dwc-archive') + .set('authorization', `bearer ${loginResult.body.token}`) + .buffer(true) + .parse(parseBinary) + + expect(result.status).toEqual(200) + expect(result.headers['content-type']).toMatch(/application\/zip/i) + expect(result.headers['content-disposition']).toMatch(/attachment;\s*filename="now_dwc_test_export_/i) + + const zip = await JSZip.loadAsync(result.body as unknown as Buffer) + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + + const taxonCsv = await zip.file('taxon.csv')!.async('string') + expect(taxonCsv).toContain('"taxonID"') + + const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') + expect(measurementCsv).toContain('"measurementID"') + + const metaXml = await zip.file('meta.xml')!.async('string') + expect(metaXml).toContain(' { + const result = await request(app).get('/species/export/dwc-archive') + expect(result.status).toEqual(403) + expect(result.body).toEqual(noPermError) + }) +}) diff --git a/backend/src/routes/species.ts b/backend/src/routes/species.ts index 8311c6089..d59558173 100644 --- a/backend/src/routes/species.ts +++ b/backend/src/routes/species.ts @@ -4,6 +4,8 @@ import { fixBigInt } from '../utils/common' import { EditMetaData, SpeciesDetailsType, Role } from '../../../frontend/src/shared/types' import { deleteSpecies, writeSpecies } from '../services/write/species' import { requireOneOf } from '../middlewares/authorizer' +import { buildDwcArchiveZipBuffer } from '../services/dwcArchiveExport' +import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' const router = Router() @@ -17,6 +19,13 @@ router.get('/synonyms', async (_req, res) => { return res.status(200).send(fixBigInt(synonyms)) }) +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res) => { + const zipBuffer = await buildDwcArchiveZipBuffer() + res.setHeader('Content-Type', 'application/zip') + res.setHeader('Content-Disposition', `attachment; filename="now_dwc_test_export_${currentDateAsString()}.zip"`) + return res.status(200).send(zipBuffer) +}) + router.get('/:id', async (req, res) => { const id = parseInt(req.params.id) const species = await getSpeciesDetails(id, req.user) diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts new file mode 100644 index 000000000..6d66381bf --- /dev/null +++ b/backend/src/services/dwcArchiveExport.ts @@ -0,0 +1,384 @@ +import Prisma from '../../prisma/generated/now_test_client' +import { format } from 'fast-csv' +import { Writable } from 'stream' +import JSZip from 'jszip' + +const isMeaningfulString = (value: unknown): value is string => { + if (typeof value !== 'string') return false + const trimmed = value.trim() + if (!trimmed) return false + if (trimmed === '-') return false + return true +} + +const toDwcString = (value: unknown): string => { + if (value === null || value === undefined) return '' + if (typeof value === 'bigint') return value.toString() + if (typeof value === 'number') return Number.isFinite(value) ? value.toString() : '' + if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value + if (typeof value === 'object') { + try { + return JSON.stringify(value) ?? '' + } catch { + return '' + } + } + return '' +} + +const writeCsvString = async (headers: string[], rows: Array>): Promise => { + return await new Promise((resolve, reject) => { + let output = '' + const csvStream = format({ + delimiter: ',', + headers, + quoteColumns: true, + quoteHeaders: true, + includeEndRowDelimiter: true, + }) + + const sink = new Writable({ + write(chunk: Buffer, _encoding, callback) { + output += chunk.toString('utf8') + callback() + }, + }) + + sink.on('finish', () => resolve(output)) + sink.on('error', reject) + csvStream.on('error', reject) + + csvStream.pipe(sink) + for (const row of rows) { + csvStream.write(row) + } + csvStream.end() + }) +} + +export const TAXON_HEADERS = [ + 'taxonID', + 'scientificName', + 'scientificNameAuthorship', + 'vernacularName', + 'taxonRank', + 'taxonomicStatus', + 'class', + 'order', + 'family', + 'genus', + 'specificEpithet', + 'infraspecificEpithet', + 'higherClassification', + 'taxonRemarks', + 'taxonConceptID', +] as const + +export type TaxonCsvHeader = (typeof TAXON_HEADERS)[number] +export type TaxonCsvRow = Record + +type SpeciesForTaxonExport = Pick< + Prisma.com_species, + | 'species_id' + | 'class_name' + | 'subclass_or_superorder_name' + | 'order_name' + | 'suborder_or_superfamily_name' + | 'family_name' + | 'subfamily_name' + | 'genus_name' + | 'species_name' + | 'unique_identifier' + | 'taxonomic_status' + | 'common_name' + | 'sp_author' + | 'sp_comment' +> + +export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRow => { + const genusName = isMeaningfulString(species.genus_name) ? species.genus_name.trim() : '' + const speciesName = isMeaningfulString(species.species_name) ? species.species_name.trim() : '' + const authorship = isMeaningfulString(species.sp_author) ? species.sp_author.trim() : '' + + const baseScientificName = [genusName, speciesName].filter(Boolean).join(' ').trim() + const scientificName = [baseScientificName, authorship].filter(Boolean).join(' ').trim() + + const higherClassification = [ + species.class_name, + species.subclass_or_superorder_name, + species.order_name, + species.suborder_or_superfamily_name, + species.family_name, + species.subfamily_name, + ] + .map(value => (isMeaningfulString(value) ? value.trim() : null)) + .filter((value): value is string => Boolean(value)) + .join('|') + + const infraspecificEpithet = isMeaningfulString(species.unique_identifier) ? species.unique_identifier.trim() : '' + + const taxonomicStatus = isMeaningfulString(species.taxonomic_status) ? species.taxonomic_status.trim() : 'accepted' + + return { + taxonID: species.species_id.toString(), + scientificName, + scientificNameAuthorship: authorship, + vernacularName: isMeaningfulString(species.common_name) ? species.common_name.trim() : '', + // TODO(#1150): Validate rank from taxonomic fields (indet./gen./sp. cases). + taxonRank: 'species', + taxonomicStatus, + class: isMeaningfulString(species.class_name) ? species.class_name.trim() : '', + order: isMeaningfulString(species.order_name) ? species.order_name.trim() : '', + family: isMeaningfulString(species.family_name) ? species.family_name.trim() : '', + genus: genusName, + specificEpithet: speciesName, + infraspecificEpithet, + higherClassification, + taxonRemarks: isMeaningfulString(species.sp_comment) ? species.sp_comment.trim() : '', + // TODO(#1150): Decide if any existing field should populate this. + taxonConceptID: '', + } +} + +export const MEASUREMENT_HEADERS = [ + 'taxonID', + 'measurementID', + 'measurementType', + 'measurementValue', + 'measurementUnit', + 'measurementMethod', + 'measurementRemarks', +] as const + +export type MeasurementCsvHeader = (typeof MEASUREMENT_HEADERS)[number] +export type MeasurementCsvRow = Record + +type SpeciesForMeasurementExport = Pick< + Prisma.com_species, + | 'species_id' + | 'body_mass' + | 'brain_mass' + | 'diet1' + | 'diet2' + | 'diet3' + | 'diet_description' + | 'locomo1' + | 'locomo2' + | 'locomo3' + | 'activity' + | 'crowntype' + | 'microwear' + | 'mesowear' + | 'mw_value' +> + +const MEASUREMENT_FIELD_MAPPINGS: Array<{ + field: keyof SpeciesForMeasurementExport + measurementType: string + measurementUnit: string +}> = [ + { field: 'body_mass', measurementType: 'body mass', measurementUnit: 'g' }, + { field: 'brain_mass', measurementType: 'brain mass', measurementUnit: 'g' }, + { field: 'diet1', measurementType: 'diet category 1', measurementUnit: '' }, + { field: 'diet2', measurementType: 'diet category 2', measurementUnit: '' }, + { field: 'diet3', measurementType: 'diet category 3', measurementUnit: '' }, + { field: 'diet_description', measurementType: 'diet description', measurementUnit: '' }, + { field: 'locomo1', measurementType: 'locomotion 1', measurementUnit: '' }, + { field: 'locomo2', measurementType: 'locomotion 2', measurementUnit: '' }, + { field: 'locomo3', measurementType: 'locomotion 3', measurementUnit: '' }, + { field: 'activity', measurementType: 'activity', measurementUnit: '' }, + { field: 'crowntype', measurementType: 'crown type', measurementUnit: '' }, + { field: 'microwear', measurementType: 'microwear', measurementUnit: '' }, + { field: 'mesowear', measurementType: 'mesowear', measurementUnit: '' }, + { field: 'mw_value', measurementType: 'mesowear value', measurementUnit: '' }, +] + +export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport): MeasurementCsvRow[] => { + const taxonID = species.species_id.toString() + + return MEASUREMENT_FIELD_MAPPINGS.flatMap(mapping => { + if (mapping.field === 'species_id') return [] + const rawValue = species[mapping.field] + if (rawValue === null || rawValue === undefined) return [] + + if (typeof rawValue === 'string' && !isMeaningfulString(rawValue)) return [] + + const measurementValue = toDwcString(rawValue).trim() + if (!measurementValue) return [] + + return [ + { + taxonID, + measurementID: `NOW:${taxonID}:${mapping.field.toString()}`, + measurementType: mapping.measurementType, + measurementValue, + measurementUnit: mapping.measurementUnit, + measurementMethod: '', + measurementRemarks: '', + }, + ] + }) +} + +const DWC_TERMS = { + taxon: { + rowType: 'http://rs.tdwg.org/dwc/terms/Taxon', + taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', + scientificName: 'http://rs.tdwg.org/dwc/terms/scientificName', + scientificNameAuthorship: 'http://rs.tdwg.org/dwc/terms/scientificNameAuthorship', + vernacularName: 'http://rs.tdwg.org/dwc/terms/vernacularName', + taxonRank: 'http://rs.tdwg.org/dwc/terms/taxonRank', + taxonomicStatus: 'http://rs.tdwg.org/dwc/terms/taxonomicStatus', + class: 'http://rs.tdwg.org/dwc/terms/class', + order: 'http://rs.tdwg.org/dwc/terms/order', + family: 'http://rs.tdwg.org/dwc/terms/family', + genus: 'http://rs.tdwg.org/dwc/terms/genus', + specificEpithet: 'http://rs.tdwg.org/dwc/terms/specificEpithet', + infraspecificEpithet: 'http://rs.tdwg.org/dwc/terms/infraspecificEpithet', + higherClassification: 'http://rs.tdwg.org/dwc/terms/higherClassification', + taxonRemarks: 'http://rs.tdwg.org/dwc/terms/taxonRemarks', + taxonConceptID: 'http://rs.tdwg.org/dwc/terms/taxonConceptID', + }, + measurement: { + rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', + taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', + measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', + measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', + measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', + measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', + measurementRemarks: 'http://rs.tdwg.org/dwc/terms/measurementRemarks', + }, +} as const + +export const buildMetaXml = (): string => { + const taxonFields = TAXON_HEADERS.map((header, index) => { + const term = (DWC_TERMS.taxon as Record)[header] + return ` ` + }).join('\n') + + const measurementFields = MEASUREMENT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.measurement as Record)[header] + return ` ` + }).join('\n') + + return ` + + + + taxon.csv + + +${taxonFields} + + + + measurementorfact.csv + + +${measurementFields} + + +` +} + +export const buildEmlXml = (publicationDateIso: string): string => { + return ` + + + + NOW database Darwin Core test export + + + NOW database + + + + + NOW database + + + ${publicationDateIso} + + Admin-only test Darwin Core Archive export from NOW database. Field mappings are intentionally limited for v1. + + + TODO(#1150): Add rights / license information. + + + +` +} + +export const buildDwcArchiveZipBufferFromSpecies = async ( + speciesRows: Array +): Promise => { + const taxonRows = speciesRows.map(mapSpeciesToTaxonRow) + const measurementRows = speciesRows.flatMap(mapSpeciesToMeasurementRows) + + const taxonCsv = await writeCsvString([...TAXON_HEADERS], taxonRows) + const measurementCsv = await writeCsvString([...MEASUREMENT_HEADERS], measurementRows) + const metaXml = buildMetaXml() + const publicationDateIso = new Date().toISOString().slice(0, 10) + const emlXml = buildEmlXml(publicationDateIso) + + const zip = new JSZip() + zip.file('taxon.csv', taxonCsv) + zip.file('measurementorfact.csv', measurementCsv) + zip.file('meta.xml', metaXml) + zip.file('eml.xml', emlXml) + + return await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } }) +} + +export const fetchSpeciesForDwcExport = async (): Promise< + Array +> => { + const { nowDb } = await import('../utils/db') + // NOTE: v1 intentionally exports only com_species rows as taxa. + // TODO(#1150): Add synonym export from com_taxa_synonym. + return await nowDb.com_species.findMany({ + select: { + species_id: true, + class_name: true, + subclass_or_superorder_name: true, + order_name: true, + suborder_or_superfamily_name: true, + family_name: true, + subfamily_name: true, + genus_name: true, + species_name: true, + unique_identifier: true, + taxonomic_status: true, + common_name: true, + sp_author: true, + sp_comment: true, + body_mass: true, + brain_mass: true, + diet1: true, + diet2: true, + diet3: true, + diet_description: true, + locomo1: true, + locomo2: true, + locomo3: true, + activity: true, + crowntype: true, + microwear: true, + mesowear: true, + mw_value: true, + }, + }) +} + +export const buildDwcArchiveZipBuffer = async (): Promise => { + const speciesRows = await fetchSpeciesForDwcExport() + return await buildDwcArchiveZipBufferFromSpecies(speciesRows) +} diff --git a/backend/src/unit-tests/dwcArchiveExport.test.ts b/backend/src/unit-tests/dwcArchiveExport.test.ts new file mode 100644 index 000000000..6acdc1546 --- /dev/null +++ b/backend/src/unit-tests/dwcArchiveExport.test.ts @@ -0,0 +1,110 @@ +import { describe, expect, it } from '@jest/globals' +import JSZip from 'jszip' +import { + buildDwcArchiveZipBufferFromSpecies, + mapSpeciesToMeasurementRows, + mapSpeciesToTaxonRow, +} from '../services/dwcArchiveExport' + +describe('DwC-A export mapping', () => { + it('maps com_species row to a DwC Taxon row', () => { + const row = mapSpeciesToTaxonRow({ + species_id: 123, + class_name: 'Mammalia', + subclass_or_superorder_name: null, + order_name: 'Carnivora', + suborder_or_superfamily_name: null, + family_name: 'Felidae', + subfamily_name: 'Felinae', + genus_name: 'Felis', + species_name: 'catus', + unique_identifier: '-', + taxonomic_status: null, + common_name: 'Cat', + sp_author: 'Linnaeus, 1758', + sp_comment: 'Test comment', + }) + + expect(row.taxonID).toEqual('123') + expect(row.scientificName).toEqual('Felis catus Linnaeus, 1758') + expect(row.scientificNameAuthorship).toEqual('Linnaeus, 1758') + expect(row.vernacularName).toEqual('Cat') + expect(row.taxonRank).toEqual('species') + expect(row.taxonomicStatus).toEqual('accepted') + expect(row.higherClassification).toEqual('Mammalia|Carnivora|Felidae|Felinae') + expect(row.taxonRemarks).toEqual('Test comment') + }) + + it('generates measurement rows only for meaningful values', () => { + const rows = mapSpeciesToMeasurementRows({ + species_id: 123, + body_mass: BigInt(2500), + brain_mass: null, + diet1: '-', + diet2: 'Herbivore', + diet3: '', + diet_description: 'Leaves', + locomo1: null, + locomo2: 'Arboreal', + locomo3: null, + activity: 'Diurnal', + crowntype: null, + microwear: 'High', + mesowear: null, + mw_value: 1.5, + }) + + const ids = rows.map(row => row.measurementID) + expect(ids).toContain('NOW:123:body_mass') + expect(ids).toContain('NOW:123:diet2') + expect(ids).toContain('NOW:123:diet_description') + expect(ids).toContain('NOW:123:locomo2') + expect(ids).toContain('NOW:123:activity') + expect(ids).toContain('NOW:123:microwear') + expect(ids).toContain('NOW:123:mw_value') + expect(ids).not.toContain('NOW:123:brain_mass') + expect(ids).not.toContain('NOW:123:diet1') + expect(ids).not.toContain('NOW:123:diet3') + }) + + it('produces a ZIP containing the expected DwC-A files', async () => { + const zipBuffer = await buildDwcArchiveZipBufferFromSpecies([ + { + species_id: 1, + class_name: 'Mammalia', + subclass_or_superorder_name: null, + order_name: 'Primates', + suborder_or_superfamily_name: null, + family_name: 'Hominidae', + subfamily_name: null, + genus_name: 'Homo', + species_name: 'sapiens', + unique_identifier: '-', + taxonomic_status: 'accepted', + common_name: 'Human', + sp_author: null, + sp_comment: null, + body_mass: BigInt(70000), + brain_mass: 1350, + diet1: null, + diet2: null, + diet3: null, + diet_description: null, + locomo1: null, + locomo2: null, + locomo3: null, + activity: null, + crowntype: null, + microwear: null, + mesowear: null, + mw_value: null, + }, + ]) + + const zip = await JSZip.loadAsync(zipBuffer) + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + }) +}) diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md new file mode 100644 index 000000000..679b40297 --- /dev/null +++ b/documentation/functionality/dwc_export.md @@ -0,0 +1,83 @@ +# Darwin Core Archive export (v1, admin-only) + +Issue: `nowcommunity/nowdatabase#1150` + +This repository includes an **admin-only** Darwin Core Archive (DwC-A) export intended for initial testing. + +## Access + +- Backend route: `GET /species/export/dwc-archive` (**Role.Admin only**) +- The frontend exposes this as an export option on the `/species` page for administrators. + +## Output + +The downloaded ZIP contains: + +- `taxon.csv` (DwC Taxon core) +- `measurementorfact.csv` (DwC MeasurementOrFact extension) +- `meta.xml` (DwC-A descriptor) +- `eml.xml` (minimal placeholder metadata; TODOs included) + +## v1 field mappings + +### `taxon.csv` + +One row per `com_species` record. + +Columns: + +- `taxonID` = `com_species.species_id` +- `scientificName` = `${genus_name} ${species_name} ${sp_author}` (trimmed; authorship appended when present) +- `scientificNameAuthorship` = `sp_author` +- `vernacularName` = `common_name` +- `taxonRank` = `species` (TODO: validate for `indet.` / `gen.` / `sp.` cases) +- `taxonomicStatus` = `taxonomic_status` (fallback: `accepted`) +- `class` = `class_name` +- `order` = `order_name` +- `family` = `family_name` +- `genus` = `genus_name` +- `specificEpithet` = `species_name` +- `infraspecificEpithet` = `unique_identifier` (only when meaningful and not `-`) +- `higherClassification` = `class_name|subclass_or_superorder_name|order_name|suborder_or_superfamily_name|family_name|subfamily_name` (skip empty / `-`) +- `taxonRemarks` = `sp_comment` +- `taxonConceptID` = empty (TODO) + +Note: + +- v1 intentionally exports only `com_species` rows as taxa (no synonyms yet). + +### `measurementorfact.csv` + +Long-format measurements linked by `taxonID`. + +Columns: + +- `taxonID` = `species_id` +- `measurementID` = `NOW::` +- `measurementType` / `measurementUnit` / `measurementValue` per field mapping +- `measurementMethod` = empty +- `measurementRemarks` = empty + +v1 includes only these `com_species` fields (rows emitted only when source value is non-null and non-empty; `-` is treated as empty): + +- `body_mass` → type: `body mass`, unit: `g` +- `brain_mass` → type: `brain mass`, unit: `g` +- `diet1` → type: `diet category 1` +- `diet2` → type: `diet category 2` +- `diet3` → type: `diet category 3` +- `diet_description` → type: `diet description` +- `locomo1` → type: `locomotion 1` +- `locomo2` → type: `locomotion 2` +- `locomo3` → type: `locomotion 3` +- `activity` → type: `activity` +- `crowntype` → type: `crown type` +- `microwear` → type: `microwear` +- `mesowear` → type: `mesowear` +- `mw_value` → type: `mesowear value` + +## Extension points (TODOs) + +- Add synonym export from `com_taxa_synonym` (either separate Taxon rows or a dedicated extension). +- Add additional traits/measurements from `com_species`. +- Replace the placeholder `eml.xml` generator with a real dataset-level EML implementation. + diff --git a/frontend/src/components/Species/SpeciesDwcExportMenuItem.tsx b/frontend/src/components/Species/SpeciesDwcExportMenuItem.tsx new file mode 100644 index 000000000..6707f5e5c --- /dev/null +++ b/frontend/src/components/Species/SpeciesDwcExportMenuItem.tsx @@ -0,0 +1,88 @@ +import { useState } from 'react' +import { MenuItem } from '@mui/material' +import { useNotify } from '@/hooks/notification' +import { BACKEND_URL } from '@/util/config' +import { useUser } from '@/hooks/user' +import { Role } from '@/shared/types' +import { currentDateAsString } from '@/shared/currentDateAsString' + +export const SpeciesDwcExportMenuItem = ({ handleClose }: { handleClose: () => void }) => { + const [loading, setLoading] = useState(false) + const { notify, setMessage: setNotificationMessage } = useNotify() + const user = useUser() + + if (user.role !== Role.Admin) { + return null + } + + const fetchOptions = user.token ? { headers: { Authorization: `Bearer ${user.token}` } } : {} + const filename = `now_dwc_test_export_${currentDateAsString()}.zip` + + const fetchZipFile = async () => { + setLoading(true) + notify('Generating DwC-A ZIP export, please wait...', 'info', null) + + try { + const response = await fetch(`${BACKEND_URL}/species/export/dwc-archive`, fetchOptions) + if (!response.ok) { + throw new Error('Server response was not OK.') + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Missing response stream.') + } + + const file: Uint8Array[] = [] + let bytes = 0 + let closed = false + + const showDownloadProgress = () => { + if (!closed) { + setTimeout(() => { + setNotificationMessage(`Downloading DwC-A ZIP, ${Math.round((bytes / 1000000) * 10) / 10} MB`) + showDownloadProgress() + }, 500) + } + } + + notify('Downloading DwC-A ZIP...', 'info', null) + showDownloadProgress() + + while (true) { + const { done, value } = await reader.read() + if (done) break + bytes = bytes + value.length + file.push(value) + } + closed = true + + const blobUrl = window.URL.createObjectURL(new Blob(file, { type: 'application/zip' })) + const downloadLink = document.createElement('a') + downloadLink.href = blobUrl + downloadLink.download = filename + document.body.appendChild(downloadLink) + downloadLink.click() + downloadLink.remove() + window.URL.revokeObjectURL(blobUrl) + + notify('Download finished.') + } catch { + notify('Downloading DwC-A export failed.', 'error') + } finally { + setLoading(false) + } + } + + return ( + { + void fetchZipFile() + handleClose() + }} + disabled={loading} + > + Export DwC-A (taxa + measurements) + + ) +} diff --git a/frontend/src/components/Species/SpeciesTable.tsx b/frontend/src/components/Species/SpeciesTable.tsx index 8120952e1..50db86418 100755 --- a/frontend/src/components/Species/SpeciesTable.tsx +++ b/frontend/src/components/Species/SpeciesTable.tsx @@ -7,6 +7,7 @@ import type { ColumnVisibilityGroup } from '../TableView/TableToolBar' import { useGetAllSpeciesQuery } from '../../redux/speciesReducer' import { SynonymsModal } from './SynonymsModal' import { SpeciesCommentDialog } from './SpeciesCommentDialog' +import { SpeciesDwcExportMenuItem } from './SpeciesDwcExportMenuItem' const normalizeFilterValue = (value: unknown): string => { if (typeof value === 'string') { @@ -566,6 +567,7 @@ export const SpeciesTable = ({ selectorFn }: { selectorFn?: (id: Species) => voi tableRowAction={handleSpeciesRowActionClick} filterFns={synonymFilterFns} renderRowActionExtras={renderCommentAction} + renderExtraExportMenuItems={handleClose => } /> ({ showNewButton, hideLeftButtons, columnVisibilityGroups, + renderExtraExportMenuItems, }: { table: MRT_TableInstance tableName: string @@ -48,6 +49,7 @@ export const TableToolBar = ({ showNewButton?: boolean hideLeftButtons?: boolean columnVisibilityGroups?: ColumnVisibilityGroup[] + renderExtraExportMenuItems?: (handleClose: () => void) => ReactNode }) => { const { previousTableUrls, setPreviousTableUrls } = usePageContext() const location = useLocation() @@ -297,6 +299,8 @@ export const TableToolBar = ({ )} + {renderExtraExportMenuItems ? renderExtraExportMenuItems(handleClose) : null} + {kmlExport && ( { diff --git a/frontend/src/components/TableView/TableView.tsx b/frontend/src/components/TableView/TableView.tsx index fa54fe5f2..05723e1f2 100755 --- a/frontend/src/components/TableView/TableView.tsx +++ b/frontend/src/components/TableView/TableView.tsx @@ -91,6 +91,7 @@ export const TableView = ({ paginationPlacement, tableContainerMaxHeight, columnVisibilityGroups, + renderExtraExportMenuItems, }: { data: T[] | undefined columns: MRT_ColumnDef[] @@ -114,6 +115,7 @@ export const TableView = ({ error?: FetchBaseQueryError | SerializedError filterFns?: Record> renderRowActionExtras?: ({ row }: { row: MRT_Row }) => ReactNode + renderExtraExportMenuItems?: (handleClose: () => void) => ReactNode paginationPlacement?: 'top' | 'bottom' | 'both' tableContainerMaxHeight?: string | number columnVisibilityGroups?: ColumnVisibilityGroup[] @@ -532,6 +534,7 @@ export const TableView = ({ selectorFn={selectorFn} hideLeftButtons={false} columnVisibilityGroups={columnVisibilityGroups} + renderExtraExportMenuItems={renderExtraExportMenuItems} /> From 8f8346151807a60952bec6d595f2074958e8e894 Mon Sep 17 00:00:00 2001 From: karilint Date: Thu, 23 Apr 2026 14:53:07 +0300 Subject: [PATCH 02/23] Add verbatimMeasurementType and methods --- .../species/dwcArchiveExport.test.ts | 1 + backend/src/services/dwcArchiveExport.ts | 118 +++++++++++++++--- documentation/functionality/dwc_export.md | 4 +- 3 files changed, 106 insertions(+), 17 deletions(-) diff --git a/backend/src/api-tests/species/dwcArchiveExport.test.ts b/backend/src/api-tests/species/dwcArchiveExport.test.ts index fdabbd594..393f8db4d 100644 --- a/backend/src/api-tests/species/dwcArchiveExport.test.ts +++ b/backend/src/api-tests/species/dwcArchiveExport.test.ts @@ -55,6 +55,7 @@ describe('DwC-A species export (admin-only)', () => { const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') expect(measurementCsv).toContain('"measurementID"') + expect(measurementCsv).toContain('"verbatimMeasurementType"') const metaXml = await zip.file('meta.xml')!.async('string') expect(metaXml).toContain(' = [ - { field: 'body_mass', measurementType: 'body mass', measurementUnit: 'g' }, - { field: 'brain_mass', measurementType: 'brain mass', measurementUnit: 'g' }, - { field: 'diet1', measurementType: 'diet category 1', measurementUnit: '' }, - { field: 'diet2', measurementType: 'diet category 2', measurementUnit: '' }, - { field: 'diet3', measurementType: 'diet category 3', measurementUnit: '' }, - { field: 'diet_description', measurementType: 'diet description', measurementUnit: '' }, - { field: 'locomo1', measurementType: 'locomotion 1', measurementUnit: '' }, - { field: 'locomo2', measurementType: 'locomotion 2', measurementUnit: '' }, - { field: 'locomo3', measurementType: 'locomotion 3', measurementUnit: '' }, - { field: 'activity', measurementType: 'activity', measurementUnit: '' }, - { field: 'crowntype', measurementType: 'crown type', measurementUnit: '' }, - { field: 'microwear', measurementType: 'microwear', measurementUnit: '' }, - { field: 'mesowear', measurementType: 'mesowear', measurementUnit: '' }, - { field: 'mw_value', measurementType: 'mesowear value', measurementUnit: '' }, + { + field: 'body_mass', + measurementType: 'body mass', + measurementUnit: 'g', + measurementMethod: + 'The average adult body mass estimated for the species, in grams. Where there is sexual dimorphism in size, put the mean of the two sexes here and record the masses per sex, if known, in the Comment field. Confidence intervals, if known, can also be put there.', + }, + { + field: 'brain_mass', + measurementType: 'brain mass', + measurementUnit: 'g', + measurementMethod: + 'The average adult brain mass estimated for the species, in grams. Where there is sexual dimorphism in size, put the mean of the two sexes here and record the masses per sex, if known, in the Comment field. Confidence intervals, if known, can also be put there.', + }, + { + field: 'diet1', + measurementType: 'diet category 1', + measurementUnit: '', + measurementMethod: + 'The predominant food type in the diet of the species, at the coarsest level of resolution: Animal, Plant, Omnivore. See also Diet 3, Diet 2, Relative Fiber Content, Selectivity, Food Processing Mode, Digestion.', + }, + { + field: 'diet2', + measurementType: 'diet category 2', + measurementUnit: '', + measurementMethod: + 'The predominant food type in the diet of the species, at an intermediate level of resolution. See also Diet 1, Diet 3, Relative Fiber Content, Selectivity, Food Processing Mode, Digestion.', + }, + { + field: 'diet3', + measurementType: 'diet category 3', + measurementUnit: '', + measurementMethod: + 'The predominant, or most important or most characteristic, food type in the diet of the species, at a detailed level of resolution. At this scale, the diets of many species will not be clearly distinguishable from one another using only a single term for the most common dietary component. Nevertheless, highly variable food-type categories often delineate distinct ecological/adaptive/functional types (as in the case of mixed browsing/grazing ungulates). That is, calling something a "frugivore" may not explicitly describe other components of its diet, some of which may be of adaptive importance to the species; it does not allow one to distinguish among species within the frugivore category, either. But it does allow one to place the species between omnivores or insectivores, on the one hand, and browsers, on the other.', + }, + { + field: 'diet_description', + measurementType: 'diet description', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'locomo1', + measurementType: 'locomotion 1', + measurementUnit: '', + measurementMethod: + 'The general substrate upon which locomotion characteristically takes place. These categories are the same as those in Feeding Habitat 1 and Shelter Habitat 1.', + }, + { + field: 'locomo2', + measurementType: 'locomotion 2', + measurementUnit: '', + measurementMethod: + 'For non-aquatic, non-aerial species the terrestrial substrate upon which locomotion characteristically takes place. "Arboreal" describes species that almost never come to the ground, or, if they do, it is almost always for the purpose of dispersing to another tree or trees. "Scansorial" is a broad category including those species that habitually use both trees and the ground in their movements. At the non-arboreal extreme, it includes species that rarely in practice use the trees, but are not morphologically prevented from doing so. [This category may eventually have to be split to distinguish species that exhibit some arboreal adaptations (e.g., squirrels), from those that could climb in a limited way if they had to (e.g., lions).] "Surficial" refers to those creatures who use only the ground surface in locomotion (e.g., sauropods, wildebeeste).', + }, + { + field: 'locomo3', + measurementType: 'locomotion 3', + measurementUnit: '', + measurementMethod: + 'The predominant mode of locomotor activity. [These categories are not necessarily complete at this time.] The categorization of flight locomotion in Locomotion 2 and Locomotion 3 is based on Norberg (1985).', + }, + { + field: 'activity', + measurementType: 'activity', + measurementUnit: '', + measurementMethod: + 'The primary time of day during which the species was active. Choices are Diurnal, Crepuscular, or Nocturnal.', + }, + { + field: 'crowntype', + measurementType: 'crown type', + measurementUnit: '', + measurementMethod: + 'This field describes the morphology of mammalian molar crowns, and is complimentary to the Tooth Shape - Multicuspid field. The latter presents a traditional classification of molar crown types (and other multicusped teeth) for vertebrates. Molar Crown Type, in contrast, uses a more recently developed classification scheme that is currently restricted to mammals. The scheme is phylogenetically neutral and descriptive, allowing functional interpretations and interpretations of underlying developmental mechanisms (see Jernvall, 1995). Currently, the values for the field consist of five-letter alphanumeric codes, described in Jernvall, et al. (1996), and the reader is referred to that paper for further explanation.', + }, + { + field: 'microwear', + measurementType: 'microwear', + measurementUnit: '', + measurementMethod: + "This field describes the kind of microwear (in terms of striations or pits) revealed by microscopic examination of the wear facets of the tooth crowns of the species. A considerable literature exists concerning the ways to infer aspects of a species' diet from patterns of microwear.", + }, + { + field: 'mesowear', + measurementType: 'mesowear', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_value', + measurementType: 'mesowear value', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, ] export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport): MeasurementCsvRow[] => { @@ -212,9 +298,10 @@ export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport taxonID, measurementID: `NOW:${taxonID}:${mapping.field.toString()}`, measurementType: mapping.measurementType, + verbatimMeasurementType: mapping.field.toString(), measurementValue, measurementUnit: mapping.measurementUnit, - measurementMethod: '', + measurementMethod: mapping.measurementMethod, measurementRemarks: '', }, ] @@ -245,6 +332,7 @@ const DWC_TERMS = { taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', + verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md index 679b40297..7ae9e9629 100644 --- a/documentation/functionality/dwc_export.md +++ b/documentation/functionality/dwc_export.md @@ -55,7 +55,8 @@ Columns: - `taxonID` = `species_id` - `measurementID` = `NOW::` - `measurementType` / `measurementUnit` / `measurementValue` per field mapping -- `measurementMethod` = empty +- `verbatimMeasurementType` = original DB field name (e.g. `diet1`, `body_mass`) +- `measurementMethod` = Pantheria VSP field description where available (`https://www.pantherion.com/dbmanual97/VSP.html`) - `measurementRemarks` = empty v1 includes only these `com_species` fields (rows emitted only when source value is non-null and non-empty; `-` is treated as empty): @@ -80,4 +81,3 @@ v1 includes only these `com_species` fields (rows emitted only when source value - Add synonym export from `com_taxa_synonym` (either separate Taxon rows or a dedicated extension). - Add additional traits/measurements from `com_species`. - Replace the placeholder `eml.xml` generator with a real dataset-level EML implementation. - From 00277b3beebf4a0ce78d47e673338067caa1f2df Mon Sep 17 00:00:00 2001 From: karilint Date: Thu, 23 Apr 2026 15:49:01 +0300 Subject: [PATCH 03/23] Update DwC taxon mapping --- .../species/dwcArchiveExport.test.ts | 2 + backend/src/services/dwcArchiveExport.ts | 78 ++++++++++++++++++- .../src/unit-tests/dwcArchiveExport.test.ts | 8 ++ documentation/functionality/dwc_export.md | 15 +++- 4 files changed, 100 insertions(+), 3 deletions(-) diff --git a/backend/src/api-tests/species/dwcArchiveExport.test.ts b/backend/src/api-tests/species/dwcArchiveExport.test.ts index 393f8db4d..7ec7a85c6 100644 --- a/backend/src/api-tests/species/dwcArchiveExport.test.ts +++ b/backend/src/api-tests/species/dwcArchiveExport.test.ts @@ -52,6 +52,8 @@ describe('DwC-A species export (admin-only)', () => { const taxonCsv = await zip.file('taxon.csv')!.async('string') expect(taxonCsv).toContain('"taxonID"') + expect(taxonCsv).toContain('"nomenclaturalCode"') + expect(taxonCsv).toContain('"genericName"') const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') expect(measurementCsv).toContain('"measurementID"') diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts index 4392fdc0f..aacfeb198 100644 --- a/backend/src/services/dwcArchiveExport.ts +++ b/backend/src/services/dwcArchiveExport.ts @@ -59,14 +59,22 @@ const writeCsvString = async (headers: string[], rows: Array +const endsWithSuffix = (value: string | null, suffix: string): boolean => { + if (!isMeaningfulString(value)) return false + return value.trim().toLowerCase().endsWith(suffix.toLowerCase()) +} + +const containsDot = (value: string): boolean => value.includes('.') +const containsSpaceOrDot = (value: string): boolean => value.includes(' ') || value.includes('.') + +const isSingleLowercaseWord = (value: string | null): boolean => { + if (!isMeaningfulString(value)) return false + const trimmed = value.trim() + return /^[a-z]+$/.test(trimmed) +} + +const resolveTaxonRank = ({ + family, + genus, + specificEpithet, + uniqueIdentifier, +}: { + family: string + genus: string + specificEpithet: string + uniqueIdentifier: string | null +}): string => { + if (family && containsDot(family)) return 'order' + if (genus && containsDot(genus)) return 'family' + if (specificEpithet && containsSpaceOrDot(specificEpithet)) return 'genus' + if (isSingleLowercaseWord(uniqueIdentifier)) return 'subspecies' + if (uniqueIdentifier === '-') return 'species' + return 'species' +} + export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRow => { const genusName = isMeaningfulString(species.genus_name) ? species.genus_name.trim() : '' const speciesName = isMeaningfulString(species.species_name) ? species.species_name.trim() : '' @@ -120,17 +161,42 @@ export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRo const taxonomicStatus = isMeaningfulString(species.taxonomic_status) ? species.taxonomic_status.trim() : 'accepted' + const superfamily = endsWithSuffix(species.subclass_or_superorder_name, 'oidea') + ? species.subclass_or_superorder_name!.trim() + : '' + + const subfamilyRaw = isMeaningfulString(species.subfamily_name) ? species.subfamily_name.trim() : '' + const subfamily = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('inae') ? subfamilyRaw : '' + const tribe = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('ini') ? subfamilyRaw : '' + const subtribe = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('ina') ? subfamilyRaw : '' + + const genericName = speciesName && !containsSpaceOrDot(speciesName) ? genusName : '' + + const taxonRank = resolveTaxonRank({ + family: isMeaningfulString(species.family_name) ? species.family_name.trim() : '', + genus: genusName, + specificEpithet: speciesName, + uniqueIdentifier: isMeaningfulString(species.unique_identifier) ? species.unique_identifier.trim() : null, + }) + return { taxonID: species.species_id.toString(), + nomenclaturalCode: 'ICZN', scientificName, + genericName, scientificNameAuthorship: authorship, vernacularName: isMeaningfulString(species.common_name) ? species.common_name.trim() : '', - // TODO(#1150): Validate rank from taxonomic fields (indet./gen./sp. cases). - taxonRank: 'species', + taxonRank, taxonomicStatus, + kingdom: 'Animalia', + phylum: 'Chordata', class: isMeaningfulString(species.class_name) ? species.class_name.trim() : '', order: isMeaningfulString(species.order_name) ? species.order_name.trim() : '', + superfamily, family: isMeaningfulString(species.family_name) ? species.family_name.trim() : '', + subfamily, + tribe, + subtribe, genus: genusName, specificEpithet: speciesName, infraspecificEpithet, @@ -312,14 +378,22 @@ const DWC_TERMS = { taxon: { rowType: 'http://rs.tdwg.org/dwc/terms/Taxon', taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', + nomenclaturalCode: 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode', scientificName: 'http://rs.tdwg.org/dwc/terms/scientificName', + genericName: 'http://rs.tdwg.org/dwc/terms/genericName', scientificNameAuthorship: 'http://rs.tdwg.org/dwc/terms/scientificNameAuthorship', vernacularName: 'http://rs.tdwg.org/dwc/terms/vernacularName', taxonRank: 'http://rs.tdwg.org/dwc/terms/taxonRank', taxonomicStatus: 'http://rs.tdwg.org/dwc/terms/taxonomicStatus', + kingdom: 'http://rs.tdwg.org/dwc/terms/kingdom', + phylum: 'http://rs.tdwg.org/dwc/terms/phylum', class: 'http://rs.tdwg.org/dwc/terms/class', order: 'http://rs.tdwg.org/dwc/terms/order', + superfamily: 'http://rs.tdwg.org/dwc/terms/superfamily', family: 'http://rs.tdwg.org/dwc/terms/family', + subfamily: 'http://rs.tdwg.org/dwc/terms/subfamily', + tribe: 'http://rs.tdwg.org/dwc/terms/tribe', + subtribe: 'http://rs.tdwg.org/dwc/terms/subtribe', genus: 'http://rs.tdwg.org/dwc/terms/genus', specificEpithet: 'http://rs.tdwg.org/dwc/terms/specificEpithet', infraspecificEpithet: 'http://rs.tdwg.org/dwc/terms/infraspecificEpithet', diff --git a/backend/src/unit-tests/dwcArchiveExport.test.ts b/backend/src/unit-tests/dwcArchiveExport.test.ts index 6acdc1546..64d6051c2 100644 --- a/backend/src/unit-tests/dwcArchiveExport.test.ts +++ b/backend/src/unit-tests/dwcArchiveExport.test.ts @@ -26,11 +26,19 @@ describe('DwC-A export mapping', () => { }) expect(row.taxonID).toEqual('123') + expect(row.nomenclaturalCode).toEqual('ICZN') expect(row.scientificName).toEqual('Felis catus Linnaeus, 1758') + expect(row.genericName).toEqual('Felis') expect(row.scientificNameAuthorship).toEqual('Linnaeus, 1758') expect(row.vernacularName).toEqual('Cat') expect(row.taxonRank).toEqual('species') expect(row.taxonomicStatus).toEqual('accepted') + expect(row.kingdom).toEqual('Animalia') + expect(row.phylum).toEqual('Chordata') + expect(row.superfamily).toEqual('') + expect(row.subfamily).toEqual('Felinae') + expect(row.tribe).toEqual('') + expect(row.subtribe).toEqual('') expect(row.higherClassification).toEqual('Mammalia|Carnivora|Felidae|Felinae') expect(row.taxonRemarks).toEqual('Test comment') }) diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md index 7ae9e9629..f5fbb894c 100644 --- a/documentation/functionality/dwc_export.md +++ b/documentation/functionality/dwc_export.md @@ -27,14 +27,27 @@ One row per `com_species` record. Columns: - `taxonID` = `com_species.species_id` +- `nomenclaturalCode` = `ICZN` - `scientificName` = `${genus_name} ${species_name} ${sp_author}` (trimmed; authorship appended when present) +- `genericName` = `genus_name` (only when `species_name` is a simple epithet; no spaces or dots) - `scientificNameAuthorship` = `sp_author` - `vernacularName` = `common_name` -- `taxonRank` = `species` (TODO: validate for `indet.` / `gen.` / `sp.` cases) +- `taxonRank`: + - `order` if `family_name` contains `.` + - `family` if `genus_name` contains `.` + - `genus` if `species_name` contains a space or `.` + - `species` if `unique_identifier` is `-` + - `subspecies` if `unique_identifier` is a single lowercase word - `taxonomicStatus` = `taxonomic_status` (fallback: `accepted`) +- `kingdom` = `Animalia` +- `phylum` = `Chordata` - `class` = `class_name` - `order` = `order_name` +- `superfamily` = `subclass_or_superorder_name` (only when it ends with `-oidea`) - `family` = `family_name` +- `subfamily` = `subfamily_name` (only when it ends with `-inae`) +- `tribe` = `subfamily_name` (only when it ends with `-ini`) +- `subtribe` = `subfamily_name` (only when it ends with `-ina`) - `genus` = `genus_name` - `specificEpithet` = `species_name` - `infraspecificEpithet` = `unique_identifier` (only when meaningful and not `-`) From ce934d030cb453f45097956fefa5ae446e405fe0 Mon Sep 17 00:00:00 2001 From: karilint Date: Thu, 23 Apr 2026 15:56:04 +0300 Subject: [PATCH 04/23] Fix meta.xml fieldsEnclosedBy quoting --- backend/src/services/dwcArchiveExport.ts | 4 ++-- backend/src/unit-tests/dwcArchiveExport.test.ts | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts index aacfeb198..e4f2e8ed2 100644 --- a/backend/src/services/dwcArchiveExport.ts +++ b/backend/src/services/dwcArchiveExport.ts @@ -427,14 +427,14 @@ export const buildMetaXml = (): string => { return ` - + taxon.csv ${taxonFields} - + measurementorfact.csv diff --git a/backend/src/unit-tests/dwcArchiveExport.test.ts b/backend/src/unit-tests/dwcArchiveExport.test.ts index 64d6051c2..50df11ae2 100644 --- a/backend/src/unit-tests/dwcArchiveExport.test.ts +++ b/backend/src/unit-tests/dwcArchiveExport.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from '@jest/globals' import JSZip from 'jszip' import { buildDwcArchiveZipBufferFromSpecies, + buildMetaXml, mapSpeciesToMeasurementRows, mapSpeciesToTaxonRow, } from '../services/dwcArchiveExport' @@ -115,4 +116,10 @@ describe('DwC-A export mapping', () => { expect(zip.file('meta.xml')).toBeTruthy() expect(zip.file('eml.xml')).toBeTruthy() }) + + it('generates valid meta.xml attributes for enclosed fields', () => { + const metaXml = buildMetaXml() + expect(metaXml).toContain('fieldsEnclosedBy="""') + expect(metaXml).not.toContain('fieldsEnclosedBy="\\""') + }) }) From 6e8ebc940f41203465124332a6cb5fdadcce18c0 Mon Sep 17 00:00:00 2001 From: karilint Date: Thu, 23 Apr 2026 16:10:02 +0300 Subject: [PATCH 05/23] Expand measurement export and drop remarks --- .../species/dwcArchiveExport.test.ts | 1 + backend/src/services/dwcArchiveExport.ts | 345 +++++++++++++++++- .../src/unit-tests/dwcArchiveExport.test.ts | 76 ++++ documentation/functionality/dwc_export.md | 41 ++- 4 files changed, 458 insertions(+), 5 deletions(-) diff --git a/backend/src/api-tests/species/dwcArchiveExport.test.ts b/backend/src/api-tests/species/dwcArchiveExport.test.ts index 7ec7a85c6..de8352d87 100644 --- a/backend/src/api-tests/species/dwcArchiveExport.test.ts +++ b/backend/src/api-tests/species/dwcArchiveExport.test.ts @@ -58,6 +58,7 @@ describe('DwC-A species export (admin-only)', () => { const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') expect(measurementCsv).toContain('"measurementID"') expect(measurementCsv).toContain('"verbatimMeasurementType"') + expect(measurementCsv).not.toContain('"measurementRemarks"') const metaXml = await zip.file('meta.xml')!.async('string') expect(metaXml).toContain(' type SpeciesForMeasurementExport = Pick< Prisma.com_species, | 'species_id' + | 'strain' + | 'gene' + | 'taxon_status' | 'body_mass' | 'brain_mass' + | 'sv_length' + | 'sd_size' + | 'sd_display' + | 'tshm' + | 'symph_mob' + | 'relative_blade_length' + | 'tht' | 'diet1' | 'diet2' | 'diet3' | 'diet_description' + | 'rel_fib' + | 'selectivity' + | 'digestion' + | 'feedinghab1' + | 'feedinghab2' + | 'shelterhab1' + | 'shelterhab2' | 'locomo1' | 'locomo2' | 'locomo3' + | 'hunt_forage' | 'activity' | 'crowntype' | 'microwear' + | 'horizodonty' + | 'cusp_shape' + | 'cusp_count_buccal' + | 'cusp_count_lingual' + | 'loph_count_lon' + | 'loph_count_trs' + | 'fct_al' + | 'fct_ol' + | 'fct_sf' + | 'fct_ot' + | 'fct_cm' | 'mesowear' + | 'mw_or_high' + | 'mw_or_low' + | 'mw_cs_sharp' + | 'mw_cs_round' + | 'mw_cs_blunt' + | 'mw_scale_min' + | 'mw_scale_max' | 'mw_value' + | 'pop_struc' + | 'sp_status' > const MEASUREMENT_FIELD_MAPPINGS: Array<{ @@ -246,6 +283,29 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: string measurementMethod: string }> = [ + // NOTE: In v1, measurementMethod is populated from the Pantheria VSP manual where available: + // https://www.pantherion.com/dbmanual97/VSP.html + { + field: 'strain', + measurementType: 'strain', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'gene', + measurementType: 'gene', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'taxon_status', + measurementType: 'taxon status', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, { field: 'body_mass', measurementType: 'body mass', @@ -260,6 +320,53 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementMethod: 'The average adult brain mass estimated for the species, in grams. Where there is sexual dimorphism in size, put the mean of the two sexes here and record the masses per sex, if known, in the Comment field. Confidence intervals, if known, can also be put there.', }, + { + field: 'sv_length', + measurementType: 'snout-vent length', + measurementUnit: '', + measurementMethod: + 'For many species body-mass values will be unavailable or cannot be estimated with any confidence. However, every species should be classifiable into one of the gross size ranges listed below. This field will allow at least a crude characterization of body sizes for any fossil locality.', + }, + { + field: 'sd_size', + measurementType: 'sexual dimorphism - size', + measurementUnit: '', + measurementMethod: 'Whether there is sexual dimorphism in overall body size.', + }, + { + field: 'sd_display', + measurementType: 'sexual dimorphism - display', + measurementUnit: '', + measurementMethod: + 'Whether there is evidence of sexual dimorphism in display (or sexual combat) structures. (e. g., horns, antlers, dome-heads, canines). If the presence of these features is unknown, leave the field blank rather than enter "n."', + }, + { + field: 'tshm', + measurementType: 'tooth shape -- multicuspid', + measurementUnit: '', + measurementMethod: + 'A description of the morphology of the tooth crown, for multicusped teeth (if present). In concert with the other tooth morphology fields, this may allow functional interpretations to be made independently of whatever has been entered in the diet fields. Terminology for tooth-crown morphology is most highly developed for extant and fossil mammals, but no system has gained universal acceptance. The following reflects a compromise among many competing traditional systems, and is based partly on Fortelius (1985) and Janis and Fortelius (1988). This field is currently subject to further development. Improved nomenclature for some mammal groups, such as rodents and insectivores, might be more functionally indicative. Also, an expanded list of terms would be useful to characterize more fully the variation found among nonmammalian terrestrial vertebrates -- dinosaurs and therapsids in particular. The similar Molar Crown Type field is based on an alternative descriptive classification scheme, and currently applies only to mammals.', + }, + { + field: 'symph_mob', + measurementType: 'symphyseal mobility', + measurementUnit: '', + measurementMethod: 'Whether or not the mandibular symphysis is mobile.', + }, + { + field: 'relative_blade_length', + measurementType: 'relative blade length', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'tht', + measurementType: 'tooth height', + measurementUnit: '', + measurementMethod: + 'An indication of hypsodonty (tooth crown height) or the nature of other adaptations to deal with the problem of lifetime tooth wear. Tooth replacement, Tooth plates, and Hypselodont (ever-growing teeth) are absolute descriptors. The terms Brachydont, Mesodont and Hypsodont refer to different degrees of crown height of (mammalian) cheek teeth, and are subject to a variety of interpretations. Hypsodont (high-crowned) teeth may be defined objectively as those where the antero-posterior length is exceeded by the dorso-ventral height (Janis & Fortelius, 1988). "Somewhat hypsodont" teeth, intermediate between brachydont and hypsodont, are referred to as "mesodont," but there is no corresponding objective definition of this term. Quantitative indices of hypsodonty have been used (Janis, 1988), and might prove superior to the classification scheme presented here. Thus, this field is currently subject to further development.', + }, { field: 'diet1', measurementType: 'diet category 1', @@ -288,6 +395,55 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', }, + { + field: 'rel_fib', + measurementType: 'relative fiber content', + measurementUnit: '', + measurementMethod: + 'The relative amount of plant fiber in the food of the species. Plant food can be divided into cell contents such as sugars, proteins and storage carbohydrates, which are directly digestible by vertebrates. Plant cell-walls, however, are composed of material ("fiber") partially digestible only by microbial fermentation. Thus, the higher the fiber content, relative to the amount of energy contained in the easily-digested portion, the harder it is to obtain energy from the forage and the poorer the "quality" of the food on a per-unit basis. In addition, the proportion of the fiber digestible by fermentation also varies among plant species, plant parts, and growth stages. This field describes the food as having high, medium, and low levels of fiber. It is intended as a rough indication of the nutritional quality of a species\' diet. It refers only to herbivorous diets, or the plant portions of omnivorous diets. (The field basically functions to group various Diet 3 categories by relative fiber content.)', + }, + { + field: 'selectivity', + measurementType: 'selectivity', + measurementUnit: '', + measurementMethod: + 'Within its food-type category (Diet 1-3) a species may feed selectively or unselectively. Thus this field applies to any dietary category. Some food types impose selectivity restrictions on the species that feed on them. For example, most large grazers are less selective than mixed feeders or browsers. This is not what this field is meant to indicate! Rather, it applies within dietary categories. It could, for example, be used to distinguish between relatively selective and relatively unselective grazers.', + }, + { + field: 'digestion', + measurementType: 'digestion', + measurementUnit: '', + measurementMethod: + 'There are different broad strategies for breaking down plant material by means of microbial activity in the gut. Hindgut fermenters (hg) and foregut fermenters (fg) are found in a variety of living taxa. True ruminants (ru) are confined to the ruminant artiodactyls; they are separated here from other foregut fermenters, of which they form a special derived subclass.', + }, + { + field: 'feedinghab1', + measurementType: 'feeding habitat 1', + measurementUnit: '', + measurementMethod: + 'The general habitat from which the species obtains the major part of its trophic resources, and in which it ordinarily spends time feeding. The allowed values are identical to those for Shelter Habitat 1. See also Feeding Habitat 2.', + }, + { + field: 'feedinghab2', + measurementType: 'feeding habitat 2', + measurementUnit: '', + measurementMethod: + 'For the Terrestrial (te) entry in Feeding Habitat 1 only, a further breakdown into more specific feeding habitats. They are described more fully below.', + }, + { + field: 'shelterhab1', + measurementType: 'shelter habitat 1', + measurementUnit: '', + measurementMethod: + 'The general habitat in which the animal sleeps, shelters, or avoids predation when not feeding. The allowed values are identical to those for Feeding Habitat 1. See also Shelter Habitat 2.', + }, + { + field: 'shelterhab2', + measurementType: 'shelter habitat 2', + measurementUnit: '', + measurementMethod: + 'For the Terrestrial (te) entry in Shelter Habitat 1 only, a further breakdown into more specific shelter habitats. They are described more fully below, and are mostly identical to the fields for Feeding Habitat 2.', + }, { field: 'locomo1', measurementType: 'locomotion 1', @@ -309,6 +465,13 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementMethod: 'The predominant mode of locomotor activity. [These categories are not necessarily complete at this time.] The categorization of flight locomotion in Locomotion 2 and Locomotion 3 is based on Norberg (1985).', }, + { + field: 'hunt_forage', + measurementType: 'hunt/forage', + measurementUnit: '', + measurementMethod: + 'The predominant hunting or foraging mode for carnivores. These categories are based upon those of Van Valkenburgh (1985) and are described more fully there. This field might also be of eventual use in describing foraging modes of non-carnivores, but at present these cannot be determined directly upon morphological criteria (such inferences as can be made are already taken care of in Feeding Habitat, Diet and Locomotion.)', + }, { field: 'activity', measurementType: 'activity', @@ -330,6 +493,83 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementMethod: "This field describes the kind of microwear (in terms of striations or pits) revealed by microscopic examination of the wear facets of the tooth crowns of the species. A considerable literature exists concerning the ways to infer aspects of a species' diet from patterns of microwear.", }, + { + field: 'horizodonty', + measurementType: 'horizodonty', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'cusp_shape', + measurementType: 'cusp shape', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'cusp_count_buccal', + measurementType: 'cusp count (buccal)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'cusp_count_lingual', + measurementType: 'cusp count (lingual)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'loph_count_lon', + measurementType: 'loph count (longitudinal)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'loph_count_trs', + measurementType: 'loph count (transverse)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'fct_al', + measurementType: 'functional crown type (AL)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'fct_ol', + measurementType: 'functional crown type (OL)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'fct_sf', + measurementType: 'functional crown type (SF)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'fct_ot', + measurementType: 'functional crown type (OT)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'fct_cm', + measurementType: 'functional crown type (CM)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, { field: 'mesowear', measurementType: 'mesowear', @@ -337,6 +577,55 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', }, + { + field: 'mw_or_high', + measurementType: 'cusp relief high (OR%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_or_low', + measurementType: 'cusp relief low (OR%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_cs_sharp', + measurementType: 'cusp shape sharp (CS%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_cs_round', + measurementType: 'cusp shape round (CS%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_cs_blunt', + measurementType: 'cusp shape blunt (CS%)', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_scale_min', + measurementType: 'mesowear scale min', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, + { + field: 'mw_scale_max', + measurementType: 'mesowear scale max', + measurementUnit: '', + // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. + measurementMethod: '', + }, { field: 'mw_value', measurementType: 'mesowear value', @@ -344,6 +633,20 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', }, + { + field: 'pop_struc', + measurementType: 'population structure', + measurementUnit: '', + measurementMethod: + 'Occasionally there will be evidence of herding or other gregarious behavior for a species. This could include evidence from mass deaths, well-preserved trace fossils (e.g., trackways), nesting-site or burrow aggregations, or association of individuals in burrows. It could also be based, less directly, on other aspects of the organism\'s biology -- for example, sexual dimorphism in sexual display or combat features. If so, indicate "soc" here and give details briefly in the Comment field. The choice "sol" (solitary) is allowed for completeness, but ordinarily there will be no positive evidence for solitary behavior, so the alternative to "soc" is usually a blank.', + }, + { + field: 'sp_status', + measurementType: 'species status', + measurementUnit: '', + // TODO(#1150): Add field description / meaning for NOW database usage. + measurementMethod: '', + }, ] export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport): MeasurementCsvRow[] => { @@ -368,7 +671,6 @@ export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport measurementValue, measurementUnit: mapping.measurementUnit, measurementMethod: mapping.measurementMethod, - measurementRemarks: '', }, ] }) @@ -410,7 +712,6 @@ const DWC_TERMS = { measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', - measurementRemarks: 'http://rs.tdwg.org/dwc/terms/measurementRemarks', }, } as const @@ -522,20 +823,58 @@ export const fetchSpeciesForDwcExport = async (): Promise< common_name: true, sp_author: true, sp_comment: true, + strain: true, + gene: true, + taxon_status: true, body_mass: true, brain_mass: true, + sv_length: true, + sd_size: true, + sd_display: true, + tshm: true, + symph_mob: true, + relative_blade_length: true, + tht: true, diet1: true, diet2: true, diet3: true, diet_description: true, + rel_fib: true, + selectivity: true, + digestion: true, + feedinghab1: true, + feedinghab2: true, + shelterhab1: true, + shelterhab2: true, locomo1: true, locomo2: true, locomo3: true, + hunt_forage: true, activity: true, crowntype: true, microwear: true, mesowear: true, + horizodonty: true, + cusp_shape: true, + cusp_count_buccal: true, + cusp_count_lingual: true, + loph_count_lon: true, + loph_count_trs: true, + fct_al: true, + fct_ol: true, + fct_sf: true, + fct_ot: true, + fct_cm: true, + mw_or_high: true, + mw_or_low: true, + mw_cs_sharp: true, + mw_cs_round: true, + mw_cs_blunt: true, + mw_scale_min: true, + mw_scale_max: true, mw_value: true, + pop_struc: true, + sp_status: true, }, }) } diff --git a/backend/src/unit-tests/dwcArchiveExport.test.ts b/backend/src/unit-tests/dwcArchiveExport.test.ts index 50df11ae2..b12c85478 100644 --- a/backend/src/unit-tests/dwcArchiveExport.test.ts +++ b/backend/src/unit-tests/dwcArchiveExport.test.ts @@ -47,20 +47,58 @@ describe('DwC-A export mapping', () => { it('generates measurement rows only for meaningful values', () => { const rows = mapSpeciesToMeasurementRows({ species_id: 123, + strain: null, + gene: null, + taxon_status: null, body_mass: BigInt(2500), brain_mass: null, + sv_length: null, + sd_size: null, + sd_display: null, + tshm: null, + symph_mob: null, + relative_blade_length: null, + tht: null, diet1: '-', diet2: 'Herbivore', diet3: '', diet_description: 'Leaves', + rel_fib: null, + selectivity: null, + digestion: null, + feedinghab1: null, + feedinghab2: null, + shelterhab1: null, + shelterhab2: null, locomo1: null, locomo2: 'Arboreal', locomo3: null, + hunt_forage: null, activity: 'Diurnal', crowntype: null, microwear: 'High', + horizodonty: null, + cusp_shape: null, + cusp_count_buccal: null, + cusp_count_lingual: null, + loph_count_lon: null, + loph_count_trs: null, + fct_al: null, + fct_ol: null, + fct_sf: null, + fct_ot: null, + fct_cm: null, mesowear: null, + mw_or_high: null, + mw_or_low: null, + mw_cs_sharp: null, + mw_cs_round: null, + mw_cs_blunt: null, + mw_scale_min: null, + mw_scale_max: null, mw_value: 1.5, + pop_struc: null, + sp_status: null, }) const ids = rows.map(row => row.measurementID) @@ -93,20 +131,58 @@ describe('DwC-A export mapping', () => { common_name: 'Human', sp_author: null, sp_comment: null, + strain: null, + gene: null, + taxon_status: null, body_mass: BigInt(70000), brain_mass: 1350, + sv_length: null, + sd_size: null, + sd_display: null, + tshm: null, + symph_mob: null, + relative_blade_length: null, + tht: null, diet1: null, diet2: null, diet3: null, diet_description: null, + rel_fib: null, + selectivity: null, + digestion: null, + feedinghab1: null, + feedinghab2: null, + shelterhab1: null, + shelterhab2: null, locomo1: null, locomo2: null, locomo3: null, + hunt_forage: null, activity: null, crowntype: null, microwear: null, mesowear: null, + horizodonty: null, + cusp_shape: null, + cusp_count_buccal: null, + cusp_count_lingual: null, + loph_count_lon: null, + loph_count_trs: null, + fct_al: null, + fct_ol: null, + fct_sf: null, + fct_ot: null, + fct_cm: null, + mw_or_high: null, + mw_or_low: null, + mw_cs_sharp: null, + mw_cs_round: null, + mw_cs_blunt: null, + mw_scale_min: null, + mw_scale_max: null, mw_value: null, + pop_struc: null, + sp_status: null, }, ]) diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md index f5fbb894c..98a9b4de6 100644 --- a/documentation/functionality/dwc_export.md +++ b/documentation/functionality/dwc_export.md @@ -70,24 +70,61 @@ Columns: - `measurementType` / `measurementUnit` / `measurementValue` per field mapping - `verbatimMeasurementType` = original DB field name (e.g. `diet1`, `body_mass`) - `measurementMethod` = Pantheria VSP field description where available (`https://www.pantherion.com/dbmanual97/VSP.html`) -- `measurementRemarks` = empty -v1 includes only these `com_species` fields (rows emitted only when source value is non-null and non-empty; `-` is treated as empty): +v1 includes these `com_species` fields (rows emitted only when source value is non-null and non-empty; `-` is treated as empty): +- `strain` +- `gene` +- `taxon_status` - `body_mass` → type: `body mass`, unit: `g` - `brain_mass` → type: `brain mass`, unit: `g` +- `sv_length` +- `sd_size` +- `sd_display` +- `tshm` +- `symph_mob` +- `relative_blade_length` +- `tht` - `diet1` → type: `diet category 1` - `diet2` → type: `diet category 2` - `diet3` → type: `diet category 3` - `diet_description` → type: `diet description` +- `rel_fib` +- `selectivity` +- `digestion` +- `feedinghab1` +- `feedinghab2` +- `shelterhab1` +- `shelterhab2` - `locomo1` → type: `locomotion 1` - `locomo2` → type: `locomotion 2` - `locomo3` → type: `locomotion 3` +- `hunt_forage` - `activity` → type: `activity` - `crowntype` → type: `crown type` - `microwear` → type: `microwear` - `mesowear` → type: `mesowear` +- `horizodonty` +- `cusp_shape` +- `cusp_count_buccal` +- `cusp_count_lingual` +- `loph_count_lon` +- `loph_count_trs` +- `fct_al` +- `fct_ol` +- `fct_sf` +- `fct_ot` +- `fct_cm` +- `mw_or_high` +- `mw_or_low` +- `mw_cs_sharp` +- `mw_cs_round` +- `mw_cs_blunt` +- `mw_scale_min` +- `mw_scale_max` - `mw_value` → type: `mesowear value` +- `pop_struc` +- `sp_status` ## Extension points (TODOs) From 2632ff0ddad0258079fb588be7eebbad6c6c9844 Mon Sep 17 00:00:00 2001 From: karilint Date: Thu, 23 Apr 2026 17:53:03 +0300 Subject: [PATCH 06/23] Adjust DwC scientificName and drop taxonConceptID --- backend/src/services/dwcArchiveExport.ts | 72 ++++++++++++++++++++--- documentation/functionality/dwc_export.md | 1 - 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts index d8c09a1a0..4dd20415e 100644 --- a/backend/src/services/dwcArchiveExport.ts +++ b/backend/src/services/dwcArchiveExport.ts @@ -80,7 +80,6 @@ export const TAXON_HEADERS = [ 'infraspecificEpithet', 'higherClassification', 'taxonRemarks', - 'taxonConceptID', ] as const export type TaxonCsvHeader = (typeof TAXON_HEADERS)[number] @@ -109,6 +108,14 @@ const endsWithSuffix = (value: string | null, suffix: string): boolean => { return value.trim().toLowerCase().endsWith(suffix.toLowerCase()) } +const isMeaningfulTaxonName = (value: string | null): boolean => { + if (!isMeaningfulString(value)) return false + const trimmed = value.trim() + if (trimmed.includes(' ')) return false + if (trimmed.includes('.')) return false + return true +} + const containsDot = (value: string): boolean => value.includes('.') const containsSpaceOrDot = (value: string): boolean => value.includes(' ') || value.includes('.') @@ -123,17 +130,37 @@ const resolveTaxonRank = ({ genus, specificEpithet, uniqueIdentifier, + superfamily, + subfamily, + tribe, + subtribe, }: { family: string genus: string specificEpithet: string uniqueIdentifier: string | null + superfamily: string + subfamily: string + tribe: string + subtribe: string }): string => { if (family && containsDot(family)) return 'order' if (genus && containsDot(genus)) return 'family' if (specificEpithet && containsSpaceOrDot(specificEpithet)) return 'genus' if (isSingleLowercaseWord(uniqueIdentifier)) return 'subspecies' if (uniqueIdentifier === '-') return 'species' + + // If lower taxa are not meaningful (e.g. contain '.'), but a higher rank is known, + // pick the most specific available rank. + const hasLowerTaxa = + isMeaningfulTaxonName(genus) || isMeaningfulTaxonName(specificEpithet) || isSingleLowercaseWord(uniqueIdentifier) + if (!hasLowerTaxa) { + if (subtribe) return 'subtribe' + if (tribe) return 'tribe' + if (subfamily) return 'subfamily' + if (superfamily) return 'superfamily' + } + return 'species' } @@ -142,9 +169,6 @@ export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRo const speciesName = isMeaningfulString(species.species_name) ? species.species_name.trim() : '' const authorship = isMeaningfulString(species.sp_author) ? species.sp_author.trim() : '' - const baseScientificName = [genusName, speciesName].filter(Boolean).join(' ').trim() - const scientificName = [baseScientificName, authorship].filter(Boolean).join(' ').trim() - const higherClassification = [ species.class_name, species.subclass_or_superorder_name, @@ -170,15 +194,50 @@ export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRo const tribe = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('ini') ? subfamilyRaw : '' const subtribe = subfamilyRaw && subfamilyRaw.toLowerCase().endsWith('ina') ? subfamilyRaw : '' - const genericName = speciesName && !containsSpaceOrDot(speciesName) ? genusName : '' + const genericName = isMeaningfulTaxonName(speciesName) ? genusName : '' const taxonRank = resolveTaxonRank({ family: isMeaningfulString(species.family_name) ? species.family_name.trim() : '', genus: genusName, specificEpithet: speciesName, uniqueIdentifier: isMeaningfulString(species.unique_identifier) ? species.unique_identifier.trim() : null, + superfamily, + subfamily, + tribe, + subtribe, }) + const scientificName = (() => { + const familyName = isMeaningfulString(species.family_name) ? species.family_name.trim() : '' + const orderName = isMeaningfulString(species.order_name) ? species.order_name.trim() : '' + const className = isMeaningfulString(species.class_name) ? species.class_name.trim() : '' + + switch (taxonRank) { + case 'subspecies': + return [genusName, speciesName, infraspecificEpithet, authorship].filter(Boolean).join(' ').trim() + case 'species': + return [genusName, speciesName, authorship].filter(Boolean).join(' ').trim() + case 'genus': + return [genusName, authorship].filter(Boolean).join(' ').trim() + case 'family': + return familyName + case 'superfamily': + return superfamily + case 'subfamily': + return subfamily + case 'tribe': + return tribe + case 'subtribe': + return subtribe + case 'order': + return orderName + case 'class': + return className + default: + return [genusName, speciesName, authorship].filter(Boolean).join(' ').trim() + } + })() + return { taxonID: species.species_id.toString(), nomenclaturalCode: 'ICZN', @@ -202,8 +261,6 @@ export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRo infraspecificEpithet, higherClassification, taxonRemarks: isMeaningfulString(species.sp_comment) ? species.sp_comment.trim() : '', - // TODO(#1150): Decide if any existing field should populate this. - taxonConceptID: '', } } @@ -701,7 +758,6 @@ const DWC_TERMS = { infraspecificEpithet: 'http://rs.tdwg.org/dwc/terms/infraspecificEpithet', higherClassification: 'http://rs.tdwg.org/dwc/terms/higherClassification', taxonRemarks: 'http://rs.tdwg.org/dwc/terms/taxonRemarks', - taxonConceptID: 'http://rs.tdwg.org/dwc/terms/taxonConceptID', }, measurement: { rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md index 98a9b4de6..bba7ea347 100644 --- a/documentation/functionality/dwc_export.md +++ b/documentation/functionality/dwc_export.md @@ -53,7 +53,6 @@ Columns: - `infraspecificEpithet` = `unique_identifier` (only when meaningful and not `-`) - `higherClassification` = `class_name|subclass_or_superorder_name|order_name|suborder_or_superfamily_name|family_name|subfamily_name` (skip empty / `-`) - `taxonRemarks` = `sp_comment` -- `taxonConceptID` = empty (TODO) Note: From 41240747f5e9eebe3da78982e3df339b4af6a139 Mon Sep 17 00:00:00 2001 From: karilint Date: Fri, 24 Apr 2026 13:22:56 +0300 Subject: [PATCH 07/23] Prefix taxonID and refine taxonRank heuristics --- backend/src/services/dwcArchiveExport.ts | 49 +++++++++++-------- .../src/unit-tests/dwcArchiveExport.test.ts | 4 +- documentation/functionality/dwc_export.md | 2 + 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts index 4dd20415e..2d973c8f3 100644 --- a/backend/src/services/dwcArchiveExport.ts +++ b/backend/src/services/dwcArchiveExport.ts @@ -116,21 +116,22 @@ const isMeaningfulTaxonName = (value: string | null): boolean => { return true } -const containsDot = (value: string): boolean => value.includes('.') -const containsSpaceOrDot = (value: string): boolean => value.includes(' ') || value.includes('.') - const isSingleLowercaseWord = (value: string | null): boolean => { if (!isMeaningfulString(value)) return false const trimmed = value.trim() return /^[a-z]+$/.test(trimmed) } +const isSpeciesSp = (value: string): boolean => /^sp\.?$/i.test(value.trim()) + +const includesIndet = (value: string): boolean => value.toLowerCase().includes('indet.') + const resolveTaxonRank = ({ family, genus, specificEpithet, uniqueIdentifier, - superfamily, + subclassOrSuperorderName, subfamily, tribe, subtribe, @@ -139,26 +140,32 @@ const resolveTaxonRank = ({ genus: string specificEpithet: string uniqueIdentifier: string | null - superfamily: string + subclassOrSuperorderName: string | null subfamily: string tribe: string subtribe: string }): string => { - if (family && containsDot(family)) return 'order' - if (genus && containsDot(genus)) return 'family' - if (specificEpithet && containsSpaceOrDot(specificEpithet)) return 'genus' - if (isSingleLowercaseWord(uniqueIdentifier)) return 'subspecies' - if (uniqueIdentifier === '-') return 'species' - - // If lower taxa are not meaningful (e.g. contain '.'), but a higher rank is known, - // pick the most specific available rank. - const hasLowerTaxa = - isMeaningfulTaxonName(genus) || isMeaningfulTaxonName(specificEpithet) || isSingleLowercaseWord(uniqueIdentifier) - if (!hasLowerTaxa) { + // Start from the lowest rank to the highest, but respect the indet.* rules which + // indicate that lower taxa are unknown and we should not emit species/subspecies ranks. + const genusIndet = includesIndet(genus) + const epithetIndet = includesIndet(specificEpithet) + + if (!genusIndet && !epithetIndet) { + const speciesSp = isSpeciesSp(specificEpithet) + if (!speciesSp && isSingleLowercaseWord(uniqueIdentifier)) return 'subspecies' + if (uniqueIdentifier === '-') return 'species' + if (speciesSp) return 'species' + } + + if (!genusIndet && epithetIndet) return 'genus' + + if (genusIndet) { if (subtribe) return 'subtribe' if (tribe) return 'tribe' if (subfamily) return 'subfamily' - if (superfamily) return 'superfamily' + if (!isMeaningfulString(subclassOrSuperorderName)) return 'family' + if (includesIndet(family)) return 'order' + return 'family' } return 'species' @@ -201,10 +208,10 @@ export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRo genus: genusName, specificEpithet: speciesName, uniqueIdentifier: isMeaningfulString(species.unique_identifier) ? species.unique_identifier.trim() : null, - superfamily, subfamily, tribe, subtribe, + subclassOrSuperorderName: species.subclass_or_superorder_name, }) const scientificName = (() => { @@ -239,7 +246,7 @@ export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRo })() return { - taxonID: species.species_id.toString(), + taxonID: `NOW:${species.species_id}`, nomenclaturalCode: 'ICZN', scientificName, genericName, @@ -707,7 +714,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ ] export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport): MeasurementCsvRow[] => { - const taxonID = species.species_id.toString() + const taxonID = `NOW:${species.species_id}` return MEASUREMENT_FIELD_MAPPINGS.flatMap(mapping => { if (mapping.field === 'species_id') return [] @@ -722,7 +729,7 @@ export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport return [ { taxonID, - measurementID: `NOW:${taxonID}:${mapping.field.toString()}`, + measurementID: `NOW:${species.species_id}:${mapping.field.toString()}`, measurementType: mapping.measurementType, verbatimMeasurementType: mapping.field.toString(), measurementValue, diff --git a/backend/src/unit-tests/dwcArchiveExport.test.ts b/backend/src/unit-tests/dwcArchiveExport.test.ts index b12c85478..8b429fa19 100644 --- a/backend/src/unit-tests/dwcArchiveExport.test.ts +++ b/backend/src/unit-tests/dwcArchiveExport.test.ts @@ -26,7 +26,7 @@ describe('DwC-A export mapping', () => { sp_comment: 'Test comment', }) - expect(row.taxonID).toEqual('123') + expect(row.taxonID).toEqual('NOW:123') expect(row.nomenclaturalCode).toEqual('ICZN') expect(row.scientificName).toEqual('Felis catus Linnaeus, 1758') expect(row.genericName).toEqual('Felis') @@ -101,6 +101,8 @@ describe('DwC-A export mapping', () => { sp_status: null, }) + expect(rows.some(row => row.taxonID === 'NOW:123')).toEqual(true) + const ids = rows.map(row => row.measurementID) expect(ids).toContain('NOW:123:body_mass') expect(ids).toContain('NOW:123:diet2') diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md index bba7ea347..29aa147b6 100644 --- a/documentation/functionality/dwc_export.md +++ b/documentation/functionality/dwc_export.md @@ -27,6 +27,7 @@ One row per `com_species` record. Columns: - `taxonID` = `com_species.species_id` +- `taxonID` = `NOW:` - `nomenclaturalCode` = `ICZN` - `scientificName` = `${genus_name} ${species_name} ${sp_author}` (trimmed; authorship appended when present) - `genericName` = `genus_name` (only when `species_name` is a simple epithet; no spaces or dots) @@ -65,6 +66,7 @@ Long-format measurements linked by `taxonID`. Columns: - `taxonID` = `species_id` +- `taxonID` = `NOW:` - `measurementID` = `NOW::` - `measurementType` / `measurementUnit` / `measurementValue` per field mapping - `verbatimMeasurementType` = original DB field name (e.g. `diet1`, `body_mass`) From 464a4d1a2fdbf5c13dcf740ed8fda92b21041ddc Mon Sep 17 00:00:00 2001 From: karilint Date: Fri, 24 Apr 2026 14:15:37 +0300 Subject: [PATCH 08/23] DwC-A: parentMeasurementID + crown type rows --- .../species/dwcArchiveExport.test.ts | 1 + backend/src/services/dwcArchiveExport.ts | 111 +++++++++++++++++- .../src/unit-tests/dwcArchiveExport.test.ts | 3 +- documentation/functionality/dwc_export.md | 7 +- 4 files changed, 117 insertions(+), 5 deletions(-) diff --git a/backend/src/api-tests/species/dwcArchiveExport.test.ts b/backend/src/api-tests/species/dwcArchiveExport.test.ts index de8352d87..271a03718 100644 --- a/backend/src/api-tests/species/dwcArchiveExport.test.ts +++ b/backend/src/api-tests/species/dwcArchiveExport.test.ts @@ -57,6 +57,7 @@ describe('DwC-A species export (admin-only)', () => { const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') expect(measurementCsv).toContain('"measurementID"') + expect(measurementCsv).toContain('"parentMeasurementID"') expect(measurementCsv).toContain('"verbatimMeasurementType"') expect(measurementCsv).not.toContain('"measurementRemarks"') diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts index 2d973c8f3..d14e67f96 100644 --- a/backend/src/services/dwcArchiveExport.ts +++ b/backend/src/services/dwcArchiveExport.ts @@ -274,6 +274,7 @@ export const mapSpeciesToTaxonRow = (species: SpeciesForTaxonExport): TaxonCsvRo export const MEASUREMENT_HEADERS = [ 'taxonID', 'measurementID', + 'parentMeasurementID', 'measurementType', 'verbatimMeasurementType', 'measurementValue', @@ -341,11 +342,49 @@ type SpeciesForMeasurementExport = Pick< | 'sp_status' > +const isMeaningfulMeasurementValue = (value: unknown): boolean => { + if (value === null || value === undefined) return false + if (typeof value === 'string') { + return isMeaningfulString(value) + } + return true +} + +const buildCrownTypeMeasurementId = (speciesId: number, kind: 'developmental_crown_type' | 'functional_crown_type') => + `NOW:${speciesId}:${kind}` + +type CrownSegment = string | number | null | undefined + +const mapCrownSegment = (segment: CrownSegment): string => { + if (segment === null || segment === undefined || segment === '') { + return '-' + } + + return String(segment) +} + +const formatDevelopmentalCrownType = (source: SpeciesForMeasurementExport): string => { + return [ + source.cusp_shape, + source.cusp_count_buccal, + source.cusp_count_lingual, + source.loph_count_lon, + source.loph_count_trs, + ] + .map(mapCrownSegment) + .join('') +} + +const formatFunctionalCrownType = (source: SpeciesForMeasurementExport): string => { + return [source.fct_al, source.fct_ol, source.fct_sf, source.fct_ot, source.fct_cm].map(mapCrownSegment).join('') +} + const MEASUREMENT_FIELD_MAPPINGS: Array<{ field: keyof SpeciesForMeasurementExport measurementType: string measurementUnit: string measurementMethod: string + parentKind?: 'developmental_crown_type' | 'functional_crown_type' }> = [ // NOTE: In v1, measurementMethod is populated from the Pantheria VSP manual where available: // https://www.pantherion.com/dbmanual97/VSP.html @@ -570,6 +609,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'developmental_crown_type', }, { field: 'cusp_count_buccal', @@ -577,6 +617,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'developmental_crown_type', }, { field: 'cusp_count_lingual', @@ -584,6 +625,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'developmental_crown_type', }, { field: 'loph_count_lon', @@ -591,6 +633,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'developmental_crown_type', }, { field: 'loph_count_trs', @@ -598,6 +641,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'developmental_crown_type', }, { field: 'fct_al', @@ -605,6 +649,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'functional_crown_type', }, { field: 'fct_ol', @@ -612,6 +657,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'functional_crown_type', }, { field: 'fct_sf', @@ -619,6 +665,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'functional_crown_type', }, { field: 'fct_ot', @@ -626,6 +673,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'functional_crown_type', }, { field: 'fct_cm', @@ -633,6 +681,7 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ measurementUnit: '', // TODO(#1150): No matching field description found on pantherion.com/dbmanual97/VSP.html. measurementMethod: '', + parentKind: 'functional_crown_type', }, { field: 'mesowear', @@ -715,8 +764,60 @@ const MEASUREMENT_FIELD_MAPPINGS: Array<{ export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport): MeasurementCsvRow[] => { const taxonID = `NOW:${species.species_id}` + const speciesId = species.species_id + + const developmentalSegments = [ + species.cusp_shape, + species.cusp_count_buccal, + species.cusp_count_lingual, + species.loph_count_lon, + species.loph_count_trs, + ] + const functionalSegments = [species.fct_al, species.fct_ol, species.fct_sf, species.fct_ot, species.fct_cm] + + const hasDevelopmentalCrownType = developmentalSegments.some(isMeaningfulMeasurementValue) + const hasFunctionalCrownType = functionalSegments.some(isMeaningfulMeasurementValue) - return MEASUREMENT_FIELD_MAPPINGS.flatMap(mapping => { + const parentIds = { + developmental_crown_type: hasDevelopmentalCrownType + ? buildCrownTypeMeasurementId(speciesId, 'developmental_crown_type') + : '', + functional_crown_type: hasFunctionalCrownType + ? buildCrownTypeMeasurementId(speciesId, 'functional_crown_type') + : '', + } as const + + const calculatedRows: MeasurementCsvRow[] = [] + + if (hasDevelopmentalCrownType) { + calculatedRows.push({ + taxonID, + measurementID: parentIds.developmental_crown_type, + parentMeasurementID: '', + measurementType: 'developmental crown type', + verbatimMeasurementType: 'developmental_crown_type', + measurementValue: formatDevelopmentalCrownType(species), + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }) + } + + if (hasFunctionalCrownType) { + calculatedRows.push({ + taxonID, + measurementID: parentIds.functional_crown_type, + parentMeasurementID: '', + measurementType: 'functional crown type', + verbatimMeasurementType: 'functional_crown_type', + measurementValue: formatFunctionalCrownType(species), + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }) + } + + const fieldRows = MEASUREMENT_FIELD_MAPPINGS.flatMap(mapping => { if (mapping.field === 'species_id') return [] const rawValue = species[mapping.field] if (rawValue === null || rawValue === undefined) return [] @@ -730,6 +831,7 @@ export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport { taxonID, measurementID: `NOW:${species.species_id}:${mapping.field.toString()}`, + parentMeasurementID: mapping.parentKind ? parentIds[mapping.parentKind] : '', measurementType: mapping.measurementType, verbatimMeasurementType: mapping.field.toString(), measurementValue, @@ -738,6 +840,8 @@ export const mapSpeciesToMeasurementRows = (species: SpeciesForMeasurementExport }, ] }) + + return [...calculatedRows, ...fieldRows] } const DWC_TERMS = { @@ -770,6 +874,7 @@ const DWC_TERMS = { rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + parentMeasurementID: 'http://rs.tdwg.org/dwc/terms/parentMeasurementID', measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', @@ -791,14 +896,14 @@ export const buildMetaXml = (): string => { return ` - + taxon.csv ${taxonFields} - + measurementorfact.csv diff --git a/backend/src/unit-tests/dwcArchiveExport.test.ts b/backend/src/unit-tests/dwcArchiveExport.test.ts index 8b429fa19..2453a3156 100644 --- a/backend/src/unit-tests/dwcArchiveExport.test.ts +++ b/backend/src/unit-tests/dwcArchiveExport.test.ts @@ -197,7 +197,8 @@ describe('DwC-A export mapping', () => { it('generates valid meta.xml attributes for enclosed fields', () => { const metaXml = buildMetaXml() - expect(metaXml).toContain('fieldsEnclosedBy="""') + expect(metaXml).toContain("fieldsEnclosedBy='\"'") + expect(metaXml).not.toContain('fieldsEnclosedBy="""') expect(metaXml).not.toContain('fieldsEnclosedBy="\\""') }) }) diff --git a/documentation/functionality/dwc_export.md b/documentation/functionality/dwc_export.md index 29aa147b6..e1462a817 100644 --- a/documentation/functionality/dwc_export.md +++ b/documentation/functionality/dwc_export.md @@ -65,13 +65,18 @@ Long-format measurements linked by `taxonID`. Columns: -- `taxonID` = `species_id` - `taxonID` = `NOW:` - `measurementID` = `NOW::` +- `parentMeasurementID` = empty by default; for crown-type segments points to the calculated parent row - `measurementType` / `measurementUnit` / `measurementValue` per field mapping - `verbatimMeasurementType` = original DB field name (e.g. `diet1`, `body_mass`) - `measurementMethod` = Pantheria VSP field description where available (`https://www.pantherion.com/dbmanual97/VSP.html`) +Calculated tooth rows (emitted only when at least one segment field is present): + +- `NOW::developmental_crown_type` (value is a 5-char concatenation of `cusp_shape`, `cusp_count_buccal`, `cusp_count_lingual`, `loph_count_lon`, `loph_count_trs`, using `-` for missing) +- `NOW::functional_crown_type` (value is a 5-char concatenation of `fct_al`, `fct_ol`, `fct_sf`, `fct_ot`, `fct_cm`, using `-` for missing) + v1 includes these `com_species` fields (rows emitted only when source value is non-null and non-empty; `-` is treated as empty): - `strain` From 58091d42ad8cb524d29b313f6e8271321a4969e9 Mon Sep 17 00:00:00 2001 From: karilint Date: Mon, 27 Apr 2026 13:21:35 +0300 Subject: [PATCH 09/23] DwC-A: revise taxonRank heuristics --- backend/src/services/dwcArchiveExport.ts | 43 +++++++++++++++--------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/backend/src/services/dwcArchiveExport.ts b/backend/src/services/dwcArchiveExport.ts index d14e67f96..ea5226319 100644 --- a/backend/src/services/dwcArchiveExport.ts +++ b/backend/src/services/dwcArchiveExport.ts @@ -122,6 +122,12 @@ const isSingleLowercaseWord = (value: string | null): boolean => { return /^[a-z]+$/.test(trimmed) } +const isSinglePropercaseWord = (value: string | null): boolean => { + if (!isMeaningfulString(value)) return false + const trimmed = value.trim() + return /^[A-Z][a-z]+$/.test(trimmed) +} + const isSpeciesSp = (value: string): boolean => /^sp\.?$/i.test(value.trim()) const includesIndet = (value: string): boolean => value.toLowerCase().includes('indet.') @@ -145,29 +151,36 @@ const resolveTaxonRank = ({ tribe: string subtribe: string }): string => { - // Start from the lowest rank to the highest, but respect the indet.* rules which - // indicate that lower taxa are unknown and we should not emit species/subspecies ranks. - const genusIndet = includesIndet(genus) - const epithetIndet = includesIndet(specificEpithet) - - if (!genusIndet && !epithetIndet) { - const speciesSp = isSpeciesSp(specificEpithet) - if (!speciesSp && isSingleLowercaseWord(uniqueIdentifier)) return 'subspecies' - if (uniqueIdentifier === '-') return 'species' - if (speciesSp) return 'species' + const genusIsPropercaseWord = isSinglePropercaseWord(genus) + const specificEpithetIsLowercaseWord = isSingleLowercaseWord(specificEpithet) + const uniqueIdentifierIsSingleLowercaseWord = isSingleLowercaseWord(uniqueIdentifier) + + // IMPORTANT: Rule order matters; implement in the exact execution order requested. + if (uniqueIdentifierIsSingleLowercaseWord && specificEpithetIsLowercaseWord && genusIsPropercaseWord) { + return 'subspecies' } - if (!genusIndet && epithetIndet) return 'genus' + if (isSpeciesSp(specificEpithet) && genusIsPropercaseWord) return 'species' - if (genusIndet) { + if (uniqueIdentifier === '-' && specificEpithetIsLowercaseWord && genusIsPropercaseWord) return 'species' + + if (uniqueIdentifier !== null && specificEpithetIsLowercaseWord && genusIsPropercaseWord) return 'species' + + if (includesIndet(specificEpithet) && genusIsPropercaseWord) return 'genus' + + if (includesIndet(genus)) { if (subtribe) return 'subtribe' if (tribe) return 'tribe' if (subfamily) return 'subfamily' - if (!isMeaningfulString(subclassOrSuperorderName)) return 'family' - if (includesIndet(family)) return 'order' - return 'family' + + const familyTrimmed = family.trim() + const isIncertaeSedis = familyTrimmed.toLowerCase() === 'incertae sedis' + const endsWithIdae = endsWithSuffix(familyTrimmed, 'idae') + if (!isMeaningfulString(subclassOrSuperorderName) && (endsWithIdae || isIncertaeSedis)) return 'family' } + if (includesIndet(family)) return 'order' + return 'species' } From 41ce6e7b079a8563d006587a482e88eb41da18ca Mon Sep 17 00:00:00 2001 From: karilint Date: Mon, 27 Apr 2026 13:57:18 +0300 Subject: [PATCH 10/23] Add admin-only DwC-A locality export --- backend/src/routes/locality.ts | 12 + .../services/dwcArchiveExportLocalities.ts | 472 ++++++++++++++++++ .../dwcArchiveExportLocalities.test.ts | 90 ++++ .../functionality/dwc_export_localities.md | 56 +++ .../Locality/LocalityDwcExportMenuItem.tsx | 88 ++++ .../src/components/Locality/LocalityTable.tsx | 2 + 6 files changed, 720 insertions(+) create mode 100644 backend/src/services/dwcArchiveExportLocalities.ts create mode 100644 backend/src/unit-tests/dwcArchiveExportLocalities.test.ts create mode 100644 documentation/functionality/dwc_export_localities.md create mode 100644 frontend/src/components/Locality/LocalityDwcExportMenuItem.tsx diff --git a/backend/src/routes/locality.ts b/backend/src/routes/locality.ts index 55582977f..04460f1b2 100644 --- a/backend/src/routes/locality.ts +++ b/backend/src/routes/locality.ts @@ -10,6 +10,8 @@ import { fixBigInt } from '../utils/common' import { EditDataType, EditMetaData, LocalityDetailsType, Role } from '../../../frontend/src/shared/types' import { AccessError, requireOneOf } from '../middlewares/authorizer' import { deleteLocality, writeLocality } from '../services/write/locality' +import { buildDwcLocalityArchiveZipBuffer } from '../services/dwcArchiveExportLocalities' +import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' const router = Router() @@ -18,6 +20,16 @@ router.get('/all', async (req, res) => { return res.status(200).send(fixBigInt(localities)) }) +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res) => { + const zipBuffer = await buildDwcLocalityArchiveZipBuffer() + res.setHeader('Content-Type', 'application/zip') + res.setHeader( + 'Content-Disposition', + `attachment; filename="now_dwc_localities_test_export_${currentDateAsString()}.zip"` + ) + return res.status(200).send(zipBuffer) +}) + router.get('/:id', async (req, res) => { const id = parseInt(req.params.id) const locality = await getLocalityDetails(id, req.user) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts new file mode 100644 index 000000000..05a08f2ee --- /dev/null +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -0,0 +1,472 @@ +import Prisma from '../../prisma/generated/now_test_client' +import { format } from 'fast-csv' +import { Writable } from 'stream' +import JSZip from 'jszip' + +const isMeaningfulString = (value: unknown): value is string => { + if (typeof value !== 'string') return false + const trimmed = value.trim() + if (!trimmed) return false + if (trimmed === '-') return false + return true +} + +const toDwcString = (value: unknown): string => { + if (value === null || value === undefined) return '' + if (typeof value === 'bigint') return value.toString() + if (typeof value === 'number') return Number.isFinite(value) ? value.toString() : '' + if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value + if (typeof value === 'object') { + try { + return JSON.stringify(value) ?? '' + } catch { + return '' + } + } + return '' +} + +const writeCsvString = async (headers: string[], rows: Array>): Promise => { + return await new Promise((resolve, reject) => { + let output = '' + const csvStream = format({ + delimiter: ',', + headers, + quoteColumns: true, + quoteHeaders: true, + includeEndRowDelimiter: true, + }) + + const sink = new Writable({ + write(chunk: Buffer | string, _encoding: BufferEncoding, callback: (error?: Error | null) => void) { + if (typeof chunk === 'string') { + output += chunk + } else { + output += chunk.toString('utf8') + } + callback() + }, + }) + + sink.on('finish', () => resolve(output)) + sink.on('error', reject) + csvStream.on('error', reject) + + csvStream.pipe(sink) + for (const row of rows) { + csvStream.write(row) + } + csvStream.end() + }) +} + +export const LOCATION_HEADERS = [ + 'locationID', + 'locality', + 'country', + 'stateProvince', + 'county', + 'decimalLatitude', + 'decimalLongitude', + 'verbatimLatitude', + 'verbatimLongitude', + 'locationRemarks', +] as const + +export type LocationCsvHeader = (typeof LOCATION_HEADERS)[number] +export type LocationCsvRow = Record + +export const GEOLOGICAL_CONTEXT_HEADERS = [ + 'locationID', + 'geologicalContextID', + 'lithostratigraphicTerms', + 'group', + 'formation', + 'member', + 'bed', + 'earliestAgeOrLowestStage', + 'latestAgeOrHighestStage', +] as const + +export type GeologicalContextCsvHeader = (typeof GEOLOGICAL_CONTEXT_HEADERS)[number] +export type GeologicalContextCsvRow = Record + +export const LOCALITY_MEASUREMENT_HEADERS = [ + 'locationID', + 'measurementID', + 'measurementType', + 'verbatimMeasurementType', + 'measurementValue', + 'measurementUnit', + 'measurementMethod', + 'measurementRemarks', +] as const + +export type LocalityMeasurementCsvHeader = (typeof LOCALITY_MEASUREMENT_HEADERS)[number] +export type LocalityMeasurementCsvRow = Record + +type TimeUnitForLocalityExport = Pick + +type LocalityForExport = Pick< + Prisma.now_loc, + | 'lid' + | 'loc_name' + | 'country' + | 'state' + | 'county' + | 'dec_lat' + | 'dec_long' + | 'dms_lat' + | 'dms_long' + | 'loc_detail' + | 'chron' + | 'lgroup' + | 'formation' + | 'member' + | 'bed' + | 'bfa_max' + | 'bfa_min' + | 'max_age' + | 'min_age' + | 'date_meth' + | 'age_comm' +> & { + now_time_unit_now_loc_bfa_maxTonow_time_unit: TimeUnitForLocalityExport | null + now_time_unit_now_loc_bfa_minTonow_time_unit: TimeUnitForLocalityExport | null +} + +const locationIdForLocality = (lid: number): string => `NOW:LOC:${lid}` + +const toMaybeMeaningful = (value: string | null | undefined): string => (isMeaningfulString(value) ? value.trim() : '') + +const toMaybeMeaningfulNumber = (value: number | null | undefined): string => { + if (value === null || value === undefined) return '' + if (!Number.isFinite(value)) return '' + // In NOW, many numeric fields default to 0 for "unknown". Treat 0 as empty for export. + if (value === 0) return '' + return value.toString() +} + +export const mapLocalityToLocationRow = (locality: LocalityForExport): LocationCsvRow => { + const locationID = locationIdForLocality(locality.lid) + + return { + locationID, + locality: toMaybeMeaningful(locality.loc_name), + country: toMaybeMeaningful(locality.country), + stateProvince: toMaybeMeaningful(locality.state), + county: toMaybeMeaningful(locality.county), + decimalLatitude: toMaybeMeaningfulNumber(locality.dec_lat), + decimalLongitude: toMaybeMeaningfulNumber(locality.dec_long), + verbatimLatitude: toMaybeMeaningful(locality.dms_lat), + verbatimLongitude: toMaybeMeaningful(locality.dms_long), + locationRemarks: [toMaybeMeaningful(locality.loc_detail), toMaybeMeaningful(locality.age_comm)] + .filter(Boolean) + .join(' | '), + } +} + +const timeUnitDisplayOrName = (timeUnit: TimeUnitForLocalityExport | null, fallbackName: string | null): string => { + if (timeUnit) { + return isMeaningfulString(timeUnit.tu_display_name) ? timeUnit.tu_display_name.trim() : timeUnit.tu_name.trim() + } + return isMeaningfulString(fallbackName) ? fallbackName.trim() : '' +} + +export const mapLocalityToGeologicalContextRow = (locality: LocalityForExport): GeologicalContextCsvRow => { + const locationID = locationIdForLocality(locality.lid) + + const lithostratigraphicTerms = [ + toMaybeMeaningful(locality.chron), + toMaybeMeaningful(locality.lgroup), + toMaybeMeaningful(locality.formation), + toMaybeMeaningful(locality.member), + toMaybeMeaningful(locality.bed), + ] + .filter(Boolean) + .join(' | ') + + return { + locationID, + geologicalContextID: `NOW:LOC:${locality.lid}:geology`, + lithostratigraphicTerms, + group: toMaybeMeaningful(locality.lgroup), + formation: toMaybeMeaningful(locality.formation), + member: toMaybeMeaningful(locality.member), + bed: toMaybeMeaningful(locality.bed), + earliestAgeOrLowestStage: timeUnitDisplayOrName( + locality.now_time_unit_now_loc_bfa_maxTonow_time_unit, + locality.bfa_max + ), + latestAgeOrHighestStage: timeUnitDisplayOrName( + locality.now_time_unit_now_loc_bfa_minTonow_time_unit, + locality.bfa_min + ), + } +} + +const isMeaningfulMeasurementValue = (value: unknown): boolean => { + if (value === null || value === undefined) return false + if (typeof value === 'string') return isMeaningfulString(value) + if (typeof value === 'number') return Number.isFinite(value) && value !== 0 + return true +} + +const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ + field: keyof LocalityForExport + measurementType: string + measurementUnit: string + measurementMethod: string +}> = [ + { + field: 'max_age', + measurementType: 'maximum age', + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality max_age/min_age semantics. + measurementMethod: '', + }, + { + field: 'min_age', + measurementType: 'minimum age', + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality max_age/min_age semantics. + measurementMethod: '', + }, + { + field: 'date_meth', + measurementType: 'dating method', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary / description. + measurementMethod: '', + }, + { + field: 'chron', + measurementType: 'chron', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'bfa_max', + measurementType: 'BFA max', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'bfa_min', + measurementType: 'BFA min', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, +] + +export const mapLocalityToMeasurementRows = (locality: LocalityForExport): LocalityMeasurementCsvRow[] => { + const locationID = locationIdForLocality(locality.lid) + + return LOCALITY_MEASUREMENT_MAPPINGS.flatMap(mapping => { + const rawValue = locality[mapping.field] + if (!isMeaningfulMeasurementValue(rawValue)) return [] + + const measurementValue = toDwcString(rawValue).trim() + if (!measurementValue) return [] + + return [ + { + locationID, + measurementID: `NOW:LOC:${locality.lid}:${mapping.field.toString()}`, + measurementType: mapping.measurementType, + verbatimMeasurementType: mapping.field.toString(), + measurementValue, + measurementUnit: mapping.measurementUnit, + measurementMethod: mapping.measurementMethod, + // TODO(#1150): Decide whether any locality measurementRemarks should be emitted and from which columns. + measurementRemarks: '', + }, + ] + }) +} + +const DWC_TERMS = { + location: { + rowType: 'http://rs.tdwg.org/dwc/terms/Location', + locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + locality: 'http://rs.tdwg.org/dwc/terms/locality', + country: 'http://rs.tdwg.org/dwc/terms/country', + stateProvince: 'http://rs.tdwg.org/dwc/terms/stateProvince', + county: 'http://rs.tdwg.org/dwc/terms/county', + decimalLatitude: 'http://rs.tdwg.org/dwc/terms/decimalLatitude', + decimalLongitude: 'http://rs.tdwg.org/dwc/terms/decimalLongitude', + verbatimLatitude: 'http://rs.tdwg.org/dwc/terms/verbatimLatitude', + verbatimLongitude: 'http://rs.tdwg.org/dwc/terms/verbatimLongitude', + locationRemarks: 'http://rs.tdwg.org/dwc/terms/locationRemarks', + }, + geologicalContext: { + rowType: 'http://rs.tdwg.org/dwc/terms/GeologicalContext', + locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + geologicalContextID: 'http://rs.tdwg.org/dwc/terms/geologicalContextID', + lithostratigraphicTerms: 'http://rs.tdwg.org/dwc/terms/lithostratigraphicTerms', + group: 'http://rs.tdwg.org/dwc/terms/group', + formation: 'http://rs.tdwg.org/dwc/terms/formation', + member: 'http://rs.tdwg.org/dwc/terms/member', + bed: 'http://rs.tdwg.org/dwc/terms/bed', + earliestAgeOrLowestStage: 'http://rs.tdwg.org/dwc/terms/earliestAgeOrLowestStage', + latestAgeOrHighestStage: 'http://rs.tdwg.org/dwc/terms/latestAgeOrHighestStage', + }, + measurement: { + rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', + locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', + verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', + measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', + measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', + measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', + measurementRemarks: 'http://rs.tdwg.org/dwc/terms/measurementRemarks', + }, +} as const + +export const buildLocalityMetaXml = (): string => { + const locationFields = LOCATION_HEADERS.map((header, index) => { + const term = (DWC_TERMS.location as Record)[header] + return ` ` + }).join('\n') + + const geologyFields = GEOLOGICAL_CONTEXT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.geologicalContext as Record)[header] + return ` ` + }).join('\n') + + const measurementFields = LOCALITY_MEASUREMENT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.measurement as Record)[header] + return ` ` + }).join('\n') + + return ` + + + + location.csv + + +${locationFields} + + + + geologicalcontext.csv + + +${geologyFields} + + + + measurementorfact.csv + + +${measurementFields} + + +` +} + +export const buildLocalityEmlXml = (publicationDateIso: string): string => { + return ` + + + + NOW database Darwin Core test export (localities) + + + NOW database + + + + + NOW database + + + ${publicationDateIso} + + Admin-only test Darwin Core Archive export for localities, mapping Location + GeologicalContext + MeasurementOrFact terms. Field mappings are intentionally limited for v1. + + + TODO(#1150): Add rights / license information. + + + +` +} + +export const buildDwcLocalityArchiveZipBufferFromLocalities = async ( + localities: LocalityForExport[] +): Promise => { + const locationRows = localities.map(mapLocalityToLocationRow) + const geologicalContextRows = localities.map(mapLocalityToGeologicalContextRow) + const measurementRows = localities.flatMap(mapLocalityToMeasurementRows) + + const locationCsv = await writeCsvString([...LOCATION_HEADERS], locationRows) + const geologyCsv = await writeCsvString([...GEOLOGICAL_CONTEXT_HEADERS], geologicalContextRows) + const measurementCsv = await writeCsvString([...LOCALITY_MEASUREMENT_HEADERS], measurementRows) + const metaXml = buildLocalityMetaXml() + + const publicationDateIso = new Date().toISOString().slice(0, 10) + const emlXml = buildLocalityEmlXml(publicationDateIso) + + const zip = new JSZip() + zip.file('location.csv', locationCsv) + zip.file('geologicalcontext.csv', geologyCsv) + zip.file('measurementorfact.csv', measurementCsv) + zip.file('meta.xml', metaXml) + zip.file('eml.xml', emlXml) + + return await zip.generateAsync({ type: 'nodebuffer' }) +} + +export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { + const prisma = new Prisma.PrismaClient() + try { + const localities = await prisma.now_loc.findMany({ + select: { + lid: true, + loc_name: true, + country: true, + state: true, + county: true, + dec_lat: true, + dec_long: true, + dms_lat: true, + dms_long: true, + loc_detail: true, + chron: true, + lgroup: true, + formation: true, + member: true, + bed: true, + bfa_max: true, + bfa_min: true, + max_age: true, + min_age: true, + date_meth: true, + age_comm: true, + now_time_unit_now_loc_bfa_maxTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + now_time_unit_now_loc_bfa_minTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + }, + }) + + return await buildDwcLocalityArchiveZipBufferFromLocalities(localities as unknown as LocalityForExport[]) + } finally { + await prisma.$disconnect() + } +} diff --git a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts new file mode 100644 index 000000000..84b06ba41 --- /dev/null +++ b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, it } from '@jest/globals' +import JSZip from 'jszip' +import { + buildDwcLocalityArchiveZipBufferFromLocalities, + buildLocalityMetaXml, + mapLocalityToGeologicalContextRow, + mapLocalityToLocationRow, + mapLocalityToMeasurementRows, +} from '../services/dwcArchiveExportLocalities' + +describe('DwC-A locality export mapping', () => { + const baseLocality = { + lid: 42, + loc_name: 'Test locality', + country: 'Finland', + state: 'Uusimaa', + county: 'Helsinki', + dec_lat: 60.1699, + dec_long: 24.9384, + dms_lat: null, + dms_long: null, + loc_detail: 'Some notes', + chron: 'Test chron', + lgroup: 'Test group', + formation: 'Test formation', + member: 'Test member', + bed: 'Test bed', + bfa_max: 'BFA_MAX', + bfa_min: 'BFA_MIN', + max_age: 12.3, + min_age: 4.5, + date_meth: 'radioisotope', + age_comm: 'Age comment', + now_time_unit_now_loc_bfa_maxTonow_time_unit: null, + now_time_unit_now_loc_bfa_minTonow_time_unit: null, + } as const + + it('maps now_loc row to a DwC Location row', () => { + const row = mapLocalityToLocationRow(baseLocality) + expect(row.locationID).toEqual('NOW:LOC:42') + expect(row.locality).toEqual('Test locality') + expect(row.country).toEqual('Finland') + expect(row.stateProvince).toEqual('Uusimaa') + expect(row.county).toEqual('Helsinki') + expect(row.decimalLatitude).toEqual('60.1699') + expect(row.decimalLongitude).toEqual('24.9384') + expect(row.locationRemarks).toContain('Some notes') + }) + + it('maps now_loc row to a DwC GeologicalContext row', () => { + const row = mapLocalityToGeologicalContextRow(baseLocality) + expect(row.locationID).toEqual('NOW:LOC:42') + expect(row.geologicalContextID).toEqual('NOW:LOC:42:geology') + expect(row.group).toEqual('Test group') + expect(row.formation).toEqual('Test formation') + expect(row.member).toEqual('Test member') + expect(row.bed).toEqual('Test bed') + expect(row.earliestAgeOrLowestStage).toEqual('BFA_MAX') + expect(row.latestAgeOrHighestStage).toEqual('BFA_MIN') + }) + + it('emits locality measurements only for meaningful values', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + chron: '-', + bfa_min: null, + }) + expect(rows.some(r => r.verbatimMeasurementType === 'max_age')).toEqual(true) + expect(rows.some(r => r.verbatimMeasurementType === 'min_age')).toEqual(true) + expect(rows.some(r => r.verbatimMeasurementType === 'chron')).toEqual(false) + expect(rows.some(r => r.verbatimMeasurementType === 'bfa_min')).toEqual(false) + }) + + it('generates a ZIP archive with expected files', async () => { + const zipBuffer = await buildDwcLocalityArchiveZipBufferFromLocalities([baseLocality]) + const zip = await JSZip.loadAsync(zipBuffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + }) + + it('generates valid meta.xml attributes for enclosed fields', () => { + const metaXml = buildLocalityMetaXml() + expect(metaXml).toContain("fieldsEnclosedBy='\"'") + expect(metaXml).not.toContain('fieldsEnclosedBy="""') + expect(metaXml).not.toContain('fieldsEnclosedBy="\\""') + }) +}) diff --git a/documentation/functionality/dwc_export_localities.md b/documentation/functionality/dwc_export_localities.md new file mode 100644 index 000000000..72467d72c --- /dev/null +++ b/documentation/functionality/dwc_export_localities.md @@ -0,0 +1,56 @@ +# DwC-A export: localities (v1) + +This document describes the admin-only Darwin Core Archive (DwC-A) test export for localities. + +## Files + +The export ZIP contains: + +- `location.csv` (DwC `Location` core) +- `geologicalcontext.csv` (DwC `GeologicalContext` extension) +- `measurementorfact.csv` (DwC `MeasurementOrFact` extension) +- `meta.xml` (DwC-A metadata) +- `eml.xml` (minimal placeholder EML metadata) + +## Core: `location.csv` + +Core rowType: `http://rs.tdwg.org/dwc/terms/Location` + +v1 columns: + +- `locationID` = `NOW:LOC:` +- `locality` = `loc_name` +- `country` = `country` +- `stateProvince` = `state` +- `county` = `county` +- `decimalLatitude` / `decimalLongitude` = `dec_lat` / `dec_long` (0 treated as empty for v1) +- `verbatimLatitude` / `verbatimLongitude` = `dms_lat` / `dms_long` +- `locationRemarks` = `loc_detail` and `age_comm` (joined with ` | `) + +## Extension: `geologicalcontext.csv` + +Extension rowType: `http://rs.tdwg.org/dwc/terms/GeologicalContext` + +v1 columns: + +- `locationID` = `NOW:LOC:` (core id) +- `geologicalContextID` = `NOW:LOC::geology` +- `lithostratigraphicTerms` = `chron`, `lgroup`, `formation`, `member`, `bed` (joined with ` | `) +- `group` / `formation` / `member` / `bed` mapped from locality columns +- `earliestAgeOrLowestStage` = `bfa_max` (uses related `now_time_unit.tu_display_name` when available) +- `latestAgeOrHighestStage` = `bfa_min` (uses related `now_time_unit.tu_display_name` when available) + +## Extension: `measurementorfact.csv` + +Extension rowType: `http://rs.tdwg.org/dwc/terms/MeasurementOrFact` + +v1 emits a small set of locality/time-related measurements. Each row has: + +- `locationID` = `NOW:LOC:` (core id) +- `measurementID` = `NOW:LOC::` +- `verbatimMeasurementType` = original DB field name + +## Admin-only + +The backend route is restricted to `Role.Admin`. + diff --git a/frontend/src/components/Locality/LocalityDwcExportMenuItem.tsx b/frontend/src/components/Locality/LocalityDwcExportMenuItem.tsx new file mode 100644 index 000000000..25f601356 --- /dev/null +++ b/frontend/src/components/Locality/LocalityDwcExportMenuItem.tsx @@ -0,0 +1,88 @@ +import { useState } from 'react' +import { MenuItem } from '@mui/material' +import { useNotify } from '@/hooks/notification' +import { BACKEND_URL } from '@/util/config' +import { useUser } from '@/hooks/user' +import { Role } from '@/shared/types' +import { currentDateAsString } from '@/shared/currentDateAsString' + +export const LocalityDwcExportMenuItem = ({ handleClose }: { handleClose: () => void }) => { + const [loading, setLoading] = useState(false) + const { notify, setMessage: setNotificationMessage } = useNotify() + const user = useUser() + + if (user.role !== Role.Admin) { + return null + } + + const fetchOptions = user.token ? { headers: { Authorization: `Bearer ${user.token}` } } : {} + const filename = `now_dwc_localities_test_export_${currentDateAsString()}.zip` + + const fetchZipFile = async () => { + setLoading(true) + notify('Generating DwC-A ZIP export, please wait...', 'info', null) + + try { + const response = await fetch(`${BACKEND_URL}/locality/export/dwc-archive`, fetchOptions) + if (!response.ok) { + throw new Error('Server response was not OK.') + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Missing response stream.') + } + + const file: Uint8Array[] = [] + let bytes = 0 + let closed = false + + const showDownloadProgress = () => { + if (!closed) { + setTimeout(() => { + setNotificationMessage(`Downloading DwC-A ZIP, ${Math.round((bytes / 1000000) * 10) / 10} MB`) + showDownloadProgress() + }, 500) + } + } + + notify('Downloading DwC-A ZIP...', 'info', null) + showDownloadProgress() + + while (true) { + const { done, value } = await reader.read() + if (done) break + bytes = bytes + value.length + file.push(value) + } + closed = true + + const blobUrl = window.URL.createObjectURL(new Blob(file, { type: 'application/zip' })) + const downloadLink = document.createElement('a') + downloadLink.href = blobUrl + downloadLink.download = filename + document.body.appendChild(downloadLink) + downloadLink.click() + downloadLink.remove() + window.URL.revokeObjectURL(blobUrl) + + notify('Download finished.') + } catch { + notify('Downloading DwC-A export failed.', 'error') + } finally { + setLoading(false) + } + } + + return ( + { + void fetchZipFile() + handleClose() + }} + disabled={loading} + > + Export DwC-A (localities) + + ) +} diff --git a/frontend/src/components/Locality/LocalityTable.tsx b/frontend/src/components/Locality/LocalityTable.tsx index e76ff9798..efc158c97 100755 --- a/frontend/src/components/Locality/LocalityTable.tsx +++ b/frontend/src/components/Locality/LocalityTable.tsx @@ -10,6 +10,7 @@ import { usePageContext } from '../Page' import { LocalitySynonymsModal } from './LocalitySynonymsModal' import { currentDateAsString } from '@/shared/currentDateAsString' import { matchesCountryOrContinent } from '@/shared/validators/countryContinents' +import { LocalityDwcExportMenuItem } from './LocalityDwcExportMenuItem' const LocalitiesMap = lazy(async () => { const module = await import('../Map/LocalitiesMap') @@ -523,6 +524,7 @@ export const LocalityTable = ({ selectorFn }: { selectorFn?: (newObject: Localit url="locality" kmlExport={kmlExport} svgExport={svgExport} + renderExtraExportMenuItems={handleClose => } enableColumnFilterModes={true} tableRowAction={handleLocalityRowActionClick} /> From a8b0178301a0cf4b5ddfa51877605d58c20296ca Mon Sep 17 00:00:00 2001 From: karilint Date: Mon, 27 Apr 2026 14:12:23 +0300 Subject: [PATCH 11/23] Fix locality export Prisma client usage --- .../services/dwcArchiveExportLocalities.ts | 74 +++++++++---------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts index 05a08f2ee..98dfee33c 100644 --- a/backend/src/services/dwcArchiveExportLocalities.ts +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -1,4 +1,4 @@ -import Prisma from '../../prisma/generated/now_test_client' +import type { now_loc, now_time_unit } from '../../prisma/generated/now_test_client' import { format } from 'fast-csv' import { Writable } from 'stream' import JSZip from 'jszip' @@ -106,10 +106,10 @@ export const LOCALITY_MEASUREMENT_HEADERS = [ export type LocalityMeasurementCsvHeader = (typeof LOCALITY_MEASUREMENT_HEADERS)[number] export type LocalityMeasurementCsvRow = Record -type TimeUnitForLocalityExport = Pick +type TimeUnitForLocalityExport = Pick type LocalityForExport = Pick< - Prisma.now_loc, + now_loc, | 'lid' | 'loc_name' | 'country' @@ -431,42 +431,38 @@ export const buildDwcLocalityArchiveZipBufferFromLocalities = async ( } export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { - const prisma = new Prisma.PrismaClient() - try { - const localities = await prisma.now_loc.findMany({ - select: { - lid: true, - loc_name: true, - country: true, - state: true, - county: true, - dec_lat: true, - dec_long: true, - dms_lat: true, - dms_long: true, - loc_detail: true, - chron: true, - lgroup: true, - formation: true, - member: true, - bed: true, - bfa_max: true, - bfa_min: true, - max_age: true, - min_age: true, - date_meth: true, - age_comm: true, - now_time_unit_now_loc_bfa_maxTonow_time_unit: { - select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, - }, - now_time_unit_now_loc_bfa_minTonow_time_unit: { - select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, - }, + const { nowDb } = await import('../utils/db') + const localities = await nowDb.now_loc.findMany({ + select: { + lid: true, + loc_name: true, + country: true, + state: true, + county: true, + dec_lat: true, + dec_long: true, + dms_lat: true, + dms_long: true, + loc_detail: true, + chron: true, + lgroup: true, + formation: true, + member: true, + bed: true, + bfa_max: true, + bfa_min: true, + max_age: true, + min_age: true, + date_meth: true, + age_comm: true, + now_time_unit_now_loc_bfa_maxTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, }, - }) + now_time_unit_now_loc_bfa_minTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + }, + }) - return await buildDwcLocalityArchiveZipBufferFromLocalities(localities as unknown as LocalityForExport[]) - } finally { - await prisma.$disconnect() - } + return await buildDwcLocalityArchiveZipBufferFromLocalities(localities as unknown as LocalityForExport[]) } From 0df9384714ab1bfefef25ec7b686f7a64ec1bef2 Mon Sep 17 00:00:00 2001 From: karilint Date: Mon, 27 Apr 2026 14:30:42 +0300 Subject: [PATCH 12/23] DwC-A localities: add continent/higherGeography/elevation --- .../services/dwcArchiveExportLocalities.ts | 28 +++++++++++++++++++ .../dwcArchiveExportLocalities.test.ts | 6 ++++ .../functionality/dwc_export_localities.md | 4 ++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts index 98dfee33c..038599b15 100644 --- a/backend/src/services/dwcArchiveExportLocalities.ts +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -2,6 +2,7 @@ import type { now_loc, now_time_unit } from '../../prisma/generated/now_test_cli import { format } from 'fast-csv' import { Writable } from 'stream' import JSZip from 'jszip' +import { getContinentByCountry } from '../../../frontend/src/shared/validators/countryContinents' const isMeaningfulString = (value: unknown): value is string => { if (typeof value !== 'string') return false @@ -64,13 +65,16 @@ const writeCsvString = async (headers: string[], rows: Array { export const mapLocalityToLocationRow = (locality: LocalityForExport): LocationCsvRow => { const locationID = locationIdForLocality(locality.lid) + const continent = getContinentByCountry(locality.country) ?? '' + + const higherGeography = [ + continent, + toMaybeMeaningful(locality.country), + toMaybeMeaningful(locality.state), + toMaybeMeaningful(locality.county), + toMaybeMeaningful(locality.basin), + toMaybeMeaningful(locality.subbasin), + ] + .filter(Boolean) + .join('|') return { locationID, locality: toMaybeMeaningful(locality.loc_name), + continent, country: toMaybeMeaningful(locality.country), stateProvince: toMaybeMeaningful(locality.state), county: toMaybeMeaningful(locality.county), + higherGeography, decimalLatitude: toMaybeMeaningfulNumber(locality.dec_lat), decimalLongitude: toMaybeMeaningfulNumber(locality.dec_long), verbatimLatitude: toMaybeMeaningful(locality.dms_lat), verbatimLongitude: toMaybeMeaningful(locality.dms_long), + verbatimElevation: locality.altitude === null || locality.altitude === undefined ? '' : String(locality.altitude), locationRemarks: [toMaybeMeaningful(locality.loc_detail), toMaybeMeaningful(locality.age_comm)] .filter(Boolean) .join(' | '), @@ -294,13 +316,16 @@ const DWC_TERMS = { rowType: 'http://rs.tdwg.org/dwc/terms/Location', locationID: 'http://rs.tdwg.org/dwc/terms/locationID', locality: 'http://rs.tdwg.org/dwc/terms/locality', + continent: 'http://rs.tdwg.org/dwc/terms/continent', country: 'http://rs.tdwg.org/dwc/terms/country', stateProvince: 'http://rs.tdwg.org/dwc/terms/stateProvince', county: 'http://rs.tdwg.org/dwc/terms/county', + higherGeography: 'http://rs.tdwg.org/dwc/terms/higherGeography', decimalLatitude: 'http://rs.tdwg.org/dwc/terms/decimalLatitude', decimalLongitude: 'http://rs.tdwg.org/dwc/terms/decimalLongitude', verbatimLatitude: 'http://rs.tdwg.org/dwc/terms/verbatimLatitude', verbatimLongitude: 'http://rs.tdwg.org/dwc/terms/verbatimLongitude', + verbatimElevation: 'http://rs.tdwg.org/dwc/terms/verbatimElevation', locationRemarks: 'http://rs.tdwg.org/dwc/terms/locationRemarks', }, geologicalContext: { @@ -436,6 +461,8 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { select: { lid: true, loc_name: true, + basin: true, + subbasin: true, country: true, state: true, county: true, @@ -443,6 +470,7 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { dec_long: true, dms_lat: true, dms_long: true, + altitude: true, loc_detail: true, chron: true, lgroup: true, diff --git a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts index 84b06ba41..537ad506c 100644 --- a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts +++ b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts @@ -12,6 +12,8 @@ describe('DwC-A locality export mapping', () => { const baseLocality = { lid: 42, loc_name: 'Test locality', + basin: 'Test basin', + subbasin: 'Test subbasin', country: 'Finland', state: 'Uusimaa', county: 'Helsinki', @@ -19,6 +21,7 @@ describe('DwC-A locality export mapping', () => { dec_long: 24.9384, dms_lat: null, dms_long: null, + altitude: 123, loc_detail: 'Some notes', chron: 'Test chron', lgroup: 'Test group', @@ -39,11 +42,14 @@ describe('DwC-A locality export mapping', () => { const row = mapLocalityToLocationRow(baseLocality) expect(row.locationID).toEqual('NOW:LOC:42') expect(row.locality).toEqual('Test locality') + expect(row.continent).toEqual('Europe') expect(row.country).toEqual('Finland') expect(row.stateProvince).toEqual('Uusimaa') expect(row.county).toEqual('Helsinki') + expect(row.higherGeography).toEqual('Europe|Finland|Uusimaa|Helsinki|Test basin|Test subbasin') expect(row.decimalLatitude).toEqual('60.1699') expect(row.decimalLongitude).toEqual('24.9384') + expect(row.verbatimElevation).toEqual('123') expect(row.locationRemarks).toContain('Some notes') }) diff --git a/documentation/functionality/dwc_export_localities.md b/documentation/functionality/dwc_export_localities.md index 72467d72c..66c8486c0 100644 --- a/documentation/functionality/dwc_export_localities.md +++ b/documentation/functionality/dwc_export_localities.md @@ -20,11 +20,14 @@ v1 columns: - `locationID` = `NOW:LOC:` - `locality` = `loc_name` +- `continent` = derived from `country` (via shared country→continent map) - `country` = `country` - `stateProvince` = `state` - `county` = `county` +- `higherGeography` = `continent|country|state|county|basin|subbasin` (skip empty) - `decimalLatitude` / `decimalLongitude` = `dec_lat` / `dec_long` (0 treated as empty for v1) - `verbatimLatitude` / `verbatimLongitude` = `dms_lat` / `dms_long` +- `verbatimElevation` = `altitude` - `locationRemarks` = `loc_detail` and `age_comm` (joined with ` | `) ## Extension: `geologicalcontext.csv` @@ -53,4 +56,3 @@ v1 emits a small set of locality/time-related measurements. Each row has: ## Admin-only The backend route is restricted to `Role.Admin`. - From 75579122a61562f7007cf2fae66ff5ef95cdcb2d Mon Sep 17 00:00:00 2001 From: karilint Date: Mon, 27 Apr 2026 15:26:50 +0300 Subject: [PATCH 13/23] DwC-A localities: expand MeasurementOrFact + parents --- .../services/dwcArchiveExportLocalities.ts | 402 ++++++++++++++++-- .../dwcArchiveExportLocalities.test.ts | 35 ++ .../functionality/dwc_export_localities.md | 13 +- 3 files changed, 416 insertions(+), 34 deletions(-) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts index 038599b15..8beb99d82 100644 --- a/backend/src/services/dwcArchiveExportLocalities.ts +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -97,14 +97,14 @@ export type GeologicalContextCsvHeader = (typeof GEOLOGICAL_CONTEXT_HEADERS)[num export type GeologicalContextCsvRow = Record export const LOCALITY_MEASUREMENT_HEADERS = [ - 'locationID', + 'taxonID', 'measurementID', + 'parentMeasurementID', 'measurementType', 'verbatimMeasurementType', 'measurementValue', 'measurementUnit', 'measurementMethod', - 'measurementRemarks', ] as const export type LocalityMeasurementCsvHeader = (typeof LOCALITY_MEASUREMENT_HEADERS)[number] @@ -134,13 +134,48 @@ type LocalityForExport = Pick< | 'bed' | 'bfa_max' | 'bfa_min' + | 'bfa_max_abs' + | 'bfa_min_abs' + | 'frac_max' + | 'frac_min' | 'max_age' | 'min_age' | 'date_meth' | 'age_comm' + | 'site_area' + | 'appr_num_spm' + | 'num_spm' + | 'true_quant' + | 'complete' + | 'num_quad' + | 'rock_type' + | 'rt_adj' + | 'lith_comm' + | 'depo_context1' + | 'depo_context2' + | 'depo_context3' + | 'depo_context4' + | 'depo_comm' + | 'sed_env_1' + | 'sed_env_2' + | 'event_circum' + | 'se_comm' + | 'assem_fm' + | 'transport' + | 'trans_mod' + | 'weath_trmp' + | 'pt_conc' + | 'size_type' + | 'vert_pres' + | 'plant_pres' + | 'invert_pres' + | 'time_rep' + | 'taph_comm' > & { now_time_unit_now_loc_bfa_maxTonow_time_unit: TimeUnitForLocalityExport | null now_time_unit_now_loc_bfa_minTonow_time_unit: TimeUnitForLocalityExport | null + now_syn_loc: ReadonlyArray<{ synonym: string | null }> + now_ss: ReadonlyArray<{ sed_struct: string }> } const locationIdForLocality = (lid: number): string => `NOW:LOC:${lid}` @@ -235,6 +270,22 @@ const isMeaningfulMeasurementValue = (value: unknown): boolean => { return true } +const concatMeaningful = (values: Array): string => { + const parts = values.map(toMaybeMeaningful).filter(Boolean) + return parts.join('|') +} + +const buildLocalityMeasurementId = (lid: number, kind: string): string => `NOW:LOC:${lid}:${kind}` + +const formatAgeRange = (locality: LocalityForExport): string => { + const minAge = toMaybeMeaningfulNumber(locality.min_age) + const maxAge = toMaybeMeaningfulNumber(locality.max_age) + if (minAge && maxAge) return `${minAge}-${maxAge}` + if (minAge) return minAge + if (maxAge) return maxAge + return '' +} + const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ field: keyof LocalityForExport measurementType: string @@ -242,43 +293,99 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ measurementMethod: string }> = [ { - field: 'max_age', - measurementType: 'maximum age', - measurementUnit: 'Ma', - // TODO(#1150): Add authoritative definition for NOW locality max_age/min_age semantics. + field: 'bfa_max', + measurementType: 'Basis for age (Time Unit)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. measurementMethod: '', }, { - field: 'min_age', - measurementType: 'minimum age', - measurementUnit: 'Ma', - // TODO(#1150): Add authoritative definition for NOW locality max_age/min_age semantics. + field: 'bfa_min', + measurementType: 'Basis for age (Time Unit)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. measurementMethod: '', }, { - field: 'date_meth', - measurementType: 'dating method', + field: 'bfa_max_abs', + measurementType: 'Basis for age (Absolute)', measurementUnit: '', - // TODO(#1150): Add controlled vocabulary / description. + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. measurementMethod: '', }, { - field: 'chron', - measurementType: 'chron', + field: 'bfa_min_abs', + measurementType: 'Basis for age (Absolute)', measurementUnit: '', - // TODO(#1150): Add field description. + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. measurementMethod: '', }, { - field: 'bfa_max', - measurementType: 'BFA max', + field: 'frac_max', + measurementType: 'Basis for age (Fraction)', measurementUnit: '', - // TODO(#1150): Add field description. + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. measurementMethod: '', }, { - field: 'bfa_min', - measurementType: 'BFA min', + field: 'frac_min', + measurementType: 'Basis for age (Fraction)', + measurementUnit: '', + // TODO(#1150): Add authoritative definition for NOW locality basis-for-age fields. + measurementMethod: '', + }, + { + field: 'site_area', + measurementType: 'site area', + measurementUnit: '', + // TODO(#1150): Add unit and definition for NOW locality site_area. + measurementMethod: '', + }, + { + field: 'appr_num_spm', + measurementType: 'approximate number of specimens', + measurementUnit: '', + // TODO(#1150): Confirm whether this is a sample-unit or locality-level count. + measurementMethod: '', + }, + { + field: 'num_spm', + measurementType: 'number of specimens', + measurementUnit: '', + // TODO(#1150): Confirm whether this is a sample-unit or locality-level count. + measurementMethod: '', + }, + { + field: 'true_quant', + measurementType: 'true quantification', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary for NOW locality true_quant. + measurementMethod: '', + }, + { + field: 'complete', + measurementType: 'complete sampling', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary for NOW locality complete. + measurementMethod: '', + }, + { + field: 'num_quad', + measurementType: 'number of quadrats', + measurementUnit: '', + // TODO(#1150): Add definition for NOW locality num_quad. + measurementMethod: '', + }, + { + field: 'date_meth', + measurementType: 'dating method', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary / description. + measurementMethod: '', + }, + { + field: 'chron', + measurementType: 'chron', measurementUnit: '', // TODO(#1150): Add field description. measurementMethod: '', @@ -286,29 +393,225 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ ] export const mapLocalityToMeasurementRows = (locality: LocalityForExport): LocalityMeasurementCsvRow[] => { - const locationID = locationIdForLocality(locality.lid) + const taxonID = locationIdForLocality(locality.lid) + const lid = locality.lid + + const ageParentId = buildLocalityMeasurementId(lid, 'age') + const maxAgeId = buildLocalityMeasurementId(lid, 'max_age') + const minAgeId = buildLocalityMeasurementId(lid, 'min_age') + + const hasMaxAgeGroup = + isMeaningfulMeasurementValue(locality.max_age) || + isMeaningfulMeasurementValue(locality.bfa_max) || + isMeaningfulMeasurementValue(locality.bfa_max_abs) || + isMeaningfulMeasurementValue(locality.frac_max) + + const hasMinAgeGroup = + isMeaningfulMeasurementValue(locality.min_age) || + isMeaningfulMeasurementValue(locality.bfa_min) || + isMeaningfulMeasurementValue(locality.bfa_min_abs) || + isMeaningfulMeasurementValue(locality.frac_min) + + const hasAnyAgeBasis = hasMaxAgeGroup || hasMinAgeGroup + + const rows: LocalityMeasurementCsvRow[] = [] + + if (hasAnyAgeBasis) { + rows.push({ + taxonID, + measurementID: ageParentId, + parentMeasurementID: '', + measurementType: 'age range', + verbatimMeasurementType: 'age_range', + measurementValue: formatAgeRange(locality), + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality age range semantics. + measurementMethod: '', + }) + } + + if (hasMaxAgeGroup) { + rows.push({ + taxonID, + measurementID: maxAgeId, + parentMeasurementID: hasAnyAgeBasis ? ageParentId : '', + measurementType: 'maximum age', + verbatimMeasurementType: 'max_age', + measurementValue: toMaybeMeaningfulNumber(locality.max_age), + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality max_age semantics. + measurementMethod: '', + }) + } - return LOCALITY_MEASUREMENT_MAPPINGS.flatMap(mapping => { + if (hasMinAgeGroup) { + rows.push({ + taxonID, + measurementID: minAgeId, + parentMeasurementID: hasAnyAgeBasis ? ageParentId : '', + measurementType: 'minimum age', + verbatimMeasurementType: 'min_age', + measurementValue: toMaybeMeaningfulNumber(locality.min_age), + measurementUnit: 'Ma', + // TODO(#1150): Add authoritative definition for NOW locality min_age semantics. + measurementMethod: '', + }) + } + + const coreRows = LOCALITY_MEASUREMENT_MAPPINGS.flatMap(mapping => { const rawValue = locality[mapping.field] if (!isMeaningfulMeasurementValue(rawValue)) return [] const measurementValue = toDwcString(rawValue).trim() if (!measurementValue) return [] + const verbatimMeasurementType = mapping.field.toString() + + const parentMeasurementID = (() => { + if (verbatimMeasurementType === 'bfa_max') return hasMaxAgeGroup ? maxAgeId : '' + if (verbatimMeasurementType === 'bfa_max_abs') return hasMaxAgeGroup ? maxAgeId : '' + if (verbatimMeasurementType === 'frac_max') return hasMaxAgeGroup ? maxAgeId : '' + + if (verbatimMeasurementType === 'bfa_min') return hasMinAgeGroup ? minAgeId : '' + if (verbatimMeasurementType === 'bfa_min_abs') return hasMinAgeGroup ? minAgeId : '' + if (verbatimMeasurementType === 'frac_min') return hasMinAgeGroup ? minAgeId : '' + + return '' + })() + + const measurementID = buildLocalityMeasurementId(lid, verbatimMeasurementType) + return [ { - locationID, - measurementID: `NOW:LOC:${locality.lid}:${mapping.field.toString()}`, + taxonID, + measurementID, + parentMeasurementID, measurementType: mapping.measurementType, - verbatimMeasurementType: mapping.field.toString(), + verbatimMeasurementType, measurementValue, measurementUnit: mapping.measurementUnit, measurementMethod: mapping.measurementMethod, - // TODO(#1150): Decide whether any locality measurementRemarks should be emitted and from which columns. - measurementRemarks: '', }, ] }) + + rows.push(...coreRows) + + const localitySynonyms = locality.now_syn_loc + .map(row => row.synonym) + .filter((syn): syn is string => isMeaningfulString(syn)) + .map(syn => syn.trim()) + + if (localitySynonyms.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'synonyms'), + parentMeasurementID: '', + measurementType: 'synonyms', + verbatimMeasurementType: 'synonym', + measurementValue: localitySynonyms.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const lithologyValue = concatMeaningful([locality.rock_type, locality.rt_adj, locality.lith_comm]) + if (lithologyValue) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'lithology'), + parentMeasurementID: '', + measurementType: 'lithology', + verbatimMeasurementType: 'rock_type|rt_adj|lith_comm', + measurementValue: lithologyValue, + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const depositionalContextValue = concatMeaningful([ + locality.depo_context1, + locality.depo_context2, + locality.depo_context3, + locality.depo_context4, + locality.depo_comm, + ]) + if (depositionalContextValue) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'depositional_context'), + parentMeasurementID: '', + measurementType: 'depositional context', + verbatimMeasurementType: 'depo_context1|depo_context2|depo_context3|depo_context4|depo_comm', + measurementValue: depositionalContextValue, + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const sedimentaryEnvironmentValue = concatMeaningful([ + locality.sed_env_1, + locality.sed_env_2, + locality.event_circum, + locality.se_comm, + ]) + if (sedimentaryEnvironmentValue) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'sedimentary_environment'), + parentMeasurementID: '', + measurementType: 'sedimentary environment', + verbatimMeasurementType: 'sed_env_1|sed_env_2|event_circum|se_comm', + measurementValue: sedimentaryEnvironmentValue, + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const sedimentaryStructures = locality.now_ss + .map(row => row.sed_struct) + .filter(value => isMeaningfulString(value)) + .map(value => value.trim()) + + const taphonomicDetailValue = concatMeaningful([ + locality.assem_fm, + locality.transport, + locality.trans_mod, + locality.weath_trmp, + locality.pt_conc, + locality.size_type, + locality.vert_pres, + locality.plant_pres, + locality.invert_pres, + locality.time_rep, + locality.taph_comm, + ]) + + const sedimentaryStructureAndTaphonomicDetailValue = [ + ...sedimentaryStructures, + ...(taphonomicDetailValue ? [taphonomicDetailValue] : []), + ].join('|') + + if (sedimentaryStructureAndTaphonomicDetailValue) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'sedimentary_structure_taphonomic_detail'), + parentMeasurementID: '', + measurementType: 'sedimentary structure & taphonomic detail', + verbatimMeasurementType: + 'now_ss.sed_struct|assem_fm|transport|trans_mod|weath_trmp|pt_conc|size_type|vert_pres|plant_pres|invert_pres|time_rep|taph_comm', + measurementValue: sedimentaryStructureAndTaphonomicDetailValue, + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + return rows } const DWC_TERMS = { @@ -342,14 +645,14 @@ const DWC_TERMS = { }, measurement: { rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', - locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + taxonID: 'http://rs.tdwg.org/dwc/terms/locationID', measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + parentMeasurementID: 'http://rs.tdwg.org/dwc/terms/parentMeasurementID', measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', - measurementRemarks: 'http://rs.tdwg.org/dwc/terms/measurementRemarks', }, } as const @@ -479,16 +782,55 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { bed: true, bfa_max: true, bfa_min: true, + bfa_max_abs: true, + bfa_min_abs: true, + frac_max: true, + frac_min: true, max_age: true, min_age: true, date_meth: true, age_comm: true, + site_area: true, + appr_num_spm: true, + num_spm: true, + true_quant: true, + complete: true, + num_quad: true, + rock_type: true, + rt_adj: true, + lith_comm: true, + depo_context1: true, + depo_context2: true, + depo_context3: true, + depo_context4: true, + depo_comm: true, + sed_env_1: true, + sed_env_2: true, + event_circum: true, + se_comm: true, + assem_fm: true, + transport: true, + trans_mod: true, + weath_trmp: true, + pt_conc: true, + size_type: true, + vert_pres: true, + plant_pres: true, + invert_pres: true, + time_rep: true, + taph_comm: true, now_time_unit_now_loc_bfa_maxTonow_time_unit: { select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, }, now_time_unit_now_loc_bfa_minTonow_time_unit: { select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, }, + now_syn_loc: { + select: { synonym: true }, + }, + now_ss: { + select: { sed_struct: true }, + }, }, }) diff --git a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts index 537ad506c..23b0685c5 100644 --- a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts +++ b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts @@ -30,10 +30,45 @@ describe('DwC-A locality export mapping', () => { bed: 'Test bed', bfa_max: 'BFA_MAX', bfa_min: 'BFA_MIN', + bfa_max_abs: null, + bfa_min_abs: null, + frac_max: null, + frac_min: null, max_age: 12.3, min_age: 4.5, date_meth: 'radioisotope', age_comm: 'Age comment', + site_area: null, + appr_num_spm: null, + num_spm: null, + true_quant: null, + complete: null, + num_quad: null, + rock_type: null, + rt_adj: null, + lith_comm: null, + depo_context1: null, + depo_context2: null, + depo_context3: null, + depo_context4: null, + depo_comm: null, + sed_env_1: null, + sed_env_2: null, + event_circum: null, + se_comm: null, + assem_fm: null, + transport: null, + trans_mod: null, + weath_trmp: null, + pt_conc: null, + size_type: null, + vert_pres: null, + plant_pres: null, + invert_pres: null, + time_rep: null, + taph_comm: null, + now_syn_loc: [], + now_ss: [], now_time_unit_now_loc_bfa_maxTonow_time_unit: null, now_time_unit_now_loc_bfa_minTonow_time_unit: null, } as const diff --git a/documentation/functionality/dwc_export_localities.md b/documentation/functionality/dwc_export_localities.md index 66c8486c0..6ca1b4356 100644 --- a/documentation/functionality/dwc_export_localities.md +++ b/documentation/functionality/dwc_export_localities.md @@ -47,11 +47,16 @@ v1 columns: Extension rowType: `http://rs.tdwg.org/dwc/terms/MeasurementOrFact` -v1 emits a small set of locality/time-related measurements. Each row has: +`measurementorfact.csv` uses the same column headings as the taxa export (for consistency), but the `taxonID` column contains the locality `locationID` value (`NOW:LOC:`). -- `locationID` = `NOW:LOC:` (core id) -- `measurementID` = `NOW:LOC::` -- `verbatimMeasurementType` = original DB field name +Each emitted row has: + +- `taxonID` = `NOW:LOC:` (core id for Location) +- `measurementID` = `NOW:LOC::` (or calculated group id) +- `parentMeasurementID` is used for the age hierarchy: + - `NOW:LOC::age` (parent) → `max_age` / `min_age` + - `bfa_*`, `*_abs`, `frac_*` link to the relevant `max_age` / `min_age` +- `verbatimMeasurementType` = original DB field name (or `|`-joined source field list for concatenated group rows) ## Admin-only From 9459dc805844f6b5f61bc04c6e933e1bcdd98769 Mon Sep 17 00:00:00 2001 From: Kari Lintulaakso Date: Mon, 27 Apr 2026 15:41:24 +0300 Subject: [PATCH 14/23] DwC-A localities: only concat requested fields --- .../services/dwcArchiveExportLocalities.ts | 140 +++++++++++------- .../functionality/dwc_export_localities.md | 7 +- 2 files changed, 90 insertions(+), 57 deletions(-) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts index 8beb99d82..ab826c504 100644 --- a/backend/src/services/dwcArchiveExportLocalities.ts +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -383,6 +383,90 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ // TODO(#1150): Add controlled vocabulary / description. measurementMethod: '', }, + { + field: 'rock_type', + measurementType: 'rock type', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'rt_adj', + measurementType: 'rock type adjective', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'lith_comm', + measurementType: 'lithology comment', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'sed_env_1', + measurementType: 'sedimentary environment 1', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'sed_env_2', + measurementType: 'sedimentary environment 2', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'event_circum', + measurementType: 'event circumstances', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'se_comm', + measurementType: 'sedimentary environment comment', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context1', + measurementType: 'depositional context 1', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context2', + measurementType: 'depositional context 2', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context3', + measurementType: 'depositional context 3', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_context4', + measurementType: 'depositional context 4', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'depo_comm', + measurementType: 'depositional context comment', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, { field: 'chron', measurementType: 'chron', @@ -516,62 +600,6 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local }) } - const lithologyValue = concatMeaningful([locality.rock_type, locality.rt_adj, locality.lith_comm]) - if (lithologyValue) { - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'lithology'), - parentMeasurementID: '', - measurementType: 'lithology', - verbatimMeasurementType: 'rock_type|rt_adj|lith_comm', - measurementValue: lithologyValue, - measurementUnit: '', - // TODO(#1150): Add field description. - measurementMethod: '', - }) - } - - const depositionalContextValue = concatMeaningful([ - locality.depo_context1, - locality.depo_context2, - locality.depo_context3, - locality.depo_context4, - locality.depo_comm, - ]) - if (depositionalContextValue) { - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'depositional_context'), - parentMeasurementID: '', - measurementType: 'depositional context', - verbatimMeasurementType: 'depo_context1|depo_context2|depo_context3|depo_context4|depo_comm', - measurementValue: depositionalContextValue, - measurementUnit: '', - // TODO(#1150): Add field description. - measurementMethod: '', - }) - } - - const sedimentaryEnvironmentValue = concatMeaningful([ - locality.sed_env_1, - locality.sed_env_2, - locality.event_circum, - locality.se_comm, - ]) - if (sedimentaryEnvironmentValue) { - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'sedimentary_environment'), - parentMeasurementID: '', - measurementType: 'sedimentary environment', - verbatimMeasurementType: 'sed_env_1|sed_env_2|event_circum|se_comm', - measurementValue: sedimentaryEnvironmentValue, - measurementUnit: '', - // TODO(#1150): Add field description. - measurementMethod: '', - }) - } - const sedimentaryStructures = locality.now_ss .map(row => row.sed_struct) .filter(value => isMeaningfulString(value)) diff --git a/documentation/functionality/dwc_export_localities.md b/documentation/functionality/dwc_export_localities.md index 6ca1b4356..97148efd7 100644 --- a/documentation/functionality/dwc_export_localities.md +++ b/documentation/functionality/dwc_export_localities.md @@ -56,7 +56,12 @@ Each emitted row has: - `parentMeasurementID` is used for the age hierarchy: - `NOW:LOC::age` (parent) → `max_age` / `min_age` - `bfa_*`, `*_abs`, `frac_*` link to the relevant `max_age` / `min_age` -- `verbatimMeasurementType` = original DB field name (or `|`-joined source field list for concatenated group rows) +- `verbatimMeasurementType` = original DB field name + +Concatenation rules (v1): + +- Locality synonyms are concatenated with `|` into a single `synonyms` measurement row. +- Sedimentary Structure & Taphonomic Detail values are concatenated with `|` into a single measurement row. ## Admin-only From 101fac67b7d34fb80af63b71d5921cbf7ac72d40 Mon Sep 17 00:00:00 2001 From: karilint Date: Mon, 27 Apr 2026 15:57:35 +0300 Subject: [PATCH 15/23] DwC-A localities: export climate/ecometrics/archaeology facts --- .../dwcArchiveExportLocalities.test.ts | 65 +++ .../services/dwcArchiveExportLocalities.ts | 534 ++++++++++++++++-- .../dwcArchiveExportLocalities.test.ts | 43 ++ .../functionality/dwc_export_localities.md | 7 +- 4 files changed, 615 insertions(+), 34 deletions(-) create mode 100644 backend/src/api-tests/locality/dwcArchiveExportLocalities.test.ts diff --git a/backend/src/api-tests/locality/dwcArchiveExportLocalities.test.ts b/backend/src/api-tests/locality/dwcArchiveExportLocalities.test.ts new file mode 100644 index 000000000..b030f7340 --- /dev/null +++ b/backend/src/api-tests/locality/dwcArchiveExportLocalities.test.ts @@ -0,0 +1,65 @@ +import { afterAll, beforeAll, describe, expect, it } from '@jest/globals' +import request from 'supertest' +import JSZip from 'jszip' +import type { Response } from 'superagent' +import app from '../../app' +import { pool } from '../../utils/db' +import { noPermError, resetDatabase, resetDatabaseTimeout, send } from '../utils' + +type ResponseStream = { + on: (event: 'data', handler: (chunk: Buffer) => void) => void +} & { + on: (event: 'end', handler: () => void) => void +} + +const parseBinary = (res: Response, callback: (err: Error | null, body: Buffer) => void) => { + const data: Buffer[] = [] + const stream = res as unknown as ResponseStream + stream.on('data', chunk => data.push(chunk)) + stream.on('end', () => { + callback(null, Buffer.concat(data)) + }) +} + +describe('DwC-A locality export (admin-only)', () => { + beforeAll(async () => { + await resetDatabase() + }, resetDatabaseTimeout) + + afterAll(async () => { + await pool.end() + }) + + it('returns a ZIP archive for admins', async () => { + const loginResult = await send<{ token: string }>('user/login', 'POST', { username: 'testSu', password: 'test' }) + expect(loginResult.status).toEqual(200) + + const result = await request(app) + .get('/locality/export/dwc-archive') + .set('authorization', `bearer ${loginResult.body.token}`) + .buffer(true) + .parse(parseBinary) + + expect(result.status).toEqual(200) + expect(result.headers['content-type']).toMatch(/application\/zip/i) + expect(result.headers['content-disposition']).toMatch(/attachment;\s*filename="now_dwc_localities_test_export_/i) + + const zip = await JSZip.loadAsync(result.body as unknown as Buffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + + const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') + expect(measurementCsv).toContain('"measurementID"') + expect(measurementCsv).toContain('"parentMeasurementID"') + expect(measurementCsv).toContain('"verbatimMeasurementType"') + }) + + it('rejects non-admin requests', async () => { + const result = await request(app).get('/locality/export/dwc-archive') + expect(result.status).toEqual(403) + expect(result.body).toEqual(noPermError) + }) +}) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts index ab826c504..61eece810 100644 --- a/backend/src/services/dwcArchiveExportLocalities.ts +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -19,6 +19,11 @@ const toDwcString = (value: unknown): string => { if (typeof value === 'boolean') return value ? 'true' : 'false' if (typeof value === 'string') return value if (typeof value === 'object') { + if (value instanceof Date) return value.toISOString() + if (typeof (value as { toString?: unknown }).toString === 'function') { + const asString = (value as { toString: () => string }).toString() + if (asString && asString !== '[object Object]') return asString + } try { return JSON.stringify(value) ?? '' } catch { @@ -171,11 +176,51 @@ type LocalityForExport = Pick< | 'invert_pres' | 'time_rep' | 'taph_comm' + | 'climate_type' + | 'biome' + | 'v_ht' + | 'v_struct' + | 'v_envi_det' + | 'disturb' + | 'nutrients' + | 'water' + | 'seasonality' + | 'seas_intens' + | 'pri_prod' + | 'moisture' + | 'temperature' + | 'estimate_precip' + | 'estimate_temp' + | 'estimate_npp' + | 'pers_woody_cover' + | 'pers_pollen_ap' + | 'pers_pollen_nap' + | 'pers_pollen_other' + | 'stone_tool_cut_marks_on_bones' + | 'bipedal_footprints' + | 'stone_tool_technology' + | 'technological_mode_1' + | 'technological_mode_2' + | 'technological_mode_3' + | 'cultural_stage_1' + | 'cultural_stage_2' + | 'cultural_stage_3' + | 'regional_culture_1' + | 'regional_culture_2' + | 'regional_culture_3' > & { now_time_unit_now_loc_bfa_maxTonow_time_unit: TimeUnitForLocalityExport | null now_time_unit_now_loc_bfa_minTonow_time_unit: TimeUnitForLocalityExport | null now_syn_loc: ReadonlyArray<{ synonym: string | null }> now_ss: ReadonlyArray<{ sed_struct: string }> + now_coll_meth: ReadonlyArray<{ coll_meth: string }> + now_ls: ReadonlyArray<{ + com_species: { + order_name: string + tht: string | null + genus_name: string | null + } + }> } const locationIdForLocality = (lid: number): string => `NOW:LOC:${lid}` @@ -267,14 +312,10 @@ const isMeaningfulMeasurementValue = (value: unknown): boolean => { if (value === null || value === undefined) return false if (typeof value === 'string') return isMeaningfulString(value) if (typeof value === 'number') return Number.isFinite(value) && value !== 0 + if (typeof value === 'boolean') return value return true } -const concatMeaningful = (values: Array): string => { - const parts = values.map(toMaybeMeaningful).filter(Boolean) - return parts.join('|') -} - const buildLocalityMeasurementId = (lid: number, kind: string): string => `NOW:LOC:${lid}:${kind}` const formatAgeRange = (locality: LocalityForExport): string => { @@ -286,11 +327,80 @@ const formatAgeRange = (locality: LocalityForExport): string => { return '' } +const calculateMeanHypsodontyForExport = (locality: Pick): number => { + const relevantOrderNames = [ + 'Perissodactyla', + 'Artiodactyla', + 'Primates', + 'Proboscidea', + 'Hyracoidea', + 'Dinocerata', + 'Embrithopoda', + 'Notoungulata', + 'Astrapotheria', + 'Pyrotheria', + 'Litopterna', + 'Condylarthra', + 'Pantodonta', + ] + + const thtToValue = { + bra: 1.0, + mes: 2.0, + hyp: 3.0, + hys: 3.0, + none: 0.0, + } as Record + + const values = locality.now_ls + .map(row => row.com_species) + .filter(species => relevantOrderNames.includes(species.order_name)) + .map(species => thtToValue[species.tht ?? 'none']) + + const sum = values.reduce((acc, cur) => acc + cur, 0.0) + const mean = values.length > 0 ? sum / values.length : 0.0 + return parseFloat((Math.floor(mean * 100) / 100).toFixed(2)) +} + +const hasHomininSkeletalRemainsForExport = (locality: Pick): boolean => { + const hominins = [ + 'sahelanthropus', + 'orrorin', + 'ardipithecus', + 'kenyanthropus', + 'australopithecus', + 'paranthropus', + 'homo', + ] + + return locality.now_ls.some(({ com_species }) => { + const genusName = com_species.genus_name + if (!genusName) return false + return hominins.includes(genusName.toLowerCase()) + }) +} + +const isNumberMeaningful = (value: number | null | undefined, { allowZero }: { allowZero: boolean }): boolean => { + if (value === null || value === undefined) return false + if (!Number.isFinite(value)) return false + if (!allowZero && value === 0) return false + return true +} + +const toMaybeMeaningfulNumberWithZeroOption = ( + value: number | null | undefined, + { allowZero }: { allowZero: boolean } +): string => { + if (!isNumberMeaningful(value, { allowZero })) return '' + return value!.toString() +} + const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ field: keyof LocalityForExport measurementType: string measurementUnit: string measurementMethod: string + allowZero?: boolean }> = [ { field: 'bfa_max', @@ -474,6 +584,287 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ // TODO(#1150): Add field description. measurementMethod: '', }, + { + field: 'assem_fm', + measurementType: 'Assemblage Formation', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'transport', + measurementType: 'Transport', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'trans_mod', + measurementType: 'Abrasion', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'weath_trmp', + measurementType: 'Weathering / Trampling', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'pt_conc', + measurementType: 'Part Concentration', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'size_type', + measurementType: 'Assemblage Component Size', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'time_rep', + measurementType: 'Time Represented (years)', + measurementUnit: '', + // TODO(#1150): Add unit and definition (years bins). + measurementMethod: '', + }, + { + field: 'vert_pres', + measurementType: 'Vertebrate Preservation', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary. + measurementMethod: '', + }, + { + field: 'taph_comm', + measurementType: 'Taphonomy comment', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'climate_type', + measurementType: 'Climate Type', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'temperature', + measurementType: 'Temperature', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'moisture', + measurementType: 'Moisture', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'disturb', + measurementType: 'Agent(s) of Disturbance', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'v_envi_det', + measurementType: 'Environment & Vegetation Detail', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'seasonality', + measurementType: 'Seasonality', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'seas_intens', + measurementType: 'Seasonality Intensity', + measurementUnit: '', + // TODO(#1150): Add unit and definition. + measurementMethod: '', + }, + { + field: 'biome', + measurementType: 'Biome', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'v_ht', + measurementType: 'Vegetation Height', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'v_struct', + measurementType: 'Vegetation Structure', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'pri_prod', + measurementType: 'Primary Productivity Level', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'nutrients', + measurementType: 'Nutrient Availability', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'water', + measurementType: 'Water Availability', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'pers_pollen_ap', + measurementType: 'Arboreal pollen (AP%)', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'pers_pollen_nap', + measurementType: 'Non-arboreal pollen (NAP%)', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'pers_pollen_other', + measurementType: 'Other pollen (OP%)', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'estimate_precip', + measurementType: 'Estimate of annual precipitation (mm)', + measurementUnit: 'mm', + measurementMethod: '', + allowZero: true, + }, + { + field: 'estimate_temp', + measurementType: 'Estimate of mean annual temperature (°C)', + measurementUnit: '°C', + measurementMethod: '', + allowZero: true, + }, + { + field: 'estimate_npp', + measurementType: 'Estimate of net primary productivity (g/m2/yr)', + measurementUnit: 'g/m2/yr', + measurementMethod: '', + allowZero: true, + }, + { + field: 'pers_woody_cover', + measurementType: 'Woody cover percentage', + measurementUnit: '%', + measurementMethod: '', + allowZero: true, + }, + { + field: 'stone_tool_cut_marks_on_bones', + measurementType: 'Stone tool cut marks on bones', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'bipedal_footprints', + measurementType: 'Bipedal footprints', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'stone_tool_technology', + measurementType: 'Stone tool technology', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, + { + field: 'technological_mode_1', + measurementType: 'Technological mode 1', + measurementUnit: '', + measurementMethod: '', + allowZero: true, + }, + { + field: 'technological_mode_2', + measurementType: 'Technological mode 2', + measurementUnit: '', + measurementMethod: '', + allowZero: true, + }, + { + field: 'technological_mode_3', + measurementType: 'Technological mode 3', + measurementUnit: '', + measurementMethod: '', + allowZero: true, + }, + { + field: 'cultural_stage_1', + measurementType: 'Cultural stage 1', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'cultural_stage_2', + measurementType: 'Cultural stage 2', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'cultural_stage_3', + measurementType: 'Cultural stage 3', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'regional_culture_1', + measurementType: 'Regional culture 1', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'regional_culture_2', + measurementType: 'Regional culture 2', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'regional_culture_3', + measurementType: 'Regional culture 3', + measurementUnit: '', + measurementMethod: '', + }, ] export const mapLocalityToMeasurementRows = (locality: LocalityForExport): LocalityMeasurementCsvRow[] => { @@ -544,9 +935,15 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local const coreRows = LOCALITY_MEASUREMENT_MAPPINGS.flatMap(mapping => { const rawValue = locality[mapping.field] - if (!isMeaningfulMeasurementValue(rawValue)) return [] - const measurementValue = toDwcString(rawValue).trim() + const measurementValue = (() => { + if (typeof rawValue === 'number') { + return toMaybeMeaningfulNumberWithZeroOption(rawValue, { allowZero: mapping.allowZero ?? false }) + } + if (!isMeaningfulMeasurementValue(rawValue)) return '' + return toDwcString(rawValue).trim() + })() + if (!measurementValue) return [] const verbatimMeasurementType = mapping.field.toString() @@ -581,6 +978,55 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local rows.push(...coreRows) + const collectingMethods = locality.now_coll_meth + .map(method => method.coll_meth) + .filter(value => isMeaningfulString(value)) + .map(value => value.trim()) + + if (collectingMethods.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'collecting_methods'), + parentMeasurementID: '', + measurementType: 'Collecting Methods', + verbatimMeasurementType: 'now_coll_meth.coll_meth', + measurementValue: collectingMethods.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const meanHypsodonty = calculateMeanHypsodontyForExport(locality) + if (isNumberMeaningful(meanHypsodonty, { allowZero: true })) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'mean_hypsodonty'), + parentMeasurementID: '', + measurementType: 'Mean hypsodonty', + verbatimMeasurementType: 'calculated_mean_hypsodonty', + measurementValue: meanHypsodonty.toString(), + measurementUnit: '', + // TODO(#1150): Document calculation provenance (see frontend shared calculations). + measurementMethod: '', + }) + } + + const homininSkeletalRemains = hasHomininSkeletalRemainsForExport(locality) + if (homininSkeletalRemains) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'hominin_skeletal_remains'), + parentMeasurementID: '', + measurementType: 'Hominin skeletal remains', + verbatimMeasurementType: 'calculated_hominin_skeletal_remains', + measurementValue: 'true', + measurementUnit: '', + // TODO(#1150): Document calculation provenance (see frontend shared calculations). + measurementMethod: '', + }) + } + const localitySynonyms = locality.now_syn_loc .map(row => row.synonym) .filter((syn): syn is string => isMeaningfulString(syn)) @@ -605,36 +1051,16 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local .filter(value => isMeaningfulString(value)) .map(value => value.trim()) - const taphonomicDetailValue = concatMeaningful([ - locality.assem_fm, - locality.transport, - locality.trans_mod, - locality.weath_trmp, - locality.pt_conc, - locality.size_type, - locality.vert_pres, - locality.plant_pres, - locality.invert_pres, - locality.time_rep, - locality.taph_comm, - ]) - - const sedimentaryStructureAndTaphonomicDetailValue = [ - ...sedimentaryStructures, - ...(taphonomicDetailValue ? [taphonomicDetailValue] : []), - ].join('|') - - if (sedimentaryStructureAndTaphonomicDetailValue) { + if (sedimentaryStructures.length) { rows.push({ taxonID, - measurementID: buildLocalityMeasurementId(lid, 'sedimentary_structure_taphonomic_detail'), + measurementID: buildLocalityMeasurementId(lid, 'sedimentary_structures'), parentMeasurementID: '', - measurementType: 'sedimentary structure & taphonomic detail', - verbatimMeasurementType: - 'now_ss.sed_struct|assem_fm|transport|trans_mod|weath_trmp|pt_conc|size_type|vert_pres|plant_pres|invert_pres|time_rep|taph_comm', - measurementValue: sedimentaryStructureAndTaphonomicDetailValue, + measurementType: 'Sedimentary structures', + verbatimMeasurementType: 'now_ss.sed_struct', + measurementValue: sedimentaryStructures.join('|'), measurementUnit: '', - // TODO(#1150): Add field description. + // TODO(#1150): Add field description / controlled vocabulary. measurementMethod: '', }) } @@ -847,6 +1273,38 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { invert_pres: true, time_rep: true, taph_comm: true, + climate_type: true, + biome: true, + v_ht: true, + v_struct: true, + v_envi_det: true, + disturb: true, + nutrients: true, + water: true, + seasonality: true, + seas_intens: true, + pri_prod: true, + moisture: true, + temperature: true, + estimate_precip: true, + estimate_temp: true, + estimate_npp: true, + pers_woody_cover: true, + pers_pollen_ap: true, + pers_pollen_nap: true, + pers_pollen_other: true, + stone_tool_cut_marks_on_bones: true, + bipedal_footprints: true, + stone_tool_technology: true, + technological_mode_1: true, + technological_mode_2: true, + technological_mode_3: true, + cultural_stage_1: true, + cultural_stage_2: true, + cultural_stage_3: true, + regional_culture_1: true, + regional_culture_2: true, + regional_culture_3: true, now_time_unit_now_loc_bfa_maxTonow_time_unit: { select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, }, @@ -859,6 +1317,16 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { now_ss: { select: { sed_struct: true }, }, + now_coll_meth: { + select: { coll_meth: true }, + }, + now_ls: { + select: { + com_species: { + select: { order_name: true, tht: true, genus_name: true }, + }, + }, + }, }, }) diff --git a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts index 23b0685c5..4deb1056a 100644 --- a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts +++ b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts @@ -67,8 +67,42 @@ describe('DwC-A locality export mapping', () => { invert_pres: null, time_rep: null, taph_comm: null, + climate_type: null, + biome: null, + v_ht: null, + v_struct: null, + v_envi_det: null, + disturb: null, + nutrients: null, + water: null, + seasonality: null, + seas_intens: null, + pri_prod: null, + moisture: null, + temperature: null, + estimate_precip: null, + estimate_temp: null, + estimate_npp: null, + pers_woody_cover: null, + pers_pollen_ap: null, + pers_pollen_nap: null, + pers_pollen_other: null, + stone_tool_cut_marks_on_bones: false, + bipedal_footprints: false, + stone_tool_technology: false, + technological_mode_1: null, + technological_mode_2: null, + technological_mode_3: null, + cultural_stage_1: null, + cultural_stage_2: null, + cultural_stage_3: null, + regional_culture_1: null, + regional_culture_2: null, + regional_culture_3: null, now_syn_loc: [], now_ss: [], + now_coll_meth: [], + now_ls: [], now_time_unit_now_loc_bfa_maxTonow_time_unit: null, now_time_unit_now_loc_bfa_minTonow_time_unit: null, } as const @@ -112,6 +146,15 @@ describe('DwC-A locality export mapping', () => { expect(rows.some(r => r.verbatimMeasurementType === 'bfa_min')).toEqual(false) }) + it('concatenates collecting methods with |', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + now_coll_meth: [{ coll_meth: 'screenwash' }, { coll_meth: 'quarry' }], + }) + const collectingMethodsRow = rows.find(r => r.verbatimMeasurementType === 'now_coll_meth.coll_meth') + expect(collectingMethodsRow?.measurementValue).toEqual('screenwash|quarry') + }) + it('generates a ZIP archive with expected files', async () => { const zipBuffer = await buildDwcLocalityArchiveZipBufferFromLocalities([baseLocality]) const zip = await JSZip.loadAsync(zipBuffer) diff --git a/documentation/functionality/dwc_export_localities.md b/documentation/functionality/dwc_export_localities.md index 97148efd7..15a795efb 100644 --- a/documentation/functionality/dwc_export_localities.md +++ b/documentation/functionality/dwc_export_localities.md @@ -61,7 +61,12 @@ Each emitted row has: Concatenation rules (v1): - Locality synonyms are concatenated with `|` into a single `synonyms` measurement row. -- Sedimentary Structure & Taphonomic Detail values are concatenated with `|` into a single measurement row. +- Collecting methods (`now_coll_meth`) are concatenated with `|` into a single measurement row. +- Sedimentary structures (`now_ss`) are concatenated with `|` into a single measurement row. + +Field coverage (v1): + +- Fossil Assemblage, Taphonomy, Climate, Ecometrics, and Archaeology tab fields are exported as `MeasurementOrFact` rows when populated (plus selected calculated values such as mean hypsodonty). ## Admin-only From 30040eaeeb88efcffd45936c85a59a1160b30347 Mon Sep 17 00:00:00 2001 From: karilint Date: Tue, 28 Apr 2026 10:38:21 +0300 Subject: [PATCH 16/23] DwC-A localities: include remaining now_loc fields --- .../services/dwcArchiveExportLocalities.ts | 296 +++++++++++++++++- .../dwcArchiveExportLocalities.test.ts | 27 ++ 2 files changed, 322 insertions(+), 1 deletion(-) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts index 61eece810..d2c5b3102 100644 --- a/backend/src/services/dwcArchiveExportLocalities.ts +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -130,6 +130,7 @@ type LocalityForExport = Pick< | 'dec_long' | 'dms_lat' | 'dms_long' + | 'approx_coord' | 'altitude' | 'loc_detail' | 'chron' @@ -148,6 +149,8 @@ type LocalityForExport = Pick< | 'date_meth' | 'age_comm' | 'site_area' + | 'gen_loc' + | 'plate' | 'appr_num_spm' | 'num_spm' | 'true_quant' @@ -176,6 +179,12 @@ type LocalityForExport = Pick< | 'invert_pres' | 'time_rep' | 'taph_comm' + | 'tax_comm' + | 'datum_plane' + | 'tos' + | 'bos' + | 'loc_status' + | 'hominin_skeletal_remains' | 'climate_type' | 'biome' | 'v_ht' @@ -214,6 +223,29 @@ type LocalityForExport = Pick< now_syn_loc: ReadonlyArray<{ synonym: string | null }> now_ss: ReadonlyArray<{ sed_struct: string }> now_coll_meth: ReadonlyArray<{ coll_meth: string }> + now_mus: ReadonlyArray<{ + museum: string + com_mlist: { + institution: string + alt_int_name: string | null + city: string | null + state: string | null + country: string | null + } + }> + now_plr: ReadonlyArray<{ + now_proj: { + proj_code: string | null + proj_name: string | null + proj_status: string | null + } + }> + now_lau: ReadonlyArray<{ + lau_date: Date | null + lau_comment: string | null + com_people_now_lau_lau_coordinatorTocom_people: { full_name: string } + com_people_now_lau_lau_authorizerTocom_people: { full_name: string } + }> now_ls: ReadonlyArray<{ com_species: { order_name: string @@ -263,7 +295,11 @@ export const mapLocalityToLocationRow = (locality: LocalityForExport): LocationC verbatimLatitude: toMaybeMeaningful(locality.dms_lat), verbatimLongitude: toMaybeMeaningful(locality.dms_long), verbatimElevation: locality.altitude === null || locality.altitude === undefined ? '' : String(locality.altitude), - locationRemarks: [toMaybeMeaningful(locality.loc_detail), toMaybeMeaningful(locality.age_comm)] + locationRemarks: [ + toMaybeMeaningful(locality.loc_detail), + toMaybeMeaningful(locality.age_comm), + toMaybeMeaningful(locality.tax_comm), + ] .filter(Boolean) .join(' | '), } @@ -451,6 +487,27 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ // TODO(#1150): Add unit and definition for NOW locality site_area. measurementMethod: '', }, + { + field: 'approx_coord', + measurementType: 'approximate coordinates', + measurementUnit: '', + // TODO(#1150): Add definition (what qualifies as approximate). + measurementMethod: '', + }, + { + field: 'gen_loc', + measurementType: 'general locality', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary / definition. + measurementMethod: '', + }, + { + field: 'plate', + measurementType: 'tectonic plate', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary / definition. + measurementMethod: '', + }, { field: 'appr_num_spm', measurementType: 'approximate number of specimens', @@ -584,6 +641,20 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ // TODO(#1150): Add field description. measurementMethod: '', }, + { + field: 'loc_status', + measurementType: 'locality status', + measurementUnit: '', + // TODO(#1150): Add definition. + measurementMethod: '', + }, + { + field: 'hominin_skeletal_remains', + measurementType: 'Hominin skeletal remains (field)', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }, { field: 'assem_fm', measurementType: 'Assemblage Formation', @@ -875,6 +946,13 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local const maxAgeId = buildLocalityMeasurementId(lid, 'max_age') const minAgeId = buildLocalityMeasurementId(lid, 'min_age') + const stratigraphyParentId = buildLocalityMeasurementId(lid, 'stratigraphy') + const tosId = buildLocalityMeasurementId(lid, 'tos') + const bosId = buildLocalityMeasurementId(lid, 'bos') + const datumPlaneId = buildLocalityMeasurementId(lid, 'datum_plane') + + const lastUpdateParentId = buildLocalityMeasurementId(lid, 'last_update') + const hasMaxAgeGroup = isMeaningfulMeasurementValue(locality.max_age) || isMeaningfulMeasurementValue(locality.bfa_max) || @@ -891,6 +969,135 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local const rows: LocalityMeasurementCsvRow[] = [] + const hasAnyStratigraphy = + isMeaningfulMeasurementValue(locality.datum_plane) || + isMeaningfulMeasurementValue(locality.tos) || + isMeaningfulMeasurementValue(locality.bos) + + if (hasAnyStratigraphy) { + rows.push({ + taxonID, + measurementID: stratigraphyParentId, + parentMeasurementID: '', + measurementType: 'stratigraphic section', + verbatimMeasurementType: 'stratigraphy', + measurementValue: '', + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + if (isMeaningfulMeasurementValue(locality.datum_plane)) { + rows.push({ + taxonID, + measurementID: datumPlaneId, + parentMeasurementID: hasAnyStratigraphy ? stratigraphyParentId : '', + measurementType: 'datum plane', + verbatimMeasurementType: 'datum_plane', + measurementValue: toMaybeMeaningful(locality.datum_plane), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + if (typeof locality.tos === 'number') { + const value = toMaybeMeaningfulNumberWithZeroOption(locality.tos, { allowZero: true }) + if (value) { + rows.push({ + taxonID, + measurementID: tosId, + parentMeasurementID: hasAnyStratigraphy ? stratigraphyParentId : '', + measurementType: 'top of section', + verbatimMeasurementType: 'tos', + measurementValue: value, + measurementUnit: '', + // TODO(#1150): Add unit and definition. + measurementMethod: '', + }) + } + } + + if (typeof locality.bos === 'number') { + const value = toMaybeMeaningfulNumberWithZeroOption(locality.bos, { allowZero: true }) + if (value) { + rows.push({ + taxonID, + measurementID: bosId, + parentMeasurementID: hasAnyStratigraphy ? stratigraphyParentId : '', + measurementType: 'bottom of section', + verbatimMeasurementType: 'bos', + measurementValue: value, + measurementUnit: '', + // TODO(#1150): Add unit and definition. + measurementMethod: '', + }) + } + } + + const lastUpdate = locality.now_lau[0] + if (lastUpdate) { + rows.push({ + taxonID, + measurementID: lastUpdateParentId, + parentMeasurementID: '', + measurementType: 'last update', + verbatimMeasurementType: 'now_lau', + measurementValue: '', + measurementUnit: '', + measurementMethod: '', + }) + + if (lastUpdate.lau_date) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'last_update_date'), + parentMeasurementID: lastUpdateParentId, + measurementType: 'last update date', + verbatimMeasurementType: 'now_lau.lau_date', + measurementValue: lastUpdate.lau_date.toISOString().slice(0, 10), + measurementUnit: '', + measurementMethod: '', + }) + } + + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'last_update_coordinator'), + parentMeasurementID: lastUpdateParentId, + measurementType: 'last update coordinator', + verbatimMeasurementType: 'now_lau.lau_coordinator', + measurementValue: toMaybeMeaningful(lastUpdate.com_people_now_lau_lau_coordinatorTocom_people.full_name), + measurementUnit: '', + measurementMethod: '', + }) + + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'last_update_authorizer'), + parentMeasurementID: lastUpdateParentId, + measurementType: 'last update authorizer', + verbatimMeasurementType: 'now_lau.lau_authorizer', + measurementValue: toMaybeMeaningful(lastUpdate.com_people_now_lau_lau_authorizerTocom_people.full_name), + measurementUnit: '', + measurementMethod: '', + }) + + if (isMeaningfulString(lastUpdate.lau_comment)) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'last_update_comment'), + parentMeasurementID: lastUpdateParentId, + measurementType: 'last update comment', + verbatimMeasurementType: 'now_lau.lau_comment', + measurementValue: lastUpdate.lau_comment.trim(), + measurementUnit: '', + measurementMethod: '', + }) + } + } + if (hasAnyAgeBasis) { rows.push({ taxonID, @@ -978,6 +1185,63 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local rows.push(...coreRows) + const museums = locality.now_mus + .map(row => { + const institution = row.com_mlist.alt_int_name ?? row.com_mlist.institution + const locationBits = [ + toMaybeMeaningful(row.com_mlist.city), + toMaybeMeaningful(row.com_mlist.state), + toMaybeMeaningful(row.com_mlist.country), + ] + .filter(Boolean) + .join(', ') + return [row.museum, institution, locationBits ? `(${locationBits})` : ''].filter(Boolean).join(' ') + }) + .filter(isMeaningfulString) + .map(value => value.trim()) + + if (museums.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'museums'), + parentMeasurementID: '', + measurementType: 'Museums', + verbatimMeasurementType: 'now_mus.museum', + measurementValue: museums.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + + const projects = locality.now_plr + .map(row => row.now_proj) + .map(project => + [ + toMaybeMeaningful(project.proj_code), + toMaybeMeaningful(project.proj_name), + toMaybeMeaningful(project.proj_status), + ] + .filter(Boolean) + .join(' - ') + ) + .filter(isMeaningfulString) + .map(value => value.trim()) + + if (projects.length) { + rows.push({ + taxonID, + measurementID: buildLocalityMeasurementId(lid, 'projects'), + parentMeasurementID: '', + measurementType: 'Projects', + verbatimMeasurementType: 'now_plr.pid', + measurementValue: projects.join('|'), + measurementUnit: '', + // TODO(#1150): Add field description. + measurementMethod: '', + }) + } + const collectingMethods = locality.now_coll_meth .map(method => method.coll_meth) .filter(value => isMeaningfulString(value)) @@ -1227,6 +1491,7 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { dec_long: true, dms_lat: true, dms_long: true, + approx_coord: true, altitude: true, loc_detail: true, chron: true, @@ -1245,6 +1510,8 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { date_meth: true, age_comm: true, site_area: true, + gen_loc: true, + plate: true, appr_num_spm: true, num_spm: true, true_quant: true, @@ -1273,6 +1540,12 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { invert_pres: true, time_rep: true, taph_comm: true, + tax_comm: true, + datum_plane: true, + tos: true, + bos: true, + loc_status: true, + hominin_skeletal_remains: true, climate_type: true, biome: true, v_ht: true, @@ -1320,6 +1593,27 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { now_coll_meth: { select: { coll_meth: true }, }, + now_mus: { + select: { + museum: true, + com_mlist: { select: { institution: true, alt_int_name: true, city: true, state: true, country: true } }, + }, + }, + now_plr: { + select: { + now_proj: { select: { proj_code: true, proj_name: true, proj_status: true } }, + }, + }, + now_lau: { + take: 1, + orderBy: { lau_date: 'desc' }, + select: { + lau_date: true, + lau_comment: true, + com_people_now_lau_lau_coordinatorTocom_people: { select: { full_name: true } }, + com_people_now_lau_lau_authorizerTocom_people: { select: { full_name: true } }, + }, + }, now_ls: { select: { com_species: { diff --git a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts index 4deb1056a..1144876ee 100644 --- a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts +++ b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts @@ -21,6 +21,7 @@ describe('DwC-A locality export mapping', () => { dec_long: 24.9384, dms_lat: null, dms_long: null, + approx_coord: null, altitude: 123, loc_detail: 'Some notes', chron: 'Test chron', @@ -39,6 +40,8 @@ describe('DwC-A locality export mapping', () => { date_meth: 'radioisotope', age_comm: 'Age comment', site_area: null, + gen_loc: null, + plate: null, appr_num_spm: null, num_spm: null, true_quant: null, @@ -67,6 +70,12 @@ describe('DwC-A locality export mapping', () => { invert_pres: null, time_rep: null, taph_comm: null, + tax_comm: null, + datum_plane: null, + tos: null, + bos: null, + loc_status: null, + hominin_skeletal_remains: false, climate_type: null, biome: null, v_ht: null, @@ -102,6 +111,9 @@ describe('DwC-A locality export mapping', () => { now_syn_loc: [], now_ss: [], now_coll_meth: [], + now_mus: [], + now_plr: [], + now_lau: [], now_ls: [], now_time_unit_now_loc_bfa_maxTonow_time_unit: null, now_time_unit_now_loc_bfa_minTonow_time_unit: null, @@ -155,6 +167,21 @@ describe('DwC-A locality export mapping', () => { expect(collectingMethodsRow?.measurementValue).toEqual('screenwash|quarry') }) + it('uses parentMeasurementID for stratigraphy fields', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + datum_plane: 'Datum', + tos: 0, + bos: 12.5, + }) + const parent = rows.find(r => r.verbatimMeasurementType === 'stratigraphy') + expect(parent).toBeTruthy() + const tosRow = rows.find(r => r.verbatimMeasurementType === 'tos') + const bosRow = rows.find(r => r.verbatimMeasurementType === 'bos') + expect(tosRow?.parentMeasurementID).toEqual(parent?.measurementID) + expect(bosRow?.parentMeasurementID).toEqual(parent?.measurementID) + }) + it('generates a ZIP archive with expected files', async () => { const zipBuffer = await buildDwcLocalityArchiveZipBufferFromLocalities([baseLocality]) const zip = await JSZip.loadAsync(zipBuffer) From ec4edd4699cf174dd82c3db3a0e58f6a5e4edb8e Mon Sep 17 00:00:00 2001 From: karilint Date: Tue, 28 Apr 2026 13:42:35 +0300 Subject: [PATCH 17/23] Adjust locality DwC export fields --- .../services/dwcArchiveExportLocalities.ts | 148 ++++-------------- .../dwcArchiveExportLocalities.test.ts | 33 +++- .../functionality/dwc_export_localities.md | 2 + 3 files changed, 61 insertions(+), 122 deletions(-) diff --git a/backend/src/services/dwcArchiveExportLocalities.ts b/backend/src/services/dwcArchiveExportLocalities.ts index d2c5b3102..5c257a4c6 100644 --- a/backend/src/services/dwcArchiveExportLocalities.ts +++ b/backend/src/services/dwcArchiveExportLocalities.ts @@ -233,19 +233,6 @@ type LocalityForExport = Pick< country: string | null } }> - now_plr: ReadonlyArray<{ - now_proj: { - proj_code: string | null - proj_name: string | null - proj_status: string | null - } - }> - now_lau: ReadonlyArray<{ - lau_date: Date | null - lau_comment: string | null - com_people_now_lau_lau_coordinatorTocom_people: { full_name: string } - com_people_now_lau_lau_authorizerTocom_people: { full_name: string } - }> now_ls: ReadonlyArray<{ com_species: { order_name: string @@ -550,6 +537,20 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ // TODO(#1150): Add controlled vocabulary / description. measurementMethod: '', }, + { + field: 'basin', + measurementType: 'basin', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, + { + field: 'subbasin', + measurementType: 'subbasin', + measurementUnit: '', + // TODO(#1150): Add field description / controlled vocabulary. + measurementMethod: '', + }, { field: 'rock_type', measurementType: 'rock type', @@ -711,6 +712,20 @@ const LOCALITY_MEASUREMENT_MAPPINGS: Array<{ // TODO(#1150): Add controlled vocabulary. measurementMethod: '', }, + { + field: 'plant_pres', + measurementType: 'Plant Preservation', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary. + measurementMethod: '', + }, + { + field: 'invert_pres', + measurementType: 'Invertebrate Preservation', + measurementUnit: '', + // TODO(#1150): Add controlled vocabulary. + measurementMethod: '', + }, { field: 'taph_comm', measurementType: 'Taphonomy comment', @@ -951,8 +966,6 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local const bosId = buildLocalityMeasurementId(lid, 'bos') const datumPlaneId = buildLocalityMeasurementId(lid, 'datum_plane') - const lastUpdateParentId = buildLocalityMeasurementId(lid, 'last_update') - const hasMaxAgeGroup = isMeaningfulMeasurementValue(locality.max_age) || isMeaningfulMeasurementValue(locality.bfa_max) || @@ -1036,68 +1049,6 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local } } - const lastUpdate = locality.now_lau[0] - if (lastUpdate) { - rows.push({ - taxonID, - measurementID: lastUpdateParentId, - parentMeasurementID: '', - measurementType: 'last update', - verbatimMeasurementType: 'now_lau', - measurementValue: '', - measurementUnit: '', - measurementMethod: '', - }) - - if (lastUpdate.lau_date) { - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'last_update_date'), - parentMeasurementID: lastUpdateParentId, - measurementType: 'last update date', - verbatimMeasurementType: 'now_lau.lau_date', - measurementValue: lastUpdate.lau_date.toISOString().slice(0, 10), - measurementUnit: '', - measurementMethod: '', - }) - } - - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'last_update_coordinator'), - parentMeasurementID: lastUpdateParentId, - measurementType: 'last update coordinator', - verbatimMeasurementType: 'now_lau.lau_coordinator', - measurementValue: toMaybeMeaningful(lastUpdate.com_people_now_lau_lau_coordinatorTocom_people.full_name), - measurementUnit: '', - measurementMethod: '', - }) - - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'last_update_authorizer'), - parentMeasurementID: lastUpdateParentId, - measurementType: 'last update authorizer', - verbatimMeasurementType: 'now_lau.lau_authorizer', - measurementValue: toMaybeMeaningful(lastUpdate.com_people_now_lau_lau_authorizerTocom_people.full_name), - measurementUnit: '', - measurementMethod: '', - }) - - if (isMeaningfulString(lastUpdate.lau_comment)) { - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'last_update_comment'), - parentMeasurementID: lastUpdateParentId, - measurementType: 'last update comment', - verbatimMeasurementType: 'now_lau.lau_comment', - measurementValue: lastUpdate.lau_comment.trim(), - measurementUnit: '', - measurementMethod: '', - }) - } - } - if (hasAnyAgeBasis) { rows.push({ taxonID, @@ -1214,34 +1165,6 @@ export const mapLocalityToMeasurementRows = (locality: LocalityForExport): Local }) } - const projects = locality.now_plr - .map(row => row.now_proj) - .map(project => - [ - toMaybeMeaningful(project.proj_code), - toMaybeMeaningful(project.proj_name), - toMaybeMeaningful(project.proj_status), - ] - .filter(Boolean) - .join(' - ') - ) - .filter(isMeaningfulString) - .map(value => value.trim()) - - if (projects.length) { - rows.push({ - taxonID, - measurementID: buildLocalityMeasurementId(lid, 'projects'), - parentMeasurementID: '', - measurementType: 'Projects', - verbatimMeasurementType: 'now_plr.pid', - measurementValue: projects.join('|'), - measurementUnit: '', - // TODO(#1150): Add field description. - measurementMethod: '', - }) - } - const collectingMethods = locality.now_coll_meth .map(method => method.coll_meth) .filter(value => isMeaningfulString(value)) @@ -1599,21 +1522,6 @@ export const buildDwcLocalityArchiveZipBuffer = async (): Promise => { com_mlist: { select: { institution: true, alt_int_name: true, city: true, state: true, country: true } }, }, }, - now_plr: { - select: { - now_proj: { select: { proj_code: true, proj_name: true, proj_status: true } }, - }, - }, - now_lau: { - take: 1, - orderBy: { lau_date: 'desc' }, - select: { - lau_date: true, - lau_comment: true, - com_people_now_lau_lau_coordinatorTocom_people: { select: { full_name: true } }, - com_people_now_lau_lau_authorizerTocom_people: { select: { full_name: true } }, - }, - }, now_ls: { select: { com_species: { diff --git a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts index 1144876ee..5489229db 100644 --- a/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts +++ b/backend/src/unit-tests/dwcArchiveExportLocalities.test.ts @@ -112,8 +112,6 @@ describe('DwC-A locality export mapping', () => { now_ss: [], now_coll_meth: [], now_mus: [], - now_plr: [], - now_lau: [], now_ls: [], now_time_unit_now_loc_bfa_maxTonow_time_unit: null, now_time_unit_now_loc_bfa_minTonow_time_unit: null, @@ -158,6 +156,37 @@ describe('DwC-A locality export mapping', () => { expect(rows.some(r => r.verbatimMeasurementType === 'bfa_min')).toEqual(false) }) + it('exports requested locality fields and omits project and last-update rows', () => { + const rows = mapLocalityToMeasurementRows({ + ...baseLocality, + basin: 'Basin value', + subbasin: 'Subbasin value', + bipedal_footprints: true, + invert_pres: 'Invert preservation', + nutrients: 'High', + pers_pollen_ap: 10, + pers_pollen_nap: 20, + pers_pollen_other: 0, + plant_pres: 'Plant preservation', + }) + + expect(rows).toEqual( + expect.arrayContaining([ + expect.objectContaining({ verbatimMeasurementType: 'basin', measurementValue: 'Basin value' }), + expect.objectContaining({ verbatimMeasurementType: 'subbasin', measurementValue: 'Subbasin value' }), + expect.objectContaining({ verbatimMeasurementType: 'bipedal_footprints', measurementValue: 'true' }), + expect.objectContaining({ verbatimMeasurementType: 'invert_pres', measurementValue: 'Invert preservation' }), + expect.objectContaining({ verbatimMeasurementType: 'nutrients', measurementValue: 'High' }), + expect.objectContaining({ verbatimMeasurementType: 'pers_pollen_ap', measurementValue: '10' }), + expect.objectContaining({ verbatimMeasurementType: 'pers_pollen_nap', measurementValue: '20' }), + expect.objectContaining({ verbatimMeasurementType: 'pers_pollen_other', measurementValue: '0' }), + expect.objectContaining({ verbatimMeasurementType: 'plant_pres', measurementValue: 'Plant preservation' }), + ]) + ) + expect(rows.some(r => r.verbatimMeasurementType.startsWith('now_plr'))).toEqual(false) + expect(rows.some(r => r.verbatimMeasurementType.startsWith('now_lau'))).toEqual(false) + }) + it('concatenates collecting methods with |', () => { const rows = mapLocalityToMeasurementRows({ ...baseLocality, diff --git a/documentation/functionality/dwc_export_localities.md b/documentation/functionality/dwc_export_localities.md index 15a795efb..4606eadbb 100644 --- a/documentation/functionality/dwc_export_localities.md +++ b/documentation/functionality/dwc_export_localities.md @@ -67,6 +67,8 @@ Concatenation rules (v1): Field coverage (v1): - Fossil Assemblage, Taphonomy, Climate, Ecometrics, and Archaeology tab fields are exported as `MeasurementOrFact` rows when populated (plus selected calculated values such as mean hypsodonty). +- `basin` and `subbasin` are exported both as part of `higherGeography` and as explicit `MeasurementOrFact` rows. +- `now_plr` project links and `now_lau` last-update rows are intentionally excluded. ## Admin-only From 51fca00a5ef715ba83d6d94572ebb6c376a5e56d Mon Sep 17 00:00:00 2001 From: karilint Date: Tue, 28 Apr 2026 17:56:47 +0300 Subject: [PATCH 18/23] Add DwC occurrence export --- .../dwcArchiveExportOccurrences.test.ts | 70 ++ backend/src/routes/occurrence.ts | 12 + .../services/dwcArchiveExportOccurrences.ts | 635 ++++++++++++++++++ .../dwcArchiveExportOccurrences.test.ts | 215 ++++++ .../functionality/dwc_export_occurrences.md | 51 ++ .../CrossSearch/CrossSearchTable.tsx | 2 + .../OccurrenceDwcExportMenuItem.tsx | 88 +++ 7 files changed, 1073 insertions(+) create mode 100644 backend/src/api-tests/occurrence/dwcArchiveExportOccurrences.test.ts create mode 100644 backend/src/services/dwcArchiveExportOccurrences.ts create mode 100644 backend/src/unit-tests/dwcArchiveExportOccurrences.test.ts create mode 100644 documentation/functionality/dwc_export_occurrences.md create mode 100644 frontend/src/components/Occurrence/OccurrenceDwcExportMenuItem.tsx diff --git a/backend/src/api-tests/occurrence/dwcArchiveExportOccurrences.test.ts b/backend/src/api-tests/occurrence/dwcArchiveExportOccurrences.test.ts new file mode 100644 index 000000000..4566ff0af --- /dev/null +++ b/backend/src/api-tests/occurrence/dwcArchiveExportOccurrences.test.ts @@ -0,0 +1,70 @@ +import { afterAll, beforeAll, describe, expect, it } from '@jest/globals' +import request from 'supertest' +import JSZip from 'jszip' +import type { Response } from 'superagent' +import app from '../../app' +import { pool } from '../../utils/db' +import { noPermError, resetDatabase, resetDatabaseTimeout, send } from '../utils' + +type ResponseStream = { + on: (event: 'data', handler: (chunk: Buffer) => void) => void +} & { + on: (event: 'end', handler: () => void) => void +} + +const parseBinary = (res: Response, callback: (err: Error | null, body: Buffer) => void) => { + const data: Buffer[] = [] + const stream = res as unknown as ResponseStream + stream.on('data', chunk => data.push(chunk)) + stream.on('end', () => { + callback(null, Buffer.concat(data)) + }) +} + +describe('DwC-A occurrence export (admin-only)', () => { + beforeAll(async () => { + await resetDatabase() + }, resetDatabaseTimeout) + + afterAll(async () => { + await pool.end() + }) + + it('returns a ZIP archive for admins', async () => { + const loginResult = await send<{ token: string }>('user/login', 'POST', { username: 'testSu', password: 'test' }) + expect(loginResult.status).toEqual(200) + + const result = await request(app) + .get('/occurrence/export/dwc-archive') + .set('authorization', `bearer ${loginResult.body.token}`) + .buffer(true) + .parse(parseBinary) + + expect(result.status).toEqual(200) + expect(result.headers['content-type']).toMatch(/application\/zip/i) + expect(result.headers['content-disposition']).toMatch(/attachment;\s*filename="now_dwc_occurrences_test_export_/i) + + const zip = await JSZip.loadAsync(result.body as unknown as Buffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('occurrence.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + + const occurrenceCsv = await zip.file('occurrence.csv')!.async('string') + expect(occurrenceCsv).toContain('"occurrenceID"') + expect(occurrenceCsv).toContain('"locationID"') + expect(occurrenceCsv).toContain('"taxonID"') + + const measurementCsv = await zip.file('measurementorfact.csv')!.async('string') + expect(measurementCsv).toContain('"verbatimMeasurementType"') + }) + + it('rejects non-admin requests', async () => { + const result = await request(app).get('/occurrence/export/dwc-archive') + expect(result.status).toEqual(403) + expect(result.body).toEqual(noPermError) + }) +}) diff --git a/backend/src/routes/occurrence.ts b/backend/src/routes/occurrence.ts index b1663d683..13383c5df 100644 --- a/backend/src/routes/occurrence.ts +++ b/backend/src/routes/occurrence.ts @@ -2,9 +2,21 @@ import { Router } from 'express' import { getOccurrenceDetail, updateOccurrenceDetail } from '../controllers/occurrenceController' import { requireOneOf } from '../middlewares/authorizer' import { Role } from '../../../frontend/src/shared/types' +import { buildDwcOccurrenceArchiveZipBuffer } from '../services/dwcArchiveExportOccurrences' +import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' const router = Router() +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res) => { + const zipBuffer = await buildDwcOccurrenceArchiveZipBuffer() + res.setHeader('Content-Type', 'application/zip') + res.setHeader( + 'Content-Disposition', + `attachment; filename="now_dwc_occurrences_test_export_${currentDateAsString()}.zip"` + ) + res.send(zipBuffer) +}) + router.get('/:lid/:speciesId', getOccurrenceDetail) router.put( '/:lid/:speciesId', diff --git a/backend/src/services/dwcArchiveExportOccurrences.ts b/backend/src/services/dwcArchiveExportOccurrences.ts new file mode 100644 index 000000000..186a69ce5 --- /dev/null +++ b/backend/src/services/dwcArchiveExportOccurrences.ts @@ -0,0 +1,635 @@ +import Prisma from '../../prisma/generated/now_test_client' +import { format } from 'fast-csv' +import { Writable } from 'stream' +import JSZip from 'jszip' +import { + GEOLOGICAL_CONTEXT_HEADERS, + LOCATION_HEADERS, + mapLocalityToGeologicalContextRow, + mapLocalityToLocationRow, +} from './dwcArchiveExportLocalities' +import { MEASUREMENT_HEADERS, TAXON_HEADERS, mapSpeciesToTaxonRow, type MeasurementCsvRow } from './dwcArchiveExport' + +const writeCsvString = async (headers: string[], rows: Array>): Promise => { + if (rows.length === 0) { + return `${headers.map(header => `"${header.replace(/"/g, '""')}"`).join(',')}\n` + } + + return await new Promise((resolve, reject) => { + let output = '' + const csvStream = format({ + delimiter: ',', + headers, + quoteColumns: true, + quoteHeaders: true, + includeEndRowDelimiter: true, + }) + + const sink = new Writable({ + write(chunk: Buffer | string, _encoding: BufferEncoding, callback: (error?: Error | null) => void) { + output += typeof chunk === 'string' ? chunk : chunk.toString('utf8') + callback() + }, + }) + + sink.on('finish', () => resolve(output)) + sink.on('error', reject) + csvStream.on('error', reject) + + csvStream.pipe(sink) + for (const row of rows) { + csvStream.write(row) + } + csvStream.end() + }) +} + +const isMeaningfulString = (value: unknown): value is string => { + if (typeof value !== 'string') return false + const trimmed = value.trim() + if (!trimmed) return false + if (trimmed === '-') return false + return true +} + +const toDwcString = (value: unknown): string => { + if (value === null || value === undefined) return '' + if (typeof value === 'bigint') return value.toString() + if (typeof value === 'number') return Number.isFinite(value) ? value.toString() : '' + if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value + if (typeof value === 'object' && typeof (value as { toString?: unknown }).toString === 'function') { + const asString = (value as { toString: () => string }).toString() + return asString === '[object Object]' ? '' : asString + } + return '' +} + +const toMaybeMeaningful = (value: string | null | undefined): string => (isMeaningfulString(value) ? value.trim() : '') + +const occurrenceIdForRow = (lid: number, speciesId: number): string => `NOW:OCC:${lid}:${speciesId}` + +const taxonIdForSpecies = (speciesId: number): string => `NOW:${speciesId}` + +export const OCCURRENCE_HEADERS = [ + 'occurrenceID', + 'locationID', + 'taxonID', + 'scientificName', + 'occurrenceStatus', + 'organismQuantity', + 'organismQuantityType', + 'identificationQualifier', + 'occurrenceRemarks', +] as const + +export type OccurrenceCsvHeader = (typeof OCCURRENCE_HEADERS)[number] +export type OccurrenceCsvRow = Record + +type LocalityForOccurrenceExport = Parameters[0] +type SpeciesForOccurrenceExport = Parameters[0] + +type OccurrenceForExport = Pick< + Prisma.now_ls, + | 'lid' + | 'species_id' + | 'nis' + | 'pct' + | 'quad' + | 'mni' + | 'qua' + | 'id_status' + | 'orig_entry' + | 'source_name' + | 'body_mass' + | 'mesowear' + | 'mw_or_high' + | 'mw_or_low' + | 'mw_cs_sharp' + | 'mw_cs_round' + | 'mw_cs_blunt' + | 'mw_scale_min' + | 'mw_scale_max' + | 'mw_value' + | 'microwear' + | 'dc13_mean' + | 'dc13_n' + | 'dc13_max' + | 'dc13_min' + | 'dc13_stdev' + | 'do18_mean' + | 'do18_n' + | 'do18_max' + | 'do18_min' + | 'do18_stdev' +> & { + now_loc: LocalityForOccurrenceExport + com_species: SpeciesForOccurrenceExport +} + +const scientificNameForOccurrence = (species: SpeciesForOccurrenceExport): string => { + const nameParts = [ + toMaybeMeaningful(species.genus_name), + toMaybeMeaningful(species.species_name), + toMaybeMeaningful(species.unique_identifier), + ].filter(Boolean) + const authorship = toMaybeMeaningful(species.sp_author) + return [nameParts.join(' '), authorship].filter(Boolean).join(' ').trim() +} + +const occurrenceQuantity = ( + occurrence: OccurrenceForExport +): Pick => { + if (occurrence.mni !== null) { + return { organismQuantity: occurrence.mni.toString(), organismQuantityType: 'minimum number of individuals' } + } + if (occurrence.nis !== null) { + return { organismQuantity: occurrence.nis.toString(), organismQuantityType: 'number of identified specimens' } + } + if (occurrence.pct !== null) { + return { organismQuantity: occurrence.pct.toString(), organismQuantityType: 'percentage' } + } + if (occurrence.quad !== null) { + return { organismQuantity: occurrence.quad.toString(), organismQuantityType: 'quadrat count' } + } + return { organismQuantity: '', organismQuantityType: '' } +} + +export const mapOccurrenceToOccurrenceRow = (occurrence: OccurrenceForExport): OccurrenceCsvRow => { + const quantity = occurrenceQuantity(occurrence) + const occurrenceRemarks = [ + toMaybeMeaningful(occurrence.orig_entry), + toMaybeMeaningful(occurrence.source_name), + toMaybeMeaningful(occurrence.qua), + ] + .filter(Boolean) + .join(' | ') + + return { + occurrenceID: occurrenceIdForRow(occurrence.lid, occurrence.species_id), + locationID: `NOW:LOC:${occurrence.lid}`, + taxonID: taxonIdForSpecies(occurrence.species_id), + scientificName: scientificNameForOccurrence(occurrence.com_species), + occurrenceStatus: 'present', + organismQuantity: quantity.organismQuantity, + organismQuantityType: quantity.organismQuantityType, + identificationQualifier: toMaybeMeaningful(occurrence.id_status), + occurrenceRemarks, + } +} + +const NOW_LS_MEASUREMENT_MAPPINGS: Array<{ + field: keyof OccurrenceForExport + measurementType: string + measurementUnit: string + measurementMethod: string +}> = [ + { field: 'nis', measurementType: 'number of identified specimens', measurementUnit: '', measurementMethod: '' }, + { field: 'pct', measurementType: 'percentage', measurementUnit: '%', measurementMethod: '' }, + { field: 'quad', measurementType: 'quadrat count', measurementUnit: '', measurementMethod: '' }, + { field: 'mni', measurementType: 'minimum number of individuals', measurementUnit: '', measurementMethod: '' }, + { field: 'body_mass', measurementType: 'occurrence body mass', measurementUnit: 'g', measurementMethod: '' }, + { field: 'mesowear', measurementType: 'occurrence mesowear', measurementUnit: '', measurementMethod: '' }, + { + field: 'mw_or_high', + measurementType: 'occurrence mesowear high occlusal relief', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_or_low', + measurementType: 'occurrence mesowear low occlusal relief', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_cs_sharp', + measurementType: 'occurrence mesowear sharp cusp shape', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_cs_round', + measurementType: 'occurrence mesowear round cusp shape', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_cs_blunt', + measurementType: 'occurrence mesowear blunt cusp shape', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_scale_min', + measurementType: 'occurrence mesowear scale minimum', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'mw_scale_max', + measurementType: 'occurrence mesowear scale maximum', + measurementUnit: '', + measurementMethod: '', + }, + { field: 'mw_value', measurementType: 'occurrence mesowear value', measurementUnit: '', measurementMethod: '' }, + { field: 'microwear', measurementType: 'occurrence microwear', measurementUnit: '', measurementMethod: '' }, + { + field: 'dc13_mean', + measurementType: 'occurrence delta C13 mean', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { field: 'dc13_n', measurementType: 'occurrence delta C13 sample count', measurementUnit: '', measurementMethod: '' }, + { + field: 'dc13_max', + measurementType: 'occurrence delta C13 maximum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'dc13_min', + measurementType: 'occurrence delta C13 minimum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'dc13_stdev', + measurementType: 'occurrence delta C13 standard deviation', + measurementUnit: '', + measurementMethod: '', + }, + { + field: 'do18_mean', + measurementType: 'occurrence delta O18 mean', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { field: 'do18_n', measurementType: 'occurrence delta O18 sample count', measurementUnit: '', measurementMethod: '' }, + { + field: 'do18_max', + measurementType: 'occurrence delta O18 maximum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'do18_min', + measurementType: 'occurrence delta O18 minimum', + measurementUnit: 'per mille', + measurementMethod: '', + }, + { + field: 'do18_stdev', + measurementType: 'occurrence delta O18 standard deviation', + measurementUnit: '', + measurementMethod: '', + }, +] + +export const mapOccurrenceToMeasurementRows = (occurrence: OccurrenceForExport): MeasurementCsvRow[] => { + const occurrenceID = occurrenceIdForRow(occurrence.lid, occurrence.species_id) + + return NOW_LS_MEASUREMENT_MAPPINGS.flatMap(mapping => { + const rawValue = occurrence[mapping.field] + if (rawValue === null || rawValue === undefined) return [] + if (typeof rawValue === 'string' && !isMeaningfulString(rawValue)) return [] + + const measurementValue = toDwcString(rawValue).trim() + if (!measurementValue) return [] + + const verbatimMeasurementType = `now_ls.${mapping.field.toString()}` + return [ + { + taxonID: occurrenceID, + measurementID: `${occurrenceID}:${verbatimMeasurementType}`, + parentMeasurementID: '', + measurementType: mapping.measurementType, + verbatimMeasurementType, + measurementValue, + measurementUnit: mapping.measurementUnit, + measurementMethod: mapping.measurementMethod, + }, + ] + }) +} + +const uniqueBy = (rows: T[], keyFn: (row: T) => string): T[] => { + const byKey = new Map() + for (const row of rows) { + const key = keyFn(row) + if (!byKey.has(key)) byKey.set(key, row) + } + return [...byKey.values()] +} + +const DWC_TERMS = { + occurrence: { + rowType: 'http://rs.tdwg.org/dwc/terms/Occurrence', + occurrenceID: 'http://rs.tdwg.org/dwc/terms/occurrenceID', + locationID: 'http://rs.tdwg.org/dwc/terms/locationID', + taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID', + scientificName: 'http://rs.tdwg.org/dwc/terms/scientificName', + occurrenceStatus: 'http://rs.tdwg.org/dwc/terms/occurrenceStatus', + organismQuantity: 'http://rs.tdwg.org/dwc/terms/organismQuantity', + organismQuantityType: 'http://rs.tdwg.org/dwc/terms/organismQuantityType', + identificationQualifier: 'http://rs.tdwg.org/dwc/terms/identificationQualifier', + occurrenceRemarks: 'http://rs.tdwg.org/dwc/terms/occurrenceRemarks', + }, + measurement: { + rowType: 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact', + taxonID: 'http://rs.tdwg.org/dwc/terms/occurrenceID', + measurementID: 'http://rs.tdwg.org/dwc/terms/measurementID', + parentMeasurementID: 'http://rs.tdwg.org/dwc/terms/parentMeasurementID', + measurementType: 'http://rs.tdwg.org/dwc/terms/measurementType', + verbatimMeasurementType: 'http://rs.tdwg.org/dwc/terms/verbatimMeasurementType', + measurementValue: 'http://rs.tdwg.org/dwc/terms/measurementValue', + measurementUnit: 'http://rs.tdwg.org/dwc/terms/measurementUnit', + measurementMethod: 'http://rs.tdwg.org/dwc/terms/measurementMethod', + }, +} as const + +export const buildOccurrenceMetaXml = (): string => { + const occurrenceFields = OCCURRENCE_HEADERS.map((header, index) => { + const term = (DWC_TERMS.occurrence as Record)[header] + return ` ` + }).join('\n') + + const measurementFields = MEASUREMENT_HEADERS.map((header, index) => { + const term = (DWC_TERMS.measurement as Record)[header] + return ` ` + }).join('\n') + + return ` + + + + occurrence.csv + + +${occurrenceFields} + + + + measurementorfact.csv + + +${measurementFields} + + +` +} + +export const buildOccurrenceEmlXml = (publicationDateIso: string): string => { + return ` + + + + NOW database Darwin Core test export (occurrences) + + + NOW database + + + + + NOW database + + + ${publicationDateIso} + + Admin-only test Darwin Core Archive export for occurrence records from now_ls. Location and taxon lookup files are included with the same structures as the locality and taxon exports. + + + TODO(#1150): Add rights / license information. + + + +` +} + +export const buildDwcOccurrenceArchiveZipBufferFromOccurrences = async ( + occurrences: OccurrenceForExport[] +): Promise => { + const localities = uniqueBy( + occurrences.map(occurrence => occurrence.now_loc), + locality => locality.lid.toString() + ) + const speciesRows = uniqueBy( + occurrences.map(occurrence => occurrence.com_species), + species => species.species_id.toString() + ) + + const locationCsv = await writeCsvString([...LOCATION_HEADERS], localities.map(mapLocalityToLocationRow)) + const geologicalContextCsv = await writeCsvString( + [...GEOLOGICAL_CONTEXT_HEADERS], + localities.map(mapLocalityToGeologicalContextRow) + ) + const taxonCsv = await writeCsvString([...TAXON_HEADERS], speciesRows.map(mapSpeciesToTaxonRow)) + const occurrenceCsv = await writeCsvString([...OCCURRENCE_HEADERS], occurrences.map(mapOccurrenceToOccurrenceRow)) + const measurementCsv = await writeCsvString( + [...MEASUREMENT_HEADERS], + occurrences.flatMap(mapOccurrenceToMeasurementRows) + ) + const metaXml = buildOccurrenceMetaXml() + const emlXml = buildOccurrenceEmlXml(new Date().toISOString().slice(0, 10)) + + const zip = new JSZip() + zip.file('location.csv', locationCsv) + zip.file('geologicalcontext.csv', geologicalContextCsv) + zip.file('taxon.csv', taxonCsv) + zip.file('occurrence.csv', occurrenceCsv) + zip.file('measurementorfact.csv', measurementCsv) + zip.file('meta.xml', metaXml) + zip.file('eml.xml', emlXml) + + return await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } }) +} + +export const buildDwcOccurrenceArchiveZipBuffer = async (): Promise => { + const { nowDb } = await import('../utils/db') + const occurrences = await nowDb.now_ls.findMany({ + select: { + lid: true, + species_id: true, + nis: true, + pct: true, + quad: true, + mni: true, + qua: true, + id_status: true, + orig_entry: true, + source_name: true, + body_mass: true, + mesowear: true, + mw_or_high: true, + mw_or_low: true, + mw_cs_sharp: true, + mw_cs_round: true, + mw_cs_blunt: true, + mw_scale_min: true, + mw_scale_max: true, + mw_value: true, + microwear: true, + dc13_mean: true, + dc13_n: true, + dc13_max: true, + dc13_min: true, + dc13_stdev: true, + do18_mean: true, + do18_n: true, + do18_max: true, + do18_min: true, + do18_stdev: true, + now_loc: { + select: { + lid: true, + loc_name: true, + basin: true, + subbasin: true, + country: true, + state: true, + county: true, + dec_lat: true, + dec_long: true, + dms_lat: true, + dms_long: true, + approx_coord: true, + altitude: true, + loc_detail: true, + chron: true, + lgroup: true, + formation: true, + member: true, + bed: true, + bfa_max: true, + bfa_min: true, + bfa_max_abs: true, + bfa_min_abs: true, + frac_max: true, + frac_min: true, + max_age: true, + min_age: true, + date_meth: true, + age_comm: true, + site_area: true, + gen_loc: true, + plate: true, + appr_num_spm: true, + num_spm: true, + true_quant: true, + complete: true, + num_quad: true, + rock_type: true, + rt_adj: true, + lith_comm: true, + depo_context1: true, + depo_context2: true, + depo_context3: true, + depo_context4: true, + depo_comm: true, + sed_env_1: true, + sed_env_2: true, + event_circum: true, + se_comm: true, + assem_fm: true, + transport: true, + trans_mod: true, + weath_trmp: true, + pt_conc: true, + size_type: true, + vert_pres: true, + plant_pres: true, + invert_pres: true, + time_rep: true, + taph_comm: true, + tax_comm: true, + datum_plane: true, + tos: true, + bos: true, + loc_status: true, + hominin_skeletal_remains: true, + climate_type: true, + biome: true, + v_ht: true, + v_struct: true, + v_envi_det: true, + disturb: true, + nutrients: true, + water: true, + seasonality: true, + seas_intens: true, + pri_prod: true, + moisture: true, + temperature: true, + estimate_precip: true, + estimate_temp: true, + estimate_npp: true, + pers_woody_cover: true, + pers_pollen_ap: true, + pers_pollen_nap: true, + pers_pollen_other: true, + stone_tool_cut_marks_on_bones: true, + bipedal_footprints: true, + stone_tool_technology: true, + technological_mode_1: true, + technological_mode_2: true, + technological_mode_3: true, + cultural_stage_1: true, + cultural_stage_2: true, + cultural_stage_3: true, + regional_culture_1: true, + regional_culture_2: true, + regional_culture_3: true, + now_time_unit_now_loc_bfa_maxTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + now_time_unit_now_loc_bfa_minTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + now_syn_loc: { select: { synonym: true } }, + now_ss: { select: { sed_struct: true } }, + now_coll_meth: { select: { coll_meth: true } }, + now_mus: { + select: { + museum: true, + com_mlist: { select: { institution: true, alt_int_name: true, city: true, state: true, country: true } }, + }, + }, + now_ls: { + select: { + com_species: { + select: { order_name: true, tht: true, genus_name: true }, + }, + }, + }, + }, + }, + com_species: { + select: { + species_id: true, + class_name: true, + subclass_or_superorder_name: true, + order_name: true, + suborder_or_superfamily_name: true, + family_name: true, + subfamily_name: true, + genus_name: true, + species_name: true, + unique_identifier: true, + taxonomic_status: true, + common_name: true, + sp_author: true, + sp_comment: true, + }, + }, + }, + }) + + return await buildDwcOccurrenceArchiveZipBufferFromOccurrences(occurrences as unknown as OccurrenceForExport[]) +} diff --git a/backend/src/unit-tests/dwcArchiveExportOccurrences.test.ts b/backend/src/unit-tests/dwcArchiveExportOccurrences.test.ts new file mode 100644 index 000000000..122cab4e6 --- /dev/null +++ b/backend/src/unit-tests/dwcArchiveExportOccurrences.test.ts @@ -0,0 +1,215 @@ +import { describe, expect, it } from '@jest/globals' +import JSZip from 'jszip' +import { + buildDwcOccurrenceArchiveZipBufferFromOccurrences, + mapOccurrenceToMeasurementRows, + mapOccurrenceToOccurrenceRow, +} from '../services/dwcArchiveExportOccurrences' + +describe('DwC-A occurrence export mapping', () => { + const baseOccurrence = { + lid: 42, + species_id: 21052, + nis: 7, + pct: null, + quad: null, + mni: 2, + qua: 'A', + id_status: 'confirmed', + orig_entry: 'Original occurrence note', + source_name: 'Source collection', + body_mass: BigInt(1234), + mesowear: 'mix', + mw_or_high: 1, + mw_or_low: null, + mw_cs_sharp: null, + mw_cs_round: 2, + mw_cs_blunt: null, + mw_scale_min: 0, + mw_scale_max: 3, + mw_value: 2, + microwear: 'scratch', + dc13_mean: -11.2, + dc13_n: 4, + dc13_max: -10.1, + dc13_min: -12.3, + dc13_stdev: 0.4, + do18_mean: 1.2, + do18_n: 3, + do18_max: 2.4, + do18_min: 0.4, + do18_stdev: 0.5, + now_loc: { + lid: 42, + loc_name: 'Test locality', + basin: 'Test basin', + subbasin: 'Test subbasin', + country: 'Finland', + state: 'Uusimaa', + county: 'Helsinki', + dec_lat: 60.1699, + dec_long: 24.9384, + dms_lat: null, + dms_long: null, + approx_coord: null, + altitude: 123, + loc_detail: 'Some notes', + chron: 'Test chron', + lgroup: 'Test group', + formation: 'Test formation', + member: 'Test member', + bed: 'Test bed', + bfa_max: 'BFA_MAX', + bfa_min: 'BFA_MIN', + bfa_max_abs: null, + bfa_min_abs: null, + frac_max: null, + frac_min: null, + max_age: 12.3, + min_age: 4.5, + date_meth: 'radioisotope', + age_comm: 'Age comment', + site_area: null, + gen_loc: null, + plate: null, + appr_num_spm: null, + num_spm: null, + true_quant: null, + complete: null, + num_quad: null, + rock_type: null, + rt_adj: null, + lith_comm: null, + depo_context1: null, + depo_context2: null, + depo_context3: null, + depo_context4: null, + depo_comm: null, + sed_env_1: null, + sed_env_2: null, + event_circum: null, + se_comm: null, + assem_fm: null, + transport: null, + trans_mod: null, + weath_trmp: null, + pt_conc: null, + size_type: null, + vert_pres: null, + plant_pres: null, + invert_pres: null, + time_rep: null, + taph_comm: null, + tax_comm: null, + datum_plane: null, + tos: null, + bos: null, + loc_status: null, + hominin_skeletal_remains: false, + climate_type: null, + biome: null, + v_ht: null, + v_struct: null, + v_envi_det: null, + disturb: null, + nutrients: null, + water: null, + seasonality: null, + seas_intens: null, + pri_prod: null, + moisture: null, + temperature: null, + estimate_precip: null, + estimate_temp: null, + estimate_npp: null, + pers_woody_cover: null, + pers_pollen_ap: null, + pers_pollen_nap: null, + pers_pollen_other: null, + stone_tool_cut_marks_on_bones: false, + bipedal_footprints: false, + stone_tool_technology: false, + technological_mode_1: null, + technological_mode_2: null, + technological_mode_3: null, + cultural_stage_1: null, + cultural_stage_2: null, + cultural_stage_3: null, + regional_culture_1: null, + regional_culture_2: null, + regional_culture_3: null, + now_syn_loc: [], + now_ss: [], + now_coll_meth: [], + now_mus: [], + now_ls: [], + now_time_unit_now_loc_bfa_maxTonow_time_unit: null, + now_time_unit_now_loc_bfa_minTonow_time_unit: null, + }, + com_species: { + species_id: 21052, + class_name: 'Mammalia', + subclass_or_superorder_name: null, + order_name: 'Rodentia', + suborder_or_superfamily_name: null, + family_name: 'Testidae', + subfamily_name: null, + genus_name: 'Simplomys', + species_name: 'simplicidens', + unique_identifier: '-', + taxonomic_status: null, + common_name: null, + sp_author: 'Test Author', + sp_comment: null, + }, + } as const + + it('maps now_ls row to a DwC Occurrence row', () => { + const row = mapOccurrenceToOccurrenceRow(baseOccurrence) + expect(row).toEqual( + expect.objectContaining({ + occurrenceID: 'NOW:OCC:42:21052', + locationID: 'NOW:LOC:42', + taxonID: 'NOW:21052', + scientificName: 'Simplomys simplicidens Test Author', + occurrenceStatus: 'present', + organismQuantity: '2', + organismQuantityType: 'minimum number of individuals', + identificationQualifier: 'confirmed', + occurrenceRemarks: 'Original occurrence note | Source collection | A', + }) + ) + }) + + it('prefixes now_ls measurement verbatim names to avoid com_species collisions', () => { + const rows = mapOccurrenceToMeasurementRows(baseOccurrence) + expect(rows).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + taxonID: 'NOW:OCC:42:21052', + measurementID: 'NOW:OCC:42:21052:now_ls.body_mass', + verbatimMeasurementType: 'now_ls.body_mass', + measurementValue: '1234', + measurementUnit: 'g', + }), + expect.objectContaining({ + measurementID: 'NOW:OCC:42:21052:now_ls.mesowear', + verbatimMeasurementType: 'now_ls.mesowear', + measurementValue: 'mix', + }), + ]) + ) + }) + + it('generates a ZIP archive with occurrence export files', async () => { + const zipBuffer = await buildDwcOccurrenceArchiveZipBufferFromOccurrences([baseOccurrence]) + const zip = await JSZip.loadAsync(zipBuffer) + expect(zip.file('location.csv')).toBeTruthy() + expect(zip.file('geologicalcontext.csv')).toBeTruthy() + expect(zip.file('taxon.csv')).toBeTruthy() + expect(zip.file('occurrence.csv')).toBeTruthy() + expect(zip.file('measurementorfact.csv')).toBeTruthy() + expect(zip.file('meta.xml')).toBeTruthy() + expect(zip.file('eml.xml')).toBeTruthy() + }) +}) diff --git a/documentation/functionality/dwc_export_occurrences.md b/documentation/functionality/dwc_export_occurrences.md new file mode 100644 index 000000000..fe38347ce --- /dev/null +++ b/documentation/functionality/dwc_export_occurrences.md @@ -0,0 +1,51 @@ +# DwC-A export: occurrences (v1) + +This document describes the admin-only Darwin Core Archive (DwC-A) test export for occurrence records (`now_ls`). + +## Files + +The export ZIP contains: + +- `occurrence.csv` (DwC `Occurrence` core) +- `measurementorfact.csv` (DwC `MeasurementOrFact` extension for `now_ls` facts) +- `location.csv` (companion file using the same structure as the locality export) +- `geologicalcontext.csv` (companion file using the same structure as the locality export) +- `taxon.csv` (companion file using the same structure as the taxon export) +- `meta.xml` +- `eml.xml` (minimal placeholder EML metadata) + +## Core: `occurrence.csv` + +Core rowType: `http://rs.tdwg.org/dwc/terms/Occurrence` + +v1 columns: + +- `occurrenceID` = `NOW:OCC::` +- `locationID` = `NOW:LOC:` +- `taxonID` = `NOW:` +- `scientificName` = genus, species, optional unique identifier, and authorship from `com_species` +- `occurrenceStatus` = `present` +- `organismQuantity` / `organismQuantityType` = first available quantity from `mni`, `nis`, `pct`, then `quad` +- `identificationQualifier` = `id_status` +- `occurrenceRemarks` = `orig_entry`, `source_name`, and `qua` (joined with `|`) + +## Extension: `measurementorfact.csv` + +The occurrence export uses the same `measurementorfact.csv` column structure as the taxon and locality exports. + +For occurrence-level measurements, the `taxonID` column contains the occurrence core id (`NOW:OCC::`). `verbatimMeasurementType` values from `now_ls` are prefixed with `now_ls.` so they do not collide with same-named `com_species` fields such as `body_mass`, `mesowear`, `mw_value`, or `microwear`. + +Included `now_ls` fields for v1: + +- count / abundance fields: `nis`, `pct`, `quad`, `mni` +- body mass: `body_mass` +- wear fields: `mesowear`, `mw_or_high`, `mw_or_low`, `mw_cs_sharp`, `mw_cs_round`, `mw_cs_blunt`, `mw_scale_min`, `mw_scale_max`, `mw_value`, `microwear` +- isotope fields: `dc13_mean`, `dc13_n`, `dc13_max`, `dc13_min`, `dc13_stdev`, `do18_mean`, `do18_n`, `do18_max`, `do18_min`, `do18_stdev` + +## Companion Files + +`location.csv`, `geologicalcontext.csv`, and `taxon.csv` are included as lookup/context files for the occurrence rows and intentionally reuse the existing locality and taxon export structures. + +## Admin-only + +The backend route is restricted to `Role.Admin`. diff --git a/frontend/src/components/CrossSearch/CrossSearchTable.tsx b/frontend/src/components/CrossSearch/CrossSearchTable.tsx index a3cf690cf..28ec70266 100755 --- a/frontend/src/components/CrossSearch/CrossSearchTable.tsx +++ b/frontend/src/components/CrossSearch/CrossSearchTable.tsx @@ -8,6 +8,7 @@ import { usePageContext } from '../Page' import { LocalitiesMap } from '../Map/LocalitiesMap' import { formatWithMaxThreeDecimals } from '@/util/numberFormatting' import { occurrenceLabels } from '@/constants/occurrenceLabels' +import { OccurrenceDwcExportMenuItem } from '@/components/Occurrence/OccurrenceDwcExportMenuItem' import { matchesCountryOrContinent } from '@/shared/validators/countryContinents' export const CrossSearchTable = ({ selectorFn }: { selectorFn?: (newObject: CrossSearch) => void }) => { @@ -1089,6 +1090,7 @@ export const CrossSearchTable = ({ selectorFn }: { selectorFn?: (newObject: Cros isCrossSearchTable={true} isError={isError} error={error} + renderExtraExportMenuItems={handleClose => } /> ) diff --git a/frontend/src/components/Occurrence/OccurrenceDwcExportMenuItem.tsx b/frontend/src/components/Occurrence/OccurrenceDwcExportMenuItem.tsx new file mode 100644 index 000000000..2461b7b9b --- /dev/null +++ b/frontend/src/components/Occurrence/OccurrenceDwcExportMenuItem.tsx @@ -0,0 +1,88 @@ +import { useState } from 'react' +import { MenuItem } from '@mui/material' +import { useNotify } from '@/hooks/notification' +import { BACKEND_URL } from '@/util/config' +import { useUser } from '@/hooks/user' +import { Role } from '@/shared/types' +import { currentDateAsString } from '@/shared/currentDateAsString' + +export const OccurrenceDwcExportMenuItem = ({ handleClose }: { handleClose: () => void }) => { + const [loading, setLoading] = useState(false) + const { notify, setMessage: setNotificationMessage } = useNotify() + const user = useUser() + + if (user.role !== Role.Admin) { + return null + } + + const fetchOptions = user.token ? { headers: { Authorization: `Bearer ${user.token}` } } : {} + const filename = `now_dwc_occurrences_test_export_${currentDateAsString()}.zip` + + const fetchZipFile = async () => { + setLoading(true) + notify('Generating DwC-A ZIP export, please wait...', 'info', null) + + try { + const response = await fetch(`${BACKEND_URL}/occurrence/export/dwc-archive`, fetchOptions) + if (!response.ok) { + throw new Error('Server response was not OK.') + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('Missing response stream.') + } + + const file: Uint8Array[] = [] + let bytes = 0 + let closed = false + + const showDownloadProgress = () => { + if (!closed) { + setTimeout(() => { + setNotificationMessage(`Downloading DwC-A ZIP, ${Math.round((bytes / 1000000) * 10) / 10} MB`) + showDownloadProgress() + }, 500) + } + } + + notify('Downloading DwC-A ZIP...', 'info', null) + showDownloadProgress() + + while (true) { + const { done, value } = await reader.read() + if (done) break + bytes = bytes + value.length + file.push(value) + } + closed = true + + const blobUrl = window.URL.createObjectURL(new Blob(file, { type: 'application/zip' })) + const downloadLink = document.createElement('a') + downloadLink.href = blobUrl + downloadLink.download = filename + document.body.appendChild(downloadLink) + downloadLink.click() + downloadLink.remove() + window.URL.revokeObjectURL(blobUrl) + + notify('Download finished.') + } catch { + notify('Downloading DwC-A export failed.', 'error') + } finally { + setLoading(false) + } + } + + return ( + { + void fetchZipFile() + handleClose() + }} + disabled={loading} + > + Export DwC-A (occurrences) + + ) +} From e90e24d2b88f6d53afc5c8c0e198e3e977e9889b Mon Sep 17 00:00:00 2001 From: karilint Date: Tue, 28 Apr 2026 18:10:12 +0300 Subject: [PATCH 19/23] Stream DwC occurrence export --- backend/src/routes/occurrence.ts | 15 +- .../services/dwcArchiveExportOccurrences.ts | 476 +++++++++++------- 2 files changed, 306 insertions(+), 185 deletions(-) diff --git a/backend/src/routes/occurrence.ts b/backend/src/routes/occurrence.ts index 13383c5df..036af5fde 100644 --- a/backend/src/routes/occurrence.ts +++ b/backend/src/routes/occurrence.ts @@ -1,20 +1,27 @@ import { Router } from 'express' +import { pipeline } from 'stream' import { getOccurrenceDetail, updateOccurrenceDetail } from '../controllers/occurrenceController' import { requireOneOf } from '../middlewares/authorizer' import { Role } from '../../../frontend/src/shared/types' -import { buildDwcOccurrenceArchiveZipBuffer } from '../services/dwcArchiveExportOccurrences' +import { buildDwcOccurrenceArchiveZipStream } from '../services/dwcArchiveExportOccurrences' import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' +import { logger } from '../utils/logger' const router = Router() -router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res) => { - const zipBuffer = await buildDwcOccurrenceArchiveZipBuffer() +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res, next) => { + const archive = await buildDwcOccurrenceArchiveZipStream() res.setHeader('Content-Type', 'application/zip') res.setHeader( 'Content-Disposition', `attachment; filename="now_dwc_occurrences_test_export_${currentDateAsString()}.zip"` ) - res.send(zipBuffer) + pipeline(archive.stream, res, error => { + archive.cleanup().catch(cleanupError => { + logger.error(`Failed to clean up occurrence DwC export temp files: ${String(cleanupError)}`) + }) + if (error) next(error) + }) }) router.get('/:lid/:speciesId', getOccurrenceDetail) diff --git a/backend/src/services/dwcArchiveExportOccurrences.ts b/backend/src/services/dwcArchiveExportOccurrences.ts index 186a69ce5..9c4f3a324 100644 --- a/backend/src/services/dwcArchiveExportOccurrences.ts +++ b/backend/src/services/dwcArchiveExportOccurrences.ts @@ -1,5 +1,10 @@ import Prisma from '../../prisma/generated/now_test_client' import { format } from 'fast-csv' +import { createReadStream, createWriteStream } from 'fs' +import { mkdtemp, rm } from 'fs/promises' +import { tmpdir } from 'os' +import path from 'path' +import { once } from 'events' import { Writable } from 'stream' import JSZip from 'jszip' import { @@ -123,10 +128,18 @@ type OccurrenceForExport = Pick< | 'do18_min' | 'do18_stdev' > & { - now_loc: LocalityForOccurrenceExport com_species: SpeciesForOccurrenceExport } +type OccurrenceWithLocalityForExport = OccurrenceForExport & { + now_loc: LocalityForOccurrenceExport +} + +type DwcOccurrenceArchiveStream = { + stream: NodeJS.ReadableStream + cleanup: () => Promise +} + const scientificNameForOccurrence = (species: SpeciesForOccurrenceExport): string => { const nameParts = [ toMaybeMeaningful(species.genus_name), @@ -322,6 +335,154 @@ const uniqueBy = (rows: T[], keyFn: (row: T) => string): T[] => { return [...byKey.values()] } +const OCCURRENCE_EXPORT_PAGE_SIZE = 1000 +const LOOKUP_EXPORT_CHUNK_SIZE = 1000 + +const occurrenceSelect = { + lid: true, + species_id: true, + nis: true, + pct: true, + quad: true, + mni: true, + qua: true, + id_status: true, + orig_entry: true, + source_name: true, + body_mass: true, + mesowear: true, + mw_or_high: true, + mw_or_low: true, + mw_cs_sharp: true, + mw_cs_round: true, + mw_cs_blunt: true, + mw_scale_min: true, + mw_scale_max: true, + mw_value: true, + microwear: true, + dc13_mean: true, + dc13_n: true, + dc13_max: true, + dc13_min: true, + dc13_stdev: true, + do18_mean: true, + do18_n: true, + do18_max: true, + do18_min: true, + do18_stdev: true, + com_species: { + select: { + species_id: true, + class_name: true, + subclass_or_superorder_name: true, + order_name: true, + suborder_or_superfamily_name: true, + family_name: true, + subfamily_name: true, + genus_name: true, + species_name: true, + unique_identifier: true, + taxonomic_status: true, + common_name: true, + sp_author: true, + sp_comment: true, + }, + }, +} as const + +const localityLookupSelect = { + lid: true, + loc_name: true, + basin: true, + subbasin: true, + country: true, + state: true, + county: true, + dec_lat: true, + dec_long: true, + dms_lat: true, + dms_long: true, + altitude: true, + loc_detail: true, + age_comm: true, + tax_comm: true, + chron: true, + lgroup: true, + formation: true, + member: true, + bed: true, + bfa_max: true, + bfa_min: true, + now_time_unit_now_loc_bfa_maxTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, + now_time_unit_now_loc_bfa_minTonow_time_unit: { + select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, + }, +} as const + +const speciesLookupSelect = occurrenceSelect.com_species.select + +const csvCell = (value: unknown): string => `"${toDwcString(value).replace(/"/g, '""')}"` + +const csvLine = (headers: readonly string[], row: Record): string => + `${headers.map(header => csvCell(row[header])).join(',')}\n` + +const createCsvFileWriter = async (filePath: string, headers: readonly string[]) => { + const stream = createWriteStream(filePath, { encoding: 'utf8' }) + await new Promise((resolve, reject) => { + stream.once('open', () => resolve()) + stream.once('error', reject) + }) + + const write = async (line: string): Promise => { + if (!stream.write(line)) await once(stream, 'drain') + } + + await write(`${headers.map(csvCell).join(',')}\n`) + + return { + writeRow: async (row: Record): Promise => { + await write(csvLine(headers, row)) + }, + close: async (): Promise => { + stream.end() + await once(stream, 'finish') + }, + } +} + +async function* iterateOccurrenceRows(): AsyncGenerator { + const { nowDb } = await import('../utils/db') + let cursor: { lid: number; species_id: number } | undefined + + while (true) { + const page = await nowDb.now_ls.findMany({ + take: OCCURRENCE_EXPORT_PAGE_SIZE, + ...(cursor ? { cursor: { lid_species_id: cursor }, skip: 1 } : {}), + orderBy: [{ lid: 'asc' }, { species_id: 'asc' }], + select: occurrenceSelect, + }) + + if (page.length === 0) return + + for (const occurrence of page) { + yield occurrence as unknown as OccurrenceForExport + } + + const last = page[page.length - 1] + cursor = { lid: last.lid, species_id: last.species_id } + } +} + +const chunk = (values: T[], size: number): T[][] => { + const chunks: T[][] = [] + for (let index = 0; index < values.length; index += size) { + chunks.push(values.slice(index, index + size)) + } + return chunks +} + const DWC_TERMS = { occurrence: { rowType: 'http://rs.tdwg.org/dwc/terms/Occurrence', @@ -414,7 +575,7 @@ export const buildOccurrenceEmlXml = (publicationDateIso: string): string => { } export const buildDwcOccurrenceArchiveZipBufferFromOccurrences = async ( - occurrences: OccurrenceForExport[] + occurrences: OccurrenceWithLocalityForExport[] ): Promise => { const localities = uniqueBy( occurrences.map(occurrence => occurrence.now_loc), @@ -451,185 +612,138 @@ export const buildDwcOccurrenceArchiveZipBufferFromOccurrences = async ( return await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } }) } -export const buildDwcOccurrenceArchiveZipBuffer = async (): Promise => { +const writeOccurrenceAndMeasurementFiles = async ({ + occurrenceFilePath, + measurementFilePath, +}: { + occurrenceFilePath: string + measurementFilePath: string +}): Promise<{ localityIds: number[]; speciesIds: number[] }> => { + const occurrenceWriter = await createCsvFileWriter(occurrenceFilePath, OCCURRENCE_HEADERS) + const measurementWriter = await createCsvFileWriter(measurementFilePath, MEASUREMENT_HEADERS) + const localityIds = new Set() + const speciesIds = new Set() + + try { + for await (const occurrence of iterateOccurrenceRows()) { + localityIds.add(occurrence.lid) + speciesIds.add(occurrence.species_id) + await occurrenceWriter.writeRow(mapOccurrenceToOccurrenceRow(occurrence)) + + for (const measurementRow of mapOccurrenceToMeasurementRows(occurrence)) { + await measurementWriter.writeRow(measurementRow) + } + } + } finally { + await occurrenceWriter.close() + await measurementWriter.close() + } + + return { + localityIds: [...localityIds].sort((a, b) => a - b), + speciesIds: [...speciesIds].sort((a, b) => a - b), + } +} + +const writeLocalityLookupFiles = async ({ + localityIds, + locationFilePath, + geologicalContextFilePath, +}: { + localityIds: number[] + locationFilePath: string + geologicalContextFilePath: string +}): Promise => { const { nowDb } = await import('../utils/db') - const occurrences = await nowDb.now_ls.findMany({ - select: { - lid: true, - species_id: true, - nis: true, - pct: true, - quad: true, - mni: true, - qua: true, - id_status: true, - orig_entry: true, - source_name: true, - body_mass: true, - mesowear: true, - mw_or_high: true, - mw_or_low: true, - mw_cs_sharp: true, - mw_cs_round: true, - mw_cs_blunt: true, - mw_scale_min: true, - mw_scale_max: true, - mw_value: true, - microwear: true, - dc13_mean: true, - dc13_n: true, - dc13_max: true, - dc13_min: true, - dc13_stdev: true, - do18_mean: true, - do18_n: true, - do18_max: true, - do18_min: true, - do18_stdev: true, - now_loc: { - select: { - lid: true, - loc_name: true, - basin: true, - subbasin: true, - country: true, - state: true, - county: true, - dec_lat: true, - dec_long: true, - dms_lat: true, - dms_long: true, - approx_coord: true, - altitude: true, - loc_detail: true, - chron: true, - lgroup: true, - formation: true, - member: true, - bed: true, - bfa_max: true, - bfa_min: true, - bfa_max_abs: true, - bfa_min_abs: true, - frac_max: true, - frac_min: true, - max_age: true, - min_age: true, - date_meth: true, - age_comm: true, - site_area: true, - gen_loc: true, - plate: true, - appr_num_spm: true, - num_spm: true, - true_quant: true, - complete: true, - num_quad: true, - rock_type: true, - rt_adj: true, - lith_comm: true, - depo_context1: true, - depo_context2: true, - depo_context3: true, - depo_context4: true, - depo_comm: true, - sed_env_1: true, - sed_env_2: true, - event_circum: true, - se_comm: true, - assem_fm: true, - transport: true, - trans_mod: true, - weath_trmp: true, - pt_conc: true, - size_type: true, - vert_pres: true, - plant_pres: true, - invert_pres: true, - time_rep: true, - taph_comm: true, - tax_comm: true, - datum_plane: true, - tos: true, - bos: true, - loc_status: true, - hominin_skeletal_remains: true, - climate_type: true, - biome: true, - v_ht: true, - v_struct: true, - v_envi_det: true, - disturb: true, - nutrients: true, - water: true, - seasonality: true, - seas_intens: true, - pri_prod: true, - moisture: true, - temperature: true, - estimate_precip: true, - estimate_temp: true, - estimate_npp: true, - pers_woody_cover: true, - pers_pollen_ap: true, - pers_pollen_nap: true, - pers_pollen_other: true, - stone_tool_cut_marks_on_bones: true, - bipedal_footprints: true, - stone_tool_technology: true, - technological_mode_1: true, - technological_mode_2: true, - technological_mode_3: true, - cultural_stage_1: true, - cultural_stage_2: true, - cultural_stage_3: true, - regional_culture_1: true, - regional_culture_2: true, - regional_culture_3: true, - now_time_unit_now_loc_bfa_maxTonow_time_unit: { - select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, - }, - now_time_unit_now_loc_bfa_minTonow_time_unit: { - select: { tu_name: true, tu_display_name: true, rank: true, sequence: true }, - }, - now_syn_loc: { select: { synonym: true } }, - now_ss: { select: { sed_struct: true } }, - now_coll_meth: { select: { coll_meth: true } }, - now_mus: { - select: { - museum: true, - com_mlist: { select: { institution: true, alt_int_name: true, city: true, state: true, country: true } }, - }, - }, - now_ls: { - select: { - com_species: { - select: { order_name: true, tht: true, genus_name: true }, - }, - }, - }, - }, - }, - com_species: { - select: { - species_id: true, - class_name: true, - subclass_or_superorder_name: true, - order_name: true, - suborder_or_superfamily_name: true, - family_name: true, - subfamily_name: true, - genus_name: true, - species_name: true, - unique_identifier: true, - taxonomic_status: true, - common_name: true, - sp_author: true, - sp_comment: true, - }, - }, - }, - }) + const locationWriter = await createCsvFileWriter(locationFilePath, LOCATION_HEADERS) + const geologicalContextWriter = await createCsvFileWriter(geologicalContextFilePath, GEOLOGICAL_CONTEXT_HEADERS) + + try { + for (const ids of chunk(localityIds, LOOKUP_EXPORT_CHUNK_SIZE)) { + const localities = await nowDb.now_loc.findMany({ + where: { lid: { in: ids } }, + orderBy: { lid: 'asc' }, + select: localityLookupSelect, + }) + + for (const locality of localities) { + const localityForExport = locality as unknown as LocalityForOccurrenceExport + await locationWriter.writeRow(mapLocalityToLocationRow(localityForExport)) + await geologicalContextWriter.writeRow(mapLocalityToGeologicalContextRow(localityForExport)) + } + } + } finally { + await locationWriter.close() + await geologicalContextWriter.close() + } +} - return await buildDwcOccurrenceArchiveZipBufferFromOccurrences(occurrences as unknown as OccurrenceForExport[]) +const writeTaxonLookupFile = async ({ + speciesIds, + taxonFilePath, +}: { + speciesIds: number[] + taxonFilePath: string +}): Promise => { + const { nowDb } = await import('../utils/db') + const taxonWriter = await createCsvFileWriter(taxonFilePath, TAXON_HEADERS) + + try { + for (const ids of chunk(speciesIds, LOOKUP_EXPORT_CHUNK_SIZE)) { + const speciesRows = await nowDb.com_species.findMany({ + where: { species_id: { in: ids } }, + orderBy: { species_id: 'asc' }, + select: speciesLookupSelect, + }) + + for (const species of speciesRows) { + await taxonWriter.writeRow(mapSpeciesToTaxonRow(species)) + } + } + } finally { + await taxonWriter.close() + } +} + +export const buildDwcOccurrenceArchiveZipStream = async (): Promise => { + const tempDirectory = await mkdtemp(path.join(tmpdir(), 'now-dwc-occurrences-')) + const files = { + location: path.join(tempDirectory, 'location.csv'), + geologicalContext: path.join(tempDirectory, 'geologicalcontext.csv'), + taxon: path.join(tempDirectory, 'taxon.csv'), + occurrence: path.join(tempDirectory, 'occurrence.csv'), + measurement: path.join(tempDirectory, 'measurementorfact.csv'), + } + + try { + const { localityIds, speciesIds } = await writeOccurrenceAndMeasurementFiles({ + occurrenceFilePath: files.occurrence, + measurementFilePath: files.measurement, + }) + await writeLocalityLookupFiles({ + localityIds, + locationFilePath: files.location, + geologicalContextFilePath: files.geologicalContext, + }) + await writeTaxonLookupFile({ speciesIds, taxonFilePath: files.taxon }) + + const zip = new JSZip() + zip.file('location.csv', createReadStream(files.location)) + zip.file('geologicalcontext.csv', createReadStream(files.geologicalContext)) + zip.file('taxon.csv', createReadStream(files.taxon)) + zip.file('occurrence.csv', createReadStream(files.occurrence)) + zip.file('measurementorfact.csv', createReadStream(files.measurement)) + zip.file('meta.xml', buildOccurrenceMetaXml()) + zip.file('eml.xml', buildOccurrenceEmlXml(new Date().toISOString().slice(0, 10))) + + return { + stream: zip.generateNodeStream({ type: 'nodebuffer', streamFiles: true, compression: 'DEFLATE' }), + cleanup: async () => { + await rm(tempDirectory, { recursive: true, force: true }) + }, + } + } catch (error) { + await rm(tempDirectory, { recursive: true, force: true }) + throw error + } } From 3a5e68fc86adf174633740ada6836a90ebce726a Mon Sep 17 00:00:00 2001 From: karilint Date: Wed, 29 Apr 2026 10:56:11 +0300 Subject: [PATCH 20/23] Show DwC occurrence export progress --- backend/src/routes/occurrence.ts | 45 +++++++++- .../services/dwcArchiveExportOccurrences.ts | 88 ++++++++++++++++++- .../OccurrenceDwcExportMenuItem.tsx | 44 +++++++++- 3 files changed, 170 insertions(+), 7 deletions(-) diff --git a/backend/src/routes/occurrence.ts b/backend/src/routes/occurrence.ts index 036af5fde..2d2e83286 100644 --- a/backend/src/routes/occurrence.ts +++ b/backend/src/routes/occurrence.ts @@ -3,14 +3,44 @@ import { pipeline } from 'stream' import { getOccurrenceDetail, updateOccurrenceDetail } from '../controllers/occurrenceController' import { requireOneOf } from '../middlewares/authorizer' import { Role } from '../../../frontend/src/shared/types' -import { buildDwcOccurrenceArchiveZipStream } from '../services/dwcArchiveExportOccurrences' +import { + buildDwcOccurrenceArchiveZipStream, + type DwcOccurrenceExportProgress, +} from '../services/dwcArchiveExportOccurrences' import { currentDateAsString } from '../../../frontend/src/shared/currentDateAsString' import { logger } from '../utils/logger' const router = Router() -router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res, next) => { - const archive = await buildDwcOccurrenceArchiveZipStream() +const occurrenceExportProgress = new Map() + +const scheduleProgressCleanup = (exportId: string) => { + setTimeout( + () => { + occurrenceExportProgress.delete(exportId) + }, + 5 * 60 * 1000 + ) +} + +router.get('/export/dwc-archive/progress/:exportId', requireOneOf([Role.Admin]), (req, res) => { + const progress = occurrenceExportProgress.get(req.params.exportId) + if (!progress) return res.status(404).send({ message: 'Occurrence export progress not found.' }) + return res.status(200).send(progress) +}) + +router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (req, res, next) => { + const exportId = typeof req.query.exportId === 'string' ? req.query.exportId : undefined + const reportProgress = exportId + ? (progress: DwcOccurrenceExportProgress) => { + occurrenceExportProgress.set(exportId, progress) + } + : undefined + + const archive = await buildDwcOccurrenceArchiveZipStream({ reportProgress }).catch(error => { + if (exportId) scheduleProgressCleanup(exportId) + throw error + }) res.setHeader('Content-Type', 'application/zip') res.setHeader( 'Content-Disposition', @@ -20,6 +50,15 @@ router.get('/export/dwc-archive', requireOneOf([Role.Admin]), async (_req, res, archive.cleanup().catch(cleanupError => { logger.error(`Failed to clean up occurrence DwC export temp files: ${String(cleanupError)}`) }) + if (exportId) { + occurrenceExportProgress.set(exportId, { + stage: 'complete', + generated: 1, + total: 1, + message: 'DwC-A ZIP export ready.', + }) + scheduleProgressCleanup(exportId) + } if (error) next(error) }) }) diff --git a/backend/src/services/dwcArchiveExportOccurrences.ts b/backend/src/services/dwcArchiveExportOccurrences.ts index 9c4f3a324..d17213596 100644 --- a/backend/src/services/dwcArchiveExportOccurrences.ts +++ b/backend/src/services/dwcArchiveExportOccurrences.ts @@ -140,6 +140,15 @@ type DwcOccurrenceArchiveStream = { cleanup: () => Promise } +export type DwcOccurrenceExportProgress = { + stage: 'occurrences' | 'localities' | 'taxa' | 'zipping' | 'complete' + generated: number + total: number | null + message: string +} + +type DwcOccurrenceExportProgressReporter = (progress: DwcOccurrenceExportProgress) => void + const scientificNameForOccurrence = (species: SpeciesForOccurrenceExport): string => { const nameParts = [ toMaybeMeaningful(species.genus_name), @@ -475,6 +484,11 @@ async function* iterateOccurrenceRows(): AsyncGenerator { } } +const countOccurrenceRows = async (): Promise => { + const { nowDb } = await import('../utils/db') + return await nowDb.now_ls.count() +} + const chunk = (values: T[], size: number): T[][] => { const chunks: T[][] = [] for (let index = 0; index < values.length; index += size) { @@ -615,14 +629,25 @@ export const buildDwcOccurrenceArchiveZipBufferFromOccurrences = async ( const writeOccurrenceAndMeasurementFiles = async ({ occurrenceFilePath, measurementFilePath, + reportProgress, }: { occurrenceFilePath: string measurementFilePath: string + reportProgress?: DwcOccurrenceExportProgressReporter }): Promise<{ localityIds: number[]; speciesIds: number[] }> => { const occurrenceWriter = await createCsvFileWriter(occurrenceFilePath, OCCURRENCE_HEADERS) const measurementWriter = await createCsvFileWriter(measurementFilePath, MEASUREMENT_HEADERS) const localityIds = new Set() const speciesIds = new Set() + const totalOccurrences = await countOccurrenceRows() + let generatedOccurrences = 0 + + reportProgress?.({ + stage: 'occurrences', + generated: generatedOccurrences, + total: totalOccurrences, + message: `Generating occurrence rows: ${generatedOccurrences}/${totalOccurrences} generated`, + }) try { for await (const occurrence of iterateOccurrenceRows()) { @@ -633,6 +658,16 @@ const writeOccurrenceAndMeasurementFiles = async ({ for (const measurementRow of mapOccurrenceToMeasurementRows(occurrence)) { await measurementWriter.writeRow(measurementRow) } + + generatedOccurrences += 1 + if (generatedOccurrences === totalOccurrences || generatedOccurrences % OCCURRENCE_EXPORT_PAGE_SIZE === 0) { + reportProgress?.({ + stage: 'occurrences', + generated: generatedOccurrences, + total: totalOccurrences, + message: `Generating occurrence rows: ${generatedOccurrences}/${totalOccurrences} generated`, + }) + } } } finally { await occurrenceWriter.close() @@ -649,14 +684,24 @@ const writeLocalityLookupFiles = async ({ localityIds, locationFilePath, geologicalContextFilePath, + reportProgress, }: { localityIds: number[] locationFilePath: string geologicalContextFilePath: string + reportProgress?: DwcOccurrenceExportProgressReporter }): Promise => { const { nowDb } = await import('../utils/db') const locationWriter = await createCsvFileWriter(locationFilePath, LOCATION_HEADERS) const geologicalContextWriter = await createCsvFileWriter(geologicalContextFilePath, GEOLOGICAL_CONTEXT_HEADERS) + let generatedLocalities = 0 + + reportProgress?.({ + stage: 'localities', + generated: generatedLocalities, + total: localityIds.length, + message: `Generating location lookup rows: ${generatedLocalities}/${localityIds.length} generated`, + }) try { for (const ids of chunk(localityIds, LOOKUP_EXPORT_CHUNK_SIZE)) { @@ -670,7 +715,15 @@ const writeLocalityLookupFiles = async ({ const localityForExport = locality as unknown as LocalityForOccurrenceExport await locationWriter.writeRow(mapLocalityToLocationRow(localityForExport)) await geologicalContextWriter.writeRow(mapLocalityToGeologicalContextRow(localityForExport)) + generatedLocalities += 1 } + + reportProgress?.({ + stage: 'localities', + generated: generatedLocalities, + total: localityIds.length, + message: `Generating location lookup rows: ${generatedLocalities}/${localityIds.length} generated`, + }) } } finally { await locationWriter.close() @@ -681,12 +734,22 @@ const writeLocalityLookupFiles = async ({ const writeTaxonLookupFile = async ({ speciesIds, taxonFilePath, + reportProgress, }: { speciesIds: number[] taxonFilePath: string + reportProgress?: DwcOccurrenceExportProgressReporter }): Promise => { const { nowDb } = await import('../utils/db') const taxonWriter = await createCsvFileWriter(taxonFilePath, TAXON_HEADERS) + let generatedTaxa = 0 + + reportProgress?.({ + stage: 'taxa', + generated: generatedTaxa, + total: speciesIds.length, + message: `Generating taxon lookup rows: ${generatedTaxa}/${speciesIds.length} generated`, + }) try { for (const ids of chunk(speciesIds, LOOKUP_EXPORT_CHUNK_SIZE)) { @@ -698,14 +761,26 @@ const writeTaxonLookupFile = async ({ for (const species of speciesRows) { await taxonWriter.writeRow(mapSpeciesToTaxonRow(species)) + generatedTaxa += 1 } + + reportProgress?.({ + stage: 'taxa', + generated: generatedTaxa, + total: speciesIds.length, + message: `Generating taxon lookup rows: ${generatedTaxa}/${speciesIds.length} generated`, + }) } } finally { await taxonWriter.close() } } -export const buildDwcOccurrenceArchiveZipStream = async (): Promise => { +export const buildDwcOccurrenceArchiveZipStream = async ({ + reportProgress, +}: { + reportProgress?: DwcOccurrenceExportProgressReporter +} = {}): Promise => { const tempDirectory = await mkdtemp(path.join(tmpdir(), 'now-dwc-occurrences-')) const files = { location: path.join(tempDirectory, 'location.csv'), @@ -719,13 +794,22 @@ export const buildDwcOccurrenceArchiveZipStream = async (): Promise { + if (window.crypto?.randomUUID) return window.crypto.randomUUID() + return `${Date.now()}-${Math.random().toString(36).slice(2)}` +} + export const OccurrenceDwcExportMenuItem = ({ handleClose }: { handleClose: () => void }) => { const [loading, setLoading] = useState(false) const { notify, setMessage: setNotificationMessage } = useNotify() @@ -20,10 +29,40 @@ export const OccurrenceDwcExportMenuItem = ({ handleClose }: { handleClose: () = const fetchZipFile = async () => { setLoading(true) - notify('Generating DwC-A ZIP export, please wait...', 'info', null) + const exportId = createExportId() + let generationProgressTimer: number | undefined + notify('Generating DwC-A ZIP export...', 'info', null) + + const updateGenerationProgress = async () => { + try { + const response = await fetch(`${BACKEND_URL}/occurrence/export/dwc-archive/progress/${exportId}`, fetchOptions) + if (!response.ok) return + + const progress = (await response.json()) as OccurrenceExportProgress + setNotificationMessage(progress.message) + } catch { + // The download request owns the final success/failure notification. + } + } + + const stopGenerationProgress = () => { + if (generationProgressTimer !== undefined) { + window.clearInterval(generationProgressTimer) + generationProgressTimer = undefined + } + } try { - const response = await fetch(`${BACKEND_URL}/occurrence/export/dwc-archive`, fetchOptions) + generationProgressTimer = window.setInterval(() => { + void updateGenerationProgress() + }, 1000) + + const response = await fetch( + `${BACKEND_URL}/occurrence/export/dwc-archive?${new URLSearchParams({ exportId })}`, + fetchOptions + ) + stopGenerationProgress() + if (!response.ok) { throw new Error('Server response was not OK.') } @@ -68,6 +107,7 @@ export const OccurrenceDwcExportMenuItem = ({ handleClose }: { handleClose: () = notify('Download finished.') } catch { + stopGenerationProgress() notify('Downloading DwC-A export failed.', 'error') } finally { setLoading(false) From 190859c3b30b5136d7c55c62c6a5aa2eb7fe0af9 Mon Sep 17 00:00:00 2001 From: karilint Date: Wed, 29 Apr 2026 11:19:39 +0300 Subject: [PATCH 21/23] Tolerate invalid locality reference dates --- backend/src/services/locality.ts | 33 +++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/backend/src/services/locality.ts b/backend/src/services/locality.ts index f27c8cded..f0c184bd2 100644 --- a/backend/src/services/locality.ts +++ b/backend/src/services/locality.ts @@ -259,7 +259,31 @@ export const getLocalityDetails = async (id: number, user: User | undefined) => now_lr: { include: { ref_ref: { - include: { + select: { + rid: true, + ref_type_id: true, + journal_id: true, + title_primary: true, + date_primary: true, + volume: true, + issue: true, + start_page: true, + end_page: true, + publisher: true, + pub_place: true, + title_secondary: true, + date_secondary: true, + title_series: true, + issn_isbn: true, + ref_abstract: true, + web_url: true, + misc_1: true, + misc_2: true, + gen_notes: true, + printed_language: true, + used_morph: true, + used_now: true, + used_gene: true, ref_authors: true, ref_journal: true, }, @@ -291,6 +315,13 @@ export const getLocalityDetails = async (id: number, user: User | undefined) => ...lau, lau_coordinator: getPersonDisplayName(coordinatorPerson, lau.lau_coordinator), lau_authorizer: getPersonDisplayName(authorizerPerson, lau.lau_authorizer), + now_lr: lau.now_lr.map(lr => ({ + ...lr, + ref_ref: { + ...lr.ref_ref, + exact_date: null, + }, + })), updates, } }) From 172aa16c03809b745afe5c19ca71ab74a90d63bb Mon Sep 17 00:00:00 2001 From: karilint Date: Wed, 29 Apr 2026 11:25:06 +0300 Subject: [PATCH 22/23] Tolerate invalid occurrence reference dates --- backend/src/services/occurrenceService.ts | 46 +++++++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/backend/src/services/occurrenceService.ts b/backend/src/services/occurrenceService.ts index 459894f2b..b45460d4b 100644 --- a/backend/src/services/occurrenceService.ts +++ b/backend/src/services/occurrenceService.ts @@ -119,6 +119,44 @@ type OccurrenceUpdate = { updates: OccurrenceLogRow[] } +const referenceWithoutExactDateSelect = { + rid: true, + ref_type_id: true, + journal_id: true, + title_primary: true, + date_primary: true, + volume: true, + issue: true, + start_page: true, + end_page: true, + publisher: true, + pub_place: true, + title_secondary: true, + date_secondary: true, + title_series: true, + issn_isbn: true, + ref_abstract: true, + web_url: true, + misc_1: true, + misc_2: true, + gen_notes: true, + printed_language: true, + used_morph: true, + used_now: true, + used_gene: true, + ref_authors: true, + ref_journal: true, +} as const + +const addNullExactDateToReferences = (references: T[]) => + references.map(reference => ({ + ...reference, + ref_ref: { + ...reference.ref_ref, + exact_date: null, + }, + })) + const stringifyLogValue = (value: unknown) => { if (value === null || value === undefined) return '' if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') return String(value) @@ -193,7 +231,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { now_lr: { include: { ref_ref: { - include: { ref_authors: true, ref_journal: true }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -207,7 +245,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { now_sr: { include: { ref_ref: { - include: { ref_authors: true, ref_journal: true }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -233,7 +271,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { update.lau_coordinator ), occ_comment: update.lau_comment ?? '', - references: update.now_lr as unknown as AnyReference[], + references: addNullExactDateToReferences(update.now_lr) as unknown as AnyReference[], updates: nowLsLogs.filter(logRow => logRow.luid === update.luid), })), ...speciesUpdates.map(update => ({ @@ -247,7 +285,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { update.sau_coordinator ), occ_comment: update.sau_comment ?? '', - references: update.now_sr as unknown as AnyReference[], + references: addNullExactDateToReferences(update.now_sr) as unknown as AnyReference[], updates: nowLsLogs.filter(logRow => logRow.suid === update.suid), })), ]) From 647773d1c906b659f5611b3f4092a17833a58e72 Mon Sep 17 00:00:00 2001 From: karilint Date: Wed, 29 Apr 2026 11:38:45 +0300 Subject: [PATCH 23/23] Harden reference date reads --- backend/src/services/locality.ts | 38 ++---------------- backend/src/services/occurrenceService.ts | 43 ++------------------- backend/src/services/reference.ts | 30 +++++++++----- backend/src/services/timeBound.ts | 7 ++-- backend/src/services/timeUnit.ts | 7 ++-- backend/src/services/utils/referenceDate.ts | 39 +++++++++++++++++++ 6 files changed, 72 insertions(+), 92 deletions(-) create mode 100644 backend/src/services/utils/referenceDate.ts diff --git a/backend/src/services/locality.ts b/backend/src/services/locality.ts index f0c184bd2..88f51343d 100644 --- a/backend/src/services/locality.ts +++ b/backend/src/services/locality.ts @@ -20,6 +20,7 @@ import { logDb, nowDb } from '../utils/db' import { validateCollectingMethodValues } from '../utils/validation/collectingMethodValues' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { getReferenceDetails } from './reference' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' const normalizeNumberField = (value: unknown) => { if (typeof value === 'string') { @@ -259,34 +260,7 @@ export const getLocalityDetails = async (id: number, user: User | undefined) => now_lr: { include: { ref_ref: { - select: { - rid: true, - ref_type_id: true, - journal_id: true, - title_primary: true, - date_primary: true, - volume: true, - issue: true, - start_page: true, - end_page: true, - publisher: true, - pub_place: true, - title_secondary: true, - date_secondary: true, - title_series: true, - issn_isbn: true, - ref_abstract: true, - web_url: true, - misc_1: true, - misc_2: true, - gen_notes: true, - printed_language: true, - used_morph: true, - used_now: true, - used_gene: true, - ref_authors: true, - ref_journal: true, - }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -315,13 +289,7 @@ export const getLocalityDetails = async (id: number, user: User | undefined) => ...lau, lau_coordinator: getPersonDisplayName(coordinatorPerson, lau.lau_coordinator), lau_authorizer: getPersonDisplayName(authorizerPerson, lau.lau_authorizer), - now_lr: lau.now_lr.map(lr => ({ - ...lr, - ref_ref: { - ...lr.ref_ref, - exact_date: null, - }, - })), + now_lr: addNullExactDateToReferenceJoins(lau.now_lr), updates, } }) diff --git a/backend/src/services/occurrenceService.ts b/backend/src/services/occurrenceService.ts index b45460d4b..914c5ef5d 100644 --- a/backend/src/services/occurrenceService.ts +++ b/backend/src/services/occurrenceService.ts @@ -4,6 +4,7 @@ import { AccessError } from '../middlewares/authorizer' import { logDb, nowDb } from '../utils/db' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { generateOccurrenceDetailSql } from './queries/crossSearchQuery' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' const getAllowedLocalities = async (user: User) => { const usersProjects = await nowDb.now_proj_people.findMany({ @@ -119,44 +120,6 @@ type OccurrenceUpdate = { updates: OccurrenceLogRow[] } -const referenceWithoutExactDateSelect = { - rid: true, - ref_type_id: true, - journal_id: true, - title_primary: true, - date_primary: true, - volume: true, - issue: true, - start_page: true, - end_page: true, - publisher: true, - pub_place: true, - title_secondary: true, - date_secondary: true, - title_series: true, - issn_isbn: true, - ref_abstract: true, - web_url: true, - misc_1: true, - misc_2: true, - gen_notes: true, - printed_language: true, - used_morph: true, - used_now: true, - used_gene: true, - ref_authors: true, - ref_journal: true, -} as const - -const addNullExactDateToReferences = (references: T[]) => - references.map(reference => ({ - ...reference, - ref_ref: { - ...reference.ref_ref, - exact_date: null, - }, - })) - const stringifyLogValue = (value: unknown) => { if (value === null || value === undefined) return '' if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') return String(value) @@ -271,7 +234,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { update.lau_coordinator ), occ_comment: update.lau_comment ?? '', - references: addNullExactDateToReferences(update.now_lr) as unknown as AnyReference[], + references: addNullExactDateToReferenceJoins(update.now_lr) as unknown as AnyReference[], updates: nowLsLogs.filter(logRow => logRow.luid === update.luid), })), ...speciesUpdates.map(update => ({ @@ -285,7 +248,7 @@ const getOccurrenceUpdates = async (lid: number, speciesId: number) => { update.sau_coordinator ), occ_comment: update.sau_comment ?? '', - references: addNullExactDateToReferences(update.now_sr) as unknown as AnyReference[], + references: addNullExactDateToReferenceJoins(update.now_sr) as unknown as AnyReference[], updates: nowLsLogs.filter(logRow => logRow.suid === update.suid), })), ]) diff --git a/backend/src/services/reference.ts b/backend/src/services/reference.ts index 50d91a1ad..f1c9f51c4 100644 --- a/backend/src/services/reference.ts +++ b/backend/src/services/reference.ts @@ -7,6 +7,20 @@ import { } from './referenceValidation' import { Role, User } from '../../../frontend/src/shared/types' import { getIdsOfUsersProjects } from './locality' +import { referenceWithoutExactDateSelect } from './utils/referenceDate' + +type RawReferenceExactDate = { + exact_date: string | null +} + +const normalizeRawExactDate = (value: string | null | undefined): string | null => { + if (!value) return null + const date = value.slice(0, 10) + const match = /^(\d{4})-(\d{2})-(\d{2})$/.exec(date) + if (!match) return null + if (match[2] === '00' || match[3] === '00') return null + return date +} export const getAllReferences = async () => { const result = await nowDb.ref_ref.findMany({ @@ -41,22 +55,20 @@ export const getAllReferences = async () => { export const getReferenceDetails = async (id: number, _user?: User) => { const result = await nowDb.ref_ref.findUnique({ where: { rid: id }, - include: { ref_authors: true, ref_journal: true }, + select: referenceWithoutExactDateSelect, }) if (!result) { return null } - //changing exact_date to yyyy-mm-dd string since frontend uses that + we don't want to display ISO string in frontend - if (result && result.exact_date) { - const date = new Date(result.exact_date) - const formattedDate = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}` + const exactDateRows = await nowDb.$queryRaw` + SELECT CAST(exact_date AS CHAR) AS exact_date + FROM ref_ref + WHERE rid = ${id} + ` - return { ...result, exact_date: formattedDate } - } - - return result + return { ...result, exact_date: normalizeRawExactDate(exactDateRows[0]?.exact_date) } } // Fetch localities that have been updated by the given reference id diff --git a/backend/src/services/timeBound.ts b/backend/src/services/timeBound.ts index 5457ee7f1..5d8b2025c 100644 --- a/backend/src/services/timeBound.ts +++ b/backend/src/services/timeBound.ts @@ -6,6 +6,7 @@ import { ValidationObject, referenceValidator } from '../../../frontend/src/shar import { getReferenceDetails } from './reference' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { TabListQueryOptions } from './tabularQuery' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' export const getAllTimeBounds = async () => { const result = await nowDb.now_tu_bound.findMany({ @@ -30,10 +31,7 @@ export const getTimeBoundDetails = async (id: number) => { now_br: { include: { ref_ref: { - include: { - ref_authors: true, - ref_journal: true, - }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -61,6 +59,7 @@ export const getTimeBoundDetails = async (id: number) => { ...bau, bau_coordinator: getPersonDisplayName(coordinatorPerson, bau.bau_coordinator), bau_authorizer: getPersonDisplayName(authorizerPerson, bau.bau_authorizer), + now_br: addNullExactDateToReferenceJoins(bau.now_br), updates, } }) diff --git a/backend/src/services/timeUnit.ts b/backend/src/services/timeUnit.ts index dca55afaf..f627e1323 100644 --- a/backend/src/services/timeUnit.ts +++ b/backend/src/services/timeUnit.ts @@ -5,6 +5,7 @@ import { validateTimeUnit } from '../../../frontend/src/shared/validators/timeUn import { getReferenceDetails } from './reference' import { buildPersonLookupByInitials, getPersonDisplayName, getPersonFromLookup } from './utils/person' import { TabListQueryOptions } from './tabularQuery' +import { addNullExactDateToReferenceJoins, referenceWithoutExactDateSelect } from './utils/referenceDate' export const getAllTimeUnits = async () => { const result = await nowDb.now_time_unit.findMany({ @@ -53,10 +54,7 @@ export const getTimeUnitDetails = async (id: string) => { now_tr: { include: { ref_ref: { - include: { - ref_authors: true, - ref_journal: true, - }, + select: referenceWithoutExactDateSelect, }, }, }, @@ -85,6 +83,7 @@ export const getTimeUnitDetails = async (id: string) => { ...tau, tau_coordinator: getPersonDisplayName(coordinatorPerson, tau.tau_coordinator), tau_authorizer: getPersonDisplayName(authorizerPerson, tau.tau_authorizer), + now_tr: addNullExactDateToReferenceJoins(tau.now_tr), updates, } }) diff --git a/backend/src/services/utils/referenceDate.ts b/backend/src/services/utils/referenceDate.ts new file mode 100644 index 000000000..2fbe4bd8d --- /dev/null +++ b/backend/src/services/utils/referenceDate.ts @@ -0,0 +1,39 @@ +export const referenceWithoutExactDateSelect = { + rid: true, + ref_type_id: true, + journal_id: true, + title_primary: true, + date_primary: true, + volume: true, + issue: true, + start_page: true, + end_page: true, + publisher: true, + pub_place: true, + title_secondary: true, + date_secondary: true, + title_series: true, + issn_isbn: true, + ref_abstract: true, + web_url: true, + misc_1: true, + misc_2: true, + gen_notes: true, + printed_language: true, + used_morph: true, + used_now: true, + used_gene: true, + ref_authors: true, + ref_journal: true, +} as const + +export const addNullExactDateToReference = (reference: T) => ({ + ...reference, + exact_date: null, +}) + +export const addNullExactDateToReferenceJoins = (references: T[]) => + references.map(reference => ({ + ...reference, + ref_ref: addNullExactDateToReference(reference.ref_ref), + }))