IQSS · jo-pol · May 26, 2026 · May 26, 2026
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
@@ -3245,7 +3245,7 @@ The fully expanded example above (without environment variables) looks like this
 
 .. code-block:: bash
 
-  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/:persistentId/files/metadata?:persistentId=doi:10.5072/FK2/J8SJZB" --upload-file file-metadata-update.json
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/:persistentId/files/metadata?persistentId=doi:10.5072/FK2/J8SJZB" --upload-file file-metadata-update.json
 
 The ``file-metadata-update.json`` file should contain a JSON array of objects, each representing a file to be updated. Here's an example structure:
 

diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -145,7 +145,7 @@ Beginning with Dataverse Software 5.0, the way a Dataverse installation handles
 - Files with the same checksum can be included in a dataset, even if the files are in the same directory.
 - Files with the same filename can be included in a dataset as long as the files are in different directories.
 - If a user uploads a file to a directory where a file already exists with that directory/filename combination, the Dataverse installation will adjust the file path and names by adding "-1" or "-2" as applicable. This change will be visible in the list of files being uploaded. 
-- If the directory or name of an existing or newly uploaded file is edited in such a way that would create a directory/filename combination that already exists, the Dataverse installation will display an error.
+- If the directory or name of an existing or newly uploaded file is edited in such a way that would create a directory/filename combination that already exists, or the new directory/filename exists as directory, the Dataverse installation will display an error.
 - If a user attempts to replace a file with another file that has the same checksum, an error message will be displayed and the file will not be able to be replaced.
 - If a user attempts to replace a file with a file that has the same checksum as a different file in the dataset, a warning will be displayed.
 

diff --git a/scripts/issues/12407/find_duplicates.py b/scripts/issues/12407/find_duplicates.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+import argparse
+import psycopg2
+from pathlib import Path
+from textwrap import dedent
+
+def read_sql(path: Path) -> str:
+    text = path.read_text(encoding="utf-8")
+    return "\n".join(
+        line for line in text.splitlines() if not line.lstrip().startswith("\\")
+    )
+
+
+def fetch_dv_ids(conn, find_dv_ids_sql: str) -> list[int]:
+    with conn.cursor() as cur:
+        cur.execute(find_dv_ids_sql)
+        rows = cur.fetchall()
+
+    # Query returns dv_id as first selected column in your file.
+    return [int(row[0]) for row in rows]
+
+
+def fetch_dataset_info(conn, datasetversion_id: int):
+    dataset_query = """
+                    SELECT dso.protocol, dso.authority, dso.identifier, dv.versionnumber, dv.minorversionnumber
+                    FROM datasetversion dv
+                             JOIN dvobject dso ON dso.id = dv.dataset_id
+                    WHERE dv.id = %s \
+                    """
+    with conn.cursor() as cur:
+        cur.execute(dataset_query, (datasetversion_id,))
+        return cur.fetchone()
+    return None
+
+
+def run_find_duplicates(conn, find_duplicates_sql: str):
+    last_dv_id = None
+    last_info = ("", "", "", "", "")
+
+    with conn.cursor() as cur:
+        cur.execute(find_duplicates_sql)
+        cols = [d[0] for d in cur.description]
+
+        extra_cols = ["protocol", "authority", "dataset_id", "versionnumber", "minorversionnumber"]
+        print("\t".join(cols + extra_cols))
+
+        for row in cur:
+            dv_id = int(row[0])  # datasetversion_id
+
+            if dv_id != last_dv_id:
+                fetched = fetch_dataset_info(conn, dv_id)
+                last_info = fetched if fetched is not None else ("", "", "", "", "")
+                last_dv_id = dv_id
+
+            print("\t".join("" if v is None else str(v) for v in (tuple(row) + tuple(last_info))))
+
+
+def main():
+    class RawDefaultsFormatter(
+        argparse.ArgumentDefaultsHelpFormatter,
+        argparse.RawDescriptionHelpFormatter,
+    ):
+        pass
+
+    parser = argparse.ArgumentParser(
+        description=dedent("""
+            Execute as owner of dvndb.
+
+            `find_duplicates.sql` is executed for dv_ids returned by `find_dv_ids.sql`.
+            `find_dv_ids.sql` returns the latest version per dataset.
+        """),
+        formatter_class=RawDefaultsFormatter,
+    )
+    parser.add_argument("--min-id", type=int, default=0, help="first dataset-version-id examined by `find_dv_ids.sql`")
+    parser.add_argument("--nr-of-ids", type=int, default=50, help="number of ID's returned by `find_dv_ids.sql`")
+    args = parser.parse_args()
+    conn_kwargs = {"dbname": 'dvndb'}
+
+    script_dir = Path(__file__).resolve().parent
+
+    dup_sql_raw = read_sql(script_dir / "find_duplicates.sql")
+
+    dv_sql = read_sql(script_dir / "find_dv_ids.sql")
+    dv_sql = dv_sql.replace(":min_id", str(args.min_id))
+    dv_sql = dv_sql.replace(":nr_of_ids", str(args.nr_of_ids))
+
+    try:
+        with psycopg2.connect(**conn_kwargs) as conn:
+            dv_ids = fetch_dv_ids(conn, dv_sql)
+
+            if not dv_ids:
+                print("No dv_id values returned by find_dv_ids.sql")
+                return
+
+            ids_csv = ",".join(str(i) for i in dv_ids)
+            print(f"dataset version ids: {ids_csv}")
+            run_find_duplicates(conn, dup_sql_raw.replace(":ids", ids_csv))
+    except psycopg2.OperationalError as e:
+        msg = str(e)
+        if "no password supplied" in msg.lower():
+            parser.print_help()
+            raise SystemExit(2)
+        print(f"Database connection failed: {e}")
+        raise SystemExit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/issues/12407/find_duplicates.sql b/scripts/issues/12407/find_duplicates.sql
@@ -0,0 +1,35 @@
+\set ids 5,7,9
+WITH dir_ancestors AS (
+        SELECT DISTINCT
+            datasetversion_id,
+            array_to_string((string_to_array(path, '/'))[1:n], '/') AS path
+        FROM (
+                 SELECT DISTINCT
+                     datasetversion_id,
+                     NULLIF(BTRIM(directorylabel), '') AS path
+                 FROM filemetadata
+                 WHERE datasetversion_id IN (:ids)
+                   AND NULLIF(BTRIM(directorylabel), '') IS NOT NULL
+             ) dirs
+        CROSS JOIN LATERAL generate_series(
+                1, cardinality(string_to_array(path, '/'))
+            ) AS g(n)
+    ),
+    file_paths AS (
+        SELECT DISTINCT
+            datasetversion_id,
+            CASE
+            WHEN NULLIF(BTRIM(directorylabel), '') IS NULL THEN label
+            ELSE NULLIF(BTRIM(directorylabel), '') || '/' || label
+            END AS path
+        FROM filemetadata
+        WHERE datasetversion_id IN (:ids)
+    )
+SELECT datasetversion_id, path
+FROM dir_ancestors
+
+INTERSECT
+
+SELECT datasetversion_id, path
+FROM file_paths
+ORDER BY datasetversion_id, path;
diff --git a/scripts/issues/12407/find_dv_ids.sql b/scripts/issues/12407/find_dv_ids.sql
@@ -0,0 +1,35 @@
+\set min_id 0
+\set nr_of_ids 50
+
+WITH ranked AS (
+    SELECT
+        dso.id AS dso_id,
+        dso.protocol,
+        dso.authority,
+        dso.identifier,
+        dv.id AS dv_id,
+        dv.versionnumber,
+        dv.minorversionnumber,
+        ROW_NUMBER() OVER (
+            PARTITION BY dso.id
+            ORDER BY
+                dv.versionnumber DESC,
+                dv.minorversionnumber DESC,
+                dv.id DESC
+        ) AS rn
+    FROM datasetversion dv
+    JOIN dvobject dso ON dso.id = dv.dataset_id
+)
+SELECT
+    dv_id,
+    dso_id,
+    protocol,
+    authority,
+    identifier,
+    versionnumber,
+    minorversionnumber
+FROM ranked
+WHERE rn = 1
+  AND dv_id >= :min_id
+ORDER BY dv_id
+    LIMIT :nr_of_ids;
diff --git a/scripts/issues/12407/test-apis.py b/scripts/issues/12407/test-apis.py
@@ -0,0 +1,158 @@
+import requests
+from requests_toolbelt.multipart.encoder import MultipartEncoder
+from datetime import datetime
+import json
+
+########################## configuration for a draft dataset without files
+
+dataverse_server = 'https://dev.archaeology.datastations.nl'
+api_key = '5623d6e3-bc94-40a5-8de0-8ebdf9f58cbc'
+persistentId = 'doi:10.5072/DAR/HBGPN5'
+
+####################
+print (' preparation: add file foo/bar  ' + ('-' * 40))
+
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('bar', ('content2: %s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"directoryLabel": "foo"})}# conflicting dir
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+print (r.status_code)
+print (r.json())
+
+####################
+print (' preparation: add file foo.tab/bar  ' + ('-' * 40))
+
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('bar', ('content2: %s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"directoryLabel": "foo.tab"})}# conflicting dir
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+print (r.status_code)
+print (r.json())
+
+####################
+print (' preparation: add file x to have a file to change  ' + ('-' * 40))
+
+###
+url = '%s/api/datasets/:persistentId/add?&persistentId=%s' % (dataverse_server, persistentId)
+unique_content = 'content2: %s' % datetime.now()
+files = {'file': ('x', unique_content)}
+jason_data = {"jsonData": json.dumps({"label": "x"})}
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+print (r.status_code)
+print (r.json())
+
+file_id = r.json()['data']['files'][0]['dataFile']['id']
+
+####################
+print (' file conflicting with existing dir gets sequence number  ' + ('-' * 40))
+
+###
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('foo', ('content2: %s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"label": "foo"})}
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+
+print (r.json())
+print (r.status_code)
+
+####################
+print (' tabular file conflicting with existing dir gets seq nr once converted to .tab ' + ('-' * 40))
+
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('foo.csv', ('header1,header2\nvalue1,%s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"label": "foo.csv"})}
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+print (r.status_code)
+print (r.json())
+
+####################
+print (' files API metadata:  dir foo/bar conflicts with previously created file foo/bar: returns bad-request  ' + ('-' * 40))
+
+### files API https://guides.dataverse.org/en/latest/api/native-api.html#updating-file-metadata
+url = f'{dataverse_server}/api/files/{file_id}/metadata'
+files = {'jsonData': (None, '{"directoryLabel": "foo/bar", "label": "files-api.txt"}  ' + ('-' * 40))}
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, files=files, verify=False)
+
+print(r.status_code)
+print(r.text)
+
+####################
+print ('datasets API update existing file into name conflicting with existing dir: returns bad-request  ' + ('-' * 40))
+
+### datasets API https://guides.dataverse.org/en/latest/api/native-api.html#update-file-metadata
+url = f'{dataverse_server}/api/datasets/:persistentId/files/metadata?key={api_key}&persistentId={persistentId}'
+json_content = [{"dataFileId": file_id, "directoryLabel": "foo/bar", "label": "datasets-api.txt"}]
+headers = {'X-Dataverse-key': api_key, 'Content-Type': 'application/json'}
+r = requests.post(url, headers=headers, json=json_content, verify=False)
+
+print(r.status_code)
+print(r.text)
+
+####################
+print ('datasets API add file conflicting with existing file: gets seq nr  ' + ('-' * 40))
+
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('fox', ('content2: %s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"label": "x"})}
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+
+print (r.json())
+print (r.status_code)
+
+####################
+print ('dataset API add dir conflicting with existing file: returns bad-request  ' + ('-' * 40))
+
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('foo', ('content2: %s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"label": "dir-conflicts-with-file.txt", "directoryLabel": "foo/bar"})}
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+
+print (r.json())
+print (r.status_code)
+
+####################
+print (' datasets API: another file on existing dir is OK  ' + ('-' * 40))
+
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('beer', ('content2: %s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"directoryLabel": "foo"})}# conflicting dir
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+print (r.status_code)
+print (r.json())
+
+####################
+print (' datasets API: a file with different capitalization is OK  ' + ('-' * 40))
+
+url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+files = {'file': ('Beer', ('content2: %s' % datetime.now()))}
+jason_data = {"jsonData": json.dumps({"directoryLabel": "foo"})}# conflicting dir
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, data=jason_data, files=files, verify=False)
+print (r.status_code)
+print (r.json())
+
+####################
+print (' files API replace: dir foo/bar conflicts with previously created file: returns bad-request  ' + ('-' * 40))
+
+url = f'{dataverse_server}/api/files/{file_id}/replace'
+files = {
+    'jsonData': (None, '{"directoryLabel": "foo/bar", "label": "x", "forceReplace":true}  ' + ('-' * 40)),
+    'file': ('foo', ('content2: %s' % datetime.now()))
+}
+r = requests.post(url, headers={'X-Dataverse-key': api_key}, files=files, verify=False)
+
+print(r.status_code)
+print(r.text)
+
+####################
+# not configured on DANS VM? Might also have no added value over previous test.
+#
+# print (' datasets API remote file: file foo conflicts with previously created dir: returns bad-request ????  ' + ('-' * 40))
+#
+# url = '%s/api/datasets/:persistentId/add?persistentId=%s' % (dataverse_server, persistentId)
+# files = {
+#     'jsonData': (None, '{"directoryLabel": "foo/bar", "label": "x", "forceReplace":true, "description":"A remote image.","storageIdentifier":"file://themes/custom/qdr/images/01234567890-012345678901","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","fileName":"testlogo.png","mimeType":"image/png"}  ' + ('-' * 40)),
+# }
+# r = requests.post(url, headers={'X-Dataverse-key': api_key}, files=files, verify=False)
+#
+# print(r.status_code)
+# print(r.text)
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -1068,7 +1068,9 @@ public String save() {
         storageSizeStr = null; // Let this re-calculate after the calling save()
         Collection<String> duplicates = IngestUtil.findDuplicateFilenames(workingVersion, newFiles);
         if (!duplicates.isEmpty()) {
-            JH.addMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.message.filesFailure"), BundleUtil.getStringFromBundle("dataset.message.editMetadata.duplicateFilenames", new ArrayList<>(duplicates)));
+            var arguments = List.of(String.join(", ", duplicates));
+            JH.addMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.message.filesFailure"), BundleUtil.getStringFromBundle("dataset.message.editMetadata.duplicateFilenames",
+                arguments));
             return null;
         }
         if (!saveEnabled) {

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -4899,9 +4899,10 @@ public Response updateMultipleFileMetadata(@Context ContainerRequestContext crc,
 
                 List<FileMetadata> fmdListMinusCurrentFile = new ArrayList<>(fileMetadataMapCopy.values());
 
-                if (IngestUtil.conflictsWithExistingFilenames(pathPlusFilename, fmdListMinusCurrentFile)) {
+                var conflictingPart = IngestUtil.findConflictingPathPart(pathPlusFilename, fmdListMinusCurrentFile);
+                if (conflictingPart.isPresent()) {
                     return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.metadata.update.duplicateFile",
-                            Arrays.asList(pathPlusFilename)));
+                            conflictingPart.stream().toList()));
                 }
 
                 // Apply optional params

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -512,8 +512,10 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa
                     }
                 }
 
-                if (IngestUtil.conflictsWithExistingFilenames(pathPlusFilename, fmdListMinusCurrentFile)) {
-                    return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.metadata.update.duplicateFile", Arrays.asList(pathPlusFilename)));
+                var conflictingPart = IngestUtil.findConflictingPathPart(pathPlusFilename, fmdListMinusCurrentFile);
+                if (conflictingPart.isPresent()) {
+                    return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.metadata.update.duplicateFile",
+                        conflictingPart.stream().toList()));
                 }
 
                 optionalFileParams.addOptionalParams(upFmd);