From 95a19d869354d67617924f0e7cc7e998a4d9e65b Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:39:33 -0400 Subject: [PATCH 01/20] feat(S3AccessIT): scrub minio from S3AccessIT test --- .../harvard/iq/dataverse/api/S3AccessIT.java | 43 +++---------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 48a64490796..1d15f87b131 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -46,7 +46,7 @@ import org.junit.jupiter.api.Test; /** - * This test requires LocalStack and Minio to be running. Developers can use our + * This test requires LocalStack to be running. Developers can use our * docker-compose file, which has all the necessary configuration. */ public class S3AccessIT { @@ -55,7 +55,6 @@ public class S3AccessIT { static final String BUCKET_NAME = "mybucket"; static S3Client s3localstack = null; - static S3Client s3minio = null; @BeforeAll public static void setUp() { @@ -71,45 +70,21 @@ public static void setUp() { .region(Region.US_EAST_2) .build(); - String accessKeyMinio = "4cc355_k3y"; - String secretKeyMinio = "s3cr3t_4cc355_k3y"; - s3minio = S3Client.builder() - .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKeyMinio, secretKeyMinio))) - .endpointOverride(URI.create("http://localhost:9000")) - .region(Region.US_EAST_1) - .forcePathStyle(true) - .build(); - // create bucket if it doesn't exist try { s3localstack.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); } catch (NoSuchBucketException ex) { s3localstack.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); } - - try { - s3minio.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); - } catch (NoSuchBucketException ex) { - try { - CreateBucketResponse createBucketResponse = s3minio.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); - if (createBucketResponse.sdkHttpResponse().isSuccessful()) { - System.out.println("Bucket created successfully"); - } else { - System.err.println("Failed to create bucket: " + createBucketResponse.sdkHttpResponse().statusCode()); - } - } catch (S3Exception e) { - System.err.println("Error creating bucket: " + e.getMessage()); - } - } } /** - * We're using MinIO for testing non-direct upload. + * We're using Localstack for testing non-direct upload. */ @Test public void testNonDirectUpload() { - String driverId = "minio1"; - String driverLabel = "MinIO"; + String driverId = "localstack1"; + String driverLabel = "LocalStack"; Response createSuperuser = UtilIT.createRandomUser(); createSuperuser.then().assertThat().statusCode(200); @@ -124,7 +99,6 @@ public void testNonDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -191,7 +165,7 @@ public void testNonDirectUpload() { String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; String s3Object = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -220,7 +194,7 @@ public void testNonDirectUpload() { S3Exception expectedException = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -258,7 +232,6 @@ public void testDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -441,7 +414,7 @@ public void testDirectUpload() { S3Exception expectedException = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -476,7 +449,6 @@ public void testDirectUploadDetectStataFile() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -663,7 +635,6 @@ public void testDirectUploadWithFileCountLimit() throws JsonParseException { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } From 8c25335f9ad0bb5ef5aee655c1901d7946cfc4c2 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:42:41 -0400 Subject: [PATCH 02/20] feat(docker-compose-dev): scrub minio from docker-compose-dev.yml --- docker-compose-dev.yml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index b24bf0ed6f6..809c0954e89 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -44,16 +44,6 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default - -Ddataverse.files.minio1.type=s3 - -Ddataverse.files.minio1.label=MinIO - -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 - -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 - -Ddataverse.files.minio1.bucket-name=mybucket - -Ddataverse.files.minio1.path-style-access=true - -Ddataverse.files.minio1.upload-redirect=false - -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE @@ -252,23 +242,6 @@ services: tmpfs: - /localstack:mode=770,size=128M,uid=1000,gid=1000 - dev_minio: - container_name: "dev_minio" - hostname: "minio" - image: minio/minio - restart: on-failure - ports: - - "9000:9000" - - "9001:9001" - networks: - - dataverse - volumes: - - ./docker-dev-volumes/minio_storage:/data - environment: - MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y - command: server /data - previewers-provider: container_name: previewers-provider hostname: previewers-provider From 183f728673e6a99732c0ce0bf827e2fa4c5f9f1e Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:43:20 -0400 Subject: [PATCH 03/20] feat(conf/keycloak/docker-compose-dev): scrub minio from conf/keycloak/docker-compose-dev.yml --- conf/keycloak/docker-compose-dev.yml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index 7356161ec47..b12aa6adbb6 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -53,16 +53,6 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default - -Ddataverse.files.minio1.type=s3 - -Ddataverse.files.minio1.label=MinIO - -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 - -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 - -Ddataverse.files.minio1.bucket-name=mybucket - -Ddataverse.files.minio1.path-style-access=true - -Ddataverse.files.minio1.upload-redirect=false - -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE @@ -260,23 +250,6 @@ services: tmpfs: - /localstack:mode=770,size=128M,uid=1000,gid=1000 - dev_minio: - container_name: "dev_minio" - hostname: "minio" - image: minio/minio - restart: on-failure - ports: - - "9000:9000" - - "9001:9001" - networks: - - dataverse - volumes: - - ./docker-dev-volumes/minio_storage:/data - environment: - MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y - command: server /data - previewers-provider: container_name: previewers-provider hostname: previewers-provider From 26b1c08eecd5d83bc230545fc5aafb5ed9a19443 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:04:54 -0400 Subject: [PATCH 04/20] feat(docker-compose-dev): add localstack_noredirect storage driver config --- docker-compose-dev.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 809c0954e89..a7dc55e9ec4 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -44,6 +44,16 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.localstack_noredirect.type=s3 + -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect + -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.path-style-access=true + -Ddataverse.files.localstack_noredirect.upload-redirect=false + -Ddataverse.files.localstack_noredirect.download-redirect=false + -Ddataverse.files.localstack_noredirect.access-key=default + -Ddataverse.files.localstack_noredirect.secret-key=default -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE From 7f523777c0cd09fe200244ecfaed7430cb6cccd3 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:05:13 -0400 Subject: [PATCH 05/20] feat(keycloak/docker-compose-dev): add localstack_noredirect storage driver config --- conf/keycloak/docker-compose-dev.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index b12aa6adbb6..81e3878c7b1 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -53,6 +53,16 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.localstack_noredirect.type=s3 + -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect + -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.path-style-access=true + -Ddataverse.files.localstack_noredirect.upload-redirect=false + -Ddataverse.files.localstack_noredirect.download-redirect=false + -Ddataverse.files.localstack_noredirect.access-key=default + -Ddataverse.files.localstack_noredirect.secret-key=default -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE From 954a6cc3d7079e5892844f38a903216c30e0f238 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:07:13 -0400 Subject: [PATCH 06/20] test(S3AccessIT): use localstack_noredirect driver in testNonDirectUpload Switch testNonDirectUpload from localstack1 (upload-redirect=true, download-redirect=true) to the new localstack_noredirect driver (both redirects disabled), so the test genuinely exercises the non-redirect proxy-through-Dataverse code path. Also replace the plain downloadFile call with downloadFileNoRedirect and assert statusCode(200). This makes the assertion self-documenting: a 303 response would now cause an explicit test failure instead of being silently followed by RestAssured. --- .../edu/harvard/iq/dataverse/api/S3AccessIT.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 1d15f87b131..f0834ceebbf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -79,12 +79,16 @@ public static void setUp() { } /** - * We're using Localstack for testing non-direct upload. + * We're using LocalStack (with redirects disabled) for testing non-direct + * upload. Using localstack_noredirect ensures the non-redirect + * (proxy-through-Dataverse) code path is actually exercised. If localstack1 + * (redirect-enabled) were used, RestAssured would silently follow the 303 + * redirect and the proxy path would never be tested. */ @Test public void testNonDirectUpload() { - String driverId = "localstack1"; - String driverLabel = "LocalStack"; + String driverId = "localstack_noredirect"; + String driverLabel = "LocalStackNoRedirect"; Response createSuperuser = UtilIT.createRandomUser(); createSuperuser.then().assertThat().statusCode(200); @@ -99,6 +103,7 @@ public void testNonDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -181,8 +186,11 @@ public void testNonDirectUpload() { fail("Failed to read S3 object content: " + ex.getMessage()); } + // Use downloadFileNoRedirect to verify Dataverse serves the content directly + // (status 200). If the driver were misconfigured with download-redirect=true, + // this would return 303 instead, causing the test to fail explicitly. System.out.println("non-direct download..."); - Response downloadFile = UtilIT.downloadFile(Integer.valueOf(fileId), apiToken); + Response downloadFile = UtilIT.downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); downloadFile.then().assertThat().statusCode(200); String contentsOfDownloadedFile = downloadFile.getBody().asString(); From eea0b5f5b66fb63633d94cf295ac30122eeb4c83 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:16:29 -0400 Subject: [PATCH 07/20] test(S3AccessIT): update drivers doc strings to include localstack_noredirect --- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index f0834ceebbf..5beebc2f177 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -240,6 +240,7 @@ public void testDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -457,6 +458,7 @@ public void testDirectUploadDetectStataFile() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -643,6 +645,7 @@ public void testDirectUploadWithFileCountLimit() throws JsonParseException { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } From 2118491899384c689f58770371027b819c97c874 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 14:10:48 -0400 Subject: [PATCH 08/20] fix(S3AccessIT): use distinct bucket name mybucket-noredirect for localstack_noredirect Using the same bucket name as localstack1 would cause a collision in the test environment when tasks/localstack_create_bucket.yml runs aws s3 mb on each bucket entry. Use mybucket-noredirect to avoid this. Update driver configs in both docker-compose files and switch S3AccessIT.testNonDirectUpload to use the new BUCKET_NAME_NOREDIRECT constant. --- conf/keycloak/docker-compose-dev.yml | 2 +- docker-compose-dev.yml | 2 +- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index 81e3878c7b1..7e57cd7d83c 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -57,7 +57,7 @@ services: -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 - -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket-noredirect -Ddataverse.files.localstack_noredirect.path-style-access=true -Ddataverse.files.localstack_noredirect.upload-redirect=false -Ddataverse.files.localstack_noredirect.download-redirect=false diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index a7dc55e9ec4..c176597c990 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -48,7 +48,7 @@ services: -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 - -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket-noredirect -Ddataverse.files.localstack_noredirect.path-style-access=true -Ddataverse.files.localstack_noredirect.upload-redirect=false -Ddataverse.files.localstack_noredirect.download-redirect=false diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 5beebc2f177..ed800ede727 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -54,6 +54,7 @@ public class S3AccessIT { private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName()); static final String BUCKET_NAME = "mybucket"; + static final String BUCKET_NAME_NOREDIRECT = "mybucket-noredirect"; static S3Client s3localstack = null; @BeforeAll @@ -165,13 +166,13 @@ public void testNonDirectUpload() { String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier"); String keyInDataverse = storageIdentifier.split(":")[2]; - Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier); + Assertions.assertEquals(driverId + "://" + BUCKET_NAME_NOREDIRECT + ":" + keyInDataverse, storageIdentifier); String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; String s3Object = null; try { ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() - .bucket(BUCKET_NAME) + .bucket(BUCKET_NAME_NOREDIRECT) .key(keyInS3) .build()); // Read the content of the object into a string @@ -203,7 +204,7 @@ public void testNonDirectUpload() { S3Exception expectedException = null; try { ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() - .bucket(BUCKET_NAME) + .bucket(BUCKET_NAME_NOREDIRECT) .key(keyInS3) .build()); // Read the content of the object into a string From 665110eef444cf690fa42aa85f042b0990df11a1 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:30:15 -0400 Subject: [PATCH 09/20] docs(big-data-support): remove MinIO references --- .../source/installation/big-data-support.rst | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/installation/big-data-support.rst b/doc/sphinx-guides/source/installation/big-data-support.rst index 45b94f71a9f..411bd6b54d8 100644 --- a/doc/sphinx-guides/source/installation/big-data-support.rst +++ b/doc/sphinx-guides/source/installation/big-data-support.rst @@ -68,7 +68,6 @@ If the bucket allows the wildcard ``*`` but the Dataverse application only allow Detailed information for the most common S3 admin tools around CORS: - `AWS `_ -- `Minio mc `_ - `s3cmd `_ Get Current CORS Policy on Bucket @@ -80,9 +79,6 @@ If you'd like to check the CORS configuration on your bucket before making chang .. group-tab:: AWS CLI :code:`aws s3api get-bucket-cors --bucket ` - .. group-tab:: Minio Client (mc) - :code:`mc cors get /` - Set CORS Policy on Bucket +++++++++++++++++++++++++ @@ -107,9 +103,6 @@ Both JSON and XML format are explained in detail in `AWS Docs ` as follows: - .. literalinclude:: /_static/installation/cors/cors.xml :name: xml-cors :language: xml @@ -124,7 +117,7 @@ Both JSON and XML format are explained in detail in `AWS Docs Date: Fri, 15 May 2026 12:32:43 -0400 Subject: [PATCH 10/20] docs(config): remove MinIO references --- doc/sphinx-guides/source/installation/config.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2a6fdfa324..2517e635006 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1218,7 +1218,7 @@ You can configure this redirect properly in your cloud environment to generate a Amazon S3 Storage (or Compatible) +++++++++++++++++++++++++++++++++ -The Dataverse Software supports Amazon S3 storage as well as other S3-compatible stores (like Minio, Ceph RADOS S3 Gateway and many more) for files uploaded to your Dataverse installation. +The Dataverse Software supports Amazon S3 storage as well as other S3-compatible stores (like Ceph RADOS S3 Gateway and many more) for files uploaded to your Dataverse installation. The Dataverse Software S3 driver supports multi-part upload for large files (over 1 GB by default - see the min-part-size option in the table below to change this). @@ -1264,7 +1264,7 @@ Please make note of the following details: - **Endpoint URL** - consult the documentation of your service on how to find it. - * Example: https://play.minio.io:9000 + * Example: http://localhost.localstack.cloud:4566 - **Region:** Optional, but some services might use it. Consult your service documentation. @@ -1461,11 +1461,6 @@ You may provide the values for these via any `supported MicroProfile Config API Reported Working S3-Compatible Storage ###################################### -`Minio v2018-09-12 `_ - Set ``dataverse.files..path-style-access=true``, as Minio works path-based. Works pretty smooth, easy to setup. - **Can be used for quick testing, too:** just use the example values above. Uses the public (read: unsecure and - possibly slow) https://play.minio.io:9000 service. - `StorJ Object Store `_ StorJ is a distributed object store that can be configured with an S3 gateway. Per the S3 Storage instructions above, you'll first set up the StorJ S3 store by defining the id, type, and label. After following the general installation, set the following configuration to use a StorJ object store: ``dataverse.files..chunked-encoding=false``. For step-by-step instructions see https://docs.storj.io/dcs/how-tos/dataverse-integration-guide/ From 630f466d933e6437baa8c179b16bcb7ea930205b Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:33:41 -0400 Subject: [PATCH 11/20] docs(S3AccessIO): remove MinIO references --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..f8eb9f91bdf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,21 +610,22 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This implementation - * is problematic, because S3 cannot save an object of an unknown length. This - * effectively nullifies any benefits of streaming; as we cannot start saving - * until we have read the entire stream. One way of solving this would be to - * buffer the entire stream as byte[], in memory, then save it... Which of - * course would be limited by the amount of memory available, and thus would not - * work for streams larger than that. So we have eventually decided to save save - * the stream to a temp file, then save to S3. This is slower, but guaranteed to - * work on any size stream. An alternative we may want to consider is to not - * implement this method in the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle attempts - * to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This + * implementation is problematic, because S3 cannot save an object of an + * unknown length. This effectively nullifies any benefits of streaming; as + * we cannot start saving until we have read the entire stream. One way of + * solving this would be to buffer the entire stream as byte[], in memory, + * then save it... Which of course would be limited by the amount of memory + * available, and thus would not work for streams larger than that. So we + * have eventually decided to save save the stream to a temp file, then save + * to S3. This is slower, but guaranteed to work on any size stream. An + * alternative we may want to consider is to not implement this method in + * the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle + * attempts to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -759,7 +760,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -910,13 +911,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key and - * bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key + * and bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname and - * protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname + * and protocol" + * * @return Main File Key * @throws IOException */ @@ -979,12 +980,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the main - * file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the + * main file label. * @return redirect url * @throws IOException. */ @@ -1003,9 +1004,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1270,7 +1271,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1331,8 +1332,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all (e.g. by - // Minio) so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all, + // so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1521,12 +1522,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if(responseInputStream!= null && responseInputStream.available()>0) { + if (responseInputStream != null && responseInputStream.available() > 0) { responseInputStream.abort(); } } catch (IOException e) { From 4e2691fd38969fd3baa81bce34f27251a9aa001b Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:34:47 -0400 Subject: [PATCH 12/20] docs(S3AccessIT): remove MinIO references --- .../harvard/iq/dataverse/api/S3AccessIT.java | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index ed800ede727..2597e77474b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -319,18 +319,6 @@ public void testDirectUpload() { InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8)); Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); - /* - Direct upload to MinIO is failing with errors like this: - - SignatureDoesNotMatch - The request signature we calculated does not match the signature you provided. Check your key and signing method. - 10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - mybucket - /mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - 1793915CCC5BC95C - dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8 - - */ uploadFileDirect.then().assertThat().statusCode(200); // TODO: Use MD5 or whatever Dataverse is configured for and @@ -533,18 +521,6 @@ public void testDirectUploadDetectStataFile() { } Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); - /* - Direct upload to MinIO is failing with errors like this: - - SignatureDoesNotMatch - The request signature we calculated does not match the signature you provided. Check your key and signing method. - 10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - mybucket - /mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - 1793915CCC5BC95C - dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8 - - */ uploadFileDirect.then().assertThat().statusCode(200); // TODO: Use MD5 or whatever Dataverse is configured for and From ce11a11aafd99c962ba318180f05fc9ad5acfb43 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:03:53 -0400 Subject: [PATCH 13/20] fix: remove unnecessary whitespace changes --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index f8eb9f91bdf..6d3fe205639 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,22 +610,21 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This - * implementation is problematic, because S3 cannot save an object of an - * unknown length. This effectively nullifies any benefits of streaming; as - * we cannot start saving until we have read the entire stream. One way of - * solving this would be to buffer the entire stream as byte[], in memory, - * then save it... Which of course would be limited by the amount of memory - * available, and thus would not work for streams larger than that. So we - * have eventually decided to save save the stream to a temp file, then save - * to S3. This is slower, but guaranteed to work on any size stream. An - * alternative we may want to consider is to not implement this method in - * the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle - * attempts to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This implementation + * is problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -760,7 +759,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -911,13 +910,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key - * and bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key and + * bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname - * and protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname and + * protocol" + * * @return Main File Key * @throws IOException */ @@ -980,12 +979,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the - * main file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the main + * file label. * @return redirect url * @throws IOException. */ @@ -1004,9 +1003,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1271,7 +1270,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1332,8 +1331,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all, - // so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all (e.g. by + // Minio) so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1522,12 +1521,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if (responseInputStream != null && responseInputStream.available() > 0) { + if(responseInputStream!= null && responseInputStream.available()>0) { responseInputStream.abort(); } } catch (IOException e) { From 5b9906b639feec3a8fd17d84e433f34f65f5c077 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:05:29 -0400 Subject: [PATCH 14/20] docs(S3AccessIO): remove MinIO references --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..2594d762000 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,21 +610,22 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This implementation - * is problematic, because S3 cannot save an object of an unknown length. This - * effectively nullifies any benefits of streaming; as we cannot start saving - * until we have read the entire stream. One way of solving this would be to - * buffer the entire stream as byte[], in memory, then save it... Which of - * course would be limited by the amount of memory available, and thus would not - * work for streams larger than that. So we have eventually decided to save save - * the stream to a temp file, then save to S3. This is slower, but guaranteed to - * work on any size stream. An alternative we may want to consider is to not - * implement this method in the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle attempts - * to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This + * implementation is problematic, because S3 cannot save an object of an + * unknown length. This effectively nullifies any benefits of streaming; as + * we cannot start saving until we have read the entire stream. One way of + * solving this would be to buffer the entire stream as byte[], in memory, + * then save it... Which of course would be limited by the amount of memory + * available, and thus would not work for streams larger than that. So we + * have eventually decided to save save the stream to a temp file, then save + * to S3. This is slower, but guaranteed to work on any size stream. An + * alternative we may want to consider is to not implement this method in + * the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle + * attempts to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -759,7 +760,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -910,13 +911,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key and - * bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key + * and bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname and - * protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname + * and protocol" + * * @return Main File Key * @throws IOException */ @@ -979,12 +980,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the main - * file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the + * main file label. * @return redirect url * @throws IOException. */ @@ -1003,9 +1004,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1270,7 +1271,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1331,8 +1332,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all (e.g. by - // Minio) so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all + // so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1521,12 +1522,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if(responseInputStream!= null && responseInputStream.available()>0) { + if (responseInputStream != null && responseInputStream.available() > 0) { responseInputStream.abort(); } } catch (IOException e) { From 12fd041b0dee25189412fea6c18c3821891f3146 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:06:02 -0400 Subject: [PATCH 15/20] feat(dev-start-frd.sh): remove MinIO initialization --- scripts/dev/dev-start-frd.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/dev/dev-start-frd.sh b/scripts/dev/dev-start-frd.sh index d113f677bad..d76bed85770 100755 --- a/scripts/dev/dev-start-frd.sh +++ b/scripts/dev/dev-start-frd.sh @@ -28,7 +28,6 @@ mkdir -p docker-dev-volumes/app/secrets mkdir -p docker-dev-volumes/postgresql/data mkdir -p docker-dev-volumes/solr/data mkdir -p docker-dev-volumes/solr/conf -mkdir -p docker-dev-volumes/minio_storage # Only disable DDL generation if database is already initialized # (on first run, we need create-tables to bootstrap the schema) From 53b4f476ecb6655f2bf00e9b6732765f03ffba32 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:10:12 -0400 Subject: [PATCH 16/20] feat: remove whitespaces --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 2594d762000..6d3fe205639 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,22 +610,21 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This - * implementation is problematic, because S3 cannot save an object of an - * unknown length. This effectively nullifies any benefits of streaming; as - * we cannot start saving until we have read the entire stream. One way of - * solving this would be to buffer the entire stream as byte[], in memory, - * then save it... Which of course would be limited by the amount of memory - * available, and thus would not work for streams larger than that. So we - * have eventually decided to save save the stream to a temp file, then save - * to S3. This is slower, but guaranteed to work on any size stream. An - * alternative we may want to consider is to not implement this method in - * the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle - * attempts to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This implementation + * is problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -760,7 +759,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -911,13 +910,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key - * and bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key and + * bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname - * and protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname and + * protocol" + * * @return Main File Key * @throws IOException */ @@ -980,12 +979,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the - * main file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the main + * file label. * @return redirect url * @throws IOException. */ @@ -1004,9 +1003,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1271,7 +1270,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1332,8 +1331,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all - // so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all (e.g. by + // Minio) so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1522,12 +1521,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if (responseInputStream != null && responseInputStream.available() > 0) { + if(responseInputStream!= null && responseInputStream.available()>0) { responseInputStream.abort(); } } catch (IOException e) { From 420101b59ee9049a3d5f3404b1cf290edf730515 Mon Sep 17 00:00:00 2001 From: Ash Manda Date: Tue, 26 May 2026 16:11:45 -0400 Subject: [PATCH 17/20] Clarify comment on S3 tags support Updated comment for clarity regarding S3 tags implementation. --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..74c22f4ce3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1331,8 +1331,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all (e.g. by - // Minio) so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all + // so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing From c564762168d30da3e3acda9c12bff1f35cc564c3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 22:43:33 +0000 Subject: [PATCH 18/20] Ensure LocalStack no-redirect bucket exists for S3 tests --- conf/localstack/buckets.sh | 1 + .../edu/harvard/iq/dataverse/api/S3AccessIT.java | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh index fe940d9890d..bd901c19634 100755 --- a/conf/localstack/buckets.sh +++ b/conf/localstack/buckets.sh @@ -1,3 +1,4 @@ #!/usr/bin/env bash # https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack awslocal s3 mb s3://mybucket +awslocal s3 mb s3://mybucket-noredirect diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 2597e77474b..7ecc3c7d480 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -71,11 +71,21 @@ public static void setUp() { .region(Region.US_EAST_2) .build(); - // create bucket if it doesn't exist + ensureBucketExists(BUCKET_NAME); + ensureBucketExists(BUCKET_NAME_NOREDIRECT); + } + + private static void ensureBucketExists(String bucketName) { try { - s3localstack.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); + s3localstack.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); } catch (NoSuchBucketException ex) { - s3localstack.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); + s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); + } catch (S3Exception ex) { + if (ex.statusCode() == 404) { + s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); + } else { + throw ex; + } } } From c319bbc024d6aa4674d2767506a64f0a7745eed0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 22:44:22 +0000 Subject: [PATCH 19/20] Handle generic S3 404 in S3AccessIT bucket setup --- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 7ecc3c7d480..7363837b8d5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -78,10 +78,8 @@ public static void setUp() { private static void ensureBucketExists(String bucketName) { try { s3localstack.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); - } catch (NoSuchBucketException ex) { - s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); } catch (S3Exception ex) { - if (ex.statusCode() == 404) { + if (ex.statusCode() == 404 || "NoSuchBucket".equals(ex.awsErrorDetails().errorCode())) { s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); } else { throw ex; From ca0b4cf4c8bb37ed035259dff5cbf42f03f6e8f5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 22:44:55 +0000 Subject: [PATCH 20/20] Null-check S3 error details in bucket existence helper --- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 7363837b8d5..bdc9049e519 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -79,7 +79,8 @@ private static void ensureBucketExists(String bucketName) { try { s3localstack.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); } catch (S3Exception ex) { - if (ex.statusCode() == 404 || "NoSuchBucket".equals(ex.awsErrorDetails().errorCode())) { + String errorCode = ex.awsErrorDetails() == null ? null : ex.awsErrorDetails().errorCode(); + if (ex.statusCode() == 404 || "NoSuchBucket".equals(errorCode)) { s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); } else { throw ex;