From ecebf5aa09ef686c876bcaddc9ed0fc44fee8b80 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 2 Jul 2025 16:26:23 -0400 Subject: [PATCH 1/3] Failed PID logging --- .../source/installation/config.rst | 6 + .../iq/dataverse/DatasetServiceBean.java | 26 ++- .../iq/dataverse/api/AbstractApiBean.java | 14 +- ...FailedPIDResolutionLoggingServiceBean.java | 161 ++++++++++++++++++ .../iq/dataverse/settings/FeatureFlags.java | 12 ++ 5 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/pidproviders/FailedPIDResolutionLoggingServiceBean.java diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 592988aa693..c56e760e8d7 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3740,6 +3740,12 @@ please find all known feature flags below. Any of these flags can be activated u * - enable-version-note - Turns on the ability to add/view/edit/delete per-dataset-version notes intended to provide :ref:`provenance` information about why the dataset/version was created. - ``Off`` + * - add-local-contexts-permission-check + - Adds a permission check to ensure that the user calling the /api/localcontexts/datasets/{id} API can edit the dataset with that id. This is currently the only use case - see https://github.com/gdcc/dataverse-external-vocab-support/tree/main/packages/local_contexts. The flag adds additional security to stop other uses, but would currently have to be used in conjunction with the api-session-auth feature flag (the security implications of which have not been fully investigated) to still allow adding Local Contexts metadata to a dataset. + - ``Off`` + * - enable-pid-failure-log + - Turns on creation of a monthly log file (logs/PIDFailures_.log) showing failed requests for dataset/file PIDs. Can be used directly or with scripts at https://github.com/gdcc/dataverse-recipes/python/pid_reports to alert admins. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 303a6d8a5ac..7495f22f368 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -19,7 +19,10 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -37,6 +40,8 @@ import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; import jakarta.ejb.TransactionAttributeType; +import jakarta.faces.context.FacesContext; +import jakarta.inject.Inject; import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; @@ -44,6 +49,7 @@ import jakarta.persistence.PersistenceContext; import jakarta.persistence.Query; import jakarta.persistence.TypedQuery; +import jakarta.servlet.http.HttpServletRequest; import org.apache.commons.lang3.StringUtils; /** @@ -86,6 +92,9 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB SystemConfig systemConfig; + + @Inject + FailedPIDResolutionLoggingServiceBean fprLogService; @EJB GlobusServiceBean globusServiceBean; @@ -94,6 +103,8 @@ public class DatasetServiceBean implements java.io.Serializable { UserNotificationServiceBean userNotificationService; private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + + private static final boolean pidFailureLoggingEnabled = FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled(); @PersistenceContext(unitName = "VDCNet-ejbPU") protected EntityManager em; @@ -298,8 +309,19 @@ public Dataset findByGlobalId(String globalId) { if (retVal != null){ return retVal; } else { - //try to find with alternative PID - return (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset); + // try to find with alternative PID + retVal = (Dataset) dvObjectService.findByAltGlobalId(globalId, DvObject.DType.Dataset); + if (retVal == null && pidFailureLoggingEnabled) { + try { + + HttpServletRequest httpRequest = ((HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest()); + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(globalId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); + } catch (NullPointerException npe) { + // Do nothing - this is an API call with no FacesContext + } + } + return retVal; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 018657bff4d..89c0b6620e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; + import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; @@ -22,10 +23,13 @@ import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry; import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean; import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; @@ -36,6 +40,7 @@ import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; +import jakarta.inject.Inject; import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.persistence.EntityManager; @@ -230,6 +235,9 @@ String getWrappedMessageWhenJson() { @EJB GuestbookResponseServiceBean gbRespSvc; + @Inject + FailedPIDResolutionLoggingServiceBean fprLogService; + @PersistenceContext(unitName = "VDCNet-ejbPU") protected EntityManager em; @@ -405,7 +413,9 @@ protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedRespon if (datasetId == null) { datasetId = dvObjSvc.findIdByAltGlobalId(globalId, DvObject.DType.Dataset); } - if (datasetId == null) { + if (datasetId == null && FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) { + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(),httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); throw new WrappedResponse( notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1))))); } @@ -465,6 +475,8 @@ protected DataFile findDataFileOrDie(String id) throws WrappedResponse { } datafile = fileService.findByGlobalId(persistentId); if (datafile == null) { + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(),httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.datafile.error.dataset.not.found.persistentId", Collections.singletonList(persistentId)))); } return datafile; diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FailedPIDResolutionLoggingServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FailedPIDResolutionLoggingServiceBean.java new file mode 100644 index 00000000000..364a424f819 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FailedPIDResolutionLoggingServiceBean.java @@ -0,0 +1,161 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.pidproviders; + +import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; +import java.util.Date; +import jakarta.enterprise.context.RequestScoped; +import jakarta.inject.Named; + +/** + * + * @author qqmyers + */ + +@Named +@RequestScoped +public class FailedPIDResolutionLoggingServiceBean { + + public static final String LOG_HEADER = "#Fields: pid\trequestURI\tHTTP method\tclient_ip\teventTime\n"; + + + public void logEntry(FailedPIDResolutionEntry entry) { + LoggingUtil.saveLogFileAppendWithHeader(entry.toString(), "../logs", getLogFileName(), LOG_HEADER); + } + + public String getLogFileName() { + return "PIDFailures_" + new SimpleDateFormat("yyyy-MM").format(new Timestamp(new Date().getTime())) + ".log"; + } + + public static class FailedPIDResolutionEntry { + + private String eventTime; + private String clientIp; + private String requestUrl; + private String identifier; + private String method; + + public FailedPIDResolutionEntry() { + + } + + public FailedPIDResolutionEntry(String persistentId, String requestURI, String method, IpAddress sourceAddress) { + try { + setIdentifier(URLEncoder.encode(persistentId, StandardCharsets.UTF_8.toString())); + } catch (UnsupportedEncodingException e) { + // Should never happen + e.printStackTrace(); + } + setRequestUrl(requestURI); + setMethod(method); + setClientIp(sourceAddress.toString()); + setEventTime(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").format(new Timestamp(new Date().getTime()))); + } + + @Override + public String toString() { + return getIdentifier() + "\t" + + getRequestUrl() + "\t" + + getMethod() + "\t" + + getClientIp() + "\t" + + getEventTime() + "\n"; + } + + /** + * @return the eventTime + */ + public String getEventTime() { + if (eventTime == null) { + return "-"; + } + return eventTime; + } + + /** + * @param eventTime + * the eventTime to set + */ + public final void setEventTime(String eventTime) { + this.eventTime = eventTime; + } + + /** + * @return the clientIp + */ + public String getClientIp() { + if (clientIp == null) { + return "-"; + } + return clientIp; + } + + /** + * @param clientIp + * the clientIp to set + */ + public final void setClientIp(String clientIp) { + this.clientIp = clientIp; + } + + /** + * @return the HTTP Method + */ + public String getMethod() { + return method; + } + + /** + * @param method + * - the HTTP Method used + */ + public final void setMethod(String method) { + this.method = method; + } + + /** + * @return the requestUrl + */ + public String getRequestUrl() { + if (requestUrl == null) { + return "-"; + } + return requestUrl; + } + + /** + * @param requestUrl + * the requestUrl to set + */ + public final void setRequestUrl(String requestUrl) { + this.requestUrl = requestUrl; + } + + /** + * @return the identifier + */ + public String getIdentifier() { + if (identifier == null) { + return "-"; + } + return identifier; + } + + /** + * @param identifier + * the identifier to set + */ + public final void setIdentifier(String identifier) { + this.identifier = identifier; + } + + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 27c65ed067c..7b51eb30971 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -166,6 +166,18 @@ public enum FeatureFlags { * @since Dataverse 6.5 */ ADD_LOCAL_CONTEXTS_PERMISSION_CHECK("add-local-contexts-permission-check"), + + /** + * This flag turns on creation of a monthly log file that tracks when requests for + * datasets/files with PIDs fail due to the PIDs not existing. This helps in catching + * cases where the DOI of a draft dataset has been cited, etc. + * + * @apiNote Raise flag by setting + * "dataverse.feature.enable-pid-failure-log" + * @since Dataverse 6.8 + */ + ENABLE_PID_FAILURE_LOG("enable-pid-failure-log"), + ; From 4aa12ca7f2df3a89d10906e8e9268cb05ffb8cd2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 4 Sep 2025 17:15:07 -0400 Subject: [PATCH 2/3] release note --- doc/release-notes/11601-pid-fail-tracking.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/11601-pid-fail-tracking.md diff --git a/doc/release-notes/11601-pid-fail-tracking.md b/doc/release-notes/11601-pid-fail-tracking.md new file mode 100644 index 00000000000..8a020c18c8e --- /dev/null +++ b/doc/release-notes/11601-pid-fail-tracking.md @@ -0,0 +1,3 @@ +This version of Dataverse includes a new feature flag - ``dataverse.feature.enable-pid-failure-log``. When set, Dataverse will log requests for dataset and file pages via persistentId that fail in monthly log files of the form PIDFailures_.log. These potentially indicate when someone has shared a draft PID without publishing or cases where a '.' or other character has been added to the PID, which may be of interest to site administrators. + +The new log files can be used in concert with the pidreport.py script at https://github.com/gdcc/dataverse-recipes/tree/main/python/pid_reports to generate and email monthly PID failure reports. \ No newline at end of file From 8eb4f623e7a5d103275b4672b9ea146b6c586a87 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 9 Sep 2025 17:16:29 -0400 Subject: [PATCH 3/3] fix ifs --- .../iq/dataverse/api/AbstractApiBean.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 2eb49bc27a7..46e8263da15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -415,11 +415,14 @@ protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedRespon if (datasetId == null) { datasetId = dvObjSvc.findIdByAltGlobalId(globalId, DvObject.DType.Dataset); } - if (datasetId == null && FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) { - FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(),httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); - fprLogService.logEntry(entry); + if (datasetId == null) { + if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) { + + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); + } throw new WrappedResponse( - notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1))))); + notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1))))); } } if (deep) { @@ -489,8 +492,11 @@ protected DataFile findDataFileOrDie(String id) throws WrappedResponse { } datafile = fileService.findByGlobalId(persistentId); if (datafile == null) { - FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(),httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); - fprLogService.logEntry(entry); + if (FeatureFlags.ENABLE_PID_FAILURE_LOG.enabled()) { + + FailedPIDResolutionLoggingServiceBean.FailedPIDResolutionEntry entry = new FailedPIDResolutionEntry(persistentId, httpRequest.getRequestURI(), httpRequest.getMethod(), new DataverseRequest(null, httpRequest).getSourceAddress()); + fprLogService.logEntry(entry); + } throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.datafile.error.dataset.not.found.persistentId", Collections.singletonList(persistentId)))); } return datafile;