diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0564c9f7..d68a8cd4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -48,6 +48,7 @@ jobs:
module:
- docling-serve-api
- docling-serve-client
+ - docling-serve-grpc
- docling-testcontainers
- docling-version-tests
name: jvm-build-test-${{ matrix.module }}-java${{ matrix.java }}
diff --git a/.gitignore b/.gitignore
index 03f1fb3d..2db7ec4d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ build
bin/
!**/src/main/**/bin/
!**/src/test/**/bin/
+/.ai/mcp/mcp.json
diff --git a/docling-serve/docling-serve-grpc/build.gradle.kts b/docling-serve/docling-serve-grpc/build.gradle.kts
new file mode 100644
index 00000000..c2fc1ce0
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/build.gradle.kts
@@ -0,0 +1,59 @@
+import com.google.protobuf.gradle.id
+
+plugins {
+ id("docling-java-shared")
+ id("docling-lombok")
+ id("docling-release")
+ id("com.google.protobuf") version "0.9.4"
+}
+
+description = "Docling Serve gRPC API"
+
+val grpcVersion = "1.72.0"
+val protocVersion = "4.29.3"
+val javaxAnnotationVersion = "1.3.2"
+
+dependencies {
+ api(project(":docling-serve-api"))
+ api("io.grpc:grpc-stub:$grpcVersion")
+ api("io.grpc:grpc-protobuf:$grpcVersion")
+ api("com.google.protobuf:protobuf-java:$protocVersion")
+ api(libs.slf4j.api)
+ compileOnly("javax.annotation:javax.annotation-api:$javaxAnnotationVersion")
+ api(platform(libs.jackson.bom))
+ api(libs.jackson.databind)
+
+ testImplementation("org.mockito:mockito-core:5.17.0")
+ testImplementation("org.mockito:mockito-junit-jupiter:5.17.0")
+ testImplementation("io.grpc:grpc-testing:$grpcVersion")
+ testImplementation("io.grpc:grpc-inprocess:$grpcVersion")
+ testRuntimeOnly(libs.slf4j.simple)
+
+ // Integration test dependencies
+ testImplementation(platform(libs.testcontainers.bom))
+ testImplementation(libs.testcontainers.junit.jupiter)
+ testImplementation(project(":docling-testcontainers"))
+ testImplementation(project(":docling-serve-client"))
+ testImplementation(platform(libs.jackson2.bom))
+ testImplementation(libs.jackson2.databind)
+}
+
+protobuf {
+ protoc {
+ artifact = "com.google.protobuf:protoc:$protocVersion"
+ }
+
+ plugins {
+ id("grpc") {
+ artifact = "io.grpc:protoc-gen-grpc-java:$grpcVersion"
+ }
+ }
+
+ generateProtoTasks {
+ all().forEach { task ->
+ task.plugins {
+ id("grpc")
+ }
+ }
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/AsyncTaskSubmitter.java b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/AsyncTaskSubmitter.java
new file mode 100644
index 00000000..9a9299e2
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/AsyncTaskSubmitter.java
@@ -0,0 +1,14 @@
+package ai.docling.serve.grpc.v1;
+
+import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
+import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+
+interface AsyncTaskSubmitter {
+ TaskStatusPollResponse submitConvertSource(ConvertDocumentRequest request);
+
+ TaskStatusPollResponse submitChunkHierarchicalSource(HierarchicalChunkDocumentRequest request);
+
+ TaskStatusPollResponse submitChunkHybridSource(HybridChunkDocumentRequest request);
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/DoclingServeGrpcService.java b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/DoclingServeGrpcService.java
new file mode 100644
index 00000000..2d555224
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/DoclingServeGrpcService.java
@@ -0,0 +1,488 @@
+package ai.docling.serve.grpc.v1;
+
+import ai.docling.serve.api.DoclingServeApi;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+import ai.docling.serve.api.task.request.TaskStatusPollRequest;
+import ai.docling.serve.api.task.response.TaskStatus;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+import ai.docling.serve.grpc.v1.mapping.ServeApiMapper;
+import ai.docling.serve.v1.ChunkHierarchicalSourceAsyncRequest;
+import ai.docling.serve.v1.ChunkHierarchicalSourceAsyncResponse;
+import ai.docling.serve.v1.ChunkHierarchicalSourceRequest;
+import ai.docling.serve.v1.ChunkHierarchicalSourceResponse;
+import ai.docling.serve.v1.ChunkHybridSourceAsyncRequest;
+import ai.docling.serve.v1.ChunkHybridSourceAsyncResponse;
+import ai.docling.serve.v1.ChunkHybridSourceRequest;
+import ai.docling.serve.v1.ChunkHybridSourceResponse;
+import ai.docling.serve.v1.ClearConvertersRequest;
+import ai.docling.serve.v1.ClearConvertersResponse;
+import ai.docling.serve.v1.ClearResultsRequest;
+import ai.docling.serve.v1.ClearResultsResponse;
+import ai.docling.serve.v1.ConvertSourceAsyncRequest;
+import ai.docling.serve.v1.ConvertSourceAsyncResponse;
+import ai.docling.serve.v1.ConvertSourceRequest;
+import ai.docling.serve.v1.ConvertSourceResponse;
+import ai.docling.serve.v1.ConvertSourceStreamRequest;
+import ai.docling.serve.v1.ConvertSourceStreamResponse;
+import ai.docling.serve.v1.DoclingServeServiceGrpc;
+import ai.docling.serve.v1.GetChunkResultRequest;
+import ai.docling.serve.v1.GetChunkResultResponse;
+import ai.docling.serve.v1.GetConvertResultRequest;
+import ai.docling.serve.v1.GetConvertResultResponse;
+import ai.docling.serve.v1.HealthRequest;
+import ai.docling.serve.v1.HealthResponse;
+import ai.docling.serve.v1.PollTaskStatusRequest;
+import ai.docling.serve.v1.PollTaskStatusResponse;
+import ai.docling.serve.v1.WatchChunkHierarchicalSourceRequest;
+import ai.docling.serve.v1.WatchChunkHierarchicalSourceResponse;
+import ai.docling.serve.v1.WatchChunkHybridSourceRequest;
+import ai.docling.serve.v1.WatchChunkHybridSourceResponse;
+import ai.docling.serve.v1.WatchConvertSourceRequest;
+import ai.docling.serve.v1.WatchConvertSourceResponse;
+
+import io.grpc.Status;
+import io.grpc.stub.StreamObserver;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.URI;
+import java.time.Duration;
+
+import org.jspecify.annotations.Nullable;
+
+/**
+ * gRPC service implementation for DoclingServe.
+ *
+ * This service wraps the {@link DoclingServeApi} (REST client) and provides
+ * a gRPC interface. All RPCs are implemented as proxies:
+ * proto request → Java API → REST → Java response → proto response.
+ *
+ * Includes Watch RPCs that leverage gRPC server-streaming to internally manage
+ * the poll loop, streaming each status update to the client until completion.
+ *
+ * Adheres to Buf linting by using unique request/response wrappers for every RPC.
+ */
+public class DoclingServeGrpcService extends DoclingServeServiceGrpc.DoclingServeServiceImplBase {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DoclingServeGrpcService.class);
+ private static final Duration DEFAULT_POLL_INTERVAL = Duration.ofSeconds(2);
+ private static final Duration DEFAULT_POLL_TIMEOUT = Duration.ofMinutes(5);
+
+ private final DoclingServeApi api;
+ private final @Nullable AsyncTaskSubmitter asyncSubmitter;
+ private final Duration pollInterval;
+ private final Duration pollTimeout;
+
+ public DoclingServeGrpcService(DoclingServeApi api) {
+ this(api, (AsyncTaskSubmitter) null, DEFAULT_POLL_INTERVAL, DEFAULT_POLL_TIMEOUT);
+ }
+
+ public DoclingServeGrpcService(DoclingServeApi api, Duration pollInterval, Duration pollTimeout) {
+ this(api, (AsyncTaskSubmitter) null, pollInterval, pollTimeout);
+ }
+
+ public DoclingServeGrpcService(DoclingServeApi api, URI baseUrl) {
+ this(api, baseUrl, null, DEFAULT_POLL_INTERVAL, DEFAULT_POLL_TIMEOUT);
+ }
+
+ public DoclingServeGrpcService(DoclingServeApi api, URI baseUrl, @Nullable String apiKey) {
+ this(api, baseUrl, apiKey, DEFAULT_POLL_INTERVAL, DEFAULT_POLL_TIMEOUT);
+ }
+
+ public DoclingServeGrpcService(
+ DoclingServeApi api,
+ URI baseUrl,
+ @Nullable String apiKey,
+ Duration pollInterval,
+ Duration pollTimeout) {
+ this(api, new HttpAsyncTaskSubmitter(baseUrl, apiKey), pollInterval, pollTimeout);
+ }
+
+ public DoclingServeGrpcService(
+ DoclingServeApi api,
+ @Nullable AsyncTaskSubmitter asyncSubmitter,
+ Duration pollInterval,
+ Duration pollTimeout) {
+ this.api = api;
+ this.asyncSubmitter = asyncSubmitter;
+ this.pollInterval = pollInterval;
+ this.pollTimeout = pollTimeout;
+ }
+
+ // ==================== Health ====================
+
+ @Override
+ public void health(HealthRequest request, StreamObserver responseObserver) {
+ try {
+ var javaResponse = api.health();
+ responseObserver.onNext(ServeApiMapper.toProto(javaResponse));
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("Health check failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ // ==================== Convert ====================
+
+ @Override
+ public void convertSource(
+ ConvertSourceRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ ConvertDocumentResponse javaResponse = api.convertSource(javaRequest);
+ responseObserver.onNext(ConvertSourceResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ConvertSource failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void convertSourceAsync(
+ ConvertSourceAsyncRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ var javaResponse = requireAsyncSubmitter("ConvertSourceAsync").submitConvertSource(javaRequest);
+ responseObserver.onNext(ConvertSourceAsyncResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ConvertSourceAsync failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void convertSourceStream(
+ ConvertSourceStreamRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ // NOTE for Demo: This is currently a "logical" stream.
+ ConvertDocumentResponse javaResponse = api.convertSource(javaRequest);
+ responseObserver.onNext(ConvertSourceStreamResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ConvertSourceStream failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ // ==================== Chunk ====================
+
+ @Override
+ public void chunkHierarchicalSource(
+ ChunkHierarchicalSourceRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ ChunkDocumentResponse javaResponse = api.chunkSourceWithHierarchicalChunker(javaRequest);
+ responseObserver.onNext(ChunkHierarchicalSourceResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ChunkHierarchicalSource failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void chunkHybridSource(
+ ChunkHybridSourceRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ ChunkDocumentResponse javaResponse = api.chunkSourceWithHybridChunker(javaRequest);
+ responseObserver.onNext(ChunkHybridSourceResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ChunkHybridSource failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void chunkHierarchicalSourceAsync(
+ ChunkHierarchicalSourceAsyncRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ var javaResponse = requireAsyncSubmitter("ChunkHierarchicalSourceAsync")
+ .submitChunkHierarchicalSource(javaRequest);
+ responseObserver.onNext(ChunkHierarchicalSourceAsyncResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ChunkHierarchicalSourceAsync failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void chunkHybridSourceAsync(
+ ChunkHybridSourceAsyncRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ var javaResponse = requireAsyncSubmitter("ChunkHybridSourceAsync")
+ .submitChunkHybridSource(javaRequest);
+ responseObserver.onNext(ChunkHybridSourceAsyncResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ChunkHybridSourceAsync failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ // ==================== Task ====================
+
+ @Override
+ public void pollTaskStatus(
+ PollTaskStatusRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ TaskStatusPollResponse javaResponse = api.pollTaskStatus(javaRequest);
+ responseObserver.onNext(PollTaskStatusResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("PollTaskStatus failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void getConvertResult(
+ GetConvertResultRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ ConvertDocumentResponse javaResponse = api.convertTaskResult(javaRequest);
+ responseObserver.onNext(GetConvertResultResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("GetConvertResult failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void getChunkResult(
+ GetChunkResultRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ ChunkDocumentResponse javaResponse = api.chunkTaskResult(javaRequest);
+ responseObserver.onNext(GetChunkResultResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("GetChunkResult failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ // ==================== Watch (streaming async) ====================
+
+ @Override
+ public void watchConvertSource(
+ WatchConvertSourceRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ var initialStatus = requireAsyncSubmitter("WatchConvertSource").submitConvertSource(javaRequest);
+ pollAndStream(initialStatus, responseObserver, "WatchConvertSource", status ->
+ WatchConvertSourceResponse.newBuilder().setResponse(ServeApiMapper.toProto(status)).build());
+ } catch (Exception e) {
+ LOG.error("WatchConvertSource failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void watchChunkHierarchicalSource(
+ WatchChunkHierarchicalSourceRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ var initialStatus = requireAsyncSubmitter("WatchChunkHierarchicalSource")
+ .submitChunkHierarchicalSource(javaRequest);
+ pollAndStream(initialStatus, responseObserver, "WatchChunkHierarchicalSource", status ->
+ WatchChunkHierarchicalSourceResponse.newBuilder().setResponse(ServeApiMapper.toProto(status)).build());
+ } catch (Exception e) {
+ LOG.error("WatchChunkHierarchicalSource failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void watchChunkHybridSource(
+ WatchChunkHybridSourceRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request.getRequest());
+ var initialStatus = requireAsyncSubmitter("WatchChunkHybridSource")
+ .submitChunkHybridSource(javaRequest);
+ pollAndStream(initialStatus, responseObserver, "WatchChunkHybridSource", status ->
+ WatchChunkHybridSourceResponse.newBuilder().setResponse(ServeApiMapper.toProto(status)).build());
+ } catch (Exception e) {
+ LOG.error("WatchChunkHybridSource failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ private void pollAndStream(
+ TaskStatusPollResponse initialStatus,
+ StreamObserver responseObserver,
+ String rpcName,
+ java.util.function.Function mapper) {
+
+ if (io.grpc.Context.current().isCancelled()) {
+ LOG.info("{}: client cancelled the request early", rpcName);
+ return;
+ }
+
+ responseObserver.onNext(mapper.apply(initialStatus));
+ LOG.info("{}: task {} submitted with status {}",
+ rpcName, initialStatus.getTaskId(), initialStatus.getTaskStatus());
+
+ if (isTerminal(initialStatus.getTaskStatus())) {
+ responseObserver.onCompleted();
+ return;
+ }
+
+ var taskId = initialStatus.getTaskId();
+ var pollRequest = TaskStatusPollRequest.builder()
+ .taskId(taskId)
+ .build();
+
+ long deadline = System.currentTimeMillis() + pollTimeout.toMillis();
+
+ while (System.currentTimeMillis() < deadline) {
+ if (io.grpc.Context.current().isCancelled()) {
+ LOG.info("{}: client cancelled the request", rpcName);
+ return;
+ }
+
+ try {
+ Thread.sleep(pollInterval.toMillis());
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ responseObserver.onError(
+ Status.CANCELLED.withDescription("Polling interrupted").asRuntimeException());
+ return;
+ }
+
+ TaskStatusPollResponse status;
+ try {
+ status = api.pollTaskStatus(pollRequest);
+ } catch (Exception e) {
+ LOG.error("{}: polling failed for task {}", rpcName, taskId, e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ return;
+ }
+
+ responseObserver.onNext(mapper.apply(status));
+ LOG.debug("{}: task {} status: {}", rpcName, taskId, status.getTaskStatus());
+
+ if (isTerminal(status.getTaskStatus())) {
+ responseObserver.onCompleted();
+ return;
+ }
+ }
+
+ LOG.warn("{}: task {} timed out after {}", rpcName, taskId, pollTimeout);
+ responseObserver.onError(
+ Status.DEADLINE_EXCEEDED
+ .withDescription("Task %s did not complete within %s".formatted(taskId, pollTimeout))
+ .asRuntimeException());
+ }
+
+ private static boolean isTerminal(TaskStatus status) {
+ return status == TaskStatus.SUCCESS || status == TaskStatus.FAILURE;
+ }
+
+ private AsyncTaskSubmitter requireAsyncSubmitter(String rpcName) {
+ if (this.asyncSubmitter == null) {
+ throw Status.FAILED_PRECONDITION
+ .withDescription("%s requires an async submitter (configure DoclingServeGrpcService with a base URL)".formatted(rpcName))
+ .asRuntimeException();
+ }
+ return this.asyncSubmitter;
+ }
+
+ // ==================== Clear ====================
+
+ @Override
+ public void clearConverters(
+ ClearConvertersRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ai.docling.serve.api.clear.request.ClearConvertersRequest.builder().build();
+ var javaResponse = api.clearConverters(javaRequest);
+ responseObserver.onNext(ClearConvertersResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ClearConverters failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+
+ @Override
+ public void clearResults(
+ ClearResultsRequest request,
+ StreamObserver responseObserver) {
+ try {
+ var javaRequest = ServeApiMapper.toJava(request);
+ var javaResponse = api.clearResults(javaRequest);
+ responseObserver.onNext(ClearResultsResponse.newBuilder()
+ .setResponse(ServeApiMapper.toProto(javaResponse))
+ .build());
+ responseObserver.onCompleted();
+ } catch (Exception e) {
+ LOG.error("ClearResults failed", e);
+ responseObserver.onError(
+ Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException());
+ }
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/HttpAsyncTaskSubmitter.java b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/HttpAsyncTaskSubmitter.java
new file mode 100644
index 00000000..4c914e0f
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/HttpAsyncTaskSubmitter.java
@@ -0,0 +1,92 @@
+package ai.docling.serve.grpc.v1;
+
+import tools.jackson.core.JacksonException;
+import tools.jackson.databind.DeserializationFeature;
+import tools.jackson.databind.json.JsonMapper;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
+import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
+import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+
+final class HttpAsyncTaskSubmitter implements AsyncTaskSubmitter {
+ private static final String API_KEY_HEADER_NAME = "X-Api-Key";
+
+ private final URI baseUrl;
+ private final String apiKey;
+ private final HttpClient httpClient;
+ private final JsonMapper jsonMapper;
+
+ HttpAsyncTaskSubmitter(URI baseUrl, String apiKey) {
+ Objects.requireNonNull(baseUrl, "baseUrl");
+ this.baseUrl = baseUrl.toString().endsWith("/") ? baseUrl : URI.create(baseUrl + "/");
+ this.apiKey = apiKey;
+ this.httpClient = HttpClient.newHttpClient();
+ this.jsonMapper = JsonMapper.builder()
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+ .build();
+ }
+
+ @Override
+ public TaskStatusPollResponse submitConvertSource(ConvertDocumentRequest request) {
+ return submit("/v1/convert/source/async", request);
+ }
+
+ @Override
+ public TaskStatusPollResponse submitChunkHierarchicalSource(HierarchicalChunkDocumentRequest request) {
+ return submit("/v1/chunk/hierarchical/source/async", request);
+ }
+
+ @Override
+ public TaskStatusPollResponse submitChunkHybridSource(HybridChunkDocumentRequest request) {
+ return submit("/v1/chunk/hybrid/source/async", request);
+ }
+
+ private TaskStatusPollResponse submit(String path, Object request) {
+ String payload;
+ try {
+ payload = this.jsonMapper.writeValueAsString(request);
+ } catch (JacksonException e) {
+ throw new RuntimeException("Failed to serialize async request payload", e);
+ }
+
+ HttpRequest.Builder requestBuilder = HttpRequest.newBuilder(this.baseUrl.resolve(path))
+ .header("Content-Type", "application/json")
+ .POST(HttpRequest.BodyPublishers.ofString(payload, StandardCharsets.UTF_8));
+
+ if (this.apiKey != null && !this.apiKey.isBlank()) {
+ requestBuilder.header(API_KEY_HEADER_NAME, this.apiKey);
+ }
+
+ HttpResponse response;
+ try {
+ response = this.httpClient.send(
+ requestBuilder.build(),
+ HttpResponse.BodyHandlers.ofString(StandardCharsets.UTF_8));
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException("Async submit interrupted", e);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to submit async request", e);
+ }
+
+ if (response.statusCode() < 200 || response.statusCode() >= 300) {
+ throw new RuntimeException("Async submit failed with status %s: %s"
+ .formatted(response.statusCode(), response.body()));
+ }
+
+ try {
+ return this.jsonMapper.readValue(response.body(), TaskStatusPollResponse.class);
+ } catch (JacksonException e) {
+ throw new RuntimeException("Failed to parse async submit response", e);
+ }
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/DoclingDocumentMapper.java b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/DoclingDocumentMapper.java
new file mode 100644
index 00000000..a0a6987e
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/DoclingDocumentMapper.java
@@ -0,0 +1,915 @@
+package ai.docling.serve.grpc.v1.mapping;
+
+import ai.docling.core.DoclingDocument;
+import ai.docling.core.v1.BaseTextItem;
+import ai.docling.core.v1.BoundingBox;
+import ai.docling.core.v1.CodeItem;
+import ai.docling.core.v1.ContentLayer;
+import ai.docling.core.v1.DescriptionMetaField;
+import ai.docling.core.v1.DocItemLabel;
+import ai.docling.core.v1.DocumentOrigin;
+import ai.docling.core.v1.FloatingMeta;
+import ai.docling.core.v1.FormItem;
+import ai.docling.core.v1.Formatting;
+import ai.docling.core.v1.FormulaItem;
+import ai.docling.core.v1.GraphCell;
+import ai.docling.core.v1.GraphCellLabel;
+import ai.docling.core.v1.GraphData;
+import ai.docling.core.v1.GraphLink;
+import ai.docling.core.v1.GraphLinkLabel;
+import ai.docling.core.v1.GroupItem;
+import ai.docling.core.v1.GroupLabel;
+import ai.docling.core.v1.ImageRef;
+import ai.docling.core.v1.KeyValueItem;
+import ai.docling.core.v1.ListItem;
+import ai.docling.core.v1.MoleculeMetaField;
+import ai.docling.core.v1.PageItem;
+import ai.docling.core.v1.PictureClassificationMetaField;
+import ai.docling.core.v1.PictureClassificationPrediction;
+import ai.docling.core.v1.PictureItem;
+import ai.docling.core.v1.PictureMeta;
+import ai.docling.core.v1.ProvenanceItem;
+import ai.docling.core.v1.RefItem;
+import ai.docling.core.v1.Script;
+import ai.docling.core.v1.SectionHeaderItem;
+import ai.docling.core.v1.Size;
+import ai.docling.core.v1.SummaryMetaField;
+import ai.docling.core.v1.TableCell;
+import ai.docling.core.v1.TableData;
+import ai.docling.core.v1.TableItem;
+import ai.docling.core.v1.TableRow;
+import ai.docling.core.v1.TabularChartMetaField;
+import ai.docling.core.v1.TextItem;
+import ai.docling.core.v1.TextItemBase;
+import ai.docling.core.v1.TitleItem;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Maps ai.docling.core.DoclingDocument (Java) to ai.docling.core.v1.DoclingDocument (Proto).
+ *
+ * This is a proper field-by-field mapper with NO JSON.
+ * Each field is explicitly mapped to maintain strong typing throughout the gRPC stack.
+ */
+@SuppressWarnings("DataFlowIssue")
+public class DoclingDocumentMapper {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DoclingDocumentMapper.class);
+
+ /**
+ * Maps Java DoclingDocument to Proto DoclingDocument.
+ *
+ * @param javaDoc The Java DoclingDocument from docling-core library
+ * @return Proto DoclingDocument message
+ */
+ public static ai.docling.core.v1.DoclingDocument map(DoclingDocument javaDoc) {
+ if (javaDoc == null) {
+ return ai.docling.core.v1.DoclingDocument.getDefaultInstance();
+ }
+
+ LOG.debug("Mapping DoclingDocument field-by-field: schema={}, version={}, name={}",
+ javaDoc.getSchemaName(), javaDoc.getVersion(), javaDoc.getName());
+
+ ai.docling.core.v1.DoclingDocument.Builder builder =
+ ai.docling.core.v1.DoclingDocument.newBuilder();
+
+ if (javaDoc.getSchemaName() != null) {
+ builder.setSchemaName(javaDoc.getSchemaName());
+ }
+ if (javaDoc.getVersion() != null) {
+ builder.setVersion(javaDoc.getVersion());
+ }
+ if (javaDoc.getName() != null) {
+ builder.setName(javaDoc.getName());
+ }
+ if (javaDoc.getOrigin() != null) {
+ builder.setOrigin(mapDocumentOrigin(javaDoc.getOrigin()));
+ }
+ if (javaDoc.getBody() != null) {
+ builder.setBody(mapGroupItem(javaDoc.getBody()));
+ }
+
+ if (javaDoc.getGroups() != null) {
+ javaDoc.getGroups().forEach(group -> builder.addGroups(mapGroupItem(group)));
+ }
+ if (javaDoc.getTexts() != null) {
+ javaDoc.getTexts().forEach(text -> builder.addTexts(mapBaseTextItem(text)));
+ }
+ if (javaDoc.getPictures() != null) {
+ javaDoc.getPictures().forEach(picture -> builder.addPictures(mapPictureItem(picture)));
+ }
+ if (javaDoc.getTables() != null) {
+ javaDoc.getTables().forEach(table -> builder.addTables(mapTableItem(table)));
+ }
+ if (javaDoc.getKeyValueItems() != null) {
+ javaDoc.getKeyValueItems().forEach(kv -> builder.addKeyValueItems(mapKeyValueItem(kv)));
+ }
+ if (javaDoc.getFormItems() != null) {
+ javaDoc.getFormItems().forEach(form -> builder.addFormItems(mapFormItem(form)));
+ }
+ if (javaDoc.getPages() != null) {
+ javaDoc.getPages().forEach((key, page) ->
+ builder.putPages(key, mapPageItem(page))
+ );
+ }
+
+ return builder.build();
+ }
+
+ private static DocumentOrigin mapDocumentOrigin(DoclingDocument.DocumentOrigin javaOrigin) {
+ DocumentOrigin.Builder builder = DocumentOrigin.newBuilder();
+
+ if (javaOrigin.getMimetype() != null) {
+ builder.setMimetype(javaOrigin.getMimetype());
+ }
+ if (javaOrigin.getBinaryHash() != null) {
+ builder.setBinaryHash(javaOrigin.getBinaryHash().toString());
+ }
+ if (javaOrigin.getFilename() != null) {
+ builder.setFilename(javaOrigin.getFilename());
+ }
+ if (javaOrigin.getUri() != null) {
+ builder.setUri(javaOrigin.getUri());
+ }
+
+ return builder.build();
+ }
+
+ private static GroupItem mapGroupItem(DoclingDocument.GroupItem javaGroup) {
+ GroupItem.Builder builder = GroupItem.newBuilder();
+
+ if (javaGroup.getSelfRef() != null) {
+ builder.setSelfRef(javaGroup.getSelfRef());
+ }
+ if (javaGroup.getParent() != null) {
+ builder.setParent(mapRefItem(javaGroup.getParent()));
+ }
+ if (javaGroup.getChildren() != null) {
+ javaGroup.getChildren().forEach(child -> builder.addChildren(mapRefItem(child)));
+ }
+ if (javaGroup.getName() != null) {
+ builder.setName(javaGroup.getName());
+ }
+ if (javaGroup.getLabel() != null) {
+ builder.setLabel(mapGroupLabel(javaGroup.getLabel()));
+ }
+
+ return builder.build();
+ }
+
+ private static GroupLabel mapGroupLabel(DoclingDocument.GroupLabel javaLabel) {
+ return switch (javaLabel) {
+ case LIST -> GroupLabel.GROUP_LABEL_LIST;
+ case ORDERED_LIST -> GroupLabel.GROUP_LABEL_ORDERED_LIST;
+ case CHAPTER -> GroupLabel.GROUP_LABEL_CHAPTER;
+ case SECTION -> GroupLabel.GROUP_LABEL_SECTION;
+ case SHEET -> GroupLabel.GROUP_LABEL_SHEET;
+ case SLIDE -> GroupLabel.GROUP_LABEL_SLIDE;
+ case FORM_AREA -> GroupLabel.GROUP_LABEL_FORM_AREA;
+ case KEY_VALUE_AREA -> GroupLabel.GROUP_LABEL_KEY_VALUE_AREA;
+ case COMMENT_SECTION -> GroupLabel.GROUP_LABEL_COMMENT_SECTION;
+ case INLINE -> GroupLabel.GROUP_LABEL_INLINE;
+ case PICTURE_AREA -> GroupLabel.GROUP_LABEL_PICTURE_AREA;
+ default -> GroupLabel.GROUP_LABEL_UNSPECIFIED;
+ };
+ }
+
+ static RefItem mapRefItem(DoclingDocument.RefItem javaRef) {
+ RefItem.Builder builder = RefItem.newBuilder();
+ if (javaRef != null && javaRef.getRef() != null) {
+ builder.setRef(javaRef.getRef());
+ }
+ return builder.build();
+ }
+
+ private static BaseTextItem mapBaseTextItem(DoclingDocument.BaseTextItem javaText) {
+ BaseTextItem.Builder builder = BaseTextItem.newBuilder();
+ DoclingDocument.DocItemLabel label = javaText.getLabel();
+
+ if (label == null) {
+ return builder.build();
+ }
+
+ switch (label) {
+ case TITLE:
+ if (javaText instanceof DoclingDocument.TitleItem titleItem) {
+ builder.setTitle(mapTitleItem(titleItem));
+ }
+ break;
+ case SECTION_HEADER:
+ if (javaText instanceof DoclingDocument.SectionHeaderItem sectionItem) {
+ builder.setSectionHeader(mapSectionHeaderItem(sectionItem));
+ }
+ break;
+ case LIST_ITEM:
+ if (javaText instanceof DoclingDocument.ListItem listItem) {
+ builder.setListItem(mapListItem(listItem));
+ }
+ break;
+ case CODE:
+ if (javaText instanceof DoclingDocument.CodeItem codeItem) {
+ builder.setCode(mapCodeItem(codeItem));
+ }
+ break;
+ case FORMULA:
+ if (javaText instanceof DoclingDocument.FormulaItem formulaItem) {
+ builder.setFormula(mapFormulaItem(formulaItem));
+ }
+ break;
+ case TEXT:
+ case PARAGRAPH:
+ case CAPTION:
+ case FOOTNOTE:
+ case PAGE_HEADER:
+ case PAGE_FOOTER:
+ case REFERENCE:
+ case CHECKBOX_SELECTED:
+ case CHECKBOX_UNSELECTED:
+ case EMPTY_VALUE:
+ if (javaText instanceof DoclingDocument.TextItem textItem) {
+ builder.setText(mapTextItem(textItem));
+ }
+ break;
+ default:
+ LOG.warn("Unknown BaseTextItem label: {}", label);
+ }
+
+ return builder.build();
+ }
+
+ private static TitleItem mapTitleItem(DoclingDocument.TitleItem javaTitle) {
+ return TitleItem.newBuilder()
+ .setBase(mapTextItemBase(javaTitle))
+ .build();
+ }
+
+ private static SectionHeaderItem mapSectionHeaderItem(DoclingDocument.SectionHeaderItem javaSection) {
+ SectionHeaderItem.Builder builder = SectionHeaderItem.newBuilder()
+ .setBase(mapTextItemBase(javaSection));
+ if (javaSection.getLevel() != null) {
+ builder.setLevel(javaSection.getLevel());
+ }
+ return builder.build();
+ }
+
+ private static ListItem mapListItem(DoclingDocument.ListItem javaList) {
+ ListItem.Builder builder = ListItem.newBuilder()
+ .setBase(mapTextItemBase(javaList))
+ .setEnumerated(javaList.isEnumerated());
+ if (javaList.getMarker() != null) {
+ builder.setMarker(javaList.getMarker());
+ }
+ return builder.build();
+ }
+
+ private static CodeItem mapCodeItem(DoclingDocument.CodeItem javaCode) {
+ CodeItem.Builder builder = CodeItem.newBuilder()
+ .setBase(mapTextItemBase(javaCode));
+ if (javaCode.getCodeLanguage() != null) {
+ builder.setCodeLanguage(javaCode.getCodeLanguage());
+ }
+ if (javaCode.getCaptions() != null) {
+ javaCode.getCaptions().forEach(caption -> builder.addCaptions(mapRefItem(caption)));
+ }
+ if (javaCode.getReferences() != null) {
+ javaCode.getReferences().forEach(ref -> builder.addReferences(mapRefItem(ref)));
+ }
+ if (javaCode.getFootnotes() != null) {
+ javaCode.getFootnotes().forEach(footnote -> builder.addFootnotes(mapRefItem(footnote)));
+ }
+ if (javaCode.getImage() != null) {
+ builder.setImage(mapImageRef(javaCode.getImage()));
+ }
+ return builder.build();
+ }
+
+ private static FormulaItem mapFormulaItem(DoclingDocument.FormulaItem javaFormula) {
+ return FormulaItem.newBuilder()
+ .setBase(mapTextItemBase(javaFormula))
+ .build();
+ }
+
+ private static TextItem mapTextItem(DoclingDocument.TextItem javaText) {
+ return TextItem.newBuilder()
+ .setBase(mapTextItemBase(javaText))
+ .build();
+ }
+
+ private static TextItemBase mapTextItemBase(DoclingDocument.BaseTextItem javaItem) {
+ TextItemBase.Builder builder = TextItemBase.newBuilder();
+
+ if (javaItem.getSelfRef() != null) {
+ builder.setSelfRef(javaItem.getSelfRef());
+ }
+ if (javaItem.getParent() != null) {
+ builder.setParent(mapRefItem(javaItem.getParent()));
+ }
+ if (javaItem.getChildren() != null) {
+ javaItem.getChildren().forEach(child -> builder.addChildren(mapRefItem(child)));
+ }
+ if (javaItem.getContentLayer() != null) {
+ builder.setContentLayer(mapContentLayer(javaItem.getContentLayer()));
+ }
+ if (javaItem.getLabel() != null) {
+ builder.setLabel(mapDocItemLabel(javaItem.getLabel()));
+ }
+ if (javaItem.getProv() != null) {
+ javaItem.getProv().forEach(prov -> builder.addProv(mapProvenanceItem(prov)));
+ }
+ if (javaItem.getOrig() != null) {
+ builder.setOrig(javaItem.getOrig());
+ }
+ if (javaItem.getText() != null) {
+ builder.setText(javaItem.getText());
+ }
+ if (javaItem.getFormatting() != null) {
+ builder.setFormatting(mapFormatting(javaItem.getFormatting()));
+ }
+ if (javaItem.getHyperlink() != null) {
+ builder.setHyperlink(javaItem.getHyperlink());
+ }
+
+ return builder.build();
+ }
+
+ private static ContentLayer mapContentLayer(DoclingDocument.ContentLayer javaLayer) {
+ if (javaLayer == null) {
+ return ContentLayer.CONTENT_LAYER_UNSPECIFIED;
+ }
+ return switch (javaLayer) {
+ case BODY -> ContentLayer.CONTENT_LAYER_BODY;
+ case FURNITURE -> ContentLayer.CONTENT_LAYER_FURNITURE;
+ case BACKGROUND -> ContentLayer.CONTENT_LAYER_BACKGROUND;
+ case INVISIBLE -> ContentLayer.CONTENT_LAYER_INVISIBLE;
+ case NOTES -> ContentLayer.CONTENT_LAYER_NOTES;
+ default -> ContentLayer.CONTENT_LAYER_UNSPECIFIED;
+ };
+ }
+
+ private static DocItemLabel mapDocItemLabel(DoclingDocument.DocItemLabel javaLabel) {
+ if (javaLabel == null) {
+ return DocItemLabel.DOC_ITEM_LABEL_UNSPECIFIED;
+ }
+ return switch (javaLabel) {
+ case CAPTION -> DocItemLabel.DOC_ITEM_LABEL_CAPTION;
+ case CHART -> DocItemLabel.DOC_ITEM_LABEL_CHART;
+ case CHECKBOX_SELECTED -> DocItemLabel.DOC_ITEM_LABEL_CHECKBOX_SELECTED;
+ case CHECKBOX_UNSELECTED -> DocItemLabel.DOC_ITEM_LABEL_CHECKBOX_UNSELECTED;
+ case CODE -> DocItemLabel.DOC_ITEM_LABEL_CODE;
+ case DOCUMENT_INDEX -> DocItemLabel.DOC_ITEM_LABEL_DOCUMENT_INDEX;
+ case EMPTY_VALUE -> DocItemLabel.DOC_ITEM_LABEL_EMPTY_VALUE;
+ case FOOTNOTE -> DocItemLabel.DOC_ITEM_LABEL_FOOTNOTE;
+ case FORM -> DocItemLabel.DOC_ITEM_LABEL_FORM;
+ case FORMULA -> DocItemLabel.DOC_ITEM_LABEL_FORMULA;
+ case GRADING_SCALE -> DocItemLabel.DOC_ITEM_LABEL_GRADING_SCALE;
+ case HANDWRITTEN_TEXT -> DocItemLabel.DOC_ITEM_LABEL_HANDWRITTEN_TEXT;
+ case KEY_VALUE_REGION -> DocItemLabel.DOC_ITEM_LABEL_KEY_VALUE_REGION;
+ case LIST_ITEM -> DocItemLabel.DOC_ITEM_LABEL_LIST_ITEM;
+ case PAGE_FOOTER -> DocItemLabel.DOC_ITEM_LABEL_PAGE_FOOTER;
+ case PAGE_HEADER -> DocItemLabel.DOC_ITEM_LABEL_PAGE_HEADER;
+ case PARAGRAPH -> DocItemLabel.DOC_ITEM_LABEL_PARAGRAPH;
+ case PICTURE -> DocItemLabel.DOC_ITEM_LABEL_PICTURE;
+ case REFERENCE -> DocItemLabel.DOC_ITEM_LABEL_REFERENCE;
+ case SECTION_HEADER -> DocItemLabel.DOC_ITEM_LABEL_SECTION_HEADER;
+ case TABLE -> DocItemLabel.DOC_ITEM_LABEL_TABLE;
+ case TEXT -> DocItemLabel.DOC_ITEM_LABEL_TEXT;
+ case TITLE -> DocItemLabel.DOC_ITEM_LABEL_TITLE;
+ default -> DocItemLabel.DOC_ITEM_LABEL_UNSPECIFIED;
+ };
+ }
+
+ static ProvenanceItem mapProvenanceItem(DoclingDocument.ProvenanceItem javaProv) {
+ ProvenanceItem.Builder builder = ProvenanceItem.newBuilder();
+ try {
+ if (javaProv.getPageNo() != null) {
+ builder.setPageNo(javaProv.getPageNo());
+ }
+ if (javaProv.getBbox() != null) {
+ builder.setBbox(mapBoundingBox(javaProv.getBbox()));
+ }
+ if (javaProv.getCharspan() != null) {
+ javaProv.getCharspan().forEach(builder::addCharspan);
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping ProvenanceItem - page_no={}, bbox={}", javaProv.getPageNo(), javaProv.getBbox(), e);
+ }
+ return builder.build();
+ }
+
+ private static Formatting mapFormatting(DoclingDocument.Formatting javaFormatting) {
+ Formatting.Builder builder = Formatting.newBuilder();
+ try {
+ builder.setBold(javaFormatting.isBold());
+ builder.setItalic(javaFormatting.isItalic());
+ builder.setUnderline(javaFormatting.isUnderline());
+ builder.setStrikethrough(javaFormatting.isStrikethrough());
+ if (javaFormatting.getScript() != null) {
+ builder.setScript(mapScript(javaFormatting.getScript()));
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping Formatting", e);
+ }
+ return builder.build();
+ }
+
+ private static Script mapScript(DoclingDocument.Script javaScript) {
+ if (javaScript == null) {
+ return Script.SCRIPT_UNSPECIFIED;
+ }
+ return switch (javaScript) {
+ case BASELINE -> Script.SCRIPT_BASELINE;
+ case SUB -> Script.SCRIPT_SUB;
+ case SUPER -> Script.SCRIPT_SUPER;
+ default -> Script.SCRIPT_UNSPECIFIED;
+ };
+ }
+
+ private static PictureItem mapPictureItem(DoclingDocument.PictureItem javaPicture) {
+ PictureItem.Builder builder = PictureItem.newBuilder();
+ try {
+ if (javaPicture.getSelfRef() != null) {
+ builder.setSelfRef(javaPicture.getSelfRef());
+ }
+ if (javaPicture.getParent() != null) {
+ builder.setParent(mapRefItem(javaPicture.getParent()));
+ }
+ if (javaPicture.getChildren() != null) {
+ javaPicture.getChildren().forEach(child -> builder.addChildren(mapRefItem(child)));
+ }
+ if (javaPicture.getContentLayer() != null) {
+ builder.setContentLayer(mapContentLayer(javaPicture.getContentLayer()));
+ }
+ if (javaPicture.getMeta() != null) {
+ builder.setMeta(mapPictureMeta(javaPicture.getMeta()));
+ }
+ if (javaPicture.getLabel() != null) {
+ builder.setLabel(javaPicture.getLabel());
+ }
+ if (javaPicture.getProv() != null) {
+ javaPicture.getProv().forEach(prov -> builder.addProv(mapProvenanceItem(prov)));
+ }
+ if (javaPicture.getCaptions() != null) {
+ javaPicture.getCaptions().forEach(caption -> builder.addCaptions(mapRefItem(caption)));
+ }
+ if (javaPicture.getReferences() != null) {
+ javaPicture.getReferences().forEach(ref -> builder.addReferences(mapRefItem(ref)));
+ }
+ if (javaPicture.getFootnotes() != null) {
+ javaPicture.getFootnotes().forEach(footnote -> builder.addFootnotes(mapRefItem(footnote)));
+ }
+ if (javaPicture.getImage() != null) {
+ builder.setImage(mapImageRef(javaPicture.getImage()));
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping PictureItem - selfRef={}", javaPicture.getSelfRef(), e);
+ }
+ return builder.build();
+ }
+
+ private static PictureMeta mapPictureMeta(DoclingDocument.PictureMeta javaMeta) {
+ PictureMeta.Builder builder = PictureMeta.newBuilder();
+ try {
+ if (javaMeta.getSummary() != null) {
+ builder.setSummary(mapSummaryMetaField(javaMeta.getSummary()));
+ }
+ if (javaMeta.getDescription() != null) {
+ builder.setDescription(mapDescriptionMetaField(javaMeta.getDescription()));
+ }
+ if (javaMeta.getClassification() != null) {
+ builder.setClassification(mapPictureClassificationMetaField(javaMeta.getClassification()));
+ }
+ if (javaMeta.getMolecule() != null) {
+ builder.setMolecule(mapMoleculeMetaField(javaMeta.getMolecule()));
+ }
+ if (javaMeta.getTabularChart() != null) {
+ builder.setTabularChart(mapTabularChartMetaField(javaMeta.getTabularChart()));
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping PictureMeta", e);
+ }
+ return builder.build();
+ }
+
+ private static SummaryMetaField mapSummaryMetaField(DoclingDocument.SummaryMetaField javaSummary) {
+ SummaryMetaField.Builder builder = SummaryMetaField.newBuilder();
+ if (javaSummary.getConfidence() != null) {
+ builder.setConfidence(javaSummary.getConfidence());
+ }
+ if (javaSummary.getCreatedBy() != null) {
+ builder.setCreatedBy(javaSummary.getCreatedBy());
+ }
+ if (javaSummary.getText() != null) {
+ builder.setText(javaSummary.getText());
+ }
+ return builder.build();
+ }
+
+ private static DescriptionMetaField mapDescriptionMetaField(DoclingDocument.DescriptionMetaField javaDesc) {
+ DescriptionMetaField.Builder builder = DescriptionMetaField.newBuilder();
+ if (javaDesc.getConfidence() != null) {
+ builder.setConfidence(javaDesc.getConfidence());
+ }
+ if (javaDesc.getCreatedBy() != null) {
+ builder.setCreatedBy(javaDesc.getCreatedBy());
+ }
+ if (javaDesc.getText() != null) {
+ builder.setText(javaDesc.getText());
+ }
+ return builder.build();
+ }
+
+ private static PictureClassificationMetaField mapPictureClassificationMetaField(
+ DoclingDocument.PictureClassificationMetaField javaClassification) {
+ PictureClassificationMetaField.Builder builder = PictureClassificationMetaField.newBuilder();
+ if (javaClassification.getPredictions() != null) {
+ javaClassification.getPredictions().forEach(pred ->
+ builder.addPredictions(mapPictureClassificationPrediction(pred)));
+ }
+ return builder.build();
+ }
+
+ private static PictureClassificationPrediction mapPictureClassificationPrediction(
+ DoclingDocument.PictureClassificationPrediction javaPred) {
+ PictureClassificationPrediction.Builder builder = PictureClassificationPrediction.newBuilder();
+ if (javaPred.getConfidence() != null) {
+ builder.setConfidence(javaPred.getConfidence());
+ }
+ if (javaPred.getCreatedBy() != null) {
+ builder.setCreatedBy(javaPred.getCreatedBy());
+ }
+ if (javaPred.getClassName() != null) {
+ builder.setClassName(javaPred.getClassName());
+ }
+ return builder.build();
+ }
+
+ private static MoleculeMetaField mapMoleculeMetaField(DoclingDocument.MoleculeMetaField javaMol) {
+ MoleculeMetaField.Builder builder = MoleculeMetaField.newBuilder();
+ if (javaMol.getConfidence() != null) {
+ builder.setConfidence(javaMol.getConfidence());
+ }
+ if (javaMol.getCreatedBy() != null) {
+ builder.setCreatedBy(javaMol.getCreatedBy());
+ }
+ if (javaMol.getSmi() != null) {
+ builder.setSmi(javaMol.getSmi());
+ }
+ return builder.build();
+ }
+
+ private static TabularChartMetaField mapTabularChartMetaField(DoclingDocument.TabularChartMetaField javaChart) {
+ TabularChartMetaField.Builder builder = TabularChartMetaField.newBuilder();
+ if (javaChart.getConfidence() != null) {
+ builder.setConfidence(javaChart.getConfidence());
+ }
+ if (javaChart.getCreatedBy() != null) {
+ builder.setCreatedBy(javaChart.getCreatedBy());
+ }
+ if (javaChart.getTitle() != null) {
+ builder.setTitle(javaChart.getTitle());
+ }
+ if (javaChart.getChartData() != null) {
+ builder.setChartData(mapTableData(javaChart.getChartData()));
+ }
+ return builder.build();
+ }
+
+ static TableItem mapTableItem(DoclingDocument.TableItem javaTable) {
+ TableItem.Builder builder = TableItem.newBuilder();
+ try {
+ if (javaTable.getSelfRef() != null) {
+ builder.setSelfRef(javaTable.getSelfRef());
+ }
+ if (javaTable.getParent() != null) {
+ builder.setParent(mapRefItem(javaTable.getParent()));
+ }
+ if (javaTable.getChildren() != null) {
+ javaTable.getChildren().forEach(child -> builder.addChildren(mapRefItem(child)));
+ }
+ if (javaTable.getContentLayer() != null) {
+ builder.setContentLayer(mapContentLayer(javaTable.getContentLayer()));
+ }
+ if (javaTable.getMeta() != null) {
+ builder.setMeta(mapFloatingMeta(javaTable.getMeta()));
+ }
+ if (javaTable.getLabel() != null) {
+ builder.setLabel(javaTable.getLabel());
+ }
+ if (javaTable.getProv() != null) {
+ javaTable.getProv().forEach(prov -> builder.addProv(mapProvenanceItem(prov)));
+ }
+ if (javaTable.getCaptions() != null) {
+ javaTable.getCaptions().forEach(caption -> builder.addCaptions(mapRefItem(caption)));
+ }
+ if (javaTable.getReferences() != null) {
+ javaTable.getReferences().forEach(ref -> builder.addReferences(mapRefItem(ref)));
+ }
+ if (javaTable.getFootnotes() != null) {
+ javaTable.getFootnotes().forEach(footnote -> builder.addFootnotes(mapRefItem(footnote)));
+ }
+ if (javaTable.getImage() != null) {
+ builder.setImage(mapImageRef(javaTable.getImage()));
+ }
+ if (javaTable.getData() != null) {
+ builder.setData(mapTableData(javaTable.getData()));
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping TableItem - selfRef={}", javaTable.getSelfRef(), e);
+ }
+ return builder.build();
+ }
+
+ private static TableData mapTableData(DoclingDocument.TableData javaTableData) {
+ TableData.Builder builder = TableData.newBuilder();
+ if (javaTableData.getNumRows() != null) {
+ builder.setNumRows(javaTableData.getNumRows());
+ }
+ if (javaTableData.getNumCols() != null) {
+ builder.setNumCols(javaTableData.getNumCols());
+ }
+ if (javaTableData.getGrid() != null) {
+ javaTableData.getGrid().forEach(cellList -> {
+ TableRow.Builder rowBuilder = TableRow.newBuilder();
+ if (cellList != null) {
+ cellList.forEach(cell -> rowBuilder.addCells(mapTableCell(cell)));
+ }
+ builder.addGrid(rowBuilder.build());
+ });
+ }
+ return builder.build();
+ }
+
+ private static TableCell mapTableCell(DoclingDocument.TableCell javaCell) {
+ TableCell.Builder builder = TableCell.newBuilder();
+ if (javaCell.getBbox() != null) {
+ builder.setBbox(mapBoundingBox(javaCell.getBbox()));
+ }
+ if (javaCell.getRowSpan() != null) {
+ builder.setRowSpan(javaCell.getRowSpan());
+ }
+ if (javaCell.getColSpan() != null) {
+ builder.setColSpan(javaCell.getColSpan());
+ }
+ if (javaCell.getStartRowOffsetIdx() != null) {
+ builder.setStartRowOffsetIdx(javaCell.getStartRowOffsetIdx());
+ }
+ if (javaCell.getEndRowOffsetIdx() != null) {
+ builder.setEndRowOffsetIdx(javaCell.getEndRowOffsetIdx());
+ }
+ if (javaCell.getStartColOffsetIdx() != null) {
+ builder.setStartColOffsetIdx(javaCell.getStartColOffsetIdx());
+ }
+ if (javaCell.getEndColOffsetIdx() != null) {
+ builder.setEndColOffsetIdx(javaCell.getEndColOffsetIdx());
+ }
+ if (javaCell.getText() != null) {
+ builder.setText(javaCell.getText());
+ }
+ builder.setColumnHeader(javaCell.isColumnHeader());
+ builder.setRowHeader(javaCell.isRowHeader());
+ builder.setRowSection(javaCell.isRowSection());
+ builder.setFillable(javaCell.isFillable());
+ return builder.build();
+ }
+
+ static BoundingBox mapBoundingBox(DoclingDocument.BoundingBox javaBbox) {
+ BoundingBox.Builder builder = BoundingBox.newBuilder();
+ if (javaBbox.getL() != null) {
+ builder.setL(javaBbox.getL());
+ }
+ if (javaBbox.getT() != null) {
+ builder.setT(javaBbox.getT());
+ }
+ if (javaBbox.getR() != null) {
+ builder.setR(javaBbox.getR());
+ }
+ if (javaBbox.getB() != null) {
+ builder.setB(javaBbox.getB());
+ }
+ if (javaBbox.getCoordOrigin() != null) {
+ builder.setCoordOrigin(javaBbox.getCoordOrigin());
+ }
+ return builder.build();
+ }
+
+ private static KeyValueItem mapKeyValueItem(DoclingDocument.KeyValueItem javaKv) {
+ KeyValueItem.Builder builder = KeyValueItem.newBuilder();
+ try {
+ if (javaKv.getSelfRef() != null) {
+ builder.setSelfRef(javaKv.getSelfRef());
+ }
+ if (javaKv.getParent() != null) {
+ builder.setParent(mapRefItem(javaKv.getParent()));
+ }
+ if (javaKv.getChildren() != null) {
+ javaKv.getChildren().forEach(child -> builder.addChildren(mapRefItem(child)));
+ }
+ if (javaKv.getContentLayer() != null) {
+ builder.setContentLayer(mapContentLayer(javaKv.getContentLayer()));
+ }
+ if (javaKv.getMeta() != null) {
+ builder.setMeta(mapFloatingMeta(javaKv.getMeta()));
+ }
+ if (javaKv.getLabel() != null) {
+ builder.setLabel(javaKv.getLabel());
+ }
+ if (javaKv.getProv() != null) {
+ javaKv.getProv().forEach(prov -> builder.addProv(mapProvenanceItem(prov)));
+ }
+ if (javaKv.getCaptions() != null) {
+ javaKv.getCaptions().forEach(caption -> builder.addCaptions(mapRefItem(caption)));
+ }
+ if (javaKv.getReferences() != null) {
+ javaKv.getReferences().forEach(ref -> builder.addReferences(mapRefItem(ref)));
+ }
+ if (javaKv.getFootnotes() != null) {
+ javaKv.getFootnotes().forEach(footnote -> builder.addFootnotes(mapRefItem(footnote)));
+ }
+ if (javaKv.getImage() != null) {
+ builder.setImage(mapImageRef(javaKv.getImage()));
+ }
+ if (javaKv.getGraph() != null) {
+ builder.setGraph(mapGraphData(javaKv.getGraph()));
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping KeyValueItem - selfRef={}", javaKv.getSelfRef(), e);
+ }
+ return builder.build();
+ }
+
+ private static FormItem mapFormItem(DoclingDocument.FormItem javaForm) {
+ FormItem.Builder builder = FormItem.newBuilder();
+ try {
+ if (javaForm.getSelfRef() != null) {
+ builder.setSelfRef(javaForm.getSelfRef());
+ }
+ if (javaForm.getParent() != null) {
+ builder.setParent(mapRefItem(javaForm.getParent()));
+ }
+ if (javaForm.getChildren() != null) {
+ javaForm.getChildren().forEach(child -> builder.addChildren(mapRefItem(child)));
+ }
+ if (javaForm.getContentLayer() != null) {
+ builder.setContentLayer(mapContentLayer(javaForm.getContentLayer()));
+ }
+ if (javaForm.getMeta() != null) {
+ builder.setMeta(mapFloatingMeta(javaForm.getMeta()));
+ }
+ if (javaForm.getLabel() != null) {
+ builder.setLabel(javaForm.getLabel());
+ }
+ if (javaForm.getProv() != null) {
+ javaForm.getProv().forEach(prov -> builder.addProv(mapProvenanceItem(prov)));
+ }
+ if (javaForm.getCaptions() != null) {
+ javaForm.getCaptions().forEach(caption -> builder.addCaptions(mapRefItem(caption)));
+ }
+ if (javaForm.getReferences() != null) {
+ javaForm.getReferences().forEach(ref -> builder.addReferences(mapRefItem(ref)));
+ }
+ if (javaForm.getFootnotes() != null) {
+ javaForm.getFootnotes().forEach(footnote -> builder.addFootnotes(mapRefItem(footnote)));
+ }
+ if (javaForm.getImage() != null) {
+ builder.setImage(mapImageRef(javaForm.getImage()));
+ }
+ if (javaForm.getGraph() != null) {
+ builder.setGraph(mapGraphData(javaForm.getGraph()));
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping FormItem - selfRef={}", javaForm.getSelfRef(), e);
+ }
+ return builder.build();
+ }
+
+ private static FloatingMeta mapFloatingMeta(DoclingDocument.FloatingMeta javaMeta) {
+ FloatingMeta.Builder builder = FloatingMeta.newBuilder();
+ if (javaMeta.getSummary() != null) {
+ builder.setSummary(mapSummaryMetaField(javaMeta.getSummary()));
+ }
+ if (javaMeta.getDescription() != null) {
+ builder.setDescription(mapDescriptionMetaField(javaMeta.getDescription()));
+ }
+ return builder.build();
+ }
+
+ private static GraphData mapGraphData(DoclingDocument.GraphData javaGraph) {
+ GraphData.Builder builder = GraphData.newBuilder();
+ try {
+ if (javaGraph.getCells() != null) {
+ javaGraph.getCells().forEach(cell -> builder.addCells(mapGraphCell(cell)));
+ }
+ if (javaGraph.getLinks() != null) {
+ javaGraph.getLinks().forEach(link -> builder.addLinks(mapGraphLink(link)));
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping GraphData", e);
+ }
+ return builder.build();
+ }
+
+ private static GraphCell mapGraphCell(DoclingDocument.GraphCell javaCell) {
+ GraphCell.Builder builder = GraphCell.newBuilder();
+ if (javaCell.getLabel() != null) {
+ builder.setLabel(mapGraphCellLabel(javaCell.getLabel()));
+ }
+ if (javaCell.getCellId() != null) {
+ builder.setCellId(javaCell.getCellId());
+ }
+ if (javaCell.getText() != null) {
+ builder.setText(javaCell.getText());
+ }
+ if (javaCell.getOrig() != null) {
+ builder.setOrig(javaCell.getOrig());
+ }
+ if (javaCell.getProv() != null) {
+ builder.setProv(mapProvenanceItem(javaCell.getProv()));
+ }
+ if (javaCell.getItemRef() != null) {
+ builder.setItemRef(mapRefItem(javaCell.getItemRef()));
+ }
+ return builder.build();
+ }
+
+ private static GraphCellLabel mapGraphCellLabel(DoclingDocument.GraphCellLabel javaLabel) {
+ if (javaLabel == null) {
+ return GraphCellLabel.GRAPH_CELL_LABEL_UNSPECIFIED;
+ }
+ return switch (javaLabel) {
+ case KEY -> GraphCellLabel.GRAPH_CELL_LABEL_KEY;
+ case VALUE -> GraphCellLabel.GRAPH_CELL_LABEL_VALUE;
+ case CHECKBOX -> GraphCellLabel.GRAPH_CELL_LABEL_CHECKBOX;
+ default -> GraphCellLabel.GRAPH_CELL_LABEL_UNSPECIFIED;
+ };
+ }
+
+ private static GraphLink mapGraphLink(DoclingDocument.GraphLink javaLink) {
+ GraphLink.Builder builder = GraphLink.newBuilder();
+ if (javaLink.getLabel() != null) {
+ builder.setLabel(mapGraphLinkLabel(javaLink.getLabel()));
+ }
+ if (javaLink.getSourceCellId() != null) {
+ builder.setSourceCellId(javaLink.getSourceCellId());
+ }
+ if (javaLink.getTargetCellId() != null) {
+ builder.setTargetCellId(javaLink.getTargetCellId());
+ }
+ return builder.build();
+ }
+
+ private static GraphLinkLabel mapGraphLinkLabel(DoclingDocument.GraphLinkLabel javaLabel) {
+ if (javaLabel == null) {
+ return GraphLinkLabel.GRAPH_LINK_LABEL_UNSPECIFIED;
+ }
+ return switch (javaLabel) {
+ case TO_VALUE -> GraphLinkLabel.GRAPH_LINK_LABEL_TO_VALUE;
+ case TO_KEY -> GraphLinkLabel.GRAPH_LINK_LABEL_TO_KEY;
+ case TO_PARENT -> GraphLinkLabel.GRAPH_LINK_LABEL_TO_PARENT;
+ case TO_CHILD -> GraphLinkLabel.GRAPH_LINK_LABEL_TO_CHILD;
+ default -> GraphLinkLabel.GRAPH_LINK_LABEL_UNSPECIFIED;
+ };
+ }
+
+ private static PageItem mapPageItem(DoclingDocument.PageItem javaPage) {
+ PageItem.Builder builder = PageItem.newBuilder();
+ if (javaPage.getSize() != null) {
+ builder.setSize(mapSize(javaPage.getSize()));
+ }
+ if (javaPage.getImage() != null) {
+ builder.setImage(mapImageRef(javaPage.getImage()));
+ }
+ if (javaPage.getPageNo() != null) {
+ builder.setPageNo(javaPage.getPageNo());
+ }
+ return builder.build();
+ }
+
+ private static Size mapSize(DoclingDocument.Size javaSize) {
+ return Size.newBuilder()
+ .setWidth(javaSize.getWidth())
+ .setHeight(javaSize.getHeight())
+ .build();
+ }
+
+ private static ImageRef mapImageRef(DoclingDocument.ImageRef javaImageRef) {
+ ImageRef.Builder builder = ImageRef.newBuilder();
+ try {
+ if (javaImageRef.getMimetype() != null) {
+ builder.setMimetype(javaImageRef.getMimetype());
+ }
+ if (javaImageRef.getDpi() != null) {
+ builder.setDpi(javaImageRef.getDpi());
+ }
+ if (javaImageRef.getSize() != null) {
+ builder.setSize(mapSize(javaImageRef.getSize()));
+ }
+ if (javaImageRef.getUri() != null) {
+ builder.setUri(javaImageRef.getUri());
+ }
+ } catch (Exception e) {
+ LOG.error("Error mapping ImageRef - mimetype={}", javaImageRef.getMimetype(), e);
+ }
+ return builder.build();
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/ProtoMapping.java b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/ProtoMapping.java
new file mode 100644
index 00000000..5f787883
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/ProtoMapping.java
@@ -0,0 +1,27 @@
+package ai.docling.serve.grpc.v1.mapping;
+
+import java.util.function.Consumer;
+
+/**
+ * Shared utilities for proto ↔ Java mapping.
+ */
+final class ProtoMapping {
+
+ private ProtoMapping() {
+ }
+
+ /**
+ * Applies the setter only if the value is non-null.
+ * Eliminates repetitive {@code if (val != null) builder.setFoo(val)} patterns
+ * across mapper classes.
+ *
+ * @param value the possibly-null value
+ * @param setter the proto builder setter (e.g. {@code builder::setFoo})
+ * @param the value type
+ */
+ static void ifNonNull(T value, Consumer setter) {
+ if (value != null) {
+ setter.accept(value);
+ }
+ }
+}
\ No newline at end of file
diff --git a/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/ServeApiMapper.java b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/ServeApiMapper.java
new file mode 100644
index 00000000..a0382609
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/java/ai/docling/serve/grpc/v1/mapping/ServeApiMapper.java
@@ -0,0 +1,723 @@
+package ai.docling.serve.grpc.v1.mapping;
+
+// Java API types (the canonical domain types)
+
+import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.options.HierarchicalChunkerOptions;
+import ai.docling.serve.api.chunk.request.options.HybridChunkerOptions;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.chunk.response.Document;
+import ai.docling.serve.api.chunk.response.ExportDocumentResponse;
+import ai.docling.serve.api.clear.request.ClearResultsRequest;
+import ai.docling.serve.api.clear.response.ClearResponse;
+import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
+import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions;
+import ai.docling.serve.api.convert.request.options.ImageRefMode;
+import ai.docling.serve.api.convert.request.options.InputFormat;
+import ai.docling.serve.api.convert.request.options.OcrEngine;
+import ai.docling.serve.api.convert.request.options.OutputFormat;
+import ai.docling.serve.api.convert.request.options.PdfBackend;
+import ai.docling.serve.api.convert.request.options.ProcessingPipeline;
+import ai.docling.serve.api.convert.request.options.TableFormerMode;
+import ai.docling.serve.api.convert.request.options.VlmModelType;
+import ai.docling.serve.api.convert.request.source.FileSource;
+import ai.docling.serve.api.convert.request.source.HttpSource;
+import ai.docling.serve.api.convert.request.source.S3Source;
+import ai.docling.serve.api.convert.request.source.Source;
+import ai.docling.serve.api.convert.request.target.InBodyTarget;
+import ai.docling.serve.api.convert.request.target.PutTarget;
+import ai.docling.serve.api.convert.request.target.Target;
+import ai.docling.serve.api.convert.request.target.ZipTarget;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+import ai.docling.serve.api.convert.response.DocumentResponse;
+import ai.docling.serve.api.convert.response.ErrorItem;
+import ai.docling.serve.api.health.HealthCheckResponse;
+import ai.docling.serve.api.task.request.TaskResultRequest;
+import ai.docling.serve.api.task.request.TaskStatusPollRequest;
+import ai.docling.serve.api.task.response.TaskStatus;
+import ai.docling.serve.api.task.response.TaskStatusMetadata;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+
+import java.net.URI;
+import java.time.Duration;
+
+/**
+ * Bidirectional mapper between proto serve types (ai.docling.serve.v1.*)
+ * and Java API models (ai.docling.serve.api.*).
+ *
+ * Proto → Java: for incoming gRPC requests that need to call the REST client.
+ * Java → Proto: for REST client responses that need to be returned as gRPC responses.
+ */
+public class ServeApiMapper {
+
+ // ==================== Proto → Java (Request Mapping) ====================
+
+ /**
+ * Maps proto ConvertDocumentRequest → Java ConvertDocumentRequest.
+ */
+ public static ConvertDocumentRequest toJava(
+ ai.docling.serve.v1.ConvertDocumentRequest proto) {
+ ConvertDocumentRequest.Builder builder = ConvertDocumentRequest.builder();
+
+ if (proto == null) {
+ return builder.build();
+ }
+
+ // Sources
+ if (proto.getSourcesList() != null) {
+ for (ai.docling.serve.v1.Source protoSource : proto.getSourcesList()) {
+ Source javaSource = toJavaSource(protoSource);
+ if (javaSource != null) {
+ builder.source(javaSource);
+ }
+ }
+ }
+
+ // Options
+ if (proto.hasOptions()) {
+ builder.options(toJavaOptions(proto.getOptions()));
+ }
+
+ // Target
+ if (proto.hasTarget()) {
+ Target javaTarget = toJavaTarget(proto.getTarget());
+ if (javaTarget != null) {
+ builder.target(javaTarget);
+ }
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Maps proto HierarchicalChunkRequest → Java HierarchicalChunkDocumentRequest.
+ */
+ public static HierarchicalChunkDocumentRequest toJava(
+ ai.docling.serve.v1.HierarchicalChunkRequest proto) {
+ var builder = HierarchicalChunkDocumentRequest.builder();
+
+ if (proto == null) {
+ return builder.build();
+ }
+
+ if (proto.getSourcesList() != null) {
+ for (ai.docling.serve.v1.Source protoSource : proto.getSourcesList()) {
+ Source javaSource = toJavaSource(protoSource);
+ if (javaSource != null) {
+ builder.source(javaSource);
+ }
+ }
+ }
+
+ if (proto.hasConvertOptions()) {
+ builder.options(toJavaOptions(proto.getConvertOptions()));
+ }
+
+ if (proto.hasTarget()) {
+ Target javaTarget = toJavaTarget(proto.getTarget());
+ if (javaTarget != null) {
+ builder.target(javaTarget);
+ }
+ }
+
+ builder.includeConvertedDoc(proto.getIncludeConvertedDoc());
+
+ if (proto.hasChunkingOptions()) {
+ builder.chunkingOptions(toJavaHierarchicalOptions(proto.getChunkingOptions()));
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Maps proto HybridChunkRequest → Java HybridChunkDocumentRequest.
+ */
+ public static HybridChunkDocumentRequest toJava(
+ ai.docling.serve.v1.HybridChunkRequest proto) {
+ var builder = HybridChunkDocumentRequest.builder();
+
+ if (proto == null) {
+ return builder.build();
+ }
+
+ if (proto.getSourcesList() != null) {
+ for (ai.docling.serve.v1.Source protoSource : proto.getSourcesList()) {
+ Source javaSource = toJavaSource(protoSource);
+ if (javaSource != null) {
+ builder.source(javaSource);
+ }
+ }
+ }
+
+ if (proto.hasConvertOptions()) {
+ builder.options(toJavaOptions(proto.getConvertOptions()));
+ }
+
+ if (proto.hasTarget()) {
+ Target javaTarget = toJavaTarget(proto.getTarget());
+ if (javaTarget != null) {
+ builder.target(javaTarget);
+ }
+ }
+
+ builder.includeConvertedDoc(proto.getIncludeConvertedDoc());
+
+ if (proto.hasChunkingOptions()) {
+ builder.chunkingOptions(toJavaHybridOptions(proto.getChunkingOptions()));
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Maps proto TaskStatusPollRequest → Java TaskStatusPollRequest.
+ */
+ public static TaskStatusPollRequest toJava(
+ ai.docling.serve.v1.TaskStatusPollRequest proto) {
+ if (proto == null) {
+ return TaskStatusPollRequest.builder().build();
+ }
+ return TaskStatusPollRequest.builder()
+ .taskId(proto.getTaskId())
+ .waitTime(Duration.ofMillis((long) (proto.getWaitTime() * 1000)))
+ .build();
+ }
+
+ /**
+ * Maps proto TaskResultRequest → Java TaskResultRequest.
+ */
+ public static TaskResultRequest toJava(
+ ai.docling.serve.v1.TaskResultRequest proto) {
+ if (proto == null) {
+ return TaskResultRequest.builder().build();
+ }
+ return TaskResultRequest.builder()
+ .taskId(proto.getTaskId())
+ .build();
+ }
+
+ /**
+ * Maps proto ClearResultsRequest → Java ClearResultsRequest.
+ */
+ public static ClearResultsRequest toJava(
+ ai.docling.serve.v1.ClearResultsRequest proto) {
+ if (proto == null) {
+ return ClearResultsRequest.builder().build();
+ }
+ var builder = ClearResultsRequest.builder();
+ if (proto.hasOlderThan()) {
+ double olderThanSeconds = proto.getOlderThan();
+ if (Double.isFinite(olderThanSeconds) && olderThanSeconds > 0) {
+ builder.olderThen(Duration.ofMillis((long) (olderThanSeconds * 1000)));
+ }
+ }
+ return builder.build();
+ }
+
+ // ==================== Java → Proto (Response Mapping) ====================
+
+ /**
+ * Maps Java HealthCheckResponse → proto HealthResponse.
+ */
+ public static ai.docling.serve.v1.HealthResponse toProto(HealthCheckResponse java) {
+ var builder = ai.docling.serve.v1.HealthResponse.newBuilder();
+ if (java != null) {
+ if (java.getStatus() != null) {
+ builder.setStatus(java.getStatus());
+ }
+ }
+ return builder.build();
+ }
+
+ /**
+ * Maps Java ConvertDocumentResponse → proto ConvertDocumentResponse.
+ */
+ public static ai.docling.serve.v1.ConvertDocumentResponse toProto(
+ ConvertDocumentResponse java) {
+ ai.docling.serve.v1.ConvertDocumentResponse.Builder builder =
+ ai.docling.serve.v1.ConvertDocumentResponse.newBuilder();
+
+ if (java == null) {
+ return builder.build();
+ }
+
+ if (java.getDocument() != null) {
+ builder.setDocument(toProtoDocumentResponse(java.getDocument()));
+ }
+ if (java.getErrors() != null) {
+ java.getErrors().forEach(error -> builder.addErrors(toProtoErrorItem(error)));
+ }
+ if (java.getProcessingTime() != null) {
+ builder.setProcessingTime(java.getProcessingTime());
+ }
+ if (java.getStatus() != null) {
+ builder.setStatus(java.getStatus());
+ }
+ if (java.getTimings() != null) {
+ java.getTimings().forEach((key, value) -> {
+ if (value instanceof Number) {
+ builder.putTimings(key, ((Number) value).doubleValue());
+ }
+ });
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Maps Java ChunkDocumentResponse → proto ChunkDocumentResponse.
+ */
+ public static ai.docling.serve.v1.ChunkDocumentResponse toProto(
+ ChunkDocumentResponse java) {
+ ai.docling.serve.v1.ChunkDocumentResponse.Builder builder =
+ ai.docling.serve.v1.ChunkDocumentResponse.newBuilder();
+
+ if (java == null) {
+ return builder.build();
+ }
+
+ if (java.getChunks() != null) {
+ java.getChunks().forEach(chunk -> builder.addChunks(toProtoChunk(chunk)));
+ }
+ if (java.getDocuments() != null) {
+ java.getDocuments().forEach(doc -> builder.addDocuments(toProtoDocument(doc)));
+ }
+ if (java.getProcessingTime() != null) {
+ builder.setProcessingTime(java.getProcessingTime());
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Maps Java TaskStatusPollResponse → proto TaskStatusPollResponse.
+ */
+ public static ai.docling.serve.v1.TaskStatusPollResponse toProto(
+ TaskStatusPollResponse java) {
+ var builder = ai.docling.serve.v1.TaskStatusPollResponse.newBuilder();
+
+ if (java == null) {
+ return builder.build();
+ }
+
+ if (java.getTaskId() != null) {
+ builder.setTaskId(java.getTaskId());
+ }
+ if (java.getTaskType() != null) {
+ builder.setTaskType(java.getTaskType());
+ }
+ if (java.getTaskStatus() != null) {
+ builder.setTaskStatus(toProtoTaskStatus(java.getTaskStatus()));
+ }
+ if (java.getTaskPosition() != null) {
+ builder.setTaskPosition(java.getTaskPosition());
+ }
+ if (java.getTaskStatusMetadata() != null) {
+ builder.setTaskMeta(toProtoTaskStatusMetadata(java.getTaskStatusMetadata()));
+ }
+
+ return builder.build();
+ }
+
+ /**
+ * Maps Java ClearResponse → proto ClearResponse.
+ */
+ public static ai.docling.serve.v1.ClearResponse toProto(ClearResponse java) {
+ var builder = ai.docling.serve.v1.ClearResponse.newBuilder();
+ if (java != null) {
+ if (java.getStatus() != null) {
+ builder.setStatus(java.getStatus());
+ }
+ }
+ return builder.build();
+ }
+
+ // ==================== Private Helpers: Proto → Java ====================
+
+ private static Source toJavaSource(ai.docling.serve.v1.Source proto) {
+ return switch (proto.getSourceCase()) {
+ case FILE -> FileSource.builder()
+ .base64String(proto.getFile().getBase64String())
+ .filename(proto.getFile().getFilename())
+ .build();
+ case HTTP -> {
+ HttpSource.Builder httpBuilder = HttpSource.builder()
+ .url(URI.create(proto.getHttp().getUrl()));
+ if (proto.getHttp().getHeadersMap() != null) {
+ proto.getHttp().getHeadersMap().forEach(
+ httpBuilder::header);
+ }
+ yield httpBuilder.build();
+ }
+ case S3 -> {
+ S3Source.Builder s3Builder = S3Source.builder()
+ .endpoint(proto.getS3().getEndpoint())
+ .accessKey(proto.getS3().getAccessKey())
+ .secretKey(proto.getS3().getSecretKey())
+ .bucket(proto.getS3().getBucket())
+ .verifySsl(proto.getS3().getVerifySsl());
+ if (proto.getS3().hasKeyPrefix()) {
+ s3Builder.keyPrefix(proto.getS3().getKeyPrefix());
+ }
+ yield s3Builder.build();
+ }
+ case SOURCE_NOT_SET -> null;
+ };
+ }
+
+ private static Target toJavaTarget(ai.docling.serve.v1.Target proto) {
+ return switch (proto.getTargetCase()) {
+ case INBODY -> InBodyTarget.builder().build();
+ case PUT -> PutTarget.builder()
+ .url(URI.create(proto.getPut().getUrl()))
+ .build();
+ case S3 -> {
+ ai.docling.serve.v1.S3Target s3 = proto.getS3();
+ ai.docling.serve.api.convert.request.target.S3Target.Builder s3Builder =
+ ai.docling.serve.api.convert.request.target.S3Target.builder()
+ .endpoint(s3.getEndpoint())
+ .accessKey(s3.getAccessKey())
+ .secretKey(s3.getSecretKey())
+ .bucket(s3.getBucket())
+ .verifySsl(s3.getVerifySsl());
+ if (s3.hasKeyPrefix()) {
+ s3Builder.keyPrefix(s3.getKeyPrefix());
+ }
+ yield s3Builder.build();
+ }
+ case ZIP -> ZipTarget.builder().build();
+ case TARGET_NOT_SET -> null;
+ };
+ }
+
+ private static ConvertDocumentOptions toJavaOptions(
+ ai.docling.serve.v1.ConvertDocumentOptions proto) {
+ ConvertDocumentOptions.Builder builder = ConvertDocumentOptions.builder();
+
+ // Input/Output formats
+ if (proto.getFromFormatsList() != null) {
+ proto.getFromFormatsList().forEach(f -> {
+ InputFormat jf = toJavaInputFormat(f);
+ if (jf != null) builder.fromFormat(jf);
+ });
+ }
+ if (proto.getToFormatsList() != null) {
+ proto.getToFormatsList().forEach(f -> {
+ OutputFormat jf = toJavaOutputFormat(f);
+ if (jf != null) builder.toFormat(jf);
+ });
+ }
+
+ if (proto.hasImageExportMode()) {
+ builder.imageExportMode(toJavaImageRefMode(proto.getImageExportMode()));
+ }
+ if (proto.hasDoOcr()) builder.doOcr(proto.getDoOcr());
+ if (proto.hasForceOcr()) builder.forceOcr(proto.getForceOcr());
+ if (proto.hasOcrEngine()) builder.ocrEngine(toJavaOcrEngine(proto.getOcrEngine()));
+ if (proto.getOcrLangList() != null) {
+ proto.getOcrLangList().forEach(builder::ocrLang);
+ }
+ if (proto.hasPdfBackend()) builder.pdfBackend(toJavaPdfBackend(proto.getPdfBackend()));
+ if (proto.hasTableMode()) builder.tableMode(toJavaTableFormerMode(proto.getTableMode()));
+ if (proto.hasTableCellMatching()) builder.tableCellMatching(proto.getTableCellMatching());
+ if (proto.hasPipeline()) builder.pipeline(toJavaPipeline(proto.getPipeline()));
+ if (proto.getPageRangeList() != null) {
+ proto.getPageRangeList().forEach(builder::pageRange);
+ }
+ if (proto.hasDocumentTimeout()) {
+ builder.documentTimeout(Duration.ofMillis((long) (proto.getDocumentTimeout() * 1000)));
+ }
+ if (proto.hasAbortOnError()) builder.abortOnError(proto.getAbortOnError());
+ if (proto.hasDoTableStructure()) builder.doTableStructure(proto.getDoTableStructure());
+ if (proto.hasIncludeImages()) builder.includeImages(proto.getIncludeImages());
+ if (proto.hasImagesScale()) builder.imagesScale(proto.getImagesScale());
+ if (proto.hasMdPageBreakPlaceholder()) {
+ builder.mdPageBreakPlaceholder(proto.getMdPageBreakPlaceholder());
+ }
+ if (proto.hasDoCodeEnrichment()) builder.doCodeEnrichment(proto.getDoCodeEnrichment());
+ if (proto.hasDoFormulaEnrichment()) builder.doFormulaEnrichment(proto.getDoFormulaEnrichment());
+ if (proto.hasDoPictureClassification()) {
+ builder.doPictureClassification(proto.getDoPictureClassification());
+ }
+ if (proto.hasDoPictureDescription()) {
+ builder.doPictureDescription(proto.getDoPictureDescription());
+ }
+ if (proto.hasPictureDescriptionAreaThreshold()) {
+ builder.pictureDescriptionAreaThreshold(proto.getPictureDescriptionAreaThreshold());
+ }
+ if (proto.hasVlmPipelineModel()) {
+ builder.vlmPipelineModel(toJavaVlmModelType(proto.getVlmPipelineModel()));
+ }
+ if (proto.hasVlmPipelineModelLocal()) {
+ builder.vlmPipelineModelLocal(proto.getVlmPipelineModelLocal());
+ }
+ if (proto.hasVlmPipelineModelApi()) {
+ builder.vlmPipelineModelApi(proto.getVlmPipelineModelApi());
+ }
+
+ return builder.build();
+ }
+
+ private static HierarchicalChunkerOptions toJavaHierarchicalOptions(
+ ai.docling.serve.v1.HierarchicalChunkerOptions proto) {
+ return HierarchicalChunkerOptions.builder()
+ .useMarkdownTables(proto.getUseMarkdownTables())
+ .includeRawText(proto.getIncludeRawText())
+ .build();
+ }
+
+ private static HybridChunkerOptions toJavaHybridOptions(
+ ai.docling.serve.v1.HybridChunkerOptions proto) {
+ HybridChunkerOptions.Builder builder = HybridChunkerOptions.builder()
+ .useMarkdownTables(proto.getUseMarkdownTables())
+ .includeRawText(proto.getIncludeRawText());
+ if (proto.hasMaxTokens()) builder.maxTokens(proto.getMaxTokens());
+ if (proto.hasTokenizer()) builder.tokenizer(proto.getTokenizer());
+ if (proto.hasMergePeers()) builder.mergePeers(proto.getMergePeers());
+ return builder.build();
+ }
+
+ // ==================== Private Helpers: Java → Proto ====================
+
+ private static ai.docling.serve.v1.DocumentResponse toProtoDocumentResponse(
+ DocumentResponse java) {
+ var builder = ai.docling.serve.v1.DocumentResponse.newBuilder();
+
+ if (java.getFilename() != null) {
+ builder.setFilename(java.getFilename());
+ }
+ if (java.getJsonContent() != null) {
+ builder.setJsonContent(ai.docling.serve.grpc.v1.mapping.DoclingDocumentMapper.map(java.getJsonContent()));
+ }
+ if (java.getMarkdownContent() != null) {
+ builder.setMdContent(java.getMarkdownContent());
+ }
+ if (java.getHtmlContent() != null) {
+ builder.setHtmlContent(java.getHtmlContent());
+ }
+ if (java.getTextContent() != null) {
+ builder.setTextContent(java.getTextContent());
+ }
+ if (java.getDoctagsContent() != null) {
+ builder.setDoctagsContent(java.getDoctagsContent());
+ }
+
+ return builder.build();
+ }
+
+ private static ai.docling.serve.v1.ErrorItem toProtoErrorItem(ErrorItem java) {
+ ai.docling.serve.v1.ErrorItem.Builder builder =
+ ai.docling.serve.v1.ErrorItem.newBuilder();
+ if (java.getComponentType() != null) {
+ builder.setComponentType(java.getComponentType());
+ }
+ if (java.getErrorMessage() != null) {
+ builder.setErrorMessage(java.getErrorMessage());
+ }
+ if (java.getModuleName() != null) {
+ builder.setModuleName(java.getModuleName());
+ }
+ return builder.build();
+ }
+
+ private static ai.docling.serve.v1.Chunk toProtoChunk(
+ ai.docling.serve.api.chunk.response.Chunk java) {
+ var builder = ai.docling.serve.v1.Chunk.newBuilder();
+
+ if (java.getFilename() != null) {
+ builder.setFilename(java.getFilename());
+ }
+ builder.setChunkIndex(java.getChunkIndex());
+ if (java.getText() != null) {
+ builder.setText(java.getText());
+ }
+ if (java.getRawText() != null) {
+ builder.setRawText(java.getRawText());
+ }
+ if (java.getNumTokens() != null) {
+ builder.setNumTokens(java.getNumTokens());
+ }
+ if (java.getHeadings() != null) {
+ builder.addAllHeadings(java.getHeadings());
+ }
+ if (java.getCaptions() != null) {
+ builder.addAllCaptions(java.getCaptions());
+ }
+ if (java.getDocItems() != null) {
+ builder.addAllDocItems(java.getDocItems());
+ }
+ if (java.getPageNumbers() != null) {
+ builder.addAllPageNumbers(java.getPageNumbers());
+ }
+ if (java.getMetadata() != null) {
+ java.getMetadata().forEach((key, value) ->
+ builder.putMetadata(key, String.valueOf(value))
+ );
+ }
+
+ return builder.build();
+ }
+
+ private static ai.docling.serve.v1.Document toProtoDocument(Document java) {
+ var builder = ai.docling.serve.v1.Document.newBuilder();
+
+ if (java.getKind() != null) {
+ builder.setKind(java.getKind());
+ }
+ if (java.getContent() != null) {
+ builder.setContent(toProtoExportDocumentResponse(java.getContent()));
+ }
+ if (java.getStatus() != null) {
+ builder.setStatus(java.getStatus());
+ }
+ if (java.getErrors() != null) {
+ java.getErrors().forEach(error -> builder.addErrors(toProtoErrorItem(error)));
+ }
+
+ return builder.build();
+ }
+
+ private static ai.docling.serve.v1.ExportDocumentResponse toProtoExportDocumentResponse(
+ ExportDocumentResponse java) {
+ var builder = ai.docling.serve.v1.ExportDocumentResponse.newBuilder();
+
+ if (java.getFilename() != null) {
+ builder.setFilename(java.getFilename());
+ }
+ if (java.getJsonContent() != null) {
+ builder.setJsonContent(ai.docling.serve.grpc.v1.mapping.DoclingDocumentMapper.map(java.getJsonContent()));
+ }
+ if (java.getMarkdownContent() != null) {
+ builder.setMdContent(java.getMarkdownContent());
+ }
+ if (java.getHtmlContent() != null) {
+ builder.setHtmlContent(java.getHtmlContent());
+ }
+ if (java.getTextContent() != null) {
+ builder.setTextContent(java.getTextContent());
+ }
+ if (java.getDoctagsContent() != null) {
+ builder.setDoctagsContent(java.getDoctagsContent());
+ }
+
+ return builder.build();
+ }
+
+ private static ai.docling.serve.v1.TaskStatus toProtoTaskStatus(TaskStatus java) {
+ if (java == null) {
+ return ai.docling.serve.v1.TaskStatus.TASK_STATUS_UNSPECIFIED;
+ }
+ return switch (java) {
+ case PENDING -> ai.docling.serve.v1.TaskStatus.TASK_STATUS_PENDING;
+ case STARTED -> ai.docling.serve.v1.TaskStatus.TASK_STATUS_STARTED;
+ case SUCCESS -> ai.docling.serve.v1.TaskStatus.TASK_STATUS_SUCCESS;
+ case FAILURE -> ai.docling.serve.v1.TaskStatus.TASK_STATUS_FAILURE;
+ };
+ }
+
+ private static ai.docling.serve.v1.TaskStatusMetadata toProtoTaskStatusMetadata(
+ TaskStatusMetadata java) {
+ ai.docling.serve.v1.TaskStatusMetadata.Builder builder =
+ ai.docling.serve.v1.TaskStatusMetadata.newBuilder();
+ ProtoMapping.ifNonNull(java.getNumDocs(), builder::setNumDocs);
+ ProtoMapping.ifNonNull(java.getNumProcessed(), builder::setNumProcessed);
+ ProtoMapping.ifNonNull(java.getNumSucceeded(), builder::setNumSucceeded);
+ ProtoMapping.ifNonNull(java.getNumFailed(), builder::setNumFailed);
+ return builder.build();
+ }
+
+ // ==================== Enum Mapping: Proto → Java ====================
+
+ private static InputFormat toJavaInputFormat(ai.docling.serve.v1.InputFormat proto) {
+ return switch (proto) {
+ case INPUT_FORMAT_ASCIIDOC -> InputFormat.ASCIIDOC;
+ case INPUT_FORMAT_AUDIO -> InputFormat.AUDIO;
+ case INPUT_FORMAT_CSV -> InputFormat.CSV;
+ case INPUT_FORMAT_DOCX -> InputFormat.DOCX;
+ case INPUT_FORMAT_HTML -> InputFormat.HTML;
+ case INPUT_FORMAT_IMAGE -> InputFormat.IMAGE;
+ case INPUT_FORMAT_JSON_DOCLING -> InputFormat.JSON_DOCLING;
+ case INPUT_FORMAT_MD -> InputFormat.MARKDOWN;
+ case INPUT_FORMAT_METS_GBS -> InputFormat.METS_GBS;
+ case INPUT_FORMAT_PDF -> InputFormat.PDF;
+ case INPUT_FORMAT_PPTX -> InputFormat.PPTX;
+ case INPUT_FORMAT_XLSX -> InputFormat.XLSX;
+ case INPUT_FORMAT_XML_JATS -> InputFormat.XML_JATS;
+ case INPUT_FORMAT_XML_USPTO -> InputFormat.XML_USPTO;
+ default -> null;
+ };
+ }
+
+ private static OutputFormat toJavaOutputFormat(ai.docling.serve.v1.OutputFormat proto) {
+ return switch (proto) {
+ case OUTPUT_FORMAT_DOCTAGS -> OutputFormat.DOCTAGS;
+ case OUTPUT_FORMAT_HTML -> OutputFormat.HTML;
+ case OUTPUT_FORMAT_HTML_SPLIT_PAGE -> OutputFormat.HTML_SPLIT_PAGE;
+ case OUTPUT_FORMAT_JSON -> OutputFormat.JSON;
+ case OUTPUT_FORMAT_MD -> OutputFormat.MARKDOWN;
+ case OUTPUT_FORMAT_TEXT -> OutputFormat.TEXT;
+ default -> null;
+ };
+ }
+
+ private static ImageRefMode toJavaImageRefMode(ai.docling.serve.v1.ImageRefMode proto) {
+ return switch (proto) {
+ case IMAGE_REF_MODE_EMBEDDED -> ImageRefMode.EMBEDDED;
+ case IMAGE_REF_MODE_PLACEHOLDER -> ImageRefMode.PLACEHOLDER;
+ case IMAGE_REF_MODE_REFERENCED -> ImageRefMode.REFERENCED;
+ default -> null;
+ };
+ }
+
+ private static OcrEngine toJavaOcrEngine(ai.docling.serve.v1.OcrEngine proto) {
+ return switch (proto) {
+ case OCR_ENGINE_AUTO -> OcrEngine.AUTO;
+ case OCR_ENGINE_EASYOCR -> OcrEngine.EASYOCR;
+ case OCR_ENGINE_OCRMAC -> OcrEngine.OCRMAC;
+ case OCR_ENGINE_RAPIDOCR -> OcrEngine.RAPIDOCR;
+ case OCR_ENGINE_TESSEROCR -> OcrEngine.TESSEROCR;
+ case OCR_ENGINE_TESSERACT -> OcrEngine.TESSERACT;
+ default -> null;
+ };
+ }
+
+ private static PdfBackend toJavaPdfBackend(ai.docling.serve.v1.PdfBackend proto) {
+ return switch (proto) {
+ case PDF_BACKEND_DLPARSE_V1 -> PdfBackend.DLPARSE_V1;
+ case PDF_BACKEND_DLPARSE_V2 -> PdfBackend.DLPARSE_V2;
+ case PDF_BACKEND_DLPARSE_V4 -> PdfBackend.DLPARSE_V4;
+ case PDF_BACKEND_PYPDFIUM2 -> PdfBackend.PYPDFIUM2;
+ default -> null;
+ };
+ }
+
+ private static TableFormerMode toJavaTableFormerMode(ai.docling.serve.v1.TableFormerMode proto) {
+ return switch (proto) {
+ case TABLE_FORMER_MODE_ACCURATE -> TableFormerMode.ACCURATE;
+ case TABLE_FORMER_MODE_FAST -> TableFormerMode.FAST;
+ default -> null;
+ };
+ }
+
+ private static ProcessingPipeline toJavaPipeline(ai.docling.serve.v1.ProcessingPipeline proto) {
+ return switch (proto) {
+ case PROCESSING_PIPELINE_ASR -> ProcessingPipeline.ASR;
+ case PROCESSING_PIPELINE_STANDARD -> ProcessingPipeline.STANDARD;
+ case PROCESSING_PIPELINE_VLM -> ProcessingPipeline.VLM;
+ default -> null;
+ };
+ }
+
+ private static VlmModelType toJavaVlmModelType(ai.docling.serve.v1.VlmModelType proto) {
+ return switch (proto) {
+ case VLM_MODEL_TYPE_SMOLDOCLING -> VlmModelType.SMOLDOCLING;
+ case VLM_MODEL_TYPE_SMOLDOCLING_VLLM -> VlmModelType.SMOLDOCLING_VLLM;
+ case VLM_MODEL_TYPE_GRANITE_VISION -> VlmModelType.GRANITE_VISION;
+ case VLM_MODEL_TYPE_GRANITE_VISION_VLLM -> VlmModelType.GRANITE_VISION_VLLM;
+ case VLM_MODEL_TYPE_GRANITE_VISION_OLLAMA -> VlmModelType.GRANITE_VISION_OLLAMA;
+ case VLM_MODEL_TYPE_GOT_OCR_2 -> VlmModelType.GOT_OCR_2;
+ default -> null;
+ };
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/java/module-info.java b/docling-serve/docling-serve-grpc/src/main/java/module-info.java
new file mode 100644
index 00000000..f2814488
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/java/module-info.java
@@ -0,0 +1,18 @@
+module ai.docling.serve.grpc {
+ requires ai.docling.serve.api;
+ requires io.grpc;
+ requires io.grpc.stub;
+ requires io.grpc.protobuf;
+ requires com.google.protobuf;
+ requires org.slf4j;
+ requires java.annotation;
+ requires java.net.http;
+
+ requires static lombok;
+ requires static org.jspecify;
+ requires tools.jackson.databind;
+ requires static com.google.errorprone.annotations;
+
+ exports ai.docling.serve.grpc.v1;
+ exports ai.docling.serve.grpc.v1.mapping;
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/core/v1/docling_document.proto b/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/core/v1/docling_document.proto
new file mode 100644
index 00000000..ae725156
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/core/v1/docling_document.proto
@@ -0,0 +1,463 @@
+syntax = "proto3";
+
+package ai.docling.core.v1;
+
+option java_multiple_files = true;
+option java_outer_classname = "DoclingDocumentProto";
+option java_package = "ai.docling.core.v1";
+
+// Docling Document Structure Protocol Buffers Definition
+//
+// This proto file defines the complete structure for documents processed by Docling,
+// providing a 1:1 mapping of the Docling JSON schema to protobuf. Docling is an
+// advanced document parsing system that extracts rich semantic structure from PDFs
+// and other document formats.
+//
+// Document Structure Overview:
+// ===========================
+//
+// A DoclingDocument contains:
+// - Hierarchical structure (body, groups)
+// - Text content (titles, headers, paragraphs, lists, code, formulas)
+// - Visual elements (pictures with AI-generated descriptions)
+// - Tabular data (tables with cell-level structure)
+// - Form data (key-value pairs, form fields)
+// - Page metadata (size, images)
+// - Provenance tracking (bounding boxes, page numbers)
+
+// DoclingDocument is the root message representing a complete parsed document.
+message DoclingDocument {
+ // Schema identifier for versioning (e.g., "docling_document_v2")
+ optional string schema_name = 1;
+
+ // Version number of the Docling schema used
+ optional string version = 2;
+
+ // Human-readable name or title of the document
+ string name = 3;
+
+ // Metadata about the source document (file info, hash, etc.)
+ optional DocumentOrigin origin = 4;
+
+ // The root body group containing the main document structure.
+ GroupItem body = 5;
+
+ // Additional groups representing logical sections (chapters, sections, etc.).
+ repeated GroupItem groups = 6;
+
+ // All text items in the document (titles, paragraphs, lists, etc.).
+ repeated BaseTextItem texts = 7;
+
+ // All picture/image items in the document.
+ repeated PictureItem pictures = 8;
+
+ // All table items in the document.
+ repeated TableItem tables = 9;
+
+ // Key-value pairs extracted from forms or structured data.
+ repeated KeyValueItem key_value_items = 10;
+
+ // Form elements detected in the document.
+ repeated FormItem form_items = 11;
+
+ // Map of page numbers to page metadata.
+ map pages = 12;
+}
+
+// DocumentOrigin contains metadata about the source document file.
+message DocumentOrigin {
+ // MIME type of the source file
+ string mimetype = 1;
+
+ // Binary hash of the source file for integrity verification.
+ string binary_hash = 2;
+
+ // Original filename of the source document
+ string filename = 3;
+
+ // Optional URI/URL where the document was retrieved from
+ optional string uri = 4;
+}
+
+// ContentLayer defines the semantic layer where content appears in the document.
+enum ContentLayer {
+ CONTENT_LAYER_UNSPECIFIED = 0;
+ CONTENT_LAYER_BODY = 1;
+ CONTENT_LAYER_FURNITURE = 2;
+ CONTENT_LAYER_BACKGROUND = 3;
+ CONTENT_LAYER_INVISIBLE = 4;
+ CONTENT_LAYER_NOTES = 5;
+}
+
+// GroupLabel defines the semantic type of a group in the document hierarchy.
+enum GroupLabel {
+ GROUP_LABEL_UNSPECIFIED = 0;
+ GROUP_LABEL_LIST = 1;
+ GROUP_LABEL_ORDERED_LIST = 2;
+ GROUP_LABEL_CHAPTER = 3;
+ GROUP_LABEL_SECTION = 4;
+ GROUP_LABEL_SHEET = 5;
+ GROUP_LABEL_SLIDE = 6;
+ GROUP_LABEL_FORM_AREA = 7;
+ GROUP_LABEL_KEY_VALUE_AREA = 8;
+ GROUP_LABEL_COMMENT_SECTION = 9;
+ GROUP_LABEL_INLINE = 10;
+ GROUP_LABEL_PICTURE_AREA = 11;
+}
+
+// GroupItem represents a logical grouping of document elements.
+message GroupItem {
+ string self_ref = 1;
+ optional RefItem parent = 2;
+ repeated RefItem children = 3;
+ ContentLayer content_layer = 4;
+ optional BaseMeta meta = 5;
+ optional string name = 6;
+ GroupLabel label = 7;
+}
+
+// RefItem is a JSON Pointer reference to another item in the document.
+message RefItem {
+ string ref = 1;
+}
+
+// BaseMeta contains metadata fields common to most document items.
+message BaseMeta {
+ optional SummaryMetaField summary = 1;
+}
+
+// SummaryMetaField contains an AI-generated text summary with confidence.
+message SummaryMetaField {
+ optional double confidence = 1;
+ optional string created_by = 2;
+ string text = 3;
+}
+
+// DocItemLabel defines the semantic type of document content items.
+enum DocItemLabel {
+ DOC_ITEM_LABEL_UNSPECIFIED = 0;
+ DOC_ITEM_LABEL_CAPTION = 1;
+ DOC_ITEM_LABEL_CHART = 2;
+ DOC_ITEM_LABEL_CHECKBOX_SELECTED = 3;
+ DOC_ITEM_LABEL_CHECKBOX_UNSELECTED = 4;
+ DOC_ITEM_LABEL_CODE = 5;
+ DOC_ITEM_LABEL_DOCUMENT_INDEX = 6;
+ DOC_ITEM_LABEL_EMPTY_VALUE = 7;
+ DOC_ITEM_LABEL_FOOTNOTE = 8;
+ DOC_ITEM_LABEL_FORM = 9;
+ DOC_ITEM_LABEL_FORMULA = 10;
+ DOC_ITEM_LABEL_GRADING_SCALE = 11;
+ DOC_ITEM_LABEL_HANDWRITTEN_TEXT = 12;
+ DOC_ITEM_LABEL_KEY_VALUE_REGION = 13;
+ DOC_ITEM_LABEL_LIST_ITEM = 14;
+ DOC_ITEM_LABEL_PAGE_FOOTER = 15;
+ DOC_ITEM_LABEL_PAGE_HEADER = 16;
+ DOC_ITEM_LABEL_PARAGRAPH = 17;
+ DOC_ITEM_LABEL_PICTURE = 18;
+ DOC_ITEM_LABEL_REFERENCE = 19;
+ DOC_ITEM_LABEL_SECTION_HEADER = 20;
+ DOC_ITEM_LABEL_TABLE = 21;
+ DOC_ITEM_LABEL_TEXT = 22;
+ DOC_ITEM_LABEL_TITLE = 23;
+}
+
+// Script defines the vertical positioning of text.
+enum Script {
+ SCRIPT_UNSPECIFIED = 0;
+ SCRIPT_BASELINE = 1;
+ SCRIPT_SUB = 2;
+ SCRIPT_SUPER = 3;
+}
+
+// Formatting contains text formatting/styling information.
+message Formatting {
+ bool bold = 1;
+ bool italic = 2;
+ bool underline = 3;
+ bool strikethrough = 4;
+ Script script = 5;
+}
+
+// BaseTextItem is a union type representing any text-based item in the document.
+message BaseTextItem {
+ oneof item {
+ TitleItem title = 1;
+ SectionHeaderItem section_header = 2;
+ ListItem list_item = 3;
+ CodeItem code = 4;
+ FormulaItem formula = 5;
+ TextItem text = 6;
+ }
+}
+
+// TextItemBase contains fields common to all text-based items.
+message TextItemBase {
+ string self_ref = 1;
+ optional RefItem parent = 2;
+ repeated RefItem children = 3;
+ ContentLayer content_layer = 4;
+ optional BaseMeta meta = 5;
+ DocItemLabel label = 6;
+ repeated ProvenanceItem prov = 7;
+ string orig = 8;
+ string text = 9;
+ optional Formatting formatting = 10;
+ optional string hyperlink = 11;
+}
+
+// TitleItem represents a document title or major heading.
+message TitleItem {
+ TextItemBase base = 1;
+}
+
+// SectionHeaderItem represents a section header with hierarchical level.
+message SectionHeaderItem {
+ TextItemBase base = 1;
+ int32 level = 2;
+}
+
+// ListItem represents a single item in a bulleted or numbered list.
+message ListItem {
+ TextItemBase base = 1;
+ bool enumerated = 2;
+ optional string marker = 3;
+}
+
+// CodeItem represents a code block with syntax highlighting metadata.
+message CodeItem {
+ TextItemBase base = 1;
+ optional FloatingMeta meta = 2;
+ repeated RefItem captions = 3;
+ repeated RefItem references = 4;
+ repeated RefItem footnotes = 5;
+ optional ImageRef image = 6;
+ optional string code_language = 7;
+}
+
+// FormulaItem represents a mathematical formula or equation.
+message FormulaItem {
+ TextItemBase base = 1;
+}
+
+// TextItem represents generic text content (paragraphs, captions, etc.).
+message TextItem {
+ TextItemBase base = 1;
+}
+
+// ProvenanceItem tracks the precise location of content in the source document.
+message ProvenanceItem {
+ int32 page_no = 1;
+ BoundingBox bbox = 2;
+ repeated int32 charspan = 3;
+}
+
+// BoundingBox defines a rectangular region in page coordinates.
+message BoundingBox {
+ double l = 1;
+ double t = 2;
+ double r = 3;
+ double b = 4;
+ optional string coord_origin = 5;
+}
+
+// ImageRef references an embedded image with its properties.
+message ImageRef {
+ string mimetype = 1;
+ int32 dpi = 2;
+ Size size = 3;
+ string uri = 4;
+}
+
+// Size represents 2D dimensions (width and height).
+message Size {
+ double width = 1;
+ double height = 2;
+}
+
+// PictureItem represents an image or figure in the document.
+message PictureItem {
+ optional string self_ref = 1;
+ RefItem parent = 2;
+ repeated RefItem children = 3;
+ ContentLayer content_layer = 4;
+ optional PictureMeta meta = 5;
+ string label = 6;
+ repeated ProvenanceItem prov = 7;
+ repeated RefItem captions = 8;
+ repeated RefItem references = 9;
+ repeated RefItem footnotes = 10;
+ optional ImageRef image = 11;
+}
+
+// PictureMeta contains rich metadata for pictures, including AI analysis.
+message PictureMeta {
+ optional SummaryMetaField summary = 1;
+ optional DescriptionMetaField description = 2;
+ optional PictureClassificationMetaField classification = 3;
+ optional MoleculeMetaField molecule = 4;
+ optional TabularChartMetaField tabular_chart = 5;
+}
+
+// DescriptionMetaField contains an AI-generated detailed description.
+message DescriptionMetaField {
+ optional double confidence = 1;
+ optional string created_by = 2;
+ string text = 3;
+}
+
+// PictureClassificationMetaField contains AI classification results.
+message PictureClassificationMetaField {
+ repeated PictureClassificationPrediction predictions = 1;
+}
+
+// PictureClassificationPrediction is a single classification result.
+message PictureClassificationPrediction {
+ optional double confidence = 1;
+ optional string created_by = 2;
+ string class_name = 3;
+}
+
+// MoleculeMetaField contains chemical structure information.
+message MoleculeMetaField {
+ optional double confidence = 1;
+ optional string created_by = 2;
+ string smi = 3;
+}
+
+// TabularChartMetaField contains chart data extracted from images.
+message TabularChartMetaField {
+ optional double confidence = 1;
+ optional string created_by = 2;
+ optional string title = 3;
+ TableData chart_data = 4;
+}
+
+// FloatingMeta contains metadata for floating elements (tables, code blocks, figures).
+message FloatingMeta {
+ optional SummaryMetaField summary = 1;
+ optional DescriptionMetaField description = 2;
+}
+
+// TableItem represents a table in the document with full structure.
+message TableItem {
+ string self_ref = 1;
+ optional RefItem parent = 2;
+ repeated RefItem children = 3;
+ ContentLayer content_layer = 4;
+ optional FloatingMeta meta = 5;
+ string label = 6;
+ repeated ProvenanceItem prov = 7;
+ repeated RefItem captions = 8;
+ repeated RefItem references = 9;
+ repeated RefItem footnotes = 10;
+ optional ImageRef image = 11;
+ TableData data = 12;
+}
+
+// TableData contains the complete table structure and content.
+message TableData {
+ repeated TableCell table_cells = 1;
+ int32 num_rows = 2;
+ int32 num_cols = 3;
+ repeated TableRow grid = 4;
+}
+
+// TableRow represents a single row in the table.
+message TableRow {
+ repeated TableCell cells = 1;
+}
+
+// TableCell represents a single cell in a table.
+message TableCell {
+ BoundingBox bbox = 1;
+ int32 row_span = 2;
+ int32 col_span = 3;
+ int32 start_row_offset_idx = 4;
+ int32 end_row_offset_idx = 5;
+ int32 start_col_offset_idx = 6;
+ int32 end_col_offset_idx = 7;
+ string text = 8;
+ bool column_header = 9;
+ bool row_header = 10;
+ bool row_section = 11;
+ bool fillable = 12;
+}
+
+// KeyValueItem represents a key-value pair extracted from forms.
+message KeyValueItem {
+ string self_ref = 1;
+ optional RefItem parent = 2;
+ repeated RefItem children = 3;
+ ContentLayer content_layer = 4;
+ optional FloatingMeta meta = 5;
+ string label = 6;
+ repeated ProvenanceItem prov = 7;
+ repeated RefItem captions = 8;
+ repeated RefItem references = 9;
+ repeated RefItem footnotes = 10;
+ optional ImageRef image = 11;
+ GraphData graph = 12;
+}
+
+// GraphData represents the structure of key-value relationships as a graph.
+message GraphData {
+ repeated GraphCell cells = 1;
+ repeated GraphLink links = 2;
+}
+
+// GraphCellLabel defines the role of a cell in a key-value graph.
+enum GraphCellLabel {
+ GRAPH_CELL_LABEL_UNSPECIFIED = 0;
+ GRAPH_CELL_LABEL_KEY = 1;
+ GRAPH_CELL_LABEL_VALUE = 2;
+ GRAPH_CELL_LABEL_CHECKBOX = 3;
+}
+
+// GraphCell is a node in the key-value graph.
+message GraphCell {
+ GraphCellLabel label = 1;
+ int32 cell_id = 2;
+ string text = 3;
+ string orig = 4;
+ optional ProvenanceItem prov = 5;
+ optional RefItem item_ref = 6;
+}
+
+// GraphLinkLabel defines the type of relationship between cells.
+enum GraphLinkLabel {
+ GRAPH_LINK_LABEL_UNSPECIFIED = 0;
+ GRAPH_LINK_LABEL_TO_VALUE = 1;
+ GRAPH_LINK_LABEL_TO_KEY = 2;
+ GRAPH_LINK_LABEL_TO_PARENT = 3;
+ GRAPH_LINK_LABEL_TO_CHILD = 4;
+}
+
+// GraphLink is an edge in the key-value graph.
+message GraphLink {
+ GraphLinkLabel label = 1;
+ int32 source_cell_id = 2;
+ int32 target_cell_id = 3;
+}
+
+// FormItem represents a form element in the document.
+message FormItem {
+ string self_ref = 1;
+ optional RefItem parent = 2;
+ repeated RefItem children = 3;
+ ContentLayer content_layer = 4;
+ optional FloatingMeta meta = 5;
+ string label = 6;
+ repeated ProvenanceItem prov = 7;
+ repeated RefItem captions = 8;
+ repeated RefItem references = 9;
+ repeated RefItem footnotes = 10;
+ optional ImageRef image = 11;
+ GraphData graph = 12;
+}
+
+// PageItem represents metadata about a single page in the document.
+message PageItem {
+ Size size = 1;
+ optional ImageRef image = 2;
+ int32 page_no = 3;
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/serve/v1/docling_serve.proto b/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/serve/v1/docling_serve.proto
new file mode 100644
index 00000000..79a3ffc6
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/serve/v1/docling_serve.proto
@@ -0,0 +1,216 @@
+syntax = "proto3";
+
+package ai.docling.serve.v1;
+
+option java_multiple_files = true;
+option java_outer_classname = "DoclingServeProto";
+option java_package = "ai.docling.serve.v1";
+
+import "ai/docling/serve/v1/docling_serve_types.proto";
+
+// ============================================================================
+// Service Definition
+// ============================================================================
+
+// DoclingServeService is the gRPC service definition that provides a mapping
+// of the docling-serve REST API endpoints.
+//
+// Adheres to Buf linting and Google gRPC style guide by providing unique
+// request and response messages for every RPC.
+service DoclingServeService {
+
+ // Health check - mirrors GET /health
+ rpc Health(HealthRequest) returns (HealthResponse);
+
+ // Synchronous document conversion - mirrors POST /v1/convert/source
+ rpc ConvertSource(ConvertSourceRequest) returns (ConvertSourceResponse);
+
+ // Asynchronous document conversion - submits the task and returns status immediately.
+ rpc ConvertSourceAsync(ConvertSourceAsyncRequest) returns (ConvertSourceAsyncResponse);
+
+ // Synchronous hierarchical chunking - mirrors POST /v1/chunk/hierarchical/source
+ rpc ChunkHierarchicalSource(ChunkHierarchicalSourceRequest) returns (ChunkHierarchicalSourceResponse);
+
+ // Synchronous hybrid chunking - mirrors POST /v1/chunk/hybrid/source
+ rpc ChunkHybridSource(ChunkHybridSourceRequest) returns (ChunkHybridSourceResponse);
+
+ // Async hierarchical chunking - mirrors POST /v1/chunk/hierarchical/source/async
+ rpc ChunkHierarchicalSourceAsync(ChunkHierarchicalSourceAsyncRequest) returns (ChunkHierarchicalSourceAsyncResponse);
+
+ // Async hybrid chunking - mirrors POST /v1/chunk/hybrid/source/async
+ rpc ChunkHybridSourceAsync(ChunkHybridSourceAsyncRequest) returns (ChunkHybridSourceAsyncResponse);
+
+ // Poll async task status - mirrors GET /v1/status/poll/{taskId}
+ rpc PollTaskStatus(PollTaskStatusRequest) returns (PollTaskStatusResponse);
+
+ // Get convert task result - mirrors GET /v1/result/{taskId} for convert tasks
+ rpc GetConvertResult(GetConvertResultRequest) returns (GetConvertResultResponse);
+
+ // Get chunk task result - mirrors GET /v1/result/{taskId} for chunk tasks
+ rpc GetChunkResult(GetChunkResultRequest) returns (GetChunkResultResponse);
+
+ // Clear converters - mirrors GET /v1/clear/converters
+ rpc ClearConverters(ClearConvertersRequest) returns (ClearConvertersResponse);
+
+ // Clear stale results - mirrors GET /v1/clear/results
+ rpc ClearResults(ClearResultsRequest) returns (ClearResultsResponse);
+
+ // Streaming document conversion - sends results as they complete per source.
+ // This is an addition beyond the REST API that leverages gRPC's streaming.
+ rpc ConvertSourceStream(ConvertSourceStreamRequest) returns (stream ConvertSourceStreamResponse);
+
+ // --- Watch RPCs (server-managed polling via streaming) ---
+ // These RPCs submit the task, then internally poll and stream each status
+ // update until the task completes or fails. Clients just read the stream
+ // instead of managing their own poll loop.
+
+ // Watch convert task - submit and stream status updates until done.
+ rpc WatchConvertSource(WatchConvertSourceRequest) returns (stream WatchConvertSourceResponse);
+
+ // Watch hierarchical chunk task - submit and stream status updates until done.
+ rpc WatchChunkHierarchicalSource(WatchChunkHierarchicalSourceRequest) returns (stream WatchChunkHierarchicalSourceResponse);
+
+ // Watch hybrid chunk task - submit and stream status updates until done.
+ rpc WatchChunkHybridSource(WatchChunkHybridSourceRequest) returns (stream WatchChunkHybridSourceResponse);
+}
+
+// ============================================================================
+// RPC Request / Response Wrappers
+// ============================================================================
+
+// --- Health ---
+
+message HealthRequest {}
+
+message HealthResponse {
+ optional string status = 1;
+}
+
+// --- Convert ---
+
+message ConvertSourceRequest {
+ ConvertDocumentRequest request = 1;
+}
+
+message ConvertSourceResponse {
+ ConvertDocumentResponse response = 1;
+}
+
+message ConvertSourceAsyncRequest {
+ ConvertDocumentRequest request = 1;
+}
+
+message ConvertSourceAsyncResponse {
+ TaskStatusPollResponse response = 1;
+}
+
+message ConvertSourceStreamRequest {
+ ConvertDocumentRequest request = 1;
+}
+
+message ConvertSourceStreamResponse {
+ ConvertDocumentResponse response = 1;
+}
+
+// --- Chunk ---
+
+message ChunkHierarchicalSourceRequest {
+ HierarchicalChunkRequest request = 1;
+}
+
+message ChunkHierarchicalSourceResponse {
+ ChunkDocumentResponse response = 1;
+}
+
+message ChunkHybridSourceRequest {
+ HybridChunkRequest request = 1;
+}
+
+message ChunkHybridSourceResponse {
+ ChunkDocumentResponse response = 1;
+}
+
+message ChunkHierarchicalSourceAsyncRequest {
+ HierarchicalChunkRequest request = 1;
+}
+
+message ChunkHierarchicalSourceAsyncResponse {
+ TaskStatusPollResponse response = 1;
+}
+
+message ChunkHybridSourceAsyncRequest {
+ HybridChunkRequest request = 1;
+}
+
+message ChunkHybridSourceAsyncResponse {
+ TaskStatusPollResponse response = 1;
+}
+
+// --- Task ---
+
+message PollTaskStatusRequest {
+ TaskStatusPollRequest request = 1;
+}
+
+message PollTaskStatusResponse {
+ TaskStatusPollResponse response = 1;
+}
+
+message GetConvertResultRequest {
+ TaskResultRequest request = 1;
+}
+
+message GetConvertResultResponse {
+ ConvertDocumentResponse response = 1;
+}
+
+message GetChunkResultRequest {
+ TaskResultRequest request = 1;
+}
+
+message GetChunkResultResponse {
+ ChunkDocumentResponse response = 1;
+}
+
+// --- Clear ---
+
+message ClearConvertersRequest {}
+
+message ClearConvertersResponse {
+ ClearResponse response = 1;
+}
+
+message ClearResultsRequest {
+ // Clear results older than this many seconds (default: 3600)
+ optional double older_than = 1;
+}
+
+message ClearResultsResponse {
+ ClearResponse response = 1;
+}
+
+// --- Watch ---
+
+message WatchConvertSourceRequest {
+ ConvertDocumentRequest request = 1;
+}
+
+message WatchConvertSourceResponse {
+ TaskStatusPollResponse response = 1;
+}
+
+message WatchChunkHierarchicalSourceRequest {
+ HierarchicalChunkRequest request = 1;
+}
+
+message WatchChunkHierarchicalSourceResponse {
+ TaskStatusPollResponse response = 1;
+}
+
+message WatchChunkHybridSourceRequest {
+ HybridChunkRequest request = 1;
+}
+
+message WatchChunkHybridSourceResponse {
+ TaskStatusPollResponse response = 1;
+}
diff --git a/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/serve/v1/docling_serve_types.proto b/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/serve/v1/docling_serve_types.proto
new file mode 100644
index 00000000..c3a60e2a
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/main/proto/ai/docling/serve/v1/docling_serve_types.proto
@@ -0,0 +1,482 @@
+syntax = "proto3";
+
+package ai.docling.serve.v1;
+
+option java_multiple_files = true;
+option java_outer_classname = "DoclingServeTypesProto";
+option java_package = "ai.docling.serve.v1";
+
+import "ai/docling/core/v1/docling_document.proto";
+import "google/protobuf/struct.proto";
+
+// ============================================================================
+// Enumerations - 1:1 mapping of the REST API enum types
+// ============================================================================
+
+// Input document format types supported by Docling.
+enum InputFormat {
+ INPUT_FORMAT_UNSPECIFIED = 0;
+ INPUT_FORMAT_ASCIIDOC = 1;
+ INPUT_FORMAT_AUDIO = 2;
+ INPUT_FORMAT_CSV = 3;
+ INPUT_FORMAT_DOCX = 4;
+ INPUT_FORMAT_HTML = 5;
+ INPUT_FORMAT_IMAGE = 6;
+ INPUT_FORMAT_JSON_DOCLING = 7;
+ INPUT_FORMAT_MD = 8;
+ INPUT_FORMAT_METS_GBS = 9;
+ INPUT_FORMAT_PDF = 10;
+ INPUT_FORMAT_PPTX = 11;
+ INPUT_FORMAT_XLSX = 12;
+ INPUT_FORMAT_XML_JATS = 13;
+ INPUT_FORMAT_XML_USPTO = 14;
+}
+
+// Output format types for converted documents.
+enum OutputFormat {
+ OUTPUT_FORMAT_UNSPECIFIED = 0;
+ OUTPUT_FORMAT_DOCTAGS = 1;
+ OUTPUT_FORMAT_HTML = 2;
+ OUTPUT_FORMAT_HTML_SPLIT_PAGE = 3;
+ OUTPUT_FORMAT_JSON = 4;
+ OUTPUT_FORMAT_MD = 5;
+ OUTPUT_FORMAT_TEXT = 6;
+}
+
+// OCR engine selection.
+enum OcrEngine {
+ OCR_ENGINE_UNSPECIFIED = 0;
+ OCR_ENGINE_AUTO = 1;
+ OCR_ENGINE_EASYOCR = 2;
+ OCR_ENGINE_OCRMAC = 3;
+ OCR_ENGINE_RAPIDOCR = 4;
+ OCR_ENGINE_TESSEROCR = 5;
+ OCR_ENGINE_TESSERACT = 6;
+}
+
+// PDF processing backend.
+enum PdfBackend {
+ PDF_BACKEND_UNSPECIFIED = 0;
+ PDF_BACKEND_DLPARSE_V1 = 1;
+ PDF_BACKEND_DLPARSE_V2 = 2;
+ PDF_BACKEND_DLPARSE_V4 = 3;
+ PDF_BACKEND_PYPDFIUM2 = 4;
+}
+
+// Table structure extraction mode.
+enum TableFormerMode {
+ TABLE_FORMER_MODE_UNSPECIFIED = 0;
+ TABLE_FORMER_MODE_ACCURATE = 1;
+ TABLE_FORMER_MODE_FAST = 2;
+}
+
+// Document processing pipeline.
+enum ProcessingPipeline {
+ PROCESSING_PIPELINE_UNSPECIFIED = 0;
+ PROCESSING_PIPELINE_ASR = 1;
+ PROCESSING_PIPELINE_STANDARD = 2;
+ PROCESSING_PIPELINE_VLM = 3;
+}
+
+// Image reference mode for export.
+enum ImageRefMode {
+ IMAGE_REF_MODE_UNSPECIFIED = 0;
+ IMAGE_REF_MODE_EMBEDDED = 1;
+ IMAGE_REF_MODE_PLACEHOLDER = 2;
+ IMAGE_REF_MODE_REFERENCED = 3;
+}
+
+// Preset VLM model types.
+enum VlmModelType {
+ VLM_MODEL_TYPE_UNSPECIFIED = 0;
+ VLM_MODEL_TYPE_SMOLDOCLING = 1;
+ VLM_MODEL_TYPE_SMOLDOCLING_VLLM = 2;
+ VLM_MODEL_TYPE_GRANITE_VISION = 3;
+ VLM_MODEL_TYPE_GRANITE_VISION_VLLM = 4;
+ VLM_MODEL_TYPE_GRANITE_VISION_OLLAMA = 5;
+ VLM_MODEL_TYPE_GOT_OCR_2 = 6;
+}
+
+// Response format for VLM model output.
+enum ResponseFormat {
+ RESPONSE_FORMAT_UNSPECIFIED = 0;
+ RESPONSE_FORMAT_DOCTAGS = 1;
+ RESPONSE_FORMAT_MARKDOWN = 2;
+ RESPONSE_FORMAT_HTML = 3;
+ RESPONSE_FORMAT_OTSL = 4;
+ RESPONSE_FORMAT_PLAINTEXT = 5;
+}
+
+// Inference framework for local VLM models.
+enum InferenceFramework {
+ INFERENCE_FRAMEWORK_UNSPECIFIED = 0;
+ INFERENCE_FRAMEWORK_MLX = 1;
+ INFERENCE_FRAMEWORK_TRANSFORMERS = 2;
+ INFERENCE_FRAMEWORK_VLLM = 3;
+}
+
+// Type of transformers auto-model to use.
+enum TransformersModelType {
+ TRANSFORMERS_MODEL_TYPE_UNSPECIFIED = 0;
+ TRANSFORMERS_MODEL_TYPE_AUTOMODEL = 1;
+ TRANSFORMERS_MODEL_TYPE_AUTOMODEL_VISION2SEQ = 2;
+ TRANSFORMERS_MODEL_TYPE_AUTOMODEL_CAUSALLM = 3;
+ TRANSFORMERS_MODEL_TYPE_AUTOMODEL_IMAGETEXTTOTEXT = 4;
+}
+
+// Async task status.
+enum TaskStatus {
+ TASK_STATUS_UNSPECIFIED = 0;
+ TASK_STATUS_PENDING = 1;
+ TASK_STATUS_STARTED = 2;
+ TASK_STATUS_SUCCESS = 3;
+ TASK_STATUS_FAILURE = 4;
+}
+
+// ============================================================================
+// Source Types - where documents come from
+// ============================================================================
+
+// Source represents a document input source (polymorphic via oneof).
+message Source {
+ oneof source {
+ FileSource file = 1;
+ HttpSource http = 2;
+ S3Source s3 = 3;
+ }
+}
+
+// FileSource provides a document as base64-encoded content.
+message FileSource {
+ // Base64-encoded file content
+ string base64_string = 1;
+ // Original filename
+ string filename = 2;
+}
+
+// HttpSource provides a document via HTTP URL.
+message HttpSource {
+ // URL to fetch the document from
+ string url = 1;
+ // Optional HTTP headers (e.g., authentication)
+ map headers = 2;
+}
+
+// S3Source provides a document from an S3-compatible store.
+message S3Source {
+ string endpoint = 1;
+ string access_key = 2;
+ string secret_key = 3;
+ string bucket = 4;
+ optional string key_prefix = 5;
+ bool verify_ssl = 6;
+}
+
+// ============================================================================
+// Target Types - where results go
+// ============================================================================
+
+// Target represents a result destination (polymorphic via oneof).
+message Target {
+ oneof target {
+ InBodyTarget inbody = 1;
+ PutTarget put = 2;
+ S3Target s3 = 3;
+ ZipTarget zip = 4;
+ }
+}
+
+// InBodyTarget returns results in the response body (default).
+message InBodyTarget {}
+
+// PutTarget sends results via HTTP PUT.
+message PutTarget {
+ string url = 1;
+}
+
+// S3Target sends results to an S3-compatible store.
+message S3Target {
+ string endpoint = 1;
+ string access_key = 2;
+ string secret_key = 3;
+ string bucket = 4;
+ optional string key_prefix = 5;
+ bool verify_ssl = 6;
+}
+
+// ZipTarget returns results as a ZIP archive.
+message ZipTarget {}
+
+// ============================================================================
+// Convert Options
+// ============================================================================
+
+// PictureDescriptionLocal configures a local VLM for picture descriptions.
+message PictureDescriptionLocal {
+ // Hugging Face repository ID
+ string repo_id = 1;
+ // Optional prompt for the model
+ optional string prompt = 2;
+ // Optional generation config parameters
+ map generation_config = 3;
+}
+
+// PictureDescriptionApi configures an API-based VLM for picture descriptions.
+message PictureDescriptionApi {
+ // API endpoint URL
+ string url = 1;
+ // Optional HTTP headers
+ map headers = 2;
+ // Optional model parameters
+ map params = 3;
+ // Timeout in seconds
+ optional double timeout = 4;
+ // Max concurrent requests
+ optional int32 concurrency = 5;
+ // Optional prompt
+ optional string prompt = 6;
+}
+
+// VlmModelLocal configures a local vision-language model for the VLM pipeline.
+message VlmModelLocal {
+ optional string repo_id = 1;
+ optional string prompt = 2;
+ optional int32 scale = 3;
+ optional ResponseFormat response_format = 4;
+ optional InferenceFramework inference_framework = 5;
+ optional TransformersModelType transformers_model_type = 6;
+ map extra_generation_config = 7;
+}
+
+// VlmModelApi configures an API-based vision-language model for the VLM pipeline.
+message VlmModelApi {
+ optional string url = 1;
+ map headers = 2;
+ map params = 3;
+ optional double timeout = 4;
+ optional int32 concurrency = 5;
+ optional string prompt = 6;
+ optional int32 scale = 7;
+ optional ResponseFormat response_format = 8;
+}
+
+// ConvertDocumentOptions mirrors all conversion settings from the REST API.
+message ConvertDocumentOptions {
+ // Input format(s) to convert from
+ repeated InputFormat from_formats = 1;
+ // Output format(s) to convert to
+ repeated OutputFormat to_formats = 2;
+ // Image export mode
+ optional ImageRefMode image_export_mode = 3;
+ // Enable OCR processing
+ optional bool do_ocr = 4;
+ // Replace text with OCR output
+ optional bool force_ocr = 5;
+ // OCR engine selection
+ optional OcrEngine ocr_engine = 6;
+ // OCR language codes
+ repeated string ocr_lang = 7;
+ // PDF processing backend
+ optional PdfBackend pdf_backend = 8;
+ // Table structure mode
+ optional TableFormerMode table_mode = 9;
+ // Match table cells to PDF cells
+ optional bool table_cell_matching = 10;
+ // Processing pipeline
+ optional ProcessingPipeline pipeline = 11;
+ // Page range to process (1-indexed)
+ repeated int32 page_range = 12;
+ // Per-document timeout in seconds
+ optional double document_timeout = 13;
+ // Abort on first error
+ optional bool abort_on_error = 14;
+ // Extract table structure
+ optional bool do_table_structure = 15;
+ // Extract images
+ optional bool include_images = 16;
+ // Image scale factor
+ optional double images_scale = 17;
+ // Markdown page break placeholder
+ optional string md_page_break_placeholder = 18;
+ // Enable code OCR enrichment
+ optional bool do_code_enrichment = 19;
+ // Enable formula OCR enrichment
+ optional bool do_formula_enrichment = 20;
+ // Enable picture classification
+ optional bool do_picture_classification = 21;
+ // Enable picture description
+ optional bool do_picture_description = 22;
+ // Min area percentage for picture processing
+ optional double picture_description_area_threshold = 23;
+ // Local VLM for picture description (mutually exclusive with api)
+ optional PictureDescriptionLocal picture_description_local = 24;
+ // API VLM for picture description (mutually exclusive with local)
+ optional PictureDescriptionApi picture_description_api = 25;
+ // Preset VLM model (mutually exclusive with local/api)
+ optional VlmModelType vlm_pipeline_model = 26;
+ // Local VLM model string (mutually exclusive with api/preset)
+ optional string vlm_pipeline_model_local = 27;
+ // API VLM model string (mutually exclusive with local/preset)
+ optional string vlm_pipeline_model_api = 28;
+}
+
+// ============================================================================
+// Error types
+// ============================================================================
+
+// ErrorItem represents a processing error from a specific component.
+message ErrorItem {
+ string component_type = 1;
+ string error_message = 2;
+ string module_name = 3;
+}
+
+// ============================================================================
+// Document response types
+// ============================================================================
+
+// DocumentResponse contains the converted document in multiple output formats.
+message DocumentResponse {
+ // Filename of the source document
+ string filename = 1;
+ // Full DoclingDocument JSON structure
+ optional ai.docling.core.v1.DoclingDocument json_content = 2;
+ // Markdown representation
+ optional string md_content = 3;
+ // HTML representation
+ optional string html_content = 4;
+ // Plain text representation
+ optional string text_content = 5;
+ // DocTags representation
+ optional string doctags_content = 6;
+}
+
+// ExportDocumentResponse is the document format used within chunk responses.
+message ExportDocumentResponse {
+ string filename = 1;
+ optional ai.docling.core.v1.DoclingDocument json_content = 2;
+ optional string md_content = 3;
+ optional string html_content = 4;
+ optional string text_content = 5;
+ optional string doctags_content = 6;
+}
+
+// Document wraps an exported document with status and error info.
+message Document {
+ optional string kind = 1;
+ ExportDocumentResponse content = 2;
+ string status = 3;
+ repeated ErrorItem errors = 4;
+}
+
+// ============================================================================
+// Task types
+// ============================================================================
+
+// TaskStatusMetadata provides progress information for async tasks.
+message TaskStatusMetadata {
+ int64 num_docs = 1;
+ int64 num_processed = 2;
+ int64 num_succeeded = 3;
+ int64 num_failed = 4;
+}
+
+// ============================================================================
+// Request / Response Domain Models (Moved from docling_serve.proto)
+// ============================================================================
+
+// ConvertDocumentRequest mirrors POST /v1/convert/source
+message ConvertDocumentRequest {
+ repeated Source sources = 1;
+ ConvertDocumentOptions options = 2;
+ optional Target target = 3;
+}
+
+// ConvertDocumentResponse mirrors the REST convert response body.
+message ConvertDocumentResponse {
+ DocumentResponse document = 1;
+ repeated ErrorItem errors = 2;
+ double processing_time = 3;
+ string status = 4;
+ map timings = 5;
+}
+
+// HierarchicalChunkerOptions configures the hierarchical chunker.
+message HierarchicalChunkerOptions {
+ bool use_markdown_tables = 1;
+ bool include_raw_text = 2;
+}
+
+// HybridChunkerOptions configures the hybrid chunker.
+message HybridChunkerOptions {
+ bool use_markdown_tables = 1;
+ bool include_raw_text = 2;
+ optional int32 max_tokens = 3;
+ optional string tokenizer = 4;
+ optional bool merge_peers = 5;
+}
+
+// HierarchicalChunkRequest mirrors POST /v1/chunk/hierarchical/source
+message HierarchicalChunkRequest {
+ repeated Source sources = 1;
+ ConvertDocumentOptions convert_options = 2;
+ optional Target target = 3;
+ bool include_converted_doc = 4;
+ HierarchicalChunkerOptions chunking_options = 5;
+}
+
+// HybridChunkRequest mirrors POST /v1/chunk/hybrid/source
+message HybridChunkRequest {
+ repeated Source sources = 1;
+ ConvertDocumentOptions convert_options = 2;
+ optional Target target = 3;
+ bool include_converted_doc = 4;
+ HybridChunkerOptions chunking_options = 5;
+}
+
+// Chunk represents a single document chunk in the response.
+message Chunk {
+ string filename = 1;
+ int32 chunk_index = 2;
+ string text = 3;
+ optional string raw_text = 4;
+ optional int32 num_tokens = 5;
+ repeated string headings = 6;
+ repeated string captions = 7;
+ repeated string doc_items = 8;
+ repeated int32 page_numbers = 9;
+ map metadata = 10;
+}
+
+// ChunkDocumentResponse mirrors the REST chunk response body.
+message ChunkDocumentResponse {
+ repeated Chunk chunks = 1;
+ repeated Document documents = 2;
+ double processing_time = 3;
+}
+
+// TaskStatusPollRequest mirrors GET /v1/status/poll/{taskId}
+message TaskStatusPollRequest {
+ string task_id = 1;
+ // Wait time in seconds (0 = no wait / immediate poll)
+ double wait_time = 2;
+}
+
+// TaskStatusPollResponse mirrors the REST task status response.
+message TaskStatusPollResponse {
+ string task_id = 1;
+ optional string task_type = 2;
+ TaskStatus task_status = 3;
+ optional int64 task_position = 4;
+ optional TaskStatusMetadata task_meta = 5;
+}
+
+// TaskResultRequest mirrors GET /v1/result/{taskId}
+message TaskResultRequest {
+ string task_id = 1;
+}
+
+// ClearResponse mirrors the REST clear response body.
+message ClearResponse {
+ optional string status = 1;
+}
\ No newline at end of file
diff --git a/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/DoclingServeGrpcIntegrationTest.java b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/DoclingServeGrpcIntegrationTest.java
new file mode 100644
index 00000000..fd9bf6eb
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/DoclingServeGrpcIntegrationTest.java
@@ -0,0 +1,275 @@
+package ai.docling.serve.grpc.v1;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.lang.reflect.Method;
+import java.net.URI;
+import java.time.Duration;
+import java.util.Optional;
+
+import org.jspecify.annotations.Nullable;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtensionContext;
+import org.junit.jupiter.api.extension.RegisterExtension;
+import org.junit.jupiter.api.extension.TestWatcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import ai.docling.serve.api.DoclingServeApi;
+import ai.docling.serve.v1.Chunk;
+import ai.docling.serve.v1.ChunkHierarchicalSourceRequest;
+import ai.docling.serve.v1.ChunkHybridSourceRequest;
+import ai.docling.serve.v1.ClearConvertersRequest;
+import ai.docling.serve.v1.ClearResultsRequest;
+import ai.docling.serve.v1.ConvertDocumentOptions;
+import ai.docling.serve.v1.ConvertDocumentRequest;
+import ai.docling.serve.v1.ConvertSourceRequest;
+import ai.docling.serve.v1.DoclingServeServiceGrpc;
+import ai.docling.serve.v1.HealthRequest;
+import ai.docling.serve.v1.HierarchicalChunkRequest;
+import ai.docling.serve.v1.HierarchicalChunkerOptions;
+import ai.docling.serve.v1.HttpSource;
+import ai.docling.serve.v1.HybridChunkRequest;
+import ai.docling.serve.v1.HybridChunkerOptions;
+import ai.docling.serve.v1.OutputFormat;
+import ai.docling.serve.v1.Source;
+import ai.docling.testcontainers.serve.DoclingServeContainer;
+import ai.docling.testcontainers.serve.config.DoclingServeContainerConfig;
+
+import io.grpc.ManagedChannel;
+import io.grpc.inprocess.InProcessChannelBuilder;
+import io.grpc.inprocess.InProcessServerBuilder;
+
+/**
+ * Integration tests for the gRPC service backed by a real docling-serve container.
+ *
+ * Stack: gRPC BlockingStub → DoclingServeGrpcService → DoclingServeJackson2Client (REST) → Docker Container (docling-serve)
+ *
+ * These tests validate that proto ↔ Java mapping produces correct results against a real server.
+ */
+class DoclingServeGrpcIntegrationTest {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DoclingServeGrpcIntegrationTest.class);
+
+ static final DoclingServeContainer container = new DoclingServeContainer(
+ DoclingServeContainerConfig.builder()
+ .image(DoclingServeContainerConfig.DOCLING_IMAGE)
+ .build()
+ );
+
+ static {
+ container.start();
+ }
+
+ // Log container output on test failure for debugging
+ @RegisterExtension
+ TestWatcher watcher = new TestWatcher() {
+ @Override
+ public void testFailed(ExtensionContext context, @Nullable Throwable cause) {
+ LOG.error("""
+ Test {}.{} failed with message: {}
+ Container logs:
+ {}
+ """,
+ getClass().getName(),
+ context.getTestMethod().map(Method::getName).orElse(""),
+ Optional.ofNullable(cause).map(Throwable::getMessage).orElse(""),
+ container.getLogs());
+ }
+ };
+
+ private static DoclingServeApi restClient;
+ private static ManagedChannel channel;
+ private static DoclingServeServiceGrpc.DoclingServeServiceBlockingStub stub;
+
+ @BeforeAll
+ static void setUp() throws Exception {
+ // Create REST client pointing at the container
+ restClient = DoclingServeApi.builder()
+ .logRequests()
+ .logResponses()
+ .prettyPrint()
+ .baseUrl(container.getApiUrl())
+ .connectTimeout(Duration.ofSeconds(10))
+ .readTimeout(Duration.ofMinutes(5))
+ .build();
+
+ // Start in-process gRPC server wrapping the REST client
+ String serverName = InProcessServerBuilder.generateName();
+ InProcessServerBuilder.forName(serverName)
+ .directExecutor()
+ .addService(new DoclingServeGrpcService(restClient, URI.create(container.getApiUrl())))
+ .build()
+ .start();
+
+ channel = InProcessChannelBuilder.forName(serverName)
+ .directExecutor()
+ .build();
+
+ stub = DoclingServeServiceGrpc.newBlockingStub(channel)
+ .withDeadlineAfter(5, java.util.concurrent.TimeUnit.MINUTES);
+ }
+
+ @AfterAll
+ static void tearDown() {
+ if (channel != null) {
+ channel.shutdownNow();
+ }
+ }
+
+ // ==================== Health ====================
+
+ @Nested
+ class HealthTests {
+
+ @Test
+ void health() {
+ var response = stub.health(
+ HealthRequest.getDefaultInstance());
+
+ assertThat(response.getStatus()).isEqualTo("ok");
+ }
+ }
+
+ // ==================== Convert ====================
+
+ @Nested
+ class ConvertTests {
+
+ @Test
+ void convertSourceWithHttpUrl() {
+ var request = ConvertSourceRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.ConvertDocumentRequest.newBuilder()
+ .addSources(ai.docling.serve.v1.Source.newBuilder()
+ .setHttp(ai.docling.serve.v1.HttpSource.newBuilder()
+ .setUrl("https://docs.arconia.io/arconia-cli/latest/development/dev/")
+ .build())
+ .build())
+ .build())
+ .build();
+
+ var response = stub.convertSource(request);
+
+ assertThat(response.getResponse().getStatus()).isNotEmpty();
+ assertThat(response.getResponse().getDocument().getFilename()).isNotEmpty();
+ assertThat(response.getResponse().getDocument().getMdContent()).isNotEmpty();
+
+ if (response.getResponse().getProcessingTime() > 0) {
+ assertThat(response.getResponse().getProcessingTime()).isPositive();
+ }
+ }
+
+ @Test
+ void convertSourceWithJsonOutput() {
+ var request = ConvertSourceRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.ConvertDocumentRequest.newBuilder()
+ .addSources(ai.docling.serve.v1.Source.newBuilder()
+ .setHttp(ai.docling.serve.v1.HttpSource.newBuilder()
+ .setUrl("https://docs.arconia.io/arconia-cli/latest/development/dev/")
+ .build())
+ .build())
+ .setOptions(ai.docling.serve.v1.ConvertDocumentOptions.newBuilder()
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_JSON)
+ .build())
+ .build())
+ .build();
+
+ var response = stub.convertSource(request);
+
+ assertThat(response.getResponse().getStatus()).isNotEmpty();
+ assertThat(response.getResponse().getDocument().getFilename()).isNotEmpty();
+ // JSON output means the DoclingDocument proto should be populated
+ assertThat(response.getResponse().getDocument().hasJsonContent()).isTrue();
+ assertThat(response.getResponse().getDocument().getJsonContent().getName()).isNotEmpty();
+ }
+ }
+
+ // ==================== Chunk ====================
+
+ @Nested
+ class ChunkTests {
+
+ @Test
+ void chunkHierarchicalSource() {
+ var request = ChunkHierarchicalSourceRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.HierarchicalChunkRequest.newBuilder()
+ .addSources(ai.docling.serve.v1.Source.newBuilder()
+ .setHttp(ai.docling.serve.v1.HttpSource.newBuilder()
+ .setUrl("https://docs.arconia.io/arconia-cli/latest/development/dev/")
+ .build())
+ .build())
+ .setConvertOptions(ai.docling.serve.v1.ConvertDocumentOptions.newBuilder()
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_JSON)
+ .build())
+ .setIncludeConvertedDoc(true)
+ .setChunkingOptions(ai.docling.serve.v1.HierarchicalChunkerOptions.newBuilder()
+ .setIncludeRawText(true)
+ .setUseMarkdownTables(true)
+ .build())
+ .build())
+ .build();
+
+ var response = stub.chunkHierarchicalSource(request);
+
+ assertThat(response.getResponse().getChunksList()).isNotEmpty();
+ assertThat(response.getResponse().getChunksList())
+ .allMatch(chunk -> !chunk.getText().isEmpty());
+ assertThat(response.getResponse().getProcessingTime()).isPositive();
+ }
+
+ @Test
+ void chunkHybridSource() {
+ var request = ChunkHybridSourceRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.HybridChunkRequest.newBuilder()
+ .addSources(ai.docling.serve.v1.Source.newBuilder()
+ .setHttp(ai.docling.serve.v1.HttpSource.newBuilder()
+ .setUrl("https://docs.arconia.io/arconia-cli/latest/development/dev/")
+ .build())
+ .build())
+ .setConvertOptions(ai.docling.serve.v1.ConvertDocumentOptions.newBuilder()
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_JSON)
+ .build())
+ .setIncludeConvertedDoc(true)
+ .setChunkingOptions(ai.docling.serve.v1.HybridChunkerOptions.newBuilder()
+ .setIncludeRawText(true)
+ .setUseMarkdownTables(true)
+ .setMaxTokens(10000)
+ .setTokenizer("sentence-transformers/all-MiniLM-L6-v2")
+ .build())
+ .build())
+ .build();
+
+ var response = stub.chunkHybridSource(request);
+
+ assertThat(response.getResponse().getChunksList()).isNotEmpty();
+ assertThat(response.getResponse().getChunksList())
+ .allMatch(chunk -> !chunk.getText().isEmpty());
+ assertThat(response.getResponse().getProcessingTime()).isPositive();
+ }
+ }
+
+ // ==================== Clear ====================
+
+ @Nested
+ class ClearTests {
+
+ @Test
+ void clearConverters() {
+ var response = stub.clearConverters(
+ ClearConvertersRequest.getDefaultInstance());
+
+ assertThat(response.getResponse().getStatus()).isEqualTo("ok");
+ }
+
+ @Test
+ void clearResults() {
+ var response = stub.clearResults(
+ ClearResultsRequest.getDefaultInstance());
+
+ assertThat(response.getResponse().getStatus()).isEqualTo("ok");
+ }
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/DoclingServeGrpcServiceTest.java b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/DoclingServeGrpcServiceTest.java
new file mode 100644
index 00000000..6522ac82
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/DoclingServeGrpcServiceTest.java
@@ -0,0 +1,580 @@
+package ai.docling.serve.grpc.v1;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.time.Duration;
+import java.util.ArrayList;
+
+import ai.docling.serve.api.DoclingServeApi;
+import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.clear.request.ClearResultsRequest;
+import ai.docling.serve.api.clear.response.ClearResponse;
+import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+import ai.docling.serve.api.convert.response.DocumentResponse;
+import ai.docling.serve.api.health.HealthCheckResponse;
+import ai.docling.serve.api.task.request.TaskResultRequest;
+import ai.docling.serve.api.task.request.TaskStatusPollRequest;
+import ai.docling.serve.api.task.response.TaskStatus;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+import ai.docling.serve.grpc.v1.mapping.ServeApiMapper;
+import ai.docling.serve.v1.ChunkHierarchicalSourceAsyncRequest;
+import ai.docling.serve.v1.ChunkHierarchicalSourceRequest;
+import ai.docling.serve.v1.ChunkHybridSourceAsyncRequest;
+import ai.docling.serve.v1.ChunkHybridSourceRequest;
+import ai.docling.serve.v1.ClearConvertersRequest;
+import ai.docling.serve.v1.ConvertSourceAsyncRequest;
+import ai.docling.serve.v1.ConvertSourceRequest;
+import ai.docling.serve.v1.ConvertSourceStreamRequest;
+import ai.docling.serve.v1.DoclingServeServiceGrpc;
+import ai.docling.serve.v1.FileSource;
+import ai.docling.serve.v1.GetChunkResultRequest;
+import ai.docling.serve.v1.GetConvertResultRequest;
+import ai.docling.serve.v1.HealthRequest;
+import ai.docling.serve.v1.HierarchicalChunkRequest;
+import ai.docling.serve.v1.PollTaskStatusRequest;
+import ai.docling.serve.v1.Source;
+import ai.docling.serve.v1.WatchChunkHierarchicalSourceRequest;
+import ai.docling.serve.v1.WatchChunkHierarchicalSourceResponse;
+import ai.docling.serve.v1.WatchChunkHybridSourceRequest;
+import ai.docling.serve.v1.WatchChunkHybridSourceResponse;
+import ai.docling.serve.v1.WatchConvertSourceRequest;
+import ai.docling.serve.v1.WatchConvertSourceResponse;
+
+import io.grpc.ManagedChannel;
+import io.grpc.StatusRuntimeException;
+import io.grpc.inprocess.InProcessChannelBuilder;
+import io.grpc.inprocess.InProcessServerBuilder;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+@ExtendWith(MockitoExtension.class)
+class DoclingServeGrpcServiceTest {
+
+ @Mock
+ private DoclingServeApi api;
+
+ @Mock
+ private AsyncTaskSubmitter asyncSubmitter;
+
+ private ManagedChannel channel;
+ private DoclingServeServiceGrpc.DoclingServeServiceBlockingStub blockingStub;
+ private String serverName;
+
+ @BeforeEach
+ void setUp() throws Exception {
+ serverName = InProcessServerBuilder.generateName();
+ var server = InProcessServerBuilder.forName(serverName)
+ .directExecutor()
+ .addService(new DoclingServeGrpcService(api, asyncSubmitter,
+ Duration.ofMillis(50), Duration.ofSeconds(5)))
+ .build()
+ .start();
+ channel = InProcessChannelBuilder.forName(serverName)
+ .directExecutor()
+ .build();
+ blockingStub = DoclingServeServiceGrpc.newBlockingStub(channel);
+ }
+
+ @AfterEach
+ void tearDown() {
+ if (channel != null) {
+ channel.shutdownNow();
+ }
+ }
+
+ @Nested
+ class HealthTests {
+
+ @Test
+ void healthReturnsStatus() {
+ when(api.health()).thenReturn(
+ HealthCheckResponse.builder().status("ok").build());
+
+ var response = blockingStub.health(
+ HealthRequest.getDefaultInstance());
+
+ assertThat(response.getStatus()).isEqualTo("ok");
+ verify(api).health();
+ }
+
+ @Test
+ void healthPropagatesErrorAsInternal() {
+ when(api.health()).thenThrow(new RuntimeException("Connection refused"));
+
+ assertThatThrownBy(() -> blockingStub.health(
+ HealthRequest.getDefaultInstance()))
+ .isInstanceOf(StatusRuntimeException.class)
+ .hasMessageContaining("Connection refused");
+ }
+ }
+
+ @Nested
+ class ConvertTests {
+
+ @Test
+ void convertSourceDelegatesToApi() {
+ var apiResponse = ConvertDocumentResponse.builder()
+ .document(DocumentResponse.builder()
+ .filename("test.pdf")
+ .markdownContent("# Test")
+ .build())
+ .status("success")
+ .build();
+
+ when(api.convertSource(any(ConvertDocumentRequest.class)))
+ .thenReturn(apiResponse);
+
+ var request = ConvertSourceRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.ConvertDocumentRequest.newBuilder()
+ .addSources(ai.docling.serve.v1.Source.newBuilder()
+ .setFile(ai.docling.serve.v1.FileSource.newBuilder()
+ .setBase64String("dGVzdA==")
+ .setFilename("test.pdf")
+ .build())
+ .build())
+ .build())
+ .build();
+
+ var response = blockingStub.convertSource(request);
+
+ assertThat(response.getResponse().getDocument().getFilename()).isEqualTo("test.pdf");
+ assertThat(response.getResponse().getDocument().getMdContent()).isEqualTo("# Test");
+ assertThat(response.getResponse().getStatus()).isEqualTo("success");
+ verify(api).convertSource(any(ConvertDocumentRequest.class));
+ }
+
+ @Test
+ void convertSourcePropagatesErrors() {
+ when(api.convertSource(any(ConvertDocumentRequest.class)))
+ .thenThrow(new RuntimeException("Conversion failed"));
+
+ assertThatThrownBy(() -> blockingStub.convertSource(
+ ConvertSourceRequest.getDefaultInstance()))
+ .isInstanceOf(StatusRuntimeException.class)
+ .hasMessageContaining("Conversion failed");
+ }
+
+ @Test
+ void convertSourceAsyncReturnsTaskStatus() {
+ var taskResponse = TaskStatusPollResponse.builder()
+ .taskId("task-123")
+ .taskStatus(TaskStatus.STARTED)
+ .taskPosition(0L)
+ .build();
+
+ when(asyncSubmitter.submitConvertSource(any(ConvertDocumentRequest.class)))
+ .thenReturn(taskResponse);
+
+ var response = blockingStub.convertSourceAsync(
+ ConvertSourceAsyncRequest.getDefaultInstance());
+
+ assertThat(response.getResponse().getTaskId()).isEqualTo("task-123");
+ assertThat(response.getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_STARTED);
+ verify(asyncSubmitter).submitConvertSource(any(ConvertDocumentRequest.class));
+ }
+
+ @Test
+ void convertSourceStreamReturnsResponse() {
+ var apiResponse = ConvertDocumentResponse.builder()
+ .document(DocumentResponse.builder()
+ .filename("test.pdf")
+ .markdownContent("# Stream")
+ .build())
+ .build();
+
+ when(api.convertSource(any(ConvertDocumentRequest.class)))
+ .thenReturn(apiResponse);
+
+ var responses = blockingStub.convertSourceStream(
+ ConvertSourceStreamRequest.getDefaultInstance());
+
+ assertThat(responses.hasNext()).isTrue();
+ var response = responses.next();
+ assertThat(response.getResponse().getDocument().getMdContent()).isEqualTo("# Stream");
+ assertThat(responses.hasNext()).isFalse();
+ }
+ }
+
+ @Nested
+ class ChunkTests {
+
+ @Test
+ void chunkHierarchicalSourceDelegatesToApi() {
+ var apiResponse = ChunkDocumentResponse.builder()
+ .processingTime(1.0)
+ .build();
+
+ when(api.chunkSourceWithHierarchicalChunker(
+ any(HierarchicalChunkDocumentRequest.class)))
+ .thenReturn(apiResponse);
+
+ var request = ChunkHierarchicalSourceRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.HierarchicalChunkRequest.newBuilder()
+ .addSources(ai.docling.serve.v1.Source.newBuilder()
+ .setFile(ai.docling.serve.v1.FileSource.newBuilder()
+ .setBase64String("dGVzdA==")
+ .setFilename("test.pdf")
+ .build())
+ .build())
+ .build())
+ .build();
+
+ var response = blockingStub.chunkHierarchicalSource(request);
+
+ assertThat(response.getResponse().getProcessingTime()).isEqualTo(1.0);
+ verify(api).chunkSourceWithHierarchicalChunker(
+ any(HierarchicalChunkDocumentRequest.class));
+ }
+
+ @Test
+ void chunkHybridSourceDelegatesToApi() {
+ var apiResponse = ChunkDocumentResponse.builder()
+ .processingTime(0.5)
+ .build();
+
+ when(api.chunkSourceWithHybridChunker(any(HybridChunkDocumentRequest.class)))
+ .thenReturn(apiResponse);
+
+ var response = blockingStub.chunkHybridSource(
+ ChunkHybridSourceRequest.getDefaultInstance());
+
+ assertThat(response.getResponse().getProcessingTime()).isEqualTo(0.5);
+ verify(api).chunkSourceWithHybridChunker(
+ any(HybridChunkDocumentRequest.class));
+ }
+
+ @Test
+ void chunkHierarchicalSourceAsyncReturnsTaskStatus() {
+ var taskResponse = TaskStatusPollResponse.builder()
+ .taskId("chunk-task-1")
+ .taskStatus(TaskStatus.PENDING)
+ .taskPosition(1L)
+ .build();
+
+ when(asyncSubmitter.submitChunkHierarchicalSource(
+ any(HierarchicalChunkDocumentRequest.class)))
+ .thenReturn(taskResponse);
+
+ var response = blockingStub.chunkHierarchicalSourceAsync(
+ ChunkHierarchicalSourceAsyncRequest.getDefaultInstance());
+
+ assertThat(response.getResponse().getTaskId()).isEqualTo("chunk-task-1");
+ assertThat(response.getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_PENDING);
+ verify(asyncSubmitter).submitChunkHierarchicalSource(
+ any(HierarchicalChunkDocumentRequest.class));
+ }
+
+ @Test
+ void chunkHybridSourceAsyncReturnsTaskStatus() {
+ var taskResponse = TaskStatusPollResponse.builder()
+ .taskId("chunk-task-2")
+ .taskStatus(TaskStatus.STARTED)
+ .taskPosition(0L)
+ .build();
+
+ when(asyncSubmitter.submitChunkHybridSource(any(HybridChunkDocumentRequest.class)))
+ .thenReturn(taskResponse);
+
+ var response = blockingStub.chunkHybridSourceAsync(
+ ChunkHybridSourceAsyncRequest.getDefaultInstance());
+
+ assertThat(response.getResponse().getTaskId()).isEqualTo("chunk-task-2");
+ assertThat(response.getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_STARTED);
+ verify(asyncSubmitter).submitChunkHybridSource(
+ any(HybridChunkDocumentRequest.class));
+ }
+ }
+
+ @Nested
+ class TaskTests {
+
+ @Test
+ void pollTaskStatusDelegatesToApi() {
+ var apiResponse = TaskStatusPollResponse.builder()
+ .taskId("task-123")
+ .taskStatus(TaskStatus.STARTED)
+ .taskPosition(2L)
+ .build();
+
+ when(api.pollTaskStatus(any(TaskStatusPollRequest.class)))
+ .thenReturn(apiResponse);
+
+ var response = blockingStub.pollTaskStatus(
+ PollTaskStatusRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.TaskStatusPollRequest.newBuilder()
+ .setTaskId("task-123")
+ .build())
+ .build());
+
+ assertThat(response.getResponse().getTaskId()).isEqualTo("task-123");
+ assertThat(response.getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_STARTED);
+ assertThat(response.getResponse().getTaskPosition()).isEqualTo(2);
+ }
+
+ @Test
+ void getConvertResultDelegatesToApi() {
+ var apiResponse = ConvertDocumentResponse.builder()
+ .document(DocumentResponse.builder()
+ .filename("result.pdf")
+ .build())
+ .build();
+
+ when(api.convertTaskResult(any(TaskResultRequest.class)))
+ .thenReturn(apiResponse);
+
+ var response = blockingStub.getConvertResult(
+ GetConvertResultRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.TaskResultRequest.newBuilder()
+ .setTaskId("task-456")
+ .build())
+ .build());
+
+ assertThat(response.getResponse().getDocument().getFilename()).isEqualTo("result.pdf");
+ }
+
+ @Test
+ void getChunkResultDelegatesToApi() {
+ var apiResponse = ChunkDocumentResponse.builder()
+ .processingTime(2.0)
+ .build();
+
+ when(api.chunkTaskResult(any(TaskResultRequest.class)))
+ .thenReturn(apiResponse);
+
+ var response = blockingStub.getChunkResult(
+ GetChunkResultRequest.newBuilder()
+ .setRequest(ai.docling.serve.v1.TaskResultRequest.newBuilder()
+ .setTaskId("task-789")
+ .build())
+ .build());
+
+ assertThat(response.getResponse().getProcessingTime()).isEqualTo(2.0);
+ }
+ }
+
+ @Nested
+ class WatchTests {
+
+ @Test
+ void watchConvertSourceStreamsStatusUpdates() {
+ var submitResponse = TaskStatusPollResponse.builder()
+ .taskId("watch-1")
+ .taskStatus(TaskStatus.PENDING)
+ .taskPosition(2L)
+ .build();
+ var polledStarted = TaskStatusPollResponse.builder()
+ .taskId("watch-1")
+ .taskStatus(TaskStatus.STARTED)
+ .taskPosition(1L)
+ .build();
+ var polledSuccess = TaskStatusPollResponse.builder()
+ .taskId("watch-1")
+ .taskStatus(TaskStatus.SUCCESS)
+ .taskPosition(0L)
+ .build();
+
+ when(asyncSubmitter.submitConvertSource(any(ConvertDocumentRequest.class)))
+ .thenReturn(submitResponse);
+ when(api.pollTaskStatus(any(TaskStatusPollRequest.class)))
+ .thenReturn(polledStarted, polledSuccess);
+
+ var responses = new ArrayList();
+ var iterator = blockingStub.watchConvertSource(
+ WatchConvertSourceRequest.getDefaultInstance());
+ iterator.forEachRemaining(responses::add);
+
+ assertThat(responses).hasSize(3);
+ assertThat(responses.get(0).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_PENDING);
+ assertThat(responses.get(1).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_STARTED);
+ assertThat(responses.get(2).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_SUCCESS);
+ verify(asyncSubmitter).submitConvertSource(any(ConvertDocumentRequest.class));
+ verify(api, times(2)).pollTaskStatus(any(TaskStatusPollRequest.class));
+ }
+
+ @Test
+ void watchChunkHierarchicalSourceStreamsStatusUpdates() {
+ var submitResponse = TaskStatusPollResponse.builder()
+ .taskId("watch-h-1")
+ .taskStatus(TaskStatus.PENDING)
+ .build();
+ var polledSuccess = TaskStatusPollResponse.builder()
+ .taskId("watch-h-1")
+ .taskStatus(TaskStatus.SUCCESS)
+ .build();
+
+ when(asyncSubmitter.submitChunkHierarchicalSource(
+ any(HierarchicalChunkDocumentRequest.class)))
+ .thenReturn(submitResponse);
+ when(api.pollTaskStatus(any(TaskStatusPollRequest.class)))
+ .thenReturn(polledSuccess);
+
+ var responses = new ArrayList();
+ blockingStub.watchChunkHierarchicalSource(
+ WatchChunkHierarchicalSourceRequest.getDefaultInstance())
+ .forEachRemaining(responses::add);
+
+ assertThat(responses).hasSize(2);
+ assertThat(responses.get(0).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_PENDING);
+ assertThat(responses.get(1).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_SUCCESS);
+ verify(asyncSubmitter).submitChunkHierarchicalSource(
+ any(HierarchicalChunkDocumentRequest.class));
+ verify(api, times(1)).pollTaskStatus(any(TaskStatusPollRequest.class));
+ }
+
+ @Test
+ void watchChunkHybridSourceStreamsStatusUpdates() {
+ var submitResponse = TaskStatusPollResponse.builder()
+ .taskId("watch-y-1")
+ .taskStatus(TaskStatus.STARTED)
+ .build();
+ var polledSuccess = TaskStatusPollResponse.builder()
+ .taskId("watch-y-1")
+ .taskStatus(TaskStatus.SUCCESS)
+ .build();
+
+ when(asyncSubmitter.submitChunkHybridSource(any(HybridChunkDocumentRequest.class)))
+ .thenReturn(submitResponse);
+ when(api.pollTaskStatus(any(TaskStatusPollRequest.class)))
+ .thenReturn(polledSuccess);
+
+ var responses = new ArrayList();
+ blockingStub.watchChunkHybridSource(
+ WatchChunkHybridSourceRequest.getDefaultInstance())
+ .forEachRemaining(responses::add);
+
+ assertThat(responses).hasSize(2);
+ assertThat(responses.get(0).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_STARTED);
+ assertThat(responses.get(1).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_SUCCESS);
+ verify(asyncSubmitter).submitChunkHybridSource(any(HybridChunkDocumentRequest.class));
+ }
+
+ @Test
+ void watchCompletesImmediatelyWhenSubmitReturnsSuccess() {
+ var submitResponse = TaskStatusPollResponse.builder()
+ .taskId("watch-imm")
+ .taskStatus(TaskStatus.SUCCESS)
+ .build();
+
+ when(asyncSubmitter.submitConvertSource(any(ConvertDocumentRequest.class)))
+ .thenReturn(submitResponse);
+
+ var responses = new ArrayList();
+ blockingStub.watchConvertSource(
+ WatchConvertSourceRequest.getDefaultInstance())
+ .forEachRemaining(responses::add);
+
+ assertThat(responses).hasSize(1);
+ assertThat(responses.get(0).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_SUCCESS);
+ verify(asyncSubmitter).submitConvertSource(any(ConvertDocumentRequest.class));
+ verify(api, times(0)).pollTaskStatus(any(TaskStatusPollRequest.class));
+ }
+
+ @Test
+ void watchCompletesOnFailureStatus() {
+ var submitResponse = TaskStatusPollResponse.builder()
+ .taskId("watch-fail")
+ .taskStatus(TaskStatus.PENDING)
+ .build();
+ var polledFailure = TaskStatusPollResponse.builder()
+ .taskId("watch-fail")
+ .taskStatus(TaskStatus.FAILURE)
+ .build();
+
+ when(asyncSubmitter.submitConvertSource(any(ConvertDocumentRequest.class)))
+ .thenReturn(submitResponse);
+ when(api.pollTaskStatus(any(TaskStatusPollRequest.class)))
+ .thenReturn(polledFailure);
+
+ var responses = new ArrayList();
+ blockingStub.watchConvertSource(
+ WatchConvertSourceRequest.getDefaultInstance())
+ .forEachRemaining(responses::add);
+
+ assertThat(responses).hasSize(2);
+ assertThat(responses.get(0).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_PENDING);
+ assertThat(responses.get(1).getResponse().getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_FAILURE);
+ }
+
+ @Test
+ void watchPropagatesPollError() {
+ var submitResponse = TaskStatusPollResponse.builder()
+ .taskId("watch-err")
+ .taskStatus(TaskStatus.PENDING)
+ .build();
+
+ when(asyncSubmitter.submitConvertSource(any(ConvertDocumentRequest.class)))
+ .thenReturn(submitResponse);
+ when(api.pollTaskStatus(any(TaskStatusPollRequest.class)))
+ .thenThrow(new RuntimeException("Poll connection refused"));
+
+ assertThatThrownBy(() -> {
+ blockingStub.watchConvertSource(
+ WatchConvertSourceRequest.getDefaultInstance())
+ .forEachRemaining(r -> {});
+ })
+ .isInstanceOf(StatusRuntimeException.class)
+ .hasMessageContaining("Poll connection refused");
+ }
+ }
+
+ @Nested
+ class ClearTests {
+
+ @Test
+ void clearConvertersDelegatesToApi() {
+ var apiResponse = ClearResponse.builder().status("cleared").build();
+
+ when(api.clearConverters(
+ any(ai.docling.serve.api.clear.request.ClearConvertersRequest.class)))
+ .thenReturn(apiResponse);
+
+ var response = blockingStub.clearConverters(
+ ClearConvertersRequest.getDefaultInstance());
+
+ assertThat(response.getResponse().getStatus()).isEqualTo("cleared");
+ }
+
+ @Test
+ void clearResultsDelegatesToApi() {
+ var apiResponse = ClearResponse.builder().status("cleared").build();
+
+ when(api.clearResults(any(ClearResultsRequest.class)))
+ .thenReturn(apiResponse);
+
+ var response = blockingStub.clearResults(
+ ai.docling.serve.v1.ClearResultsRequest.newBuilder()
+ .setOlderThan(60.0f)
+ .build());
+
+ assertThat(response.getResponse().getStatus()).isEqualTo("cleared");
+ }
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/mapping/DoclingDocumentMapperTest.java b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/mapping/DoclingDocumentMapperTest.java
new file mode 100644
index 00000000..93c8ae5e
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/mapping/DoclingDocumentMapperTest.java
@@ -0,0 +1,489 @@
+package ai.docling.serve.grpc.v1.mapping;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatNoException;
+
+import java.math.BigInteger;
+import java.util.List;
+
+import ai.docling.core.DoclingDocument;
+import ai.docling.core.v1.BoundingBox;
+import ai.docling.core.v1.ContentLayer;
+import ai.docling.core.v1.DocItemLabel;
+import ai.docling.core.v1.DocumentOrigin;
+import ai.docling.core.v1.Formatting;
+import ai.docling.core.v1.GraphCell;
+import ai.docling.core.v1.GraphCellLabel;
+import ai.docling.core.v1.GraphData;
+import ai.docling.core.v1.GraphLink;
+import ai.docling.core.v1.GraphLinkLabel;
+import ai.docling.core.v1.ImageRef;
+import ai.docling.core.v1.KeyValueItem;
+import ai.docling.core.v1.PageItem;
+import ai.docling.core.v1.ProvenanceItem;
+import ai.docling.core.v1.Script;
+import ai.docling.core.v1.SectionHeaderItem;
+import ai.docling.core.v1.Size;
+import ai.docling.core.v1.TableCell;
+import ai.docling.core.v1.TableData;
+import ai.docling.core.v1.TableItem;
+import ai.docling.core.v1.TextItem;
+
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for DoclingDocumentMapper null-safety.
+ * Every nullable field that we guard must have a regression test here
+ * so removing a null check causes a test failure (NPE).
+ */
+class DoclingDocumentMapperTest {
+
+ @Test
+ void mapNullDocumentReturnsDefault() {
+ var result = DoclingDocumentMapper.map(null);
+ assertThat(result).isEqualTo(ai.docling.core.v1.DoclingDocument.getDefaultInstance());
+ }
+
+ @Nested
+ class DocumentOriginNullSafety {
+
+ @Test
+ void handlesAllNullFields() {
+ var origin = DoclingDocument.DocumentOrigin.builder().build();
+ var doc = DoclingDocument.builder().name("test").origin(origin).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ assertThat(proto.hasOrigin()).isTrue();
+ assertThat(proto.getOrigin().getMimetype()).isEmpty();
+ assertThat(proto.getOrigin().getBinaryHash()).isEmpty();
+ assertThat(proto.getOrigin().getFilename()).isEmpty();
+ }
+
+ @Test
+ void mapsPopulatedFields() {
+ var origin = DoclingDocument.DocumentOrigin.builder()
+ .mimetype("application/pdf")
+ .binaryHash(BigInteger.valueOf(12345))
+ .filename("test.pdf")
+ .uri("file:///test.pdf")
+ .build();
+ var doc = DoclingDocument.builder().name("test").origin(origin).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+
+ assertThat(proto.getOrigin().getMimetype()).isEqualTo("application/pdf");
+ assertThat(proto.getOrigin().getBinaryHash()).isEqualTo("12345");
+ assertThat(proto.getOrigin().getFilename()).isEqualTo("test.pdf");
+ assertThat(proto.getOrigin().getUri()).isEqualTo("file:///test.pdf");
+ }
+ }
+
+ @Nested
+ class SectionHeaderNullSafety {
+
+ @Test
+ void handlesNullLevel() {
+ var section = DoclingDocument.SectionHeaderItem.builder()
+ .selfRef("#/texts/0")
+ .label(DoclingDocument.DocItemLabel.SECTION_HEADER)
+ .text("Header")
+ .orig("Header")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ // level is null
+ .build();
+ var doc = DoclingDocument.builder().name("test").text(section).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ assertThat(proto.getTexts(0).getSectionHeader().getLevel()).isZero();
+ }
+
+ @Test
+ void mapsPopulatedLevel() {
+ var section = DoclingDocument.SectionHeaderItem.builder()
+ .selfRef("#/texts/0")
+ .label(DoclingDocument.DocItemLabel.SECTION_HEADER)
+ .text("Header")
+ .orig("Header")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .level(3)
+ .build();
+ var doc = DoclingDocument.builder().name("test").text(section).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+ assertThat(proto.getTexts(0).getSectionHeader().getLevel()).isEqualTo(3);
+ }
+ }
+
+ @Nested
+ class FormattingNullSafety {
+
+ @Test
+ void handlesNullScript() {
+ var formatting = DoclingDocument.Formatting.builder()
+ .bold(true)
+ .italic(false)
+ .underline(false)
+ .strikethrough(false)
+ // script is null
+ .build();
+ var text = DoclingDocument.TextItem.builder()
+ .selfRef("#/texts/0")
+ .label(DoclingDocument.DocItemLabel.PARAGRAPH)
+ .text("text").orig("text")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .formatting(formatting)
+ .build();
+ var doc = DoclingDocument.builder().name("test").text(text).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoFormatting = proto.getTexts(0).getText().getBase().getFormatting();
+ assertThat(protoFormatting.getBold()).isTrue();
+ assertThat(protoFormatting.getScript()).isEqualTo(Script.SCRIPT_UNSPECIFIED);
+ }
+ }
+
+ @Nested
+ class ProvenanceNullSafety {
+
+ @Test
+ void handlesNullBbox() {
+ var prov = DoclingDocument.ProvenanceItem.builder()
+ .pageNo(1)
+ // bbox is null
+ .build();
+
+ assertThatNoException().isThrownBy(
+ () -> DoclingDocumentMapper.mapProvenanceItem(prov));
+
+ var proto = DoclingDocumentMapper.mapProvenanceItem(prov);
+ assertThat(proto.getPageNo()).isEqualTo(1);
+ assertThat(proto.hasBbox()).isFalse();
+ }
+
+ @Test
+ void mapsPopulatedBbox() {
+ var prov = DoclingDocument.ProvenanceItem.builder()
+ .pageNo(1)
+ .bbox(DoclingDocument.BoundingBox.builder()
+ .l(10.0).t(20.0).r(100.0).b(50.0).build())
+ .charspan(List.of(0, 10))
+ .build();
+
+ var proto = DoclingDocumentMapper.mapProvenanceItem(prov);
+ assertThat(proto.hasBbox()).isTrue();
+ assertThat(proto.getBbox().getL()).isEqualTo(10.0);
+ assertThat(proto.getCharspanCount()).isEqualTo(2);
+ }
+ }
+
+ @Nested
+ class BoundingBoxNullSafety {
+
+ @Test
+ void handlesAllNullCoordinates() {
+ var bbox = DoclingDocument.BoundingBox.builder().build();
+
+ assertThatNoException().isThrownBy(
+ () -> DoclingDocumentMapper.mapBoundingBox(bbox));
+
+ var proto = DoclingDocumentMapper.mapBoundingBox(bbox);
+ assertThat(proto.getL()).isZero();
+ assertThat(proto.getT()).isZero();
+ assertThat(proto.getR()).isZero();
+ assertThat(proto.getB()).isZero();
+ }
+
+ @Test
+ void mapsPopulatedCoordinates() {
+ var bbox = DoclingDocument.BoundingBox.builder()
+ .l(1.0).t(2.0).r(3.0).b(4.0).build();
+
+ var proto = DoclingDocumentMapper.mapBoundingBox(bbox);
+ assertThat(proto.getL()).isEqualTo(1.0);
+ assertThat(proto.getT()).isEqualTo(2.0);
+ assertThat(proto.getR()).isEqualTo(3.0);
+ assertThat(proto.getB()).isEqualTo(4.0);
+ }
+ }
+
+ @Nested
+ class TableDataNullSafety {
+
+ @Test
+ void handlesNullNumRowsAndNumCols() {
+ var tableData = DoclingDocument.TableData.builder()
+ // numRows, numCols null; grid defaults to empty
+ .build();
+ var table = DoclingDocument.TableItem.builder()
+ .selfRef("#/tables/0")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .data(tableData)
+ .build();
+ var doc = DoclingDocument.builder().name("test").table(table).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ assertThat(proto.getTables(0).getData().getNumRows()).isZero();
+ assertThat(proto.getTables(0).getData().getNumCols()).isZero();
+ }
+
+ @Test
+ void mapsPopulatedNumRowsAndNumCols() {
+ var tableData = DoclingDocument.TableData.builder()
+ .numRows(5).numCols(3).build();
+ var table = DoclingDocument.TableItem.builder()
+ .selfRef("#/tables/0")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .data(tableData).build();
+ var doc = DoclingDocument.builder().name("test").table(table).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+ assertThat(proto.getTables(0).getData().getNumRows()).isEqualTo(5);
+ assertThat(proto.getTables(0).getData().getNumCols()).isEqualTo(3);
+ }
+ }
+
+ @Nested
+ class TableCellNullSafety {
+
+ @Test
+ void handlesAllNullFields() {
+ // All boxed Integer fields + bbox + text are null
+ var cell = DoclingDocument.TableCell.builder()
+ .columnHeader(false).rowHeader(false)
+ .rowSection(false).fillable(false)
+ .build();
+ var tableData = DoclingDocument.TableData.builder()
+ .grid(List.of(List.of(cell))).build();
+ var table = DoclingDocument.TableItem.builder()
+ .selfRef("#/tables/0")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .data(tableData).build();
+ var doc = DoclingDocument.builder().name("test").table(table).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoCell = proto.getTables(0).getData().getGrid(0).getCells(0);
+ assertThat(protoCell.hasBbox()).isFalse();
+ assertThat(protoCell.getRowSpan()).isZero();
+ assertThat(protoCell.getColSpan()).isZero();
+ assertThat(protoCell.getStartRowOffsetIdx()).isZero();
+ assertThat(protoCell.getEndRowOffsetIdx()).isZero();
+ assertThat(protoCell.getStartColOffsetIdx()).isZero();
+ assertThat(protoCell.getEndColOffsetIdx()).isZero();
+ assertThat(protoCell.getText()).isEmpty();
+ }
+
+ @Test
+ void mapsPopulatedFields() {
+ var cell = DoclingDocument.TableCell.builder()
+ .bbox(DoclingDocument.BoundingBox.builder()
+ .l(1.0).t(2.0).r(3.0).b(4.0).build())
+ .text("Cell")
+ .rowSpan(2).colSpan(3)
+ .startRowOffsetIdx(0).endRowOffsetIdx(2)
+ .startColOffsetIdx(1).endColOffsetIdx(4)
+ .columnHeader(true).rowHeader(false)
+ .rowSection(false).fillable(false)
+ .build();
+ var tableData = DoclingDocument.TableData.builder()
+ .grid(List.of(List.of(cell))).build();
+ var table = DoclingDocument.TableItem.builder()
+ .selfRef("#/tables/0")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .data(tableData).build();
+ var doc = DoclingDocument.builder().name("test").table(table).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoCell = proto.getTables(0).getData().getGrid(0).getCells(0);
+ assertThat(protoCell.hasBbox()).isTrue();
+ assertThat(protoCell.getBbox().getL()).isEqualTo(1.0);
+ assertThat(protoCell.getText()).isEqualTo("Cell");
+ assertThat(protoCell.getRowSpan()).isEqualTo(2);
+ assertThat(protoCell.getColSpan()).isEqualTo(3);
+ assertThat(protoCell.getStartRowOffsetIdx()).isZero();
+ assertThat(protoCell.getEndRowOffsetIdx()).isEqualTo(2);
+ assertThat(protoCell.getStartColOffsetIdx()).isEqualTo(1);
+ assertThat(protoCell.getEndColOffsetIdx()).isEqualTo(4);
+ assertThat(protoCell.getColumnHeader()).isTrue();
+ }
+ }
+
+ @Nested
+ class GraphCellNullSafety {
+
+ @Test
+ void handlesAllNullFields() {
+ var cell = DoclingDocument.GraphCell.builder().build();
+ var graph = DoclingDocument.GraphData.builder().cell(cell).build();
+ var kv = DoclingDocument.KeyValueItem.builder()
+ .selfRef("#/kv/0")
+ .label("kv_region")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .graph(graph)
+ .build();
+ var doc = DoclingDocument.builder().name("test").keyValueItem(kv).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoCell = proto.getKeyValueItems(0).getGraph().getCells(0);
+ assertThat(protoCell.getLabel()).isEqualTo(ai.docling.core.v1.GraphCellLabel.GRAPH_CELL_LABEL_UNSPECIFIED);
+ assertThat(protoCell.getCellId()).isZero();
+ assertThat(protoCell.getText()).isEmpty();
+ assertThat(protoCell.getOrig()).isEmpty();
+ }
+
+ @Test
+ void mapsPopulatedFields() {
+ var cell = DoclingDocument.GraphCell.builder()
+ .label(DoclingDocument.GraphCellLabel.KEY)
+ .cellId(42)
+ .text("Name:")
+ .orig("Name:")
+ .build();
+ var graph = DoclingDocument.GraphData.builder().cell(cell).build();
+ var kv = DoclingDocument.KeyValueItem.builder()
+ .selfRef("#/kv/0").label("kv_region")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .graph(graph).build();
+ var doc = DoclingDocument.builder().name("test").keyValueItem(kv).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoCell = proto.getKeyValueItems(0).getGraph().getCells(0);
+ assertThat(protoCell.getLabel()).isEqualTo(ai.docling.core.v1.GraphCellLabel.GRAPH_CELL_LABEL_KEY);
+ assertThat(protoCell.getCellId()).isEqualTo(42);
+ assertThat(protoCell.getText()).isEqualTo("Name:");
+ assertThat(protoCell.getOrig()).isEqualTo("Name:");
+ }
+ }
+
+ @Nested
+ class GraphLinkNullSafety {
+
+ @Test
+ void handlesAllNullFields() {
+ var link = DoclingDocument.GraphLink.builder().build();
+ var graph = DoclingDocument.GraphData.builder().link(link).build();
+ var kv = DoclingDocument.KeyValueItem.builder()
+ .selfRef("#/kv/0").label("kv_region")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .graph(graph).build();
+ var doc = DoclingDocument.builder().name("test").keyValueItem(kv).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoLink = proto.getKeyValueItems(0).getGraph().getLinks(0);
+ assertThat(protoLink.getLabel()).isEqualTo(ai.docling.core.v1.GraphLinkLabel.GRAPH_LINK_LABEL_UNSPECIFIED);
+ assertThat(protoLink.getSourceCellId()).isZero();
+ assertThat(protoLink.getTargetCellId()).isZero();
+ }
+
+ @Test
+ void mapsPopulatedFields() {
+ var link = DoclingDocument.GraphLink.builder()
+ .label(DoclingDocument.GraphLinkLabel.TO_VALUE)
+ .sourceCellId(1).targetCellId(2).build();
+ var graph = DoclingDocument.GraphData.builder().link(link).build();
+ var kv = DoclingDocument.KeyValueItem.builder()
+ .selfRef("#/kv/0").label("kv_region")
+ .contentLayer(DoclingDocument.ContentLayer.BODY)
+ .graph(graph).build();
+ var doc = DoclingDocument.builder().name("test").keyValueItem(kv).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoLink = proto.getKeyValueItems(0).getGraph().getLinks(0);
+ assertThat(protoLink.getLabel()).isEqualTo(ai.docling.core.v1.GraphLinkLabel.GRAPH_LINK_LABEL_TO_VALUE);
+ assertThat(protoLink.getSourceCellId()).isEqualTo(1);
+ assertThat(protoLink.getTargetCellId()).isEqualTo(2);
+ }
+ }
+
+ @Nested
+ class PageItemNullSafety {
+
+ @Test
+ void handlesNullSizeAndPageNo() {
+ var page = DoclingDocument.PageItem.builder().build();
+ var doc = DoclingDocument.builder().name("test").page("0", page).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoPage = proto.getPagesMap().get("0");
+ assertThat(protoPage.hasSize()).isFalse();
+ assertThat(protoPage.getPageNo()).isZero();
+ }
+
+ @Test
+ void mapsPopulatedFields() {
+ var page = DoclingDocument.PageItem.builder()
+ .pageNo(3)
+ .size(DoclingDocument.Size.builder()
+ .width(800.0).height(600.0).build())
+ .build();
+ var doc = DoclingDocument.builder().name("test").page("3", page).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoPage = proto.getPagesMap().get("3");
+ assertThat(protoPage.getPageNo()).isEqualTo(3);
+ assertThat(protoPage.hasSize()).isTrue();
+ assertThat(protoPage.getSize().getWidth()).isEqualTo(800.0);
+ assertThat(protoPage.getSize().getHeight()).isEqualTo(600.0);
+ }
+ }
+
+ @Nested
+ class ImageRefNullSafety {
+
+ @Test
+ void handlesAllNullFields() {
+ var imageRef = DoclingDocument.ImageRef.builder().build();
+ var page = DoclingDocument.PageItem.builder()
+ .pageNo(1).image(imageRef).build();
+ var doc = DoclingDocument.builder().name("test").page("1", page).build();
+
+ assertThatNoException().isThrownBy(() -> DoclingDocumentMapper.map(doc));
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoPage = proto.getPagesMap().get("1");
+ assertThat(protoPage.hasImage()).isTrue();
+ var protoImage = protoPage.getImage();
+ assertThat(protoImage.getMimetype()).isEmpty();
+ assertThat(protoImage.getDpi()).isZero();
+ assertThat(protoImage.hasSize()).isFalse();
+ assertThat(protoImage.getUri()).isEmpty();
+ }
+
+ @Test
+ void mapsPopulatedFields() {
+ var imageRef = DoclingDocument.ImageRef.builder()
+ .mimetype("image/png")
+ .dpi(300)
+ .size(DoclingDocument.Size.builder()
+ .width(800.0).height(600.0).build())
+ .uri("file:///image.png")
+ .build();
+ var page = DoclingDocument.PageItem.builder()
+ .pageNo(1).image(imageRef).build();
+ var doc = DoclingDocument.builder().name("test").page("1", page).build();
+
+ var proto = DoclingDocumentMapper.map(doc);
+ var protoImage = proto.getPagesMap().get("1").getImage();
+ assertThat(protoImage.getMimetype()).isEqualTo("image/png");
+ assertThat(protoImage.getDpi()).isEqualTo(300);
+ assertThat(protoImage.hasSize()).isTrue();
+ assertThat(protoImage.getUri()).isEqualTo("file:///image.png");
+ }
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/mapping/ServeApiMapperTest.java b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/mapping/ServeApiMapperTest.java
new file mode 100644
index 00000000..7d467e26
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/test/java/ai/docling/serve/grpc/v1/mapping/ServeApiMapperTest.java
@@ -0,0 +1,723 @@
+package ai.docling.serve.grpc.v1.mapping;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.net.URI;
+import java.time.Duration;
+import java.util.List;
+
+import ai.docling.core.DoclingDocument;
+import ai.docling.core.v1.GroupItem;
+import ai.docling.core.v1.GroupLabel;
+import ai.docling.serve.api.chunk.response.Chunk;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import ai.docling.serve.api.chunk.response.Document;
+import ai.docling.serve.api.chunk.response.ExportDocumentResponse;
+import ai.docling.serve.api.convert.request.options.InputFormat;
+import ai.docling.serve.api.convert.request.options.OutputFormat;
+import ai.docling.serve.api.convert.request.options.OcrEngine;
+import ai.docling.serve.api.convert.request.options.PdfBackend;
+import ai.docling.serve.api.convert.request.options.TableFormerMode;
+import ai.docling.serve.api.convert.request.options.ProcessingPipeline;
+import ai.docling.serve.api.convert.request.options.ImageRefMode;
+import ai.docling.serve.api.convert.request.options.VlmModelType;
+import ai.docling.serve.api.convert.request.source.FileSource;
+import ai.docling.serve.api.convert.request.source.HttpSource;
+import ai.docling.serve.api.convert.request.source.S3Source;
+import ai.docling.serve.api.convert.request.target.InBodyTarget;
+import ai.docling.serve.api.convert.request.target.PutTarget;
+import ai.docling.serve.api.convert.request.target.ZipTarget;
+import ai.docling.serve.api.convert.response.ConvertDocumentResponse;
+import ai.docling.serve.api.convert.response.DocumentResponse;
+import ai.docling.serve.api.convert.response.ErrorItem;
+import ai.docling.serve.api.health.HealthCheckResponse;
+import ai.docling.serve.api.clear.response.ClearResponse;
+import ai.docling.serve.api.task.response.TaskStatus;
+import ai.docling.serve.api.task.response.TaskStatusMetadata;
+import ai.docling.serve.api.task.response.TaskStatusPollResponse;
+import ai.docling.serve.v1.ClearResultsRequest;
+import ai.docling.serve.v1.ConvertDocumentOptions;
+import ai.docling.serve.v1.ConvertDocumentRequest;
+import ai.docling.serve.v1.HierarchicalChunkRequest;
+import ai.docling.serve.v1.HierarchicalChunkerOptions;
+import ai.docling.serve.v1.HybridChunkRequest;
+import ai.docling.serve.v1.HybridChunkerOptions;
+import ai.docling.serve.v1.Source;
+import ai.docling.serve.v1.Target;
+import ai.docling.serve.v1.TaskResultRequest;
+import ai.docling.serve.v1.TaskStatusPollRequest;
+
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+
+class ServeApiMapperTest {
+
+ // ==================== Proto → Java (Request Mapping) ====================
+
+ @Nested
+ class ConvertDocumentRequestMapping {
+
+ @Test
+ void mapsFileSource() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .addSources(Source.newBuilder()
+ .setFile(ai.docling.serve.v1.FileSource.newBuilder()
+ .setBase64String("dGVzdA==")
+ .setFilename("test.pdf")
+ .build())
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getSources()).hasSize(1);
+ assertThat(java.getSources().get(0)).isInstanceOf(FileSource.class);
+ var fileSource = (FileSource) java.getSources().get(0);
+ assertThat(fileSource.getBase64String()).isEqualTo("dGVzdA==");
+ assertThat(fileSource.getFilename()).isEqualTo("test.pdf");
+ }
+
+ @Test
+ void mapsHttpSource() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .addSources(Source.newBuilder()
+ .setHttp(ai.docling.serve.v1.HttpSource.newBuilder()
+ .setUrl("https://example.com/doc.pdf")
+ .putHeaders("Authorization", "Bearer token")
+ .build())
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getSources()).hasSize(1);
+ assertThat(java.getSources().get(0)).isInstanceOf(HttpSource.class);
+ var httpSource = (HttpSource) java.getSources().get(0);
+ assertThat(httpSource.getUrl()).isEqualTo(URI.create("https://example.com/doc.pdf"));
+ assertThat(httpSource.getHeaders()).containsEntry("Authorization", "Bearer token");
+ }
+
+ @Test
+ void mapsS3Source() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .addSources(Source.newBuilder()
+ .setS3(ai.docling.serve.v1.S3Source.newBuilder()
+ .setEndpoint("https://s3.amazonaws.com")
+ .setAccessKey("AKID")
+ .setSecretKey("secret")
+ .setBucket("my-bucket")
+ .setKeyPrefix("docs/")
+ .setVerifySsl(true)
+ .build())
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getSources()).hasSize(1);
+ assertThat(java.getSources().get(0)).isInstanceOf(S3Source.class);
+ var s3Source = (S3Source) java.getSources().get(0);
+ assertThat(s3Source.getEndpoint()).isEqualTo("https://s3.amazonaws.com");
+ assertThat(s3Source.getAccessKey()).isEqualTo("AKID");
+ assertThat(s3Source.getSecretKey()).isEqualTo("secret");
+ assertThat(s3Source.getBucket()).isEqualTo("my-bucket");
+ assertThat(s3Source.getKeyPrefix()).isEqualTo("docs/");
+ assertThat(s3Source.isVerifySsl()).isTrue();
+ }
+
+ @Test
+ void mapsInBodyTarget() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setTarget(ai.docling.serve.v1.Target.newBuilder()
+ .setInbody(ai.docling.serve.v1.InBodyTarget.newBuilder().build())
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getTarget()).isInstanceOf(InBodyTarget.class);
+ }
+
+ @Test
+ void mapsPutTarget() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setTarget(ai.docling.serve.v1.Target.newBuilder()
+ .setPut(ai.docling.serve.v1.PutTarget.newBuilder()
+ .setUrl("https://example.com/upload")
+ .build())
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getTarget()).isInstanceOf(PutTarget.class);
+ assertThat(((PutTarget) java.getTarget()).getUrl())
+ .isEqualTo(URI.create("https://example.com/upload"));
+ }
+
+ @Test
+ void mapsZipTarget() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setTarget(ai.docling.serve.v1.Target.newBuilder()
+ .setZip(ai.docling.serve.v1.ZipTarget.newBuilder().build())
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getTarget()).isInstanceOf(ZipTarget.class);
+ }
+
+ @Test
+ void mapsConvertOptions() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setOptions(ConvertDocumentOptions.newBuilder()
+ .addFromFormats(ai.docling.serve.v1.InputFormat.INPUT_FORMAT_PDF)
+ .addFromFormats(ai.docling.serve.v1.InputFormat.INPUT_FORMAT_DOCX)
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_JSON)
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_MD)
+ .setDoOcr(true)
+ .setForceOcr(false)
+ .setOcrEngine(ai.docling.serve.v1.OcrEngine.OCR_ENGINE_EASYOCR)
+ .addOcrLang("en")
+ .addOcrLang("de")
+ .setPdfBackend(ai.docling.serve.v1.PdfBackend.PDF_BACKEND_DLPARSE_V2)
+ .setTableMode(ai.docling.serve.v1.TableFormerMode.TABLE_FORMER_MODE_ACCURATE)
+ .setPipeline(ai.docling.serve.v1.ProcessingPipeline.PROCESSING_PIPELINE_STANDARD)
+ .setImageExportMode(ai.docling.serve.v1.ImageRefMode.IMAGE_REF_MODE_EMBEDDED)
+ .setAbortOnError(true)
+ .setDoTableStructure(true)
+ .setIncludeImages(false)
+ .setImagesScale(2.0)
+ .setDocumentTimeout(30.0)
+ .setDoCodeEnrichment(true)
+ .setDoFormulaEnrichment(false)
+ .setDoPictureClassification(true)
+ .setDoPictureDescription(false)
+ .setPictureDescriptionAreaThreshold(0.5)
+ .setVlmPipelineModel(ai.docling.serve.v1.VlmModelType.VLM_MODEL_TYPE_SMOLDOCLING)
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+ var opts = java.getOptions();
+
+ assertThat(opts).isNotNull();
+ assertThat(opts.getFromFormats()).containsExactly(InputFormat.PDF, InputFormat.DOCX);
+ assertThat(opts.getToFormats()).containsExactly(OutputFormat.JSON, OutputFormat.MARKDOWN);
+ assertThat(opts.getDoOcr()).isTrue();
+ assertThat(opts.getForceOcr()).isFalse();
+ assertThat(opts.getOcrEngine()).isEqualTo(OcrEngine.EASYOCR);
+ assertThat(opts.getOcrLang()).containsExactly("en", "de");
+ assertThat(opts.getPdfBackend()).isEqualTo(PdfBackend.DLPARSE_V2);
+ assertThat(opts.getTableMode()).isEqualTo(TableFormerMode.ACCURATE);
+ assertThat(opts.getPipeline()).isEqualTo(ProcessingPipeline.STANDARD);
+ assertThat(opts.getImageExportMode()).isEqualTo(ImageRefMode.EMBEDDED);
+ assertThat(opts.getAbortOnError()).isTrue();
+ assertThat(opts.getDoTableStructure()).isTrue();
+ assertThat(opts.getIncludeImages()).isFalse();
+ assertThat(opts.getImagesScale()).isEqualTo(2.0);
+ assertThat(opts.getDocumentTimeout()).isEqualTo(Duration.ofSeconds(30));
+ assertThat(opts.getDoCodeEnrichment()).isTrue();
+ assertThat(opts.getDoFormulaEnrichment()).isFalse();
+ assertThat(opts.getDoPictureClassification()).isTrue();
+ assertThat(opts.getDoPictureDescription()).isFalse();
+ assertThat(opts.getPictureDescriptionAreaThreshold()).isEqualTo(0.5);
+ assertThat(opts.getVlmPipelineModel()).isEqualTo(VlmModelType.SMOLDOCLING);
+ }
+
+ @Test
+ void mapsMultipleSources() {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .addSources(Source.newBuilder()
+ .setFile(ai.docling.serve.v1.FileSource.newBuilder()
+ .setBase64String("YQ==")
+ .setFilename("a.pdf")
+ .build())
+ .build())
+ .addSources(Source.newBuilder()
+ .setHttp(ai.docling.serve.v1.HttpSource.newBuilder()
+ .setUrl("https://example.com/b.pdf")
+ .build())
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getSources()).hasSize(2);
+ assertThat(java.getSources().get(0)).isInstanceOf(FileSource.class);
+ assertThat(java.getSources().get(1)).isInstanceOf(HttpSource.class);
+ }
+
+ @Test
+ void mapsEmptyRequest() {
+ var proto = ConvertDocumentRequest.getDefaultInstance();
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getSources()).isEmpty();
+ // Options defaults to an empty instance (not null) due to @Builder.Default
+ assertThat(java.getOptions()).isNotNull();
+ assertThat(java.getOptions().getFromFormats()).isEmpty();
+ assertThat(java.getTarget()).isNull();
+ }
+ }
+
+ @Nested
+ class ChunkRequestMapping {
+
+ @Test
+ void mapsHierarchicalChunkRequest() {
+ var proto = HierarchicalChunkRequest.newBuilder()
+ .addSources(Source.newBuilder()
+ .setFile(ai.docling.serve.v1.FileSource.newBuilder()
+ .setBase64String("dGVzdA==")
+ .setFilename("test.pdf")
+ .build())
+ .build())
+ .setIncludeConvertedDoc(true)
+ .setChunkingOptions(ai.docling.serve.v1.HierarchicalChunkerOptions.newBuilder()
+ .setUseMarkdownTables(true)
+ .setIncludeRawText(false)
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getSources()).hasSize(1);
+ assertThat(java.isIncludeConvertedDoc()).isTrue();
+ assertThat(java.getChunkingOptions()).isNotNull();
+ assertThat(java.getChunkingOptions().isUseMarkdownTables()).isTrue();
+ assertThat(java.getChunkingOptions().isIncludeRawText()).isFalse();
+ }
+
+ @Test
+ void mapsHybridChunkRequest() {
+ var proto = HybridChunkRequest.newBuilder()
+ .addSources(Source.newBuilder()
+ .setFile(ai.docling.serve.v1.FileSource.newBuilder()
+ .setBase64String("dGVzdA==")
+ .setFilename("test.pdf")
+ .build())
+ .build())
+ .setIncludeConvertedDoc(false)
+ .setChunkingOptions(ai.docling.serve.v1.HybridChunkerOptions.newBuilder()
+ .setUseMarkdownTables(true)
+ .setIncludeRawText(true)
+ .setMaxTokens(512)
+ .setTokenizer("sentence-transformers/all-MiniLM-L6-v2")
+ .setMergePeers(true)
+ .build())
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getSources()).hasSize(1);
+ assertThat(java.isIncludeConvertedDoc()).isFalse();
+ assertThat(java.getChunkingOptions()).isNotNull();
+ assertThat(java.getChunkingOptions().isUseMarkdownTables()).isTrue();
+ assertThat(java.getChunkingOptions().isIncludeRawText()).isTrue();
+ assertThat(java.getChunkingOptions().getMaxTokens()).isEqualTo(512);
+ assertThat(java.getChunkingOptions().getTokenizer())
+ .isEqualTo("sentence-transformers/all-MiniLM-L6-v2");
+ assertThat(java.getChunkingOptions().getMergePeers()).isTrue();
+ }
+ }
+
+ @Nested
+ class TaskRequestMapping {
+
+ @Test
+ void mapsTaskStatusPollRequest() {
+ var proto = TaskStatusPollRequest.newBuilder()
+ .setTaskId("task-123")
+ .setWaitTime(5.0f)
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getTaskId()).isEqualTo("task-123");
+ assertThat(java.getWaitTime()).isEqualTo(Duration.ofMillis(5000));
+ }
+
+ @Test
+ void mapsTaskResultRequest() {
+ var proto = TaskResultRequest.newBuilder()
+ .setTaskId("task-456")
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getTaskId()).isEqualTo("task-456");
+ }
+
+ @Test
+ void mapsClearResultsRequest() {
+ var proto = ClearResultsRequest.newBuilder()
+ .setOlderThan(60.0f)
+ .build();
+
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getOlderThen()).isEqualTo(Duration.ofMillis(60000));
+ }
+ }
+
+ // ==================== Java → Proto (Response Mapping) ====================
+
+ @Nested
+ class HealthResponseMapping {
+
+ @Test
+ void mapsHealthCheckResponse() {
+ var java = HealthCheckResponse.builder()
+ .status("ok")
+ .build();
+
+ var proto = ServeApiMapper.toProto(java);
+ assertThat(proto.getStatus()).isEqualTo("ok");
+ }
+
+ @Test
+ void handlesNullHealthCheckResponse() {
+ var proto = ServeApiMapper.toProto((HealthCheckResponse) null);
+ assertThat(proto.getStatus()).isEmpty();
+ }
+ }
+
+ @Nested
+ class ConvertDocumentResponseMapping {
+
+ @Test
+ void mapsConvertDocumentResponse() {
+ var java = ConvertDocumentResponse.builder()
+ .document(DocumentResponse.builder()
+ .filename("test.pdf")
+ .markdownContent("# Hello")
+ .htmlContent("Hello
")
+ .textContent("Hello")
+ .doctagsContent("Hello")
+ .build())
+ .error(ErrorItem.builder()
+ .componentType("ocr")
+ .errorMessage("OCR failed")
+ .moduleName("tesseract")
+ .build())
+ .processingTime(1.5)
+ .status("partial")
+ .timing("parse", 0.5)
+ .timing("ocr", 1.0)
+ .build();
+
+ var proto = ServeApiMapper.toProto(java);
+
+ assertThat(proto.getDocument().getFilename()).isEqualTo("test.pdf");
+ assertThat(proto.getDocument().getMdContent()).isEqualTo("# Hello");
+ assertThat(proto.getDocument().getHtmlContent()).isEqualTo("Hello
");
+ assertThat(proto.getDocument().getTextContent()).isEqualTo("Hello");
+ assertThat(proto.getDocument().getDoctagsContent()).isEqualTo("Hello");
+ assertThat(proto.getErrorsCount()).isEqualTo(1);
+ assertThat(proto.getErrors(0).getComponentType()).isEqualTo("ocr");
+ assertThat(proto.getErrors(0).getErrorMessage()).isEqualTo("OCR failed");
+ assertThat(proto.getErrors(0).getModuleName()).isEqualTo("tesseract");
+ assertThat(proto.getProcessingTime()).isEqualTo(1.5);
+ assertThat(proto.getStatus()).isEqualTo("partial");
+ assertThat(proto.getTimingsMap()).containsEntry("parse", 0.5);
+ assertThat(proto.getTimingsMap()).containsEntry("ocr", 1.0);
+ }
+
+ @Test
+ void mapsResponseWithJsonContent() {
+ var doclingDoc = DoclingDocument.builder()
+ .name("test-doc")
+ .body(DoclingDocument.GroupItem.builder()
+ .selfRef("#/body")
+ .name("body")
+ .label(DoclingDocument.GroupLabel.UNSPECIFIED)
+ .build())
+ .build();
+
+ var java = ConvertDocumentResponse.builder()
+ .document(DocumentResponse.builder()
+ .filename("test.pdf")
+ .jsonContent(doclingDoc)
+ .build())
+ .build();
+
+ var proto = ServeApiMapper.toProto(java);
+
+ assertThat(proto.getDocument().hasJsonContent()).isTrue();
+ assertThat(proto.getDocument().getJsonContent().getName()).isEqualTo("test-doc");
+ }
+ }
+
+ @Nested
+ class ChunkDocumentResponseMapping {
+
+ @Test
+ void mapsChunkDocumentResponse() {
+ var java = ChunkDocumentResponse.builder()
+ .chunk(Chunk.builder()
+ .filename("test.pdf")
+ .chunkIndex(0)
+ .text("## Heading\nThis is chunk text")
+ .rawText("This is chunk text")
+ .numTokens(5)
+ .heading("Heading")
+ .caption("Figure 1")
+ .docItem("item-1")
+ .pageNumber(1)
+ .metadata("key1", "value1")
+ .build())
+ .document(Document.builder()
+ .kind("pdf")
+ .content(ExportDocumentResponse.builder()
+ .filename("test.pdf")
+ .markdownContent("# Test")
+ .build())
+ .status("success")
+ .build())
+ .processingTime(0.8)
+ .build();
+
+ var proto = ServeApiMapper.toProto(java);
+
+ assertThat(proto.getChunksCount()).isEqualTo(1);
+ var chunk = proto.getChunks(0);
+ assertThat(chunk.getFilename()).isEqualTo("test.pdf");
+ assertThat(chunk.getChunkIndex()).isZero();
+ assertThat(chunk.getText()).isEqualTo("## Heading\nThis is chunk text");
+ assertThat(chunk.getRawText()).isEqualTo("This is chunk text");
+ assertThat(chunk.getNumTokens()).isEqualTo(5);
+ assertThat(chunk.getHeadingsList()).containsExactly("Heading");
+ assertThat(chunk.getCaptionsList()).containsExactly("Figure 1");
+ assertThat(chunk.getDocItemsList()).containsExactly("item-1");
+ assertThat(chunk.getPageNumbersList()).containsExactly(1);
+ assertThat(chunk.getMetadataMap()).containsEntry("key1", "value1");
+
+ assertThat(proto.getDocumentsCount()).isEqualTo(1);
+ var doc = proto.getDocuments(0);
+ assertThat(doc.getKind()).isEqualTo("pdf");
+ assertThat(doc.getContent().getFilename()).isEqualTo("test.pdf");
+ assertThat(doc.getContent().getMdContent()).isEqualTo("# Test");
+ assertThat(doc.getStatus()).isEqualTo("success");
+
+ assertThat(proto.getProcessingTime()).isEqualTo(0.8);
+ }
+ }
+
+ @Nested
+ class TaskStatusPollResponseMapping {
+
+ @Test
+ void mapsTaskStatusPollResponse() {
+ var java = TaskStatusPollResponse.builder()
+ .taskId("task-789")
+ .taskType("convert")
+ .taskStatus(TaskStatus.SUCCESS)
+ .taskPosition(0L)
+ .taskStatusMetadata(TaskStatusMetadata.builder()
+ .numDocs(10L)
+ .numProcessed(10L)
+ .numSucceeded(8L)
+ .numFailed(2L)
+ .build())
+ .build();
+
+ var proto = ServeApiMapper.toProto(java);
+
+ assertThat(proto.getTaskId()).isEqualTo("task-789");
+ assertThat(proto.getTaskType()).isEqualTo("convert");
+ assertThat(proto.getTaskStatus())
+ .isEqualTo(ai.docling.serve.v1.TaskStatus.TASK_STATUS_SUCCESS);
+ assertThat(proto.getTaskPosition()).isZero();
+ assertThat(proto.getTaskMeta().getNumDocs()).isEqualTo(10L);
+ assertThat(proto.getTaskMeta().getNumProcessed()).isEqualTo(10L);
+ assertThat(proto.getTaskMeta().getNumSucceeded()).isEqualTo(8L);
+ assertThat(proto.getTaskMeta().getNumFailed()).isEqualTo(2L);
+ }
+
+ @Test
+ void mapsAllTaskStatuses() {
+ for (TaskStatus status : TaskStatus.values()) {
+ var java = TaskStatusPollResponse.builder()
+ .taskStatus(status)
+ .build();
+ var proto = ServeApiMapper.toProto(java);
+ assertThat(proto.getTaskStatus()).isNotEqualTo(
+ ai.docling.serve.v1.TaskStatus.UNRECOGNIZED);
+ }
+ }
+ }
+
+ @Nested
+ class ClearResponseMapping {
+
+ @Test
+ void mapsClearResponse() {
+ var java = ClearResponse.builder()
+ .status("cleared")
+ .build();
+
+ var proto = ServeApiMapper.toProto(java);
+ assertThat(proto.getStatus()).isEqualTo("cleared");
+ }
+
+ @Test
+ void handlesNullClearResponse() {
+ var proto = ServeApiMapper.toProto((ClearResponse) null);
+ assertThat(proto.getStatus()).isEmpty();
+ }
+ }
+
+ // ==================== Enum Mapping Tests ====================
+
+ @Nested
+ class EnumMappingTests {
+
+ @Test
+ void mapsAllInputFormats() {
+ // Build a request with all input formats and verify round-trip
+ var protoFormats = List.of(
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_ASCIIDOC,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_AUDIO,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_CSV,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_DOCX,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_HTML,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_IMAGE,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_JSON_DOCLING,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_MD,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_METS_GBS,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_PDF,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_PPTX,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_XLSX,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_XML_JATS,
+ ai.docling.serve.v1.InputFormat.INPUT_FORMAT_XML_USPTO
+ );
+
+ var expectedJava = List.of(
+ InputFormat.ASCIIDOC,
+ InputFormat.AUDIO,
+ InputFormat.CSV,
+ InputFormat.DOCX,
+ InputFormat.HTML,
+ InputFormat.IMAGE,
+ InputFormat.JSON_DOCLING,
+ InputFormat.MARKDOWN,
+ InputFormat.METS_GBS,
+ InputFormat.PDF,
+ InputFormat.PPTX,
+ InputFormat.XLSX,
+ InputFormat.XML_JATS,
+ InputFormat.XML_USPTO
+ );
+
+ var builder = ConvertDocumentOptions.newBuilder();
+ protoFormats.forEach(builder::addFromFormats);
+
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setOptions(builder.build())
+ .build();
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getOptions().getFromFormats())
+ .containsExactlyElementsOf(expectedJava);
+ }
+
+ @Test
+ void mapsAllOutputFormats() {
+ var builder = ConvertDocumentOptions.newBuilder()
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_DOCTAGS)
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_HTML)
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_HTML_SPLIT_PAGE)
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_JSON)
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_MD)
+ .addToFormats(ai.docling.serve.v1.OutputFormat.OUTPUT_FORMAT_TEXT);
+
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setOptions(builder.build())
+ .build();
+ var java = ServeApiMapper.toJava(proto);
+
+ assertThat(java.getOptions().getToFormats()).containsExactly(
+ OutputFormat.DOCTAGS,
+ OutputFormat.HTML,
+ OutputFormat.HTML_SPLIT_PAGE,
+ OutputFormat.JSON,
+ OutputFormat.MARKDOWN,
+ OutputFormat.TEXT
+ );
+ }
+
+ @Test
+ void mapsAllOcrEngines() {
+ var engines = List.of(
+ ai.docling.serve.v1.OcrEngine.OCR_ENGINE_AUTO,
+ ai.docling.serve.v1.OcrEngine.OCR_ENGINE_EASYOCR,
+ ai.docling.serve.v1.OcrEngine.OCR_ENGINE_OCRMAC,
+ ai.docling.serve.v1.OcrEngine.OCR_ENGINE_RAPIDOCR,
+ ai.docling.serve.v1.OcrEngine.OCR_ENGINE_TESSEROCR,
+ ai.docling.serve.v1.OcrEngine.OCR_ENGINE_TESSERACT
+ );
+ var expected = List.of(
+ OcrEngine.AUTO, OcrEngine.EASYOCR, OcrEngine.OCRMAC,
+ OcrEngine.RAPIDOCR, OcrEngine.TESSEROCR, OcrEngine.TESSERACT
+ );
+
+ for (int i = 0; i < engines.size(); i++) {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setOptions(ConvertDocumentOptions.newBuilder()
+ .setOcrEngine(engines.get(i))
+ .build())
+ .build();
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getOptions().getOcrEngine()).isEqualTo(expected.get(i));
+ }
+ }
+
+ @Test
+ void mapsAllPdfBackends() {
+ var backends = List.of(
+ ai.docling.serve.v1.PdfBackend.PDF_BACKEND_DLPARSE_V1,
+ ai.docling.serve.v1.PdfBackend.PDF_BACKEND_DLPARSE_V2,
+ ai.docling.serve.v1.PdfBackend.PDF_BACKEND_DLPARSE_V4,
+ ai.docling.serve.v1.PdfBackend.PDF_BACKEND_PYPDFIUM2
+ );
+ var expected = List.of(
+ PdfBackend.DLPARSE_V1, PdfBackend.DLPARSE_V2,
+ PdfBackend.DLPARSE_V4, PdfBackend.PYPDFIUM2
+ );
+
+ for (int i = 0; i < backends.size(); i++) {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setOptions(ConvertDocumentOptions.newBuilder()
+ .setPdfBackend(backends.get(i))
+ .build())
+ .build();
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getOptions().getPdfBackend()).isEqualTo(expected.get(i));
+ }
+ }
+
+ @Test
+ void mapsAllVlmModelTypes() {
+ var models = List.of(
+ ai.docling.serve.v1.VlmModelType.VLM_MODEL_TYPE_SMOLDOCLING,
+ ai.docling.serve.v1.VlmModelType.VLM_MODEL_TYPE_SMOLDOCLING_VLLM,
+ ai.docling.serve.v1.VlmModelType.VLM_MODEL_TYPE_GRANITE_VISION,
+ ai.docling.serve.v1.VlmModelType.VLM_MODEL_TYPE_GRANITE_VISION_VLLM,
+ ai.docling.serve.v1.VlmModelType.VLM_MODEL_TYPE_GRANITE_VISION_OLLAMA,
+ ai.docling.serve.v1.VlmModelType.VLM_MODEL_TYPE_GOT_OCR_2
+ );
+ var expected = List.of(
+ VlmModelType.SMOLDOCLING, VlmModelType.SMOLDOCLING_VLLM,
+ VlmModelType.GRANITE_VISION, VlmModelType.GRANITE_VISION_VLLM,
+ VlmModelType.GRANITE_VISION_OLLAMA, VlmModelType.GOT_OCR_2
+ );
+
+ for (int i = 0; i < models.size(); i++) {
+ var proto = ConvertDocumentRequest.newBuilder()
+ .setOptions(ConvertDocumentOptions.newBuilder()
+ .setVlmPipelineModel(models.get(i))
+ .build())
+ .build();
+ var java = ServeApiMapper.toJava(proto);
+ assertThat(java.getOptions().getVlmPipelineModel()).isEqualTo(expected.get(i));
+ }
+ }
+ }
+}
diff --git a/docling-serve/docling-serve-grpc/src/test/resources/simplelogger.properties b/docling-serve/docling-serve-grpc/src/test/resources/simplelogger.properties
new file mode 100644
index 00000000..835e2735
--- /dev/null
+++ b/docling-serve/docling-serve-grpc/src/test/resources/simplelogger.properties
@@ -0,0 +1,3 @@
+# Suppress noisy stack traces from expected error-propagation tests
+org.slf4j.simpleLogger.defaultLogLevel=warn
+org.slf4j.simpleLogger.log.ai.docling.serve.grpc.DoclingServeGrpcService=off
diff --git a/docs/build.gradle.kts b/docs/build.gradle.kts
index 91b54678..23a1619d 100644
--- a/docs/build.gradle.kts
+++ b/docs/build.gradle.kts
@@ -25,6 +25,7 @@ mkdocs {
"project-artifactId" to "${rootProject.name}",
"serve-api-artifactId" to project(":docling-serve-api").name,
"serve-client-artifactId" to project(":docling-serve-client").name,
+ "serve-grpc-artifactId" to project(":docling-serve-grpc").name,
"testcontainers-artifactId" to project(":docling-testcontainers").name,
"core-artifactId" to project(":docling-core").name
)
diff --git a/docs/src/doc/docs/docling-serve/serve-grpc-client.md b/docs/src/doc/docs/docling-serve/serve-grpc-client.md
new file mode 100644
index 00000000..3849e9df
--- /dev/null
+++ b/docs/src/doc/docs/docling-serve/serve-grpc-client.md
@@ -0,0 +1,165 @@
+# Docling Serve gRPC Client
+
+[](https://docling-project.github.io/docling-java/{{ gradle.project_version }}/docling-serve/serve-grpc-client)
+
+The `docling-serve-grpc` module provides a gRPC interface for communicating with a
+[Docling Serve](https://github.com/docling-project/docling-serve) backend.
+
+It wraps the framework‑agnostic `DoclingServeApi` from [`docling-serve-api`](serve-api.md) and exposes it via gRPC, providing strongly‑typed Protobuf definitions for the complete Docling document structure.
+
+If you prefer a standard HTTP implementation, see the reference client:
+- Docling Serve Client: [`docling-serve-client`](serve-client.md)
+
+## When to use this module
+
+- You want to leverage gRPC for communication, benefiting from binary serialization and efficient streaming.
+- You want to store the output as a protocol buffer record for archival purposes.
+- You want to manage the protobuf in a schema manager to ensure non-breaking changes.
+- You need a strongly‑typed contract (Protobuf) that is shared across different languages and services.
+- You want to use the "Watch" pattern where the server manages the polling loop for asynchronous tasks.
+
+## Installation
+
+Add the gRPC client dependency to your project.
+
+=== "Gradle"
+
+ ``` kotlin
+ dependencies {
+ implementation("{{ gradle.project_groupId }}:{{ gradle.serve_grpc_artifactId }}:{{ gradle.project_version }}")
+ }
+ ```
+
+=== "Maven"
+
+ ``` xml
+
+ {{ gradle.project_groupId }}
+ {{ gradle.serve_grpc_artifactId }}
+ {{ gradle.project_version }}
+
+ ```
+
+## Core concepts
+
+### gRPC Service Definition
+
+The service is defined in `ai.docling.serve.v1.DoclingServeService`. It mirrors the REST API but adds gRPC-specific capabilities like server-streaming for status monitoring.
+
+### Strong Typing with Protobuf
+
+Unlike the REST API which relies on JSON, this module provides a 1:1 Protobuf mapping of the `DoclingDocument` schema. This ensures zero data loss and maximum type safety across the gRPC boundary.
+
+### Watch RPCs (Server-Side Polling)
+
+One of the key advantages of the gRPC implementation is the "Watch" pattern. Instead of the client manually polling the status of an asynchronous task, the client can call a "Watch" RPC:
+
+- `WatchConvertSource`
+- `WatchChunkHierarchicalSource`
+- `WatchChunkHybridSource`
+
+The gRPC server will submit the task, manage the internal poll loop, and stream status updates back to the client until the task reaches a terminal state (Success or Failure).
+
+### A Quick Note on the Versioning Packaging
+
+gRPC services are defined in separate packages (e.g., `ai.docling.serve.v1`) to allow for independent versioning and evolution of the gRPC contract. This means that while the underlying `DoclingServeApi` may evolve, the gRPC service can maintain backward compatibility or introduce breaking changes in a controlled manner.
+
+This is a common practice in gRPC development. It allows you to retain your previous records and avoid breaking changes when you upgrade the gRPC client while still maintaining a 1:1 mapping of the REST API.
+
+## Design Philosophy: REST vs. gRPC Best Practices
+
+While this module provides a functional 1:1 mapping of the Docling Serve REST API, the gRPC implementation intentionally follows [Protobuf Definition Guide](https://protobuf.dev/reference/protobuf/google.protobuf/) and [Buf](https://buf.build/docs/lint/overview/) linting standards.
+
+### Logical Mapping - gRPC for gRPC and REST for REST
+Many gRPC implementations often overlook the long-term maintenance costs of using domain entities or shared models directly as RPC request/response types. While this seems convenient initially, it can lead to significant technical debt and confusion:
+- **Breaking Changes:** If a shared domain model changes, every RPC using it potentially breaks.
+- **Strict Linting:** Industry-standard tools would flag request/response messages that are reused across different RPCs as a violation of best practices.
+- **Future-Proofing:** Distinct messages allow you to add fields to one request without cluttering every other unrelated call.
+- **Protocol Conventions:** gRPC and REST have different serialization and versioning paradigms; a 1:1 binary-to-JSON bridge rarely honors both equally.
+
+In large-scale pipeline processors, Protobuf messages are frequently used to represent the domain model and are often committed to long-term storage or S3 archives. Adhering to these gRPC standards ensures that your archived records remain readable and that downstream integrations are insulated from breaking changes, avoiding the massive overhead of data conversion projects.
+
+As long as the mappings between APIs still honor a 1:1 binary-to-JSON bridge, it's best to avoid the trap of a "pure" 1:1 mapping.
+
+### Unique Request/Response Wrappers
+Every RPC in `DoclingServeService` has its own dedicated message pair (e.g., `ConvertSourceRequest` and `ConvertSourceResponse`). These messages *wrap* the underlying domain types (like `ConvertDocumentRequest`). This aligns with gRPC architectural patterns which favor encapsulation and independence of service methods.
+
+This "wrapper" pattern allows the gRPC contract to remain stable and lint-clean even if the underlying Java models or REST payloads undergo minor changes. It ensures that this gRPC client is a first-class citizen in a professional service-oriented architecture, rather than a "lazy" bridge.
+
+## Usage Examples
+
+### Server-Side Setup
+Below is a conceptual example of how to host the gRPC service. This service acts as a bridge, accepting gRPC requests and delegating them to the underlying REST client.
+
+```java
+import ai.docling.serve.api.DoclingServeApi;
+import ai.docling.serve.grpc.DoclingServeGrpcService;
+import io.grpc.ServerBuilder;
+
+// 1. Create the underlying REST client (bridge to Docling Serve)
+DoclingServeApi restClient = DoclingServeApi.builder()
+ .baseUrl("http://localhost:8000")
+ .build();
+
+// 2. Instantiate the gRPC service implementation
+DoclingServeGrpcService grpcService = new DoclingServeGrpcService(
+ restClient,
+ URI.create("http://localhost:8000")
+);
+
+// 3. Start the gRPC server
+var server = ServerBuilder.forPort(9000)
+ .addService(grpcService)
+ .build()
+ .start();
+```
+
+### Client-Side Call
+When calling the service from a gRPC client, notice how the domain request (`ConvertDocumentRequest`) is wrapped in a specific RPC request (`ConvertSourceRequest`). This follows the design philosophy of method independence.
+
+```java
+import ai.docling.serve.v1.ConvertDocumentRequest;
+import ai.docling.serve.v1.ConvertDocumentResponse;
+import ai.docling.serve.v1.ConvertSourceRequest;
+import ai.docling.serve.v1.ConvertSourceResponse;
+import ai.docling.serve.v1.DoclingServeServiceGrpc;
+import ai.docling.serve.v1.HttpSource;
+import ai.docling.serve.v1.Source;
+import io.grpc.ManagedChannelBuilder;
+import java.net.URI;
+
+// 1. Create a channel and a blocking stub
+var channel = ManagedChannelBuilder.forAddress("localhost", 9000)
+ .usePlaintext()
+ .build();
+var stub = DoclingServeServiceGrpc.newBlockingStub(channel);
+
+// 2. Build the domain-level request
+var docRequest = ConvertDocumentRequest.newBuilder()
+ .addSources(Source.newBuilder()
+ .setHttp(HttpSource.newBuilder()
+ .setUrl("https://arxiv.org/pdf/2408.09869")
+ .build())
+ .build())
+ .build();
+
+// 3. Wrap it in the RPC-specific request message
+var rpcRequest = ConvertSourceRequest.newBuilder()
+ .setRequest(docRequest)
+ .build();
+
+// 4. Execute the call
+ConvertSourceResponse rpcResponse = stub.convertSource(rpcRequest);
+
+// 5. Access the wrapped domain-level response
+ConvertDocumentResponse docResponse = rpcResponse.getResponse();
+System.out.println("Status: " + docResponse.getStatus());
+```
+
+## Streaming Placeholder
+
+The `ConvertSourceStream` RPC is currently implemented as a **logical stream**.
+
+While the underlying Docling backend is currently synchronous (processing all sources in a single batch), the gRPC interface is designed to be "true-streaming" ready. This means that as soon as the Docling backend supports per-document event emission, this method will be updated to emit responses as each document completes, without requiring any changes to the gRPC service contract.
+
+This design future-proofs your integrations and allows for a more responsive user experience as the Docling ecosystem evolves.
diff --git a/docs/src/doc/mkdocs.yml b/docs/src/doc/mkdocs.yml
index b76f7bd2..0994286c 100644
--- a/docs/src/doc/mkdocs.yml
+++ b/docs/src/doc/mkdocs.yml
@@ -62,6 +62,7 @@ nav:
- Docling Serve:
- API: docling-serve/serve-api.md
- Client: docling-serve/serve-client.md
+ - gRPC Client: docling-serve/serve-grpc-client.md
- Testing: testing.md
- Testcontainers: testcontainers.md
diff --git a/settings.gradle.kts b/settings.gradle.kts
index ee5752bd..d1008b8f 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -4,8 +4,9 @@ plugins {
}
rootProject.name = "docling-java"
-include("docling-core", "docling-serve-api", "docling-serve-client", "docs", "docling-testcontainers", "docling-version-tests", "test-report-aggregation")
+include("docling-core", "docling-serve-api", "docling-serve-client", "docling-serve-grpc", "docs", "docling-testcontainers", "docling-version-tests", "test-report-aggregation")
project(":docling-serve-api").projectDir = file("docling-serve/docling-serve-api")
project(":docling-serve-client").projectDir = file("docling-serve/docling-serve-client")
+project(":docling-serve-grpc").projectDir = file("docling-serve/docling-serve-grpc")
project(":docling-version-tests").projectDir = file("docling-testing/docling-version-tests")
diff --git a/test-report-aggregation/build.gradle.kts b/test-report-aggregation/build.gradle.kts
index 52a2b47e..571f3700 100644
--- a/test-report-aggregation/build.gradle.kts
+++ b/test-report-aggregation/build.gradle.kts
@@ -13,12 +13,14 @@ dependencies {
testReportAggregation(project(":docling-core"))
testReportAggregation(project(":docling-serve-api"))
testReportAggregation(project(":docling-serve-client"))
+ testReportAggregation(project(":docling-serve-grpc"))
testReportAggregation(project(":docling-testcontainers"))
testReportAggregation(project(":docling-version-tests"))
jacocoAggregation(project(":docling-core"))
jacocoAggregation(project(":docling-serve-api"))
jacocoAggregation(project(":docling-serve-client"))
+ jacocoAggregation(project(":docling-serve-grpc"))
jacocoAggregation(project(":docling-testcontainers"))
jacocoAggregation(project(":docling-version-tests"))