Skip to content

Commit dcc2145

Browse files
DeveloperTinyZzh
authored andcommitted
feat: Add Protobuf protocol support for structured binary file parsing
- Implement ProtobufStructHandler for parsing .protobuf files - Add protobuf dependency (com.google.protobuf:protobuf-java:4.29.2) - Register handler in SPI configuration - Add FILE_PROTOBUF extension constant to FileExtensionMatcher - Update module-info.java with protobuf module requirement - Generate PersonProto test class for descriptor testing - Add comprehensive test suite with 33 test cases - Achieve 100% instruction and branch coverage Features: - Support for startOrder and endOrder configuration - Dynamic message parsing using protobuf descriptors - Handle empty messages and parse errors gracefully - Convert protobuf messages to StructImpl with proper field mapping - Support for repeated fields (comma-separated values)
1 parent a42aa92 commit dcc2145

9 files changed

Lines changed: 1788 additions & 2 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.0-bin.zip
1+
distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.1-bin.zip
22
rl=https\://services.gradle.org/distributions/gradle-9.2.0-all.zip
33
zipStoreBase=GRADLE_USER_HOME
44
zipStorePath=wrapper/dists

struct-core/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,6 @@ dependencies {
2121
implementation("org.apache.poi:poi-ooxml:${version_options.POI_VERSION}")
2222
// gson
2323
implementation("com.google.code.gson:gson:${version_options.GSON_VERSION}")
24+
// protobuf
25+
implementation("com.google.protobuf:protobuf-java:4.29.2")
2426
}

struct-core/src/main/java/module-info.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.struct.core.handler.CsvStructHandler;
2222
import org.struct.core.handler.ExcelUMStructHandler;
2323
import org.struct.core.handler.JsonStructHandler;
24+
import org.struct.core.handler.ProtobufStructHandler;
2425
import org.struct.core.handler.XlsEventStructHandler;
2526
import org.struct.core.handler.XlsxSaxStructHandler;
2627

@@ -44,6 +45,7 @@
4445
requires java.xml;
4546
requires org.slf4j;
4647
requires com.google.gson;
48+
requires com.google.protobuf;
4749
requires org.apache.poi.poi;
4850
requires org.apache.poi.ooxml;
4951

@@ -54,7 +56,8 @@
5456
CsvStructHandler,
5557
XlsEventStructHandler,
5658
XlsxSaxStructHandler,
57-
ExcelUMStructHandler
59+
ExcelUMStructHandler,
60+
ProtobufStructHandler
5861
;
5962
provides org.struct.core.converter.Converters with
6063
EmbeddedConverters;
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
/*
2+
*
3+
*
4+
* Copyright (c) 2024. - TinyZ.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.struct.core.handler;
20+
21+
import com.google.protobuf.Descriptors;
22+
import com.google.protobuf.DynamicMessage;
23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
25+
import org.struct.core.StructDescriptor;
26+
import org.struct.core.StructImpl;
27+
import org.struct.core.StructWorker;
28+
import org.struct.core.matcher.FileExtensionMatcher;
29+
import org.struct.core.matcher.WorkerMatcher;
30+
import org.struct.exception.StructTransformException;
31+
import org.struct.spi.SPI;
32+
33+
import java.io.File;
34+
import java.io.FileInputStream;
35+
import java.io.IOException;
36+
import java.util.function.Consumer;
37+
38+
@SPI(name = "protobuf", order = 0)
39+
public class ProtobufStructHandler implements StructHandler {
40+
41+
private static final Logger LOGGER = LoggerFactory.getLogger(ProtobufStructHandler.class);
42+
private static final WorkerMatcher MATCHER = new FileExtensionMatcher(FileExtensionMatcher.FILE_PROTOBUF);
43+
44+
private final DescriptorPool descriptorPool;
45+
46+
public ProtobufStructHandler() {
47+
this(new DescriptorPool());
48+
}
49+
50+
public ProtobufStructHandler(DescriptorPool descriptorPool) {
51+
this.descriptorPool = descriptorPool;
52+
}
53+
54+
@Override
55+
public WorkerMatcher matcher() {
56+
return MATCHER;
57+
}
58+
59+
@Override
60+
public <T> void handle(StructWorker<T> worker, Class<T> clzOfStruct, Consumer<T> cellHandler, File file) {
61+
StructDescriptor descriptor = worker.getDescriptor();
62+
int line = 0;
63+
try {
64+
Descriptors.Descriptor messageDescriptor = descriptorPool.getDescriptor(clzOfStruct);
65+
if (messageDescriptor == null) {
66+
throw new StructTransformException("Failed to get protobuf descriptor for class: " + clzOfStruct.getName());
67+
}
68+
69+
int startOrder = descriptor.getStartOrder();
70+
int endOrder = descriptor.getEndOrder();
71+
72+
try (FileInputStream fis = new FileInputStream(file)) {
73+
while (true) {
74+
try {
75+
byte[] data = readDelimitedFrom(fis);
76+
if (data == null) {
77+
break;
78+
}
79+
if (data.length == 0) {
80+
break;
81+
}
82+
line++;
83+
if (startOrder > 0) {
84+
if (line < startOrder) {
85+
continue;
86+
}
87+
}
88+
if (endOrder > 0) {
89+
if (line > endOrder) {
90+
break;
91+
}
92+
}
93+
94+
DynamicMessage message = DynamicMessage.parseFrom(messageDescriptor, data);
95+
StructImpl struct = convertToStructImpl(message);
96+
worker.createInstance(struct).ifPresent(cellHandler);
97+
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
98+
LOGGER.warn("protobuf parse failure. struct:{}, file:{}, line:{}", clzOfStruct, file.getName(), line, e);
99+
break;
100+
}
101+
}
102+
}
103+
} catch (IOException e) {
104+
LOGGER.warn("protobuf deserialize failure. struct:{}, file:{}, line:{}", clzOfStruct, file.getName(), line, e);
105+
throw new StructTransformException(e.getMessage(), e);
106+
}
107+
}
108+
109+
public byte[] readDelimitedFrom(FileInputStream fis) throws IOException {
110+
int length = 0;
111+
int shift = 0;
112+
while (true) {
113+
int b = fis.read();
114+
if (b == -1) {
115+
return null;
116+
}
117+
length |= (b & 0x7F) << shift;
118+
if ((b & 0x80) == 0) {
119+
break;
120+
}
121+
shift += 7;
122+
}
123+
124+
byte[] data = new byte[length];
125+
int totalRead = 0;
126+
while (totalRead < length) {
127+
int bytesRead = fis.read(data, totalRead, length - totalRead);
128+
if (bytesRead == -1) {
129+
break;
130+
}
131+
totalRead += bytesRead;
132+
}
133+
if (totalRead < length) {
134+
return null;
135+
}
136+
return data;
137+
}
138+
139+
public StructImpl convertToStructImpl(DynamicMessage message) {
140+
StructImpl struct = new StructImpl();
141+
for (Descriptors.FieldDescriptor field : message.getDescriptorForType().getFields()) {
142+
Object value = message.getField(field);
143+
if (value == null) {
144+
continue;
145+
}
146+
if (field.isRepeated()) {
147+
StringBuilder sb = new StringBuilder();
148+
for (Object item : (Iterable<?>) value) {
149+
if (sb.length() > 0) {
150+
sb.append(",");
151+
}
152+
sb.append(item.toString());
153+
}
154+
struct.add(field.getName(), sb.toString());
155+
} else {
156+
struct.add(field.getName(), value.toString());
157+
}
158+
}
159+
return struct;
160+
}
161+
162+
public static class DescriptorPool {
163+
private Descriptors.Descriptor cachedDescriptor;
164+
165+
public Descriptors.Descriptor getDescriptor(Class<?> clazz) {
166+
if (cachedDescriptor != null) {
167+
return cachedDescriptor;
168+
}
169+
try {
170+
Class<?> parserClass = Class.forName(clazz.getName() + "OrBuilder");
171+
java.lang.reflect.Method method = parserClass.getMethod("getDescriptor");
172+
cachedDescriptor = (Descriptors.Descriptor) method.invoke(null);
173+
return cachedDescriptor;
174+
} catch (Exception e) {
175+
LOGGER.warn("Failed to get descriptor from OrBuilder for class: {}", clazz.getName());
176+
}
177+
return null;
178+
}
179+
180+
public void clearCache() {
181+
this.cachedDescriptor = null;
182+
}
183+
}
184+
}

struct-core/src/main/java/org/struct/core/matcher/FileExtensionMatcher.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public class FileExtensionMatcher implements WorkerMatcher {
3636
public static final String FILE_XML = ".xml";
3737
public static final String FILE_BINARY = ".binary";
3838
public static final String FILE_CSV = ".csv";
39+
public static final String FILE_PROTOBUF = ".protobuf";
3940

4041
/**
4142
* 可处理的文件大小的阀值

struct-core/src/main/resources/META-INF/struct/org.struct.core.handler.StructHandler

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ org.struct.core.handler.JsonStructHandler
33
org.struct.core.handler.XlsEventStructHandler
44
org.struct.core.handler.XlsxSaxStructHandler
55
org.struct.core.handler.CsvStructHandler
6+
org.struct.core.handler.ProtobufStructHandler

0 commit comments

Comments
 (0)