From 7524f6d373fce1647c3c4af79fc99194f0a83fff Mon Sep 17 00:00:00 2001 From: Dev10-sys Date: Mon, 4 May 2026 10:36:17 +0530 Subject: [PATCH] fix: resolve incorrect UTF-8 string encoding and array handling (#1841) - fixed Utf8String.bytes32PaddedLength calculation to use actual byte length for non-ASCII characters - updated TypeEncoder to correctly identify and encode static arrays of dynamic types with proper offsets - added comprehensive test cases for Chinese strings, long multi-byte strings, and empty strings - resolved compilation issues in existing tests on Windows by replacing literal characters with Unicode escapes - updated CHANGELOG.md with the bug fix entry Signed-off-by: Dev10-sys --- CHANGELOG.md | 3 +- .../main/java/org/web3j/abi/TypeEncoder.java | 35 +++++-- .../org/web3j/abi/datatypes/Utf8String.java | 13 ++- .../java/org/web3j/abi/TypeEncoderTest.java | 6 +- .../java/org/web3j/abi/Utf8StringTest.java | 98 +++++++++++++++++++ .../abi/datatypes/ChineseUtf8StringTest.java | 42 ++++++++ 6 files changed, 180 insertions(+), 17 deletions(-) create mode 100644 abi/src/test/java/org/web3j/abi/Utf8StringTest.java create mode 100644 abi/src/test/java/org/web3j/abi/datatypes/ChineseUtf8StringTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index dcd148859b..49543928b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# Change Log +# Change Log All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. @@ -10,6 +10,7 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline * Fix Async executor lifecycle to support safe shutdown and reuse (#2244) * Fix Async executor lifecycle to safely recreate executor after shutdown and prevent RejectedExecutionException (#2255) * Replace raw usage of EthLog.LogResult with parameterized type to improve type safety (#2252) +* Fix incorrect UTF-8 string encoding and array handling for non-ASCII characters (#1841) ### Features diff --git a/abi/src/main/java/org/web3j/abi/TypeEncoder.java b/abi/src/main/java/org/web3j/abi/TypeEncoder.java index 4b62def447..5218bfd377 100644 --- a/abi/src/main/java/org/web3j/abi/TypeEncoder.java +++ b/abi/src/main/java/org/web3j/abi/TypeEncoder.java @@ -50,12 +50,26 @@ public class TypeEncoder { private TypeEncoder() {} static boolean isDynamic(Type parameter) { - return parameter instanceof DynamicBytes + if (parameter instanceof DynamicBytes || parameter instanceof Utf8String - || parameter instanceof DynamicArray - || (parameter instanceof StaticArray - && DynamicStruct.class.isAssignableFrom( - ((StaticArray) parameter).getComponentType())); + || parameter instanceof DynamicArray) { + return true; + } + + if (parameter instanceof StaticArray) { + StaticArray staticArray = (StaticArray) parameter; + Class componentType = staticArray.getComponentType(); + return isDynamic(componentType); + } + + return false; + } + + private static boolean isDynamic(Class componentType) { + return DynamicStruct.class.isAssignableFrom(componentType) + || DynamicArray.class.isAssignableFrom(componentType) + || Utf8String.class.isAssignableFrom(componentType) + || DynamicBytes.class.isAssignableFrom(componentType); } @SuppressWarnings("unchecked") @@ -73,9 +87,8 @@ public static String encode(Type parameter) { } else if (parameter instanceof Utf8String) { return encodeString((Utf8String) parameter); } else if (parameter instanceof StaticArray) { - if (DynamicStruct.class.isAssignableFrom( - ((StaticArray) parameter).getComponentType())) { - return encodeStaticArrayWithDynamicStruct((StaticArray) parameter); + if (isDynamic(((StaticArray) parameter).getComponentType())) { + return encodeStaticArrayWithDynamicValues((StaticArray) parameter); } else { return encodeArrayValues((StaticArray) parameter); } @@ -163,7 +176,7 @@ static String removePadding(String encodedValue, Type parameter) { * @param * @return */ - private static String encodeStaticArrayWithDynamicStruct(Array value) { + private static String encodeStaticArrayWithDynamicValues(Array value) { String valuesOffsets = encodeDynamicsTypesArraysOffsets(value); String encodedValues = encodeArrayValues(value); @@ -225,6 +238,10 @@ static String encodeBool(Bool value) { static String encodeBytes(BytesType bytesType) { byte[] value = bytesType.getValue(); + return encodeBytes(value); + } + + static String encodeBytes(byte[] value) { int length = value.length; int mod = length % MAX_BYTE_LENGTH; diff --git a/abi/src/main/java/org/web3j/abi/datatypes/Utf8String.java b/abi/src/main/java/org/web3j/abi/datatypes/Utf8String.java index 0c7f2cc81e..ba69d3a32c 100644 --- a/abi/src/main/java/org/web3j/abi/datatypes/Utf8String.java +++ b/abi/src/main/java/org/web3j/abi/datatypes/Utf8String.java @@ -12,6 +12,8 @@ */ package org.web3j.abi.datatypes; +import java.nio.charset.StandardCharsets; + /** UTF-8 encoded string type. */ public class Utf8String implements Type { @@ -30,11 +32,14 @@ public Utf8String(String value) { */ @Override public int bytes32PaddedLength() { - if (value.isEmpty()) { - return MAX_BYTE_LENGTH; - } else { - return 2 * MAX_BYTE_LENGTH; + byte[] bytes = value.getBytes(StandardCharsets.UTF_8); + int len = bytes.length; + int mod = len % MAX_BYTE_LENGTH; + int padding = 0; + if (mod != 0) { + padding = MAX_BYTE_LENGTH - mod; } + return MAX_BYTE_LENGTH + len + padding; } @Override diff --git a/abi/src/test/java/org/web3j/abi/TypeEncoderTest.java b/abi/src/test/java/org/web3j/abi/TypeEncoderTest.java index 28052f5b15..a4d474ca26 100644 --- a/abi/src/test/java/org/web3j/abi/TypeEncoderTest.java +++ b/abi/src/test/java/org/web3j/abi/TypeEncoderTest.java @@ -1223,10 +1223,10 @@ public void testDynamicStringsArray() { public void testDynamicUtf8StringsArray() { // Test for issue #1741: non-ASCII characters (Chinese, Korean, etc.) encoding DynamicArray array = - new DynamicArray<>(Utf8String.class, new Utf8String("你好"), new Utf8String("世界")); + new DynamicArray<>(Utf8String.class, new Utf8String("\u4f60\u597d"), new Utf8String("\u4e16\u754c")); - // "你好" UTF-8 = E4BDA0E5A5BD (6 bytes) - // "世界" UTF-8 = E4B896E7958C (6 bytes) + // "\u4f60\u597d" UTF-8 = E4BDA0E5A5BD (6 bytes) + // "\u4e16\u754c" UTF-8 = E4B896E7958C (6 bytes) assertEquals( ("0000000000000000000000000000000000000000000000000000000000000002" + "0000000000000000000000000000000000000000000000000000000000000040" diff --git a/abi/src/test/java/org/web3j/abi/Utf8StringTest.java b/abi/src/test/java/org/web3j/abi/Utf8StringTest.java new file mode 100644 index 0000000000..3fadd036f2 --- /dev/null +++ b/abi/src/test/java/org/web3j/abi/Utf8StringTest.java @@ -0,0 +1,98 @@ +package org.web3j.abi; + +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Collections; + +import org.junit.jupiter.api.Test; + +import org.web3j.abi.datatypes.DynamicArray; +import org.web3j.abi.datatypes.DynamicStruct; +import org.web3j.abi.datatypes.Uint; +import org.web3j.abi.datatypes.Utf8String; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class Utf8StringTest { + + @Test + public void testUtf8StringArrayChinese() { + // "你好" is 2 Chinese characters. Each is 3 bytes in UTF-8. Total 6 bytes. + Utf8String s1 = new Utf8String("\u4f60\u597d"); + // "世界" is 2 Chinese characters. Each is 3 bytes in UTF-8. Total 6 bytes. + Utf8String s2 = new Utf8String("\u4e16\u754c"); + + DynamicArray array = new DynamicArray<>(Utf8String.class, Arrays.asList(s1, s2)); + + String encoded = TypeEncoder.encode(array); + + String expectedLength = "0000000000000000000000000000000000000000000000000000000000000002"; + String expectedOffset1 = "0000000000000000000000000000000000000000000000000000000000000040"; + String expectedOffset2 = "0000000000000000000000000000000000000000000000000000000000000080"; + + assertEquals(expectedLength + expectedOffset1 + expectedOffset2, encoded.substring(0, 64 * 3)); + } + + @Test + public void testUtf8StringLongChinese() { + // 14 characters * 3 bytes = 42 bytes. + String longString = "\u4f60\u597d\u4e16\u754c\u4f60\u597d\u4e16\u754c\u4f60\u597d\u4e16\u754c\u4f60\u597d"; + Utf8String s1 = new Utf8String(longString); + + DynamicArray array = new DynamicArray<>(Utf8String.class, Collections.singletonList(s1)); + String encoded = TypeEncoder.encode(array); + + String expectedLength = "0000000000000000000000000000000000000000000000000000000000000001"; + String expectedOffset1 = "0000000000000000000000000000000000000000000000000000000000000020"; + String expectedS1Length = "000000000000000000000000000000000000000000000000000000000000002a"; + + assertEquals(expectedLength + expectedOffset1 + expectedS1Length, encoded.substring(0, 64 * 3)); + } + + @Test + public void testUtf8String33Bytes() { + // 10 Chinese characters = 30 bytes. + 3 chars = 33 bytes. + String s33 = "\u4f60\u597d\u4e16\u754c\u4f60\u597d\u4e16\u754c\u4f60\u597d" + "aaa"; + Utf8String string33 = new Utf8String(s33); + assertEquals(33, s33.getBytes(java.nio.charset.StandardCharsets.UTF_8).length); + + assertEquals(96, string33.bytes32PaddedLength()); + } + + @Test + public void testUtf8StringEmpty() { + Utf8String string = new Utf8String(""); + assertEquals(32, string.bytes32PaddedLength()); + } + + @Test + public void testStaticUtf8StringArray() { + Utf8String s1 = new Utf8String("a"); + Utf8String s2 = new Utf8String("b"); + + org.web3j.abi.datatypes.generated.StaticArray2 array = + new org.web3j.abi.datatypes.generated.StaticArray2<>(Utf8String.class, s1, s2); + + String encoded = TypeEncoder.encode(array); + + String expectedOffset1 = "0000000000000000000000000000000000000000000000000000000000000040"; + String expectedOffset2 = "0000000000000000000000000000000000000000000000000000000000000080"; + + assertEquals(expectedOffset1 + expectedOffset2, encoded.substring(0, 64 * 2)); + } + + @Test + public void testStaticUtf8StringArrayInStruct() { + Utf8String s1 = new Utf8String("a"); + Utf8String s2 = new Utf8String("b"); + org.web3j.abi.datatypes.generated.StaticArray2 array = + new org.web3j.abi.datatypes.generated.StaticArray2<>(Utf8String.class, s1, s2); + + DynamicStruct struct = new DynamicStruct(array); + + String encoded = TypeEncoder.encode(struct); + + String expectedOffsetArray = "0000000000000000000000000000000000000000000000000000000000000020"; + assertEquals(expectedOffsetArray, encoded.substring(0, 64)); + } +} diff --git a/abi/src/test/java/org/web3j/abi/datatypes/ChineseUtf8StringTest.java b/abi/src/test/java/org/web3j/abi/datatypes/ChineseUtf8StringTest.java new file mode 100644 index 0000000000..c6db1aa3e4 --- /dev/null +++ b/abi/src/test/java/org/web3j/abi/datatypes/ChineseUtf8StringTest.java @@ -0,0 +1,42 @@ +package org.web3j.abi.datatypes; + +import org.junit.jupiter.api.Test; +import org.web3j.abi.TypeEncoder; + +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class ChineseUtf8StringTest { + + @Test + public void testChineseStringEncoding() { + // Chinese text: Zhong Guo Liu Li Fa Lang Cai Hua Niao Zi Ming Zhong + String chineseText = "\u4e2d\u56fd\u7409\u7483\u73d0\u7405\u5f69\u82b1\u9e1f\u81ea\u9e23\u949f"; + Utf8String utf8String = new Utf8String(chineseText); + + String encoded = TypeEncoder.encode(utf8String); + System.out.println("Encoded: " + encoded); + + byte[] bytes = chineseText.getBytes(StandardCharsets.UTF_8); + System.out.println("Byte length: " + bytes.length); + + // 12 characters * 3 bytes = 36 bytes. + assertEquals(36, bytes.length); + + // Check bytes32PaddedLength + // 32 (length) + 64 (padded data) = 96 + assertEquals(96, utf8String.bytes32PaddedLength()); + } + + @Test + public void testChineseStringInArray() { + // Chinese text: Zhong Guo + String chineseText = "\u4e2d\u56fd"; + Utf8String utf8String = new Utf8String(chineseText); + DynamicArray array = new DynamicArray<>(Utf8String.class, utf8String); + + String encoded = TypeEncoder.encode(array); + System.out.println("Array Encoded: " + encoded); + } +}