Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Change Log
# Change Log

All notable changes to this project will be documented in this file.
See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
Expand All @@ -10,6 +10,7 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline
* Fix Async executor lifecycle to support safe shutdown and reuse (#2244)
* Fix Async executor lifecycle to safely recreate executor after shutdown and prevent RejectedExecutionException (#2255)
* Replace raw usage of EthLog.LogResult with parameterized type to improve type safety (#2252)
* Fix incorrect UTF-8 string encoding and array handling for non-ASCII characters (#1841)

### Features

Expand Down
35 changes: 26 additions & 9 deletions abi/src/main/java/org/web3j/abi/TypeEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,26 @@ public class TypeEncoder {
private TypeEncoder() {}

static boolean isDynamic(Type parameter) {
return parameter instanceof DynamicBytes
if (parameter instanceof DynamicBytes
|| parameter instanceof Utf8String
|| parameter instanceof DynamicArray
|| (parameter instanceof StaticArray
&& DynamicStruct.class.isAssignableFrom(
((StaticArray) parameter).getComponentType()));
|| parameter instanceof DynamicArray) {
return true;
}

if (parameter instanceof StaticArray) {
StaticArray staticArray = (StaticArray) parameter;
Class componentType = staticArray.getComponentType();
return isDynamic(componentType);
}

return false;
}

private static boolean isDynamic(Class componentType) {
return DynamicStruct.class.isAssignableFrom(componentType)
|| DynamicArray.class.isAssignableFrom(componentType)
|| Utf8String.class.isAssignableFrom(componentType)
|| DynamicBytes.class.isAssignableFrom(componentType);
}

@SuppressWarnings("unchecked")
Expand All @@ -73,9 +87,8 @@ public static String encode(Type parameter) {
} else if (parameter instanceof Utf8String) {
return encodeString((Utf8String) parameter);
} else if (parameter instanceof StaticArray) {
if (DynamicStruct.class.isAssignableFrom(
((StaticArray) parameter).getComponentType())) {
return encodeStaticArrayWithDynamicStruct((StaticArray) parameter);
if (isDynamic(((StaticArray) parameter).getComponentType())) {
return encodeStaticArrayWithDynamicValues((StaticArray) parameter);
} else {
return encodeArrayValues((StaticArray) parameter);
}
Expand Down Expand Up @@ -163,7 +176,7 @@ static String removePadding(String encodedValue, Type parameter) {
* @param
* @return
*/
private static <T extends Type> String encodeStaticArrayWithDynamicStruct(Array<T> value) {
private static <T extends Type> String encodeStaticArrayWithDynamicValues(Array<T> value) {
String valuesOffsets = encodeDynamicsTypesArraysOffsets(value);
String encodedValues = encodeArrayValues(value);

Expand Down Expand Up @@ -225,6 +238,10 @@ static String encodeBool(Bool value) {

static String encodeBytes(BytesType bytesType) {
byte[] value = bytesType.getValue();
return encodeBytes(value);
}

static String encodeBytes(byte[] value) {
int length = value.length;
int mod = length % MAX_BYTE_LENGTH;

Expand Down
13 changes: 9 additions & 4 deletions abi/src/main/java/org/web3j/abi/datatypes/Utf8String.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
*/
package org.web3j.abi.datatypes;

import java.nio.charset.StandardCharsets;

/** UTF-8 encoded string type. */
public class Utf8String implements Type<String> {

Expand All @@ -30,11 +32,14 @@ public Utf8String(String value) {
*/
@Override
public int bytes32PaddedLength() {
if (value.isEmpty()) {
return MAX_BYTE_LENGTH;
} else {
return 2 * MAX_BYTE_LENGTH;
byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
int len = bytes.length;
int mod = len % MAX_BYTE_LENGTH;
int padding = 0;
if (mod != 0) {
padding = MAX_BYTE_LENGTH - mod;
}
return MAX_BYTE_LENGTH + len + padding;
}

@Override
Expand Down
6 changes: 3 additions & 3 deletions abi/src/test/java/org/web3j/abi/TypeEncoderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1223,10 +1223,10 @@ public void testDynamicStringsArray() {
public void testDynamicUtf8StringsArray() {
// Test for issue #1741: non-ASCII characters (Chinese, Korean, etc.) encoding
DynamicArray<Utf8String> array =
new DynamicArray<>(Utf8String.class, new Utf8String("你好"), new Utf8String("世界"));
new DynamicArray<>(Utf8String.class, new Utf8String("\u4f60\u597d"), new Utf8String("\u4e16\u754c"));

// "你好" UTF-8 = E4BDA0E5A5BD (6 bytes)
// "世界" UTF-8 = E4B896E7958C (6 bytes)
// "\u4f60\u597d" UTF-8 = E4BDA0E5A5BD (6 bytes)
// "\u4e16\u754c" UTF-8 = E4B896E7958C (6 bytes)
assertEquals(
("0000000000000000000000000000000000000000000000000000000000000002"
+ "0000000000000000000000000000000000000000000000000000000000000040"
Expand Down
98 changes: 98 additions & 0 deletions abi/src/test/java/org/web3j/abi/Utf8StringTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package org.web3j.abi;

import java.math.BigInteger;
import java.util.Arrays;
import java.util.Collections;

import org.junit.jupiter.api.Test;

import org.web3j.abi.datatypes.DynamicArray;
import org.web3j.abi.datatypes.DynamicStruct;
import org.web3j.abi.datatypes.Uint;
import org.web3j.abi.datatypes.Utf8String;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class Utf8StringTest {

@Test
public void testUtf8StringArrayChinese() {
// "你好" is 2 Chinese characters. Each is 3 bytes in UTF-8. Total 6 bytes.
Utf8String s1 = new Utf8String("\u4f60\u597d");
// "世界" is 2 Chinese characters. Each is 3 bytes in UTF-8. Total 6 bytes.
Utf8String s2 = new Utf8String("\u4e16\u754c");

DynamicArray<Utf8String> array = new DynamicArray<>(Utf8String.class, Arrays.asList(s1, s2));

String encoded = TypeEncoder.encode(array);

String expectedLength = "0000000000000000000000000000000000000000000000000000000000000002";
String expectedOffset1 = "0000000000000000000000000000000000000000000000000000000000000040";
String expectedOffset2 = "0000000000000000000000000000000000000000000000000000000000000080";

assertEquals(expectedLength + expectedOffset1 + expectedOffset2, encoded.substring(0, 64 * 3));
}

@Test
public void testUtf8StringLongChinese() {
// 14 characters * 3 bytes = 42 bytes.
String longString = "\u4f60\u597d\u4e16\u754c\u4f60\u597d\u4e16\u754c\u4f60\u597d\u4e16\u754c\u4f60\u597d";
Utf8String s1 = new Utf8String(longString);

DynamicArray<Utf8String> array = new DynamicArray<>(Utf8String.class, Collections.singletonList(s1));
String encoded = TypeEncoder.encode(array);

String expectedLength = "0000000000000000000000000000000000000000000000000000000000000001";
String expectedOffset1 = "0000000000000000000000000000000000000000000000000000000000000020";
String expectedS1Length = "000000000000000000000000000000000000000000000000000000000000002a";

assertEquals(expectedLength + expectedOffset1 + expectedS1Length, encoded.substring(0, 64 * 3));
}

@Test
public void testUtf8String33Bytes() {
// 10 Chinese characters = 30 bytes. + 3 chars = 33 bytes.
String s33 = "\u4f60\u597d\u4e16\u754c\u4f60\u597d\u4e16\u754c\u4f60\u597d" + "aaa";
Utf8String string33 = new Utf8String(s33);
assertEquals(33, s33.getBytes(java.nio.charset.StandardCharsets.UTF_8).length);

assertEquals(96, string33.bytes32PaddedLength());
}

@Test
public void testUtf8StringEmpty() {
Utf8String string = new Utf8String("");
assertEquals(32, string.bytes32PaddedLength());
}

@Test
public void testStaticUtf8StringArray() {
Utf8String s1 = new Utf8String("a");
Utf8String s2 = new Utf8String("b");

org.web3j.abi.datatypes.generated.StaticArray2<Utf8String> array =
new org.web3j.abi.datatypes.generated.StaticArray2<>(Utf8String.class, s1, s2);

String encoded = TypeEncoder.encode(array);

String expectedOffset1 = "0000000000000000000000000000000000000000000000000000000000000040";
String expectedOffset2 = "0000000000000000000000000000000000000000000000000000000000000080";

assertEquals(expectedOffset1 + expectedOffset2, encoded.substring(0, 64 * 2));
}

@Test
public void testStaticUtf8StringArrayInStruct() {
Utf8String s1 = new Utf8String("a");
Utf8String s2 = new Utf8String("b");
org.web3j.abi.datatypes.generated.StaticArray2<Utf8String> array =
new org.web3j.abi.datatypes.generated.StaticArray2<>(Utf8String.class, s1, s2);

DynamicStruct struct = new DynamicStruct(array);

String encoded = TypeEncoder.encode(struct);

String expectedOffsetArray = "0000000000000000000000000000000000000000000000000000000000000020";
assertEquals(expectedOffsetArray, encoded.substring(0, 64));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.web3j.abi.datatypes;

import org.junit.jupiter.api.Test;
import org.web3j.abi.TypeEncoder;

import java.nio.charset.StandardCharsets;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class ChineseUtf8StringTest {

@Test
public void testChineseStringEncoding() {
// Chinese text: Zhong Guo Liu Li Fa Lang Cai Hua Niao Zi Ming Zhong
String chineseText = "\u4e2d\u56fd\u7409\u7483\u73d0\u7405\u5f69\u82b1\u9e1f\u81ea\u9e23\u949f";
Utf8String utf8String = new Utf8String(chineseText);

String encoded = TypeEncoder.encode(utf8String);
System.out.println("Encoded: " + encoded);

byte[] bytes = chineseText.getBytes(StandardCharsets.UTF_8);
System.out.println("Byte length: " + bytes.length);

// 12 characters * 3 bytes = 36 bytes.
assertEquals(36, bytes.length);

// Check bytes32PaddedLength
// 32 (length) + 64 (padded data) = 96
assertEquals(96, utf8String.bytes32PaddedLength());
}

@Test
public void testChineseStringInArray() {
// Chinese text: Zhong Guo
String chineseText = "\u4e2d\u56fd";
Utf8String utf8String = new Utf8String(chineseText);
DynamicArray<Utf8String> array = new DynamicArray<>(Utf8String.class, utf8String);

String encoded = TypeEncoder.encode(array);
System.out.println("Array Encoded: " + encoded);
}
}
Loading