From 59a42e637a80fb6ea235c5368f1a45ee020c3e88 Mon Sep 17 00:00:00 2001 From: ChangRui-Ryan Date: Thu, 4 Dec 2025 10:57:09 +0800 Subject: [PATCH] Fix crash by using alignedAlloc rather than alloc --- .../AggregateFunctionGroupArray.h | 6 +- dbms/src/Columns/ColumnAggregateFunction.cpp | 6 +- .../DataTypes/DataTypeAggregateFunction.cpp | 19 ++-- dbms/src/Interpreters/AggregationCommon.h | 94 ------------------- dbms/src/Interpreters/JoinPartition.cpp | 4 +- dbms/src/Interpreters/SpecializedAggregator.h | 3 +- 6 files changed, 19 insertions(+), 113 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h index c80babc0502..67e5ed1f99c 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -182,7 +182,7 @@ struct GroupArrayListNodeBase UInt64 size; readVarUInt(size, buf); - Node * node = reinterpret_cast(arena->alloc(sizeof(Node) + size)); + Node * node = reinterpret_cast(arena->alignedAlloc(sizeof(Node) + size, alignof(Node))); node->size = size; buf.read(node->data(), size); return node; @@ -198,7 +198,7 @@ struct GroupArrayListNodeString : public GroupArrayListNodeBase(column).getDataAt(row_num); - Node * node = reinterpret_cast(arena->alloc(sizeof(Node) + string.size)); + Node * node = reinterpret_cast(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node))); node->next = nullptr; node->size = string.size; memcpy(node->data(), string.data, string.size); @@ -215,7 +215,7 @@ struct GroupArrayListNodeGeneral : public GroupArrayListNodeBasealloc(sizeof(Node)); + const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node)); StringRef value = column.serializeValueIntoArena(row_num, *arena, begin); Node * node = reinterpret_cast(const_cast(begin)); diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 3d14252f8c7..b126d85b271 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -297,7 +297,7 @@ void ColumnAggregateFunction::insert(const Field & x) Arena & arena = createOrGetArena(); - getData().push_back(arena.alloc(function->sizeOfData())); + getData().push_back(arena.alignedAlloc(function->sizeOfData(), function->alignOfData())); function->create(getData().back()); ReadBufferFromString read_buffer(x.get()); function->deserialize(getData().back(), read_buffer, &arena); @@ -309,7 +309,7 @@ void ColumnAggregateFunction::insertDefault() Arena & arena = createOrGetArena(); - getData().push_back(arena.alloc(function->sizeOfData())); + getData().push_back(arena.alignedAlloc(function->sizeOfData(), function->alignOfData())); function->create(getData().back()); } @@ -337,7 +337,7 @@ const char * ColumnAggregateFunction::deserializeAndInsertFromArena( */ Arena & dst_arena = createOrGetArena(); - getData().push_back(dst_arena.alloc(function->sizeOfData())); + getData().push_back(dst_arena.alignedAlloc(function->sizeOfData(), function->alignOfData())); function->create(getData().back()); /** We will read from src_arena. diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 98c578c5c78..53695bb25b9 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -63,7 +63,7 @@ std::string DataTypeAggregateFunction::getName() const void DataTypeAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr) const { - const String & s = get(field); + const auto & s = get(field); writeVarUInt(s.size(), ostr); writeString(s, ostr); } @@ -73,7 +73,7 @@ void DataTypeAggregateFunction::deserializeBinary(Field & field, ReadBuffer & is UInt64 size; readVarUInt(size, istr); field = String(); - String & s = get(field); + auto & s = get(field); s.resize(size); istr.readStrict(&s[0], size); } @@ -85,11 +85,11 @@ void DataTypeAggregateFunction::serializeBinary(const IColumn & column, size_t r void DataTypeAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & istr) const { - ColumnAggregateFunction & column_concrete = static_cast(column); + auto & column_concrete = static_cast(column); Arena & arena = column_concrete.createOrGetArena(); size_t size_of_state = function->sizeOfData(); - AggregateDataPtr place = arena.alloc(size_of_state); + AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData()); function->create(place); try @@ -143,8 +143,7 @@ void DataTypeAggregateFunction::deserializeBinaryBulk( { if (istr.eof()) break; - - AggregateDataPtr place = arena.alloc(size_of_state); + AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData()); function->create(place); @@ -171,11 +170,11 @@ static String serializeToString(const AggregateFunctionPtr & function, const ICo static void deserializeFromString(const AggregateFunctionPtr & function, IColumn & column, const String & s) { - ColumnAggregateFunction & column_concrete = static_cast(column); + auto & column_concrete = static_cast(column); Arena & arena = column_concrete.createOrGetArena(); size_t size_of_state = function->sizeOfData(); - AggregateDataPtr place = arena.alloc(size_of_state); + AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData()); function->create(place); @@ -317,7 +316,7 @@ static DataTypePtr create(const ASTPtr & arguments) "name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (const ASTFunction * parametric = typeid_cast(arguments->children[0].get())) + if (const auto * parametric = typeid_cast(arguments->children[0].get())) { if (parametric->parameters) throw Exception("Unexpected level of parameters to aggregate function", ErrorCodes::SYNTAX_ERROR); @@ -328,7 +327,7 @@ static DataTypePtr create(const ASTPtr & arguments) for (size_t i = 0; i < parameters.size(); ++i) { - const ASTLiteral * lit = typeid_cast(parameters[i].get()); + const auto * lit = typeid_cast(parameters[i].get()); if (!lit) throw Exception( "Parameters to aggregate functions must be literals", diff --git a/dbms/src/Interpreters/AggregationCommon.h b/dbms/src/Interpreters/AggregationCommon.h index 039c4564baa..e090cc5bc21 100644 --- a/dbms/src/Interpreters/AggregationCommon.h +++ b/dbms/src/Interpreters/AggregationCommon.h @@ -235,100 +235,6 @@ static inline UInt128 ALWAYS_INLINE hash128( return key; } - -/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. -static inline StringRef * ALWAYS_INLINE placeKeysInPool(size_t keys_size, StringRefs & keys, Arena & pool) -{ - for (size_t j = 0; j < keys_size; ++j) - { - char * place = pool.alloc(keys[j].size); - memcpy(place, keys[j].data, keys[j].size); /// TODO padding in Arena and memcpySmall - keys[j].data = place; - } - - /// Place the StringRefs on the newly copied keys in the pool. - char * res = pool.alloc(keys_size * sizeof(StringRef)); - memcpy(res, keys.data(), keys_size * sizeof(StringRef)); - - return reinterpret_cast(res); -} - -/* -/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. -static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool( - size_t i, - size_t keys_size, - const ColumnRawPtrs & key_columns, - StringRefs & keys, - Arena & pool) -{ - for (size_t j = 0; j < keys_size; ++j) - { - keys[j] = key_columns[j]->getDataAtWithTerminatingZero(i); - char * place = pool.alloc(keys[j].size); - memcpy(place, keys[j].data, keys[j].size); - keys[j].data = place; - } - - /// Place the StringRefs on the newly copied keys in the pool. - char * res = pool.alloc(keys_size * sizeof(StringRef)); - memcpy(res, keys.data(), keys_size * sizeof(StringRef)); - - return reinterpret_cast(res); -} - - -/// Copy the specified keys to a continuous memory chunk of a pool. -/// Subsequently append StringRef objects referring to each key. -/// -/// [key1][key2]...[keyN][ref1][ref2]...[refN] -/// ^ ^ : | | -/// +-----|--------:-----+ | -/// : +--------:-----------+ -/// : : -/// <--------------> -/// (1) -/// -/// Return a StringRef object, referring to the area (1) of the memory -/// chunk that contains the keys. In other words, we ignore their StringRefs. -inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous( - size_t i, - size_t keys_size, - const ColumnRawPtrs & key_columns, - StringRefs & keys, - const TiDB::TiDBCollators & collators, - std::vector & sort_key_containers, - Arena & pool) -{ - size_t sum_keys_size = 0; - for (size_t j = 0; j < keys_size; ++j) - { - keys[j] = key_columns[j]->getDataAtWithTerminatingZero(i); - if (!collators.empty() && collators[j] != nullptr) - { - // todo check if need to handle the terminating zero - keys[j] = collators[j]->sortKey(keys[j].data, keys[j].size - 1, sort_key_containers[j]); - } - sum_keys_size += keys[j].size; - } - - char * res = pool.alloc(sum_keys_size + keys_size * sizeof(StringRef)); - char * place = res; - - for (size_t j = 0; j < keys_size; ++j) - { - memcpy(place, keys[j].data, keys[j].size); - keys[j].data = place; - place += keys[j].size; - } - - /// Place the StringRefs on the newly copied keys in the pool. - memcpy(place, keys.data(), keys_size * sizeof(StringRef)); - - return {res, sum_keys_size}; -} -*/ - /** Serialize keys into a continuous chunk of memory. */ static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( diff --git a/dbms/src/Interpreters/JoinPartition.cpp b/dbms/src/Interpreters/JoinPartition.cpp index e0c554a58ff..af113fa3cb3 100644 --- a/dbms/src/Interpreters/JoinPartition.cpp +++ b/dbms/src/Interpreters/JoinPartition.cpp @@ -66,7 +66,7 @@ void RowsNotInsertToMap::insertRow(Block * stored_block, size_t index, bool need } else { - auto * elem = reinterpret_cast(pool.alloc(sizeof(RowRefList))); + auto * elem = reinterpret_cast(pool.alignedAlloc(sizeof(RowRefList), alignof(RowRefList))); new (elem) RowRefList(stored_block, index); insertRowToList(&head, elem); } @@ -496,7 +496,7 @@ struct Inserter * We will insert each time the element into the second place. * That is, the former second element, if it was, will be the third, and so on. */ - auto elem = reinterpret_cast(pool.alloc(sizeof(MappedType))); + auto elem = reinterpret_cast(pool.alignedAlloc(sizeof(MappedType), alignof(MappedType))); new (elem) typename Map::mapped_type(stored_block, i); insertRowToList(&emplace_result.getMapped(), elem); } diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h index 29455d95d79..8e191926373 100644 --- a/dbms/src/Interpreters/SpecializedAggregator.h +++ b/dbms/src/Interpreters/SpecializedAggregator.h @@ -224,7 +224,8 @@ void NO_INLINE Aggregator::executeSpecializedCase( method.onNewKey(*it, params.keys_size, keys, *aggregates_pool); - AggregateDataPtr place = aggregates_pool->alloc(total_size_of_aggregate_states); + AggregateDataPtr place + = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); AggregateFunctionsList::forEach( AggregateFunctionsCreator(aggregate_functions, offsets_of_aggregate_states, place));