diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index 4e636a0c653..aca95b2cc0a 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -1613,7 +1613,24 @@ gpdb::IsOpHashJoinable(Oid opno, Oid inputtype) GP_WRAP_START; { /* catalog tables: pg_operator */ - return op_hashjoinable(opno, inputtype); + if (!op_hashjoinable(opno, inputtype)) + return false; + + /* + * Even if oprcanhash is true, we need to verify that hash functions + * actually exist for this operator. This is because oprcanhash can be + * set to true while the operator is only registered in a btree opfamily + * and not in a hash opfamily, which would cause execution-time errors + * when trying to build hash tables. + * + * See get_op_hash_functions() in lsyscache.c which requires operators + * to be registered in a hash opfamily (amopmethod == HASH_AM_OID). + */ + RegProcedure hash_proc; + if (!get_op_hash_functions(opno, &hash_proc, NULL)) + return false; + + return true; } GP_WRAP_END; return false; diff --git a/src/test/regress/expected/gp_hashagg.out b/src/test/regress/expected/gp_hashagg.out index 210ec705398..513aeb09966 100644 --- a/src/test/regress/expected/gp_hashagg.out +++ b/src/test/regress/expected/gp_hashagg.out @@ -274,3 +274,169 @@ EXPLAIN (COSTS OFF, VERBOSE) :qry; (1 row) +drop schema if exists tinc_base_types cascade; +NOTICE: schema "tinc_base_types" does not exist, skipping +create schema tinc_base_types; +set search_path=tinc_base_types; +CREATE FUNCTION int42_in(cstring) + RETURNS int42 + AS 'int4in' + LANGUAGE internal IMMUTABLE STRICT; +NOTICE: type "int42" is not yet defined +DETAIL: Creating a shell type definition. +CREATE FUNCTION int42_out(int42) + RETURNS cstring + AS 'int4out' + LANGUAGE internal IMMUTABLE STRICT; +NOTICE: argument type int42 is only a shell +CREATE TYPE int42 ( + internallength = 4, + input = int42_in, + output = int42_out, + alignment = int4, + default = 42, + passedbyvalue +); +CREATE TABLE aTABLE(k int, a int42); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'k' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO aTABLE VALUES(1, '21'); +INSERT INTO aTABLE VALUES(2, '22'); +INSERT INTO aTABLE VALUES(3, '23'); +INSERT INTO aTABLE VALUES(4, '24'); +SELECT * FROM aTABLE; + k | a +---+---- + 2 | 22 + 3 | 23 + 4 | 24 + 1 | 21 +(4 rows) + +CREATE OR REPLACE FUNCTION my_lt(int42, int42) +RETURNS boolean AS +'int4lt' +language internal; +CREATE OR REPLACE FUNCTION my_lteq(int42, int42) +RETURNS boolean AS +'int4le' +language internal; +CREATE OR REPLACE FUNCTION my_gt(int42, int42) +RETURNS boolean AS +'int4gt' +language internal; +CREATE OR REPLACE FUNCTION my_gteq(int42, int42) +RETURNS boolean AS +'int4ge' +language internal; +CREATE OR REPLACE FUNCTION my_eq(int42, int42) +RETURNS boolean AS +'int4eq' +language internal; +CREATE OPERATOR < ( + leftarg = int42, + rightarg = int42, + procedure = my_lt, + hashes +); +CREATE OPERATOR <= ( + leftarg = int42, + rightarg = int42, + procedure = my_lteq, + hashes +); +CREATE OPERATOR = ( + leftarg = int42, + rightarg = int42, + procedure = my_eq, + hashes +); +CREATE OPERATOR > ( + leftarg = int42, + rightarg = int42, + procedure = my_gt, + hashes +); +CREATE OPERATOR >= ( + leftarg = int42, + rightarg = int42, + procedure = my_gteq, + hashes +); +CREATE OR REPLACE FUNCTION my_comp_func(int42, int42) +RETURNS int AS +$$ +BEGIN +IF $1 < $2 THEN + return -1; +ELSIF $1 = $2 THEN + return 0; +ELSE + return 1; +END IF; +END; +$$ LANGUAGE PLPGSQL; +CREATE OPERATOR CLASS my_operator_class_comp_type +DEFAULT +FOR TYPE int42 USING btree +AS +OPERATOR 1 <, +OPERATOR 2 <=, +OPERATOR 3 =, +OPERATOR 4 >=, +OPERATOR 5 >, +FUNCTION 1 my_comp_func(int42, int42); +CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt +DEFAULT +FOR TYPE int42 USING hash +AS +OPERATOR 1 <, +FUNCTION 1 my_comp_func(int42, int42); +ERROR: hash function 1 must have one argument +CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq +FOR TYPE int42 USING hash +AS +OPERATOR 1 =, +FUNCTION 1 my_comp_func(int42, int42); +ERROR: hash function 1 must have one argument +select * from atable group by a,k; + k | a +---+---- + 1 | 21 + 2 | 22 + 3 | 23 + 4 | 24 +(4 rows) + +-- Before fix: This would fail at runtime with: +-- ERROR: could not find hash function for hash operator XXXXX (execGrouping.c:118) +-- After fix: ORCA should detect missing hash function at planning time +-- and either use GroupAgg or report a clear planning error +SET optimizer_enable_groupagg=off; +select * from atable group by a,k; + k | a +---+---- + 1 | 21 + 2 | 22 + 3 | 23 + 4 | 24 +(4 rows) + +drop schema if exists tinc_base_types cascade; +NOTICE: drop cascades to 16 other objects +DETAIL: drop cascades to function int42_out(int42) +drop cascades to type int42 +drop cascades to function int42_in(cstring) +drop cascades to table atable +drop cascades to function my_lt(int42,int42) +drop cascades to function my_lteq(int42,int42) +drop cascades to function my_gt(int42,int42) +drop cascades to function my_gteq(int42,int42) +drop cascades to function my_eq(int42,int42) +drop cascades to operator <(int42,int42) +drop cascades to operator <=(int42,int42) +drop cascades to operator =(int42,int42) +drop cascades to operator >(int42,int42) +drop cascades to operator >=(int42,int42) +drop cascades to function my_comp_func(int42,int42) +drop cascades to operator family my_operator_class_comp_type for access method btree diff --git a/src/test/regress/sql/gp_hashagg.sql b/src/test/regress/sql/gp_hashagg.sql index 36000de8af8..c9b05c1936a 100644 --- a/src/test/regress/sql/gp_hashagg.sql +++ b/src/test/regress/sql/gp_hashagg.sql @@ -140,3 +140,141 @@ HAVING max(c) = '31' $$ AS qry \gset EXPLAIN (COSTS OFF, VERBOSE) :qry; :qry; + +drop schema if exists tinc_base_types cascade; + +create schema tinc_base_types; +set search_path=tinc_base_types; + + +CREATE FUNCTION int42_in(cstring) + RETURNS int42 + AS 'int4in' + LANGUAGE internal IMMUTABLE STRICT; +CREATE FUNCTION int42_out(int42) + RETURNS cstring + AS 'int4out' + LANGUAGE internal IMMUTABLE STRICT; +CREATE TYPE int42 ( + internallength = 4, + input = int42_in, + output = int42_out, + alignment = int4, + default = 42, + passedbyvalue +); + +CREATE TABLE aTABLE(k int, a int42); +INSERT INTO aTABLE VALUES(1, '21'); +INSERT INTO aTABLE VALUES(2, '22'); +INSERT INTO aTABLE VALUES(3, '23'); +INSERT INTO aTABLE VALUES(4, '24'); +SELECT * FROM aTABLE; + +CREATE OR REPLACE FUNCTION my_lt(int42, int42) +RETURNS boolean AS +'int4lt' +language internal; + +CREATE OR REPLACE FUNCTION my_lteq(int42, int42) +RETURNS boolean AS +'int4le' +language internal; + +CREATE OR REPLACE FUNCTION my_gt(int42, int42) +RETURNS boolean AS +'int4gt' +language internal; + +CREATE OR REPLACE FUNCTION my_gteq(int42, int42) +RETURNS boolean AS +'int4ge' +language internal; + +CREATE OR REPLACE FUNCTION my_eq(int42, int42) +RETURNS boolean AS +'int4eq' +language internal; + +CREATE OPERATOR < ( + leftarg = int42, + rightarg = int42, + procedure = my_lt, + hashes +); + +CREATE OPERATOR <= ( + leftarg = int42, + rightarg = int42, + procedure = my_lteq, + hashes +); + +CREATE OPERATOR = ( + leftarg = int42, + rightarg = int42, + procedure = my_eq, + hashes +); + +CREATE OPERATOR > ( + leftarg = int42, + rightarg = int42, + procedure = my_gt, + hashes +); + +CREATE OPERATOR >= ( + leftarg = int42, + rightarg = int42, + procedure = my_gteq, + hashes +); + +CREATE OR REPLACE FUNCTION my_comp_func(int42, int42) +RETURNS int AS +$$ +BEGIN +IF $1 < $2 THEN + return -1; +ELSIF $1 = $2 THEN + return 0; +ELSE + return 1; +END IF; +END; +$$ LANGUAGE PLPGSQL; + +CREATE OPERATOR CLASS my_operator_class_comp_type +DEFAULT +FOR TYPE int42 USING btree +AS +OPERATOR 1 <, +OPERATOR 2 <=, +OPERATOR 3 =, +OPERATOR 4 >=, +OPERATOR 5 >, +FUNCTION 1 my_comp_func(int42, int42); + +CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt +DEFAULT +FOR TYPE int42 USING hash +AS +OPERATOR 1 <, +FUNCTION 1 my_comp_func(int42, int42); + +CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq +FOR TYPE int42 USING hash +AS +OPERATOR 1 =, +FUNCTION 1 my_comp_func(int42, int42); + +select * from atable group by a,k; +-- Before fix: This would fail at runtime with: +-- ERROR: could not find hash function for hash operator XXXXX (execGrouping.c:118) +-- After fix: ORCA should detect missing hash function at planning time +-- and either use GroupAgg or report a clear planning error +SET optimizer_enable_groupagg=off; +select * from atable group by a,k; + +drop schema if exists tinc_base_types cascade;