From f989449293fce731b5a0e12793cbe97fea4a5a7e Mon Sep 17 00:00:00 2001 From: rturrado Date: Sat, 9 May 2026 19:47:35 +0200 Subject: [PATCH 01/15] Implement parse for `jeff.for` SCF op Implement ForOp::parse based on upstream scf::ForOp::parse, including ensureTerminator handling for the implicit `jeff.yield`. Add `SingleBlockImplicitTerminator<"jeff::YieldOp">` to all four SCF ops (for, while, doWhile, switch) so the printer's elision of empty yields round-trips correctly through parse. Make the induction-variable type annotation required (`: T`) at parse time rather than defaulting to index, which jeff does not accept, and simplify the matching print branch. Add unittests/IR with string-based parse tests for ForOp covering valid forms, parse-level errors, verify-level errors, and a print/parse round-trip idempotency check. Signed-off-by: rturrado --- include/jeff/IR/JeffOps.td | 8 +- lib/IR/JeffOps.cpp | 89 ++++++++++- unittests/CMakeLists.txt | 1 + unittests/IR/CMakeLists.txt | 20 +++ unittests/IR/test_parse_for_op.cpp | 233 +++++++++++++++++++++++++++++ 5 files changed, 339 insertions(+), 12 deletions(-) create mode 100644 unittests/IR/CMakeLists.txt create mode 100644 unittests/IR/test_parse_for_op.cpp diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index f5045d1..eb2693f 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -1882,7 +1882,7 @@ def FloatArrayCreateOp : FloatArray_Op<"float_array_create", []> { class SCF_Op traits = []> : Jeff_Op; -def SwitchOp : SCF_Op<"switch", []> { +def SwitchOp : SCF_Op<"switch", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { let summary = ""; let description = [{}]; @@ -1905,7 +1905,7 @@ def SwitchOp : SCF_Op<"switch", []> { let hasRegionVerifier = 1; } -def ForOp : SCF_Op<"for", []> { +def ForOp : SCF_Op<"for", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { let summary = ""; let description = [{}]; @@ -1927,7 +1927,7 @@ def ForOp : SCF_Op<"for", []> { let hasRegionVerifier = 1; } -def WhileOp : SCF_Op<"while", []> { +def WhileOp : SCF_Op<"while", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { let summary = ""; let description = [{}]; @@ -1949,7 +1949,7 @@ def WhileOp : SCF_Op<"while", []> { let hasRegionVerifier = 1; } -def DoWhileOp : SCF_Op<"doWhile", []> { +def DoWhileOp : SCF_Op<"doWhile", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { let summary = ""; let description = [{}]; diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 63ecbb8..488b2ae 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -239,11 +239,7 @@ void ForOp::print(OpAsmPrinter& p) { p << " -> (" << inValues.getTypes() << ')'; } - if (Type t = inductionVar.getType(); !t.isIndex()) { - p << " : " << t << ' '; - } else { - p << ' '; - } + p << " : " << inductionVar.getType() << ' '; p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, @@ -251,9 +247,86 @@ void ForOp::print(OpAsmPrinter& p) { p.printOptionalAttrDict((*this)->getAttrs()); } -ParseResult ForOp::parse(OpAsmParser& /*parser*/, OperationState& /*result*/) { - // TODO: Implement this - llvm::report_fatal_error("ForOp::parse is not implemented yet"); +// Adapted from +// https://github.com/llvm/llvm-project/blob/a58268a77cdbfeb0b71f3e76d169ddd7edf7a4df/mlir/lib/Dialect/SCF/IR/SCF.cpp#L516 +ParseResult ForOp::parse(OpAsmParser& parser, OperationState& result) { + auto& builder = parser.getBuilder(); + Type type; + + OpAsmParser::Argument inductionVar; + OpAsmParser::UnresolvedOperand start; + OpAsmParser::UnresolvedOperand stop; + OpAsmParser::UnresolvedOperand step; + + // Parse the induction variable followed by '='. + if (parser.parseOperand(inductionVar.ssaName) || parser.parseEqual() || + // Parse loop bounds. + parser.parseOperand(start) || parser.parseKeyword("to") || parser.parseOperand(stop) || + parser.parseKeyword("step") || parser.parseOperand(step)) { + return failure(); + } + + // Parse the optional initial iteration arguments. + llvm::SmallVector regionArgs; + llvm::SmallVector operands; + regionArgs.push_back(inductionVar); + + bool hasArgs = succeeded(parser.parseOptionalKeyword("args")); + if (hasArgs) { + // Parse assignment list and result types list. + if (parser.parseAssignmentList(regionArgs, operands) || + parser.parseArrowTypeList(result.types)) { + return failure(); + } + } + + if (regionArgs.size() != result.types.size() + 1) { + return parser.emitError(parser.getNameLoc(), + "mismatch in number of loop-carried values and defined values"); + } + + // Parse type. + if (parser.parseColon() || parser.parseType(type)) { + return failure(); + } + + // Set block argument types so that they are known when parsing the region. + regionArgs.front().type = type; + for (auto [arg, argType] : llvm::zip_equal(llvm::drop_begin(regionArgs), result.types)) { + arg.type = argType; + } + + // Parse the body region. + Region* body = result.addRegion(); + if (parser.parseRegion(*body, regionArgs)) { + return failure(); + } + ForOp::ensureTerminator(*body, builder, result.location); + + // Resolve input operands. This should be done after parsing the region to + // catch invalid IR where operands were defined inside the region. + if (parser.resolveOperand(start, type, result.operands) || + parser.resolveOperand(stop, type, result.operands) || + parser.resolveOperand(step, type, result.operands)) { + return failure(); + } + if (hasArgs) { + for (auto argOperandType : + llvm::zip_equal(llvm::drop_begin(regionArgs), operands, result.types)) { + Type argOpType = std::get<2>(argOperandType); + std::get<0>(argOperandType).type = argOpType; + if (parser.resolveOperand(std::get<1>(argOperandType), argOpType, result.operands)) { + return failure(); + } + } + } + + // Parse the optional attribute list. + if (parser.parseOptionalAttrDict(result.attributes)) { + return failure(); + } + + return success(); } // Adapted from diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index b4f031f..b1986d0 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(Conversion) +add_subdirectory(IR) add_subdirectory(Translation) diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt new file mode 100644 index 0000000..b850485 --- /dev/null +++ b/unittests/IR/CMakeLists.txt @@ -0,0 +1,20 @@ +set(test_name "jeff-mlir-parse-test") + +if(NOT TARGET ${test_name}) + add_executable(${test_name} + test_parse_for_op.cpp + ) + + target_link_libraries(${test_name} PRIVATE + GTest::gtest_main + MLIRJeff + MLIRIR + MLIRParser + MLIRFuncDialect + MLIRSupport + ) + + set_target_properties(${test_name} PROPERTIES FOLDER unittests) + + gtest_discover_tests(${test_name} DISCOVERY_TIMEOUT 60) +endif() diff --git a/unittests/IR/test_parse_for_op.cpp b/unittests/IR/test_parse_for_op.cpp new file mode 100644 index 0000000..14d7998 --- /dev/null +++ b/unittests/IR/test_parse_for_op.cpp @@ -0,0 +1,233 @@ +#include "jeff/IR/JeffDialect.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace mlir; + +class ForOpTest : public ::testing::Test { + protected: + MLIRContext ctx; + ScopedDiagnosticHandler handler{&ctx, [](Diagnostic&) { return success(); }}; + + void SetUp() override { ctx.loadDialect(); } +}; + +// === Valid tests === + +TEST_F(ForOpTest, BasicFormI32) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32) { + jeff.for %i = %lo to %hi step %s : i32 {} + return + } + )MLIR", + &ctx); + ASSERT_TRUE(module); +} + +TEST_F(ForOpTest, BasicFormI64) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i64, %hi: i64, %s: i64) { + jeff.for %i = %lo to %hi step %s : i64 {} + return + } + )MLIR", + &ctx); + ASSERT_TRUE(module); +} + +// Body has no explicit `jeff.yield`. +// `SingleBlockImplicitTerminator` should auto-insert one +// via `ForOp::ensureTerminator`. +TEST_F(ForOpTest, ImplicitYield) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32) { + jeff.for %i = %lo to %hi step %s : i32 {} + return + } + )MLIR", + &ctx); + ASSERT_TRUE(module); +} + +TEST_F(ForOpTest, WithArgsSingle) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32, %init: i32) -> i32 { + %r = jeff.for %i = %lo to %hi step %s args(%acc = %init) -> (i32) : i32 { + jeff.yield %acc : i32 + } + return %r : i32 + } + )MLIR", + &ctx); + ASSERT_TRUE(module); +} + +TEST_F(ForOpTest, WithArgsMultiple) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32, %a: i32, %b: i64) -> (i32, i64) { + %r1, %r2 = jeff.for %i = %lo to %hi step %s args(%x = %a, %y = %b) -> (i32, i64) : i32 { + jeff.yield %x, %y : i32, i64 + } + return %r1, %r2 : i32, i64 + } + )MLIR", + &ctx); + ASSERT_TRUE(module); +} + +TEST_F(ForOpTest, Nested) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32) { + jeff.for %i = %lo to %hi step %s : i32 { + jeff.for %j = %lo to %hi step %s : i32 {} + } + return + } + )MLIR", + &ctx); + ASSERT_TRUE(module); +} + +// `ForOp::print` elides the empty yield, +// but bare `jeff.yield` is still valid input. +// It can come from `YieldOp`'s own printer, +// generic-form output (`-mlir-print-op-generic`), or +// handwritten MLIR. +// The parser must accept this shape. +TEST_F(ForOpTest, ExplicitEmptyYield) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32) { + jeff.for %i = %lo to %hi step %s : i32 { + jeff.yield + } + return + } + )MLIR", + &ctx); + ASSERT_TRUE(module); +} + +// Parse → print → parse → print, then assert idempotent. +// The first round normalizes (whitespace, SSA names). +// The second round must be a no-op. +TEST_F(ForOpTest, RoundTripIdempotent) { + const std::string src = R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32, %init: i32) -> i32 { + %r = jeff.for %i = %lo to %hi step %s args(%acc = %init) -> (i32) : i32 { + jeff.yield %acc : i32 + } + return %r : i32 + } + )MLIR"; + + const auto module1 = parseSourceString(src, &ctx); + ASSERT_TRUE(module1); + std::string printed1; + llvm::raw_string_ostream(printed1) << *module1; + + const auto module2 = parseSourceString(printed1, &ctx); + ASSERT_TRUE(module2); + std::string printed2; + llvm::raw_string_ostream(printed2) << *module2; + + EXPECT_EQ(printed1, printed2); +} + +// === Invalid syntax tests (parse-level) === + +TEST_F(ForOpTest, InvalidMissingEquals) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32) { + jeff.for %i %lo to %hi step %s : i32 {} + return + } + )MLIR", + &ctx); + ASSERT_FALSE(module); +} + +TEST_F(ForOpTest, InvalidMissingTo) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32) { + jeff.for %i = %lo %hi step %s : i32 {} + return + } + )MLIR", + &ctx); + ASSERT_FALSE(module); +} + +TEST_F(ForOpTest, InvalidMissingType) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32) { + jeff.for %i = %lo to %hi step %s {} + return + } + )MLIR", + &ctx); + ASSERT_FALSE(module); +} + +TEST_F(ForOpTest, InvalidArgsWithoutArrow) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32, %init: i32) { + jeff.for %i = %lo to %hi step %s args(%acc = %init) : i32 { + jeff.yield %acc : i32 + } + return + } + )MLIR", + &ctx); + ASSERT_FALSE(module); +} + +// 2 region args, 1 result type. +// Caught by the explicit size check in `parse`. +TEST_F(ForOpTest, InvalidArgCountMismatch) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i32, %x: i32, %y: i32) { + jeff.for %i = %lo to %hi step %s args(%a = %x, %b = %y) -> (i32) : i32 { + jeff.yield %a : i32 + } + return + } + )MLIR", + &ctx); + ASSERT_FALSE(module); +} + +// === Invalid semantics tests (verify-level) === + +// `index` is rejected by SupportedIntType. +TEST_F(ForOpTest, InvalidIndexType) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: index, %hi: index, %s: index) { + jeff.for %i = %lo to %hi step %s : index {} + return + } + )MLIR", + &ctx); + ASSERT_FALSE(module); +} + +// Floating-point types are rejected by SupportedIntType. +TEST_F(ForOpTest, InvalidFloatType) { + const auto module = parseSourceString(R"MLIR( + func.func @f(%lo: f32, %hi: f32, %s: f32) { + jeff.for %i = %lo to %hi step %s : f32 {} + return + } + )MLIR", + &ctx); + ASSERT_FALSE(module); +} From 07c9d89cca1ff51e679c929befb3522889c92abd Mon Sep 17 00:00:00 2001 From: rturrado Date: Tue, 12 May 2026 20:56:54 +0200 Subject: [PATCH 02/15] Implement parse for `jeff.while` SCF op Implement WhileOp::parse and rewrite WhileOp::print to use a new syntax `[: (types)] args(...) { cond } args(...) { body }`, where the leading function-type annotation is present only when the op has loop-carried values, and the per-region `args(...)` clauses are always emitted (possibly empty) so they serve as the structural separator between the two regions. The condition region's implicit `i1` yield type is no longer printed. Verify at parse time that both `args(...)` clauses bind the same operand names, reflecting that a Jeff while has a single op-level operand list shared between the condition and body regions. Add ODS descriptions for ForOp, WhileOp, and SwitchOp documenting their semantics, syntax, verifier invariants, and differences from the corresponding scf ops. Add a TODO in the stub SwitchOp::parse flagging that the verifier's `in_values.size() == out_values.size()` constraint should be relaxed when the parser is implemented; the constraint is a carry-over from loop iter-args and is not part of switch semantics. Add unittests/IR/test_parse_while_op.cpp covering valid forms and invalid forms. Refactor test_parse_for_op.cpp to a `const std::string src` style for consistency with the new while-op tests. Signed-off-by: rturrado --- include/jeff/IR/JeffOps.td | 233 ++++++++++++++++++++++++++- lib/IR/JeffOps.cpp | 114 +++++++++++-- unittests/IR/CMakeLists.txt | 1 + unittests/IR/test_parse_for_op.cpp | 100 +++++------- unittests/IR/test_parse_while_op.cpp | 206 +++++++++++++++++++++++ 5 files changed, 581 insertions(+), 73 deletions(-) create mode 100644 unittests/IR/test_parse_while_op.cpp diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index eb2693f..82cdc8c 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -1883,8 +1883,78 @@ def FloatArrayCreateOp : FloatArray_Op<"float_array_create", []> { class SCF_Op traits = []> : Jeff_Op; def SwitchOp : SCF_Op<"switch", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { - let summary = ""; - let description = [{}]; + let summary = "Multi-way branch on an integer selector"; + let description = [{ + The `jeff.switch` operation dispatches on an integer selector to one of + several branch regions or, if no case matches, an optional default + region. Branch labels are positional: the i-th branch region runs when + `selection` equals `i`. If `selection` does not match any branch label, + control flows to the `default` region. + + The selector operand `selection` must be a `SupportedIntType` (one of + `i1`, `i8`, `i16`, `i32`, `i64`). + + The op carries a list of in-values (`in_values`) shared across all + branches and the default region: each region declares its own local + block arguments, bound to the corresponding in-values on entry. Each + region must terminate with a `jeff.yield` whose operands provide the + op's results for the selected branch. + + The default region is optional. When present, it covers any selector + value not matched by an explicit branch. + + Unlike a `for` or `while` loop, a `switch` has no iteration semantics: + it dispatches once. The op's result count and types are therefore not + required to match the in-value count and types — branches may yield + any number of values of any type, provided every branch (and the + default, if present) yields the same shape. + + Schematic syntax: + ``` + jeff.switch ( $selection ) : $selection_type -> ( $result_types ) + ( case $i args ( $assignments ) { $branch_body } )* + ( default args ( $assignments ) { $default_body } )? + ``` + + Example: + ```mlir + %r1, %r2 = jeff.switch (%sel) : i32 -> (i32, i64) + case 0 args(%x = %a, %y = %b) { + jeff.yield %x, %y : i32, i64 + } + case 1 args(%x = %a, %y = %b) { + // ... compute %next_x, %next_y ... + jeff.yield %next_x, %next_y : i32, i64 + } + default args(%x = %a, %y = %b) { + jeff.yield %x, %y : i32, i64 + } + ``` + + Invariants: + - `selection` is a `SupportedIntType`. + - For each branch region and the default region (if present), each + block argument has the same type as the corresponding in-value. + - Every region's yielded values match the op's result types in + count and order. + + Underspecified: + - The semantics when `selection` does not match any branch label and + the default region is absent. + - Structurally, the op admits zero branches and an empty default; + such a form is legal but meaningless and is not constrained out. + - Whether branch labels form a contiguous range starting at 0: the + printer emits them positionally, so this is currently implicit in + the in-memory representation rather than enforced. + + Differences from `scf.index_switch`: + - Selector type is restricted to `SupportedIntType` (no `index`). + - Case labels are implicit and positional (`case 0`, `case 1`, ...) + rather than arbitrary integer literals. + - The default region is optional rather than mandatory. + - Supports loop-carried-style in-values shared across regions + (`scf.index_switch` has no iter-args mechanism). + }]; let arguments = (ins SupportedIntType:$selection, @@ -1906,8 +1976,86 @@ def SwitchOp : SCF_Op<"switch", [SingleBlockImplicitTerminator<"jeff::YieldOp">] } def ForOp : SCF_Op<"for", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { - let summary = ""; - let description = [{}]; + let summary = "Counted for loop with iteration arguments"; + let description = [{ + The `jeff.for` operation represents a counted loop over a half-open + integer range `[start, stop)`, advancing by `step` on each iteration. + The body executes once for every value the induction variable takes + while it remains less than `stop`, starting at `start`. + + The op takes three SSA integer operands — `start`, `stop`, and + `step`, each of which must be a `SupportedIntType` (one of `i1`, + `i8`, `i16`, `i32`, `i64`). The induction variable has the same + type as `start`. + + The op may optionally carry a list of loop-carried values + (`in_values`). These are bound to the corresponding body block + arguments on entry to each iteration and replaced at the end of + each iteration by the operands of the terminating `jeff.yield`. + The op's results (`out_values`) hold the final values of the + carried variables after the last iteration; their types must match + the types of `in_values`. + + The body is a single block whose arguments are the induction + variable followed by one argument per loop-carried value. It must + terminate with a `jeff.yield` whose operand types and count match + `in_values`. When no values are carried, the terminator is inserted + implicitly by the parser. + + Schematic syntax: + ``` + jeff.for $induction_variable = $start to $stop step $step + ( args ( $assignments ) -> ( $result_types ) )? + : $induction_variable_type + { $body } + ``` + + The body's block-argument types are not printed explicitly: they + are recovered from `-> ( $result_types )`, which by the verifier + must equal the iter-arg types. + + Example without carried values: + ```mlir + jeff.for %i = %lo to %hi step %s : i32 { + // body + } + ``` + + Example with carried values: + ```mlir + %r1, %r2 = jeff.for %i = %lo to %hi step %s + args(%x = %a, %y = %b) -> (i32, i64) : i32 { + // ... compute %next_x, %next_y ... + jeff.yield %next_x, %next_y : i32, i64 + } + ``` + + Invariants enforced by the verifier: + - `start`, `stop`, and `step` are each a `SupportedIntType`. + - The induction variable has the same type as `start`. + - The number of `in_values` equals the number of results. + - Each `in_value`, the corresponding body block argument, and the + corresponding result have matching types. + + Underspecified (not currently enforced): + - Whether `start`, `stop`, and `step` must share a common type + (only `start` is checked against the induction variable). + - Comparison signedness (the operand types are signless MLIR + integers). + - That `step` is non-zero and positive. + + Differences from `scf.for`: + - Bounds and step are restricted to `SupportedIntType` + (no `index`, no widths outside `{1, 8, 16, 32, 64}`). + - Uses the keyword `args` instead of `iter_args`, with a trailing + `-> ( result_types )` instead of the SCF arrow placement. + - The induction-variable type annotation `:` is always printed, + even when no values are carried. + - Does not implement `LoopLikeOpInterface`, + `RegionBranchOpInterface`, `RecursiveMemoryEffects`, or + `AutomaticAllocationScope`, so upstream loop-aware passes + (LICM, peeling, generic unrolling) do not apply. + }]; let arguments = (ins SupportedIntType:$start, @@ -1928,8 +2076,81 @@ def ForOp : SCF_Op<"for", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { } def WhileOp : SCF_Op<"while", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { - let summary = ""; - let description = [{}]; + let summary = "While loop with iteration arguments"; + let description = [{ + The `jeff.while` operation represents a loop that repeats while a + condition holds. It has two regions: + + - The `condition` region computes the loop's continuation predicate. + It must terminate with a `jeff.yield` whose only operand is an + `i1`. By the Jeff language spec, the condition region may not + modify quantum state (qubits or qubit registers); it may only + inspect classical metadata. + - The `body` region is executed when the condition yields `true`. + It must terminate with a `jeff.yield` whose operands become the + next iteration's input values, or — on the last iteration — the + op's results. + + The op carries a list of `in_values` shared between both regions. + Each region declares its own local block arguments, bound to those + `in_values` on entry. Because the condition region does not modify + state, the same iteration values are visible to the body region + (formally, they are forwarded from the condition's inputs). + + The types of `in_values`, the body region's yielded values, and the + op's results all match. There is a single signature for the loop. + + Syntax: + ``` + [ $results = ] jeff.while [ : ( $types ) ] + args ( $cond_assignments ) $cond_body + args ( $body_assignments ) $body_body + ``` + + The `: ( $types )` annotation is present iff `$types` is non-empty. + Both `args(...)` clauses are always emitted (possibly empty), + serving as a structural separator between the regions. The RHS of + both `args(...)` clauses must reference the same op operands in the + same order. + + Example with loop-carried values: + ```mlir + %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { + // condition + jeff.yield %pred : i1 + } args(%b_x = %a, %b_y = %b) { + // body + jeff.yield %next_x, %next_y : i32, i64 + } + ``` + + Example with no loop-carried values: + ```mlir + jeff.while args() { + jeff.yield %pred : i1 + } args() { + // body (no values to yield) + } + ``` + + Invariants enforced by the verifier: + - The number of `in_values` equals the number of results. + - Each `in_value`, the corresponding block argument of each region, + and the corresponding result have matching types. + - Both regions terminate with a `jeff.yield` (per + `SingleBlockImplicitTerminator`). + + Differences from `scf.while`: + - No `do` keyword between the regions; `args(...)` is the + structural separator. + - The condition region yields only `i1`; it does not also yield + values to the body region (in SCF, `scf.condition` yields an + `i1` plus the carried values). + - Both regions share a single op-level operand list; in SCF the + operand list feeds only the `before` region. + - Input types and result types are required to match (single + signature), reflecting the Jeff language design choice of T1 = T2. + }]; let arguments = (ins Variadic:$in_values diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 488b2ae..3edf9ba 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -191,7 +191,11 @@ void SwitchOp::print(OpAsmPrinter& p) { } ParseResult SwitchOp::parse(OpAsmParser& /*parser*/, OperationState& /*result*/) { - // TODO: Implement this + // TODO: Implement this. + // TODO: When implementing the parser, also relax the verifier — the + // current `in_values.size() == out_values.size()` requirement (enforced + // via `verifyRegionArgs`) is a carry-over from loop iter-args and is + // not part of the intended semantics for a switch. llvm::report_fatal_error("SwitchOp::parse is not implemented yet"); } @@ -271,9 +275,9 @@ ParseResult ForOp::parse(OpAsmParser& parser, OperationState& result) { llvm::SmallVector operands; regionArgs.push_back(inductionVar); + // Parse assignment list and result types list. bool hasArgs = succeeded(parser.parseOptionalKeyword("args")); if (hasArgs) { - // Parse assignment list and result types list. if (parser.parseAssignmentList(regionArgs, operands) || parser.parseArrowTypeList(result.types)) { return failure(); @@ -303,8 +307,7 @@ ParseResult ForOp::parse(OpAsmParser& parser, OperationState& result) { } ForOp::ensureTerminator(*body, builder, result.location); - // Resolve input operands. This should be done after parsing the region to - // catch invalid IR where operands were defined inside the region. + // Resolve input operands. if (parser.resolveOperand(start, type, result.operands) || parser.resolveOperand(stop, type, result.operands) || parser.resolveOperand(step, type, result.operands)) { @@ -357,29 +360,120 @@ LogicalResult ForOp::verifyRegions() { return success(); } +// Adapted from +// https://github.com/llvm/llvm-project/blob/a58268a77cdbfeb0b71f3e76d169ddd7edf7a4df/mlir/lib/Dialect/SCF/IR/SCF.cpp#L3343 void WhileOp::print(OpAsmPrinter& p) { auto inValues = getInValues(); + // Emit `: ( types )` only when there are in-values. + if (!inValues.empty()) { + p << " : (" << inValues.getTypes() << ")"; + } + + // Condition region: `args ( $assignments )` then the region. auto& condition = getCondition(); auto conditionArgs = condition.getArguments(); printInitializationList(p, conditionArgs, inValues, " args"); - p << " -> (" << IntegerType::get(getContext(), 1) << ") "; + p << ' '; p.printRegion(condition, /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/!inValues.empty()); + /*printBlockTerminators=*/true); + // Body region: `args ( $assignments )` then the region. auto& body = getBody(); auto bodyArgs = body.getArguments(); printInitializationList(p, bodyArgs, inValues, " args"); - p << " -> (" << inValues.getTypes() << ") "; + p << ' '; p.printRegion(body, /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/!inValues.empty()); p.printOptionalAttrDict((*this)->getAttrs()); } -ParseResult WhileOp::parse(OpAsmParser& /*parser*/, OperationState& /*result*/) { - // TODO: Implement this - llvm::report_fatal_error("WhileOp::parse is not implemented yet"); +// Adapted from +// https://github.com/llvm/llvm-project/blob/a58268a77cdbfeb0b71f3e76d169ddd7edf7a4df/mlir/lib/Dialect/SCF/IR/SCF.cpp#L3303 +ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { + auto& builder = parser.getBuilder(); + + Region* condition = result.addRegion(); + Region* body = result.addRegion(); + + // Parse optional `: ( types )`. Omitted when there are no in-values. + llvm::SmallVector types; + if (succeeded(parser.parseOptionalColon())) { + if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseType(types.emplace_back()); + })) { + return failure(); + } + } + + // Parse the condition region's `args ( $assignments )`. + llvm::SmallVector condRegionArgs; + llvm::SmallVector condOperands; + if (parser.parseKeyword("args") || parser.parseAssignmentList(condRegionArgs, condOperands)) { + return failure(); + } + + if (condRegionArgs.size() != types.size()) { + return parser.emitError(parser.getNameLoc()) + << "expected " << types.size() << " condition arguments but got " + << condRegionArgs.size(); + } + + for (auto [arg, ty] : llvm::zip_equal(condRegionArgs, types)) { + arg.type = ty; + } + + if (parser.parseRegion(*condition, condRegionArgs)) { + return failure(); + } + WhileOp::ensureTerminator(*condition, builder, result.location); + + // Parse the body region's `args ( $assignments )`. + llvm::SmallVector bodyRegionArgs; + llvm::SmallVector bodyOperands; + if (parser.parseKeyword("args") || parser.parseAssignmentList(bodyRegionArgs, bodyOperands)) { + return failure(); + } + + if (bodyRegionArgs.size() != types.size()) { + return parser.emitError(parser.getNameLoc()) + << "expected " << types.size() << " body arguments but got " + << bodyRegionArgs.size(); + } + + // Both args clauses must reference the same operands. + // Sizes are already equal at this point (both equal types.size()), so just compare names. + for (auto [c, b] : llvm::zip_equal(condOperands, bodyOperands)) { + if (c.name != b.name || c.number != b.number) { + return parser.emitError(parser.getNameLoc()) + << "condition and body args must bind the same operands " + << "(got " << b.name << ", expected " << c.name << ")"; + } + } + + for (auto [arg, ty] : llvm::zip_equal(bodyRegionArgs, types)) { + arg.type = ty; + } + + if (parser.parseRegion(*body, bodyRegionArgs)) { + return failure(); + } + WhileOp::ensureTerminator(*body, builder, result.location); + + // Resolve operands (condition and body operand lists are equal). + if (parser.resolveOperands(bodyOperands, types, parser.getCurrentLocation(), result.operands)) { + return failure(); + } + + // Op results have the same types as in-values. + result.addTypes(types); + + if (parser.parseOptionalAttrDict(result.attributes)) { + return failure(); + } + + return success(); } LogicalResult WhileOp::verify() { diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt index b850485..7ea7ce7 100644 --- a/unittests/IR/CMakeLists.txt +++ b/unittests/IR/CMakeLists.txt @@ -3,6 +3,7 @@ set(test_name "jeff-mlir-parse-test") if(NOT TARGET ${test_name}) add_executable(${test_name} test_parse_for_op.cpp + test_parse_while_op.cpp ) target_link_libraries(${test_name} PRIVATE diff --git a/unittests/IR/test_parse_for_op.cpp b/unittests/IR/test_parse_for_op.cpp index 14d7998..17d5214 100644 --- a/unittests/IR/test_parse_for_op.cpp +++ b/unittests/IR/test_parse_for_op.cpp @@ -24,78 +24,72 @@ class ForOpTest : public ::testing::Test { // === Valid tests === TEST_F(ForOpTest, BasicFormI32) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { jeff.for %i = %lo to %hi step %s : i32 {} return } - )MLIR", - &ctx); - ASSERT_TRUE(module); + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); } TEST_F(ForOpTest, BasicFormI64) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i64, %hi: i64, %s: i64) { jeff.for %i = %lo to %hi step %s : i64 {} return } - )MLIR", - &ctx); - ASSERT_TRUE(module); + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); } // Body has no explicit `jeff.yield`. // `SingleBlockImplicitTerminator` should auto-insert one // via `ForOp::ensureTerminator`. TEST_F(ForOpTest, ImplicitYield) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { jeff.for %i = %lo to %hi step %s : i32 {} return } - )MLIR", - &ctx); - ASSERT_TRUE(module); + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); } TEST_F(ForOpTest, WithArgsSingle) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32, %init: i32) -> i32 { %r = jeff.for %i = %lo to %hi step %s args(%acc = %init) -> (i32) : i32 { jeff.yield %acc : i32 } return %r : i32 } - )MLIR", - &ctx); - ASSERT_TRUE(module); + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); } TEST_F(ForOpTest, WithArgsMultiple) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32, %a: i32, %b: i64) -> (i32, i64) { %r1, %r2 = jeff.for %i = %lo to %hi step %s args(%x = %a, %y = %b) -> (i32, i64) : i32 { jeff.yield %x, %y : i32, i64 } return %r1, %r2 : i32, i64 } - )MLIR", - &ctx); - ASSERT_TRUE(module); + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); } TEST_F(ForOpTest, Nested) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { jeff.for %i = %lo to %hi step %s : i32 { jeff.for %j = %lo to %hi step %s : i32 {} } return } - )MLIR", - &ctx); - ASSERT_TRUE(module); + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); } // `ForOp::print` elides the empty yield, @@ -105,16 +99,15 @@ TEST_F(ForOpTest, Nested) { // handwritten MLIR. // The parser must accept this shape. TEST_F(ForOpTest, ExplicitEmptyYield) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { jeff.for %i = %lo to %hi step %s : i32 { jeff.yield } return } - )MLIR", - &ctx); - ASSERT_TRUE(module); + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); } // Parse → print → parse → print, then assert idempotent. @@ -128,7 +121,7 @@ TEST_F(ForOpTest, RoundTripIdempotent) { } return %r : i32 } - )MLIR"; + )MLIR"; const auto module1 = parseSourceString(src, &ctx); ASSERT_TRUE(module1); @@ -146,88 +139,81 @@ TEST_F(ForOpTest, RoundTripIdempotent) { // === Invalid syntax tests (parse-level) === TEST_F(ForOpTest, InvalidMissingEquals) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { jeff.for %i %lo to %hi step %s : i32 {} return } - )MLIR", - &ctx); - ASSERT_FALSE(module); + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); } TEST_F(ForOpTest, InvalidMissingTo) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { jeff.for %i = %lo %hi step %s : i32 {} return } - )MLIR", - &ctx); - ASSERT_FALSE(module); + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); } TEST_F(ForOpTest, InvalidMissingType) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { jeff.for %i = %lo to %hi step %s {} return } - )MLIR", - &ctx); - ASSERT_FALSE(module); + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); } TEST_F(ForOpTest, InvalidArgsWithoutArrow) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32, %init: i32) { jeff.for %i = %lo to %hi step %s args(%acc = %init) : i32 { jeff.yield %acc : i32 } return } - )MLIR", - &ctx); - ASSERT_FALSE(module); + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); } // 2 region args, 1 result type. // Caught by the explicit size check in `parse`. TEST_F(ForOpTest, InvalidArgCountMismatch) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32, %x: i32, %y: i32) { jeff.for %i = %lo to %hi step %s args(%a = %x, %b = %y) -> (i32) : i32 { jeff.yield %a : i32 } return } - )MLIR", - &ctx); - ASSERT_FALSE(module); + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); } // === Invalid semantics tests (verify-level) === // `index` is rejected by SupportedIntType. TEST_F(ForOpTest, InvalidIndexType) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: index, %hi: index, %s: index) { jeff.for %i = %lo to %hi step %s : index {} return } - )MLIR", - &ctx); - ASSERT_FALSE(module); + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); } // Floating-point types are rejected by SupportedIntType. TEST_F(ForOpTest, InvalidFloatType) { - const auto module = parseSourceString(R"MLIR( + const std::string src = R"MLIR( func.func @f(%lo: f32, %hi: f32, %s: f32) { jeff.for %i = %lo to %hi step %s : f32 {} return } - )MLIR", - &ctx); - ASSERT_FALSE(module); + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); } diff --git a/unittests/IR/test_parse_while_op.cpp b/unittests/IR/test_parse_while_op.cpp new file mode 100644 index 0000000..4af5154 --- /dev/null +++ b/unittests/IR/test_parse_while_op.cpp @@ -0,0 +1,206 @@ +#include "jeff/IR/JeffDialect.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace mlir; + +class WhileOpTest : public ::testing::Test { + protected: + MLIRContext ctx; + ScopedDiagnosticHandler handler{&ctx, [](Diagnostic&) { return success(); }}; + + void SetUp() override { ctx.loadDialect(); } +}; + +// === Valid tests === + +TEST_F(WhileOpTest, NoArgs) { + const std::string src = R"MLIR( + func.func @f(%pred: i1) { + jeff.while args() { + jeff.yield %pred : i1 + } args() { + } + return + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(WhileOpTest, WithArgsSingle) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) -> i32 { + %r = jeff.while : (i32) args(%c_x = %a) { + jeff.yield %pred : i1 + } args(%b_x = %a) { + jeff.yield %b_x : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(WhileOpTest, WithArgsMultiple) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { + %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { + jeff.yield %pred : i1 + } args(%b_x = %a, %b_y = %b) { + jeff.yield %b_x, %b_y : i32, i64 + } + return %r1, %r2 : i32, i64 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(WhileOpTest, Nested) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) -> i32 { + %r = jeff.while : (i32) args(%c_x = %a) { + jeff.yield %pred : i1 + } args(%b_x = %a) { + %s = jeff.while : (i32) args(%cc = %b_x) { + jeff.yield %pred : i1 + } args(%bb = %b_x) { + jeff.yield %bb : i32 + } + jeff.yield %s : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +// `WhileOp::print` elides the empty yield for the body when there are no +// in-values, but bare `jeff.yield` is still valid input. It can come from +// `YieldOp`'s own printer, generic-form output (`-mlir-print-op-generic`), +// or handwritten MLIR. The parser must accept this shape. +TEST_F(WhileOpTest, ExplicitEmptyBodyYield) { + const std::string src = R"MLIR( + func.func @f(%pred: i1) { + jeff.while args() { + jeff.yield %pred : i1 + } args() { + jeff.yield + } + return + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +// Parse → print → parse → print, then assert idempotent. +// The first round normalizes (whitespace, SSA names). +// The second round must be a no-op. +TEST_F(WhileOpTest, RoundTripIdempotent) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { + %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { + jeff.yield %pred : i1 + } args(%b_x = %a, %b_y = %b) { + jeff.yield %b_x, %b_y : i32, i64 + } + return %r1, %r2 : i32, i64 + } + )MLIR"; + + const auto module1 = parseSourceString(src, &ctx); + ASSERT_TRUE(module1); + std::string printed1; + llvm::raw_string_ostream(printed1) << *module1; + + const auto module2 = parseSourceString(printed1, &ctx); + ASSERT_TRUE(module2); + std::string printed2; + llvm::raw_string_ostream(printed2) << *module2; + + EXPECT_EQ(printed1, printed2); +} + +// === Invalid syntax tests (parse-level) === + +TEST_F(WhileOpTest, InvalidMissingArgsKeyword) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) { + jeff.while : (i32) (%c_x = %a) { + jeff.yield %pred : i1 + } args(%b_x = %a) { + jeff.yield %b_x : i32 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// `: (...)` is present but `args(...)` is empty (or vice versa). +TEST_F(WhileOpTest, InvalidArgCountMismatchWithTypes) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) { + jeff.while : (i32, i64) args(%c_x = %a) { + jeff.yield %pred : i1 + } args(%b_x = %a) { + jeff.yield %b_x : i32 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// Condition has 2 args but body has 1. +TEST_F(WhileOpTest, InvalidCondBodyArgCountMismatch) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i64, %pred: i1) { + jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { + jeff.yield %pred : i1 + } args(%b_x = %a) { + jeff.yield %b_x : i32 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// Condition and body bind to different operand SSA values. +TEST_F(WhileOpTest, InvalidCondBodyOperandsDiffer) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i32, %pred: i1) -> i32 { + %r = jeff.while : (i32) args(%c_x = %a) { + jeff.yield %pred : i1 + } args(%b_x = %b) { + jeff.yield %b_x : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// `args(...)` is non-empty but no `: (...)` is provided. +TEST_F(WhileOpTest, InvalidMissingTypeAnnotation) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) { + jeff.while args(%c_x = %a) { + jeff.yield %pred : i1 + } args(%b_x = %a) { + jeff.yield %b_x : i32 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} From a7c3dcc6f3fd1fe226302bc055461263028cfacc Mon Sep 17 00:00:00 2001 From: rturrado Date: Thu, 14 May 2026 16:54:46 +0200 Subject: [PATCH 03/15] Implement parse for `jeff.doWhile` SCF op Implement DoWhileOp::parse and rewrite DoWhileOp::print to use the same syntax as `jeff.while`, with the body region declared first and the condition region second to reflect the do-while semantic that the body runs unconditionally on the first iteration before the predicate is evaluated. The leading function-type annotation is present only when the op has loop-carried values, and the per-region `args(...)` clauses are always emitted (possibly empty) so they serve as the structural separator between the two regions. The condition region's implicit `i1` yield type is no longer printed. Verify at parse time that both `args(...)` clauses bind the same operand names, reflecting that a Jeff doWhile has a single op-level operand list shared between the body and condition regions. Add an ODS description for DoWhileOp documenting its semantics, syntax, verifier invariants, and the body-first difference from `jeff.while`, cross-referencing `jeff.while` for the differences from `scf.while`. Add unittests/IR/test_parse_do_while_op.cpp covering valid forms, parse-level errors, and a print/parse round-trip idempotency check. Signed-off-by: rturrado --- include/jeff/IR/JeffOps.td | 79 ++++++++- lib/IR/JeffOps.cpp | 110 ++++++++++++- unittests/IR/CMakeLists.txt | 1 + unittests/IR/test_parse_do_while_op.cpp | 206 ++++++++++++++++++++++++ 4 files changed, 387 insertions(+), 9 deletions(-) create mode 100644 unittests/IR/test_parse_do_while_op.cpp diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index 82cdc8c..c8a99da 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -2171,8 +2171,83 @@ def WhileOp : SCF_Op<"while", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> } def DoWhileOp : SCF_Op<"doWhile", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { - let summary = ""; - let description = [{}]; + let summary = "Do-while loop with iteration arguments"; + let description = [{ + The `jeff.doWhile` operation represents a loop that repeats while a + condition holds. Unlike the `jeff.while` operation, an initial + iteration of the loop occurs before the condition is checked for the + first time. It has two regions: + + - The `condition` region computes the loop's continuation predicate. + It must terminate with a `jeff.yield` whose only operand is an + `i1`. By the Jeff language spec, the condition region may not + modify quantum state (qubits or qubit registers); it may only + inspect classical metadata. + - The `body` region executes unconditionally on the first iteration, + then repeats while the condition yields `true`. + It must terminate with a `jeff.yield` whose operands become the + next iteration's input values, or — on the last iteration — the + op's results. + + The op carries a list of `in_values` shared between both regions. + Each region declares its own local block arguments, bound to those + `in_values` on entry. On the first iteration the body's block + arguments are bound directly from the op's operands; on subsequent + iterations they are bound from the body's previous yield. Because + the condition region does not modify state, the same iteration + values are visible to both regions. + + The types of `in_values`, the body region's yielded values, and the + op's results all match. There is a single signature for the loop. + + Syntax: + ``` + [ $results = ] jeff.doWhile [ : ( $types ) ] + args ( $body_assignments ) $body_body + args ( $cond_assignments ) $cond_body + ``` + + The `: ( $types )` annotation is present iff `$types` is non-empty. + Both `args(...)` clauses are always emitted (possibly empty), + serving as a structural separator between the regions. The RHS of + both `args(...)` clauses must reference the same op operands in the + same order. + + Example with loop-carried values: + ```mlir + %r1, %r2 = jeff.doWhile : (i32, i64) args(%c_x = %a, %c_y = %b) { + // body + jeff.yield %next_x, %next_y : i32, i64 + } args(%b_x = %a, %b_y = %b) { + // condition + jeff.yield %pred : i1 + } + ``` + + Example with no loop-carried values: + ```mlir + jeff.doWhile args() { + jeff.yield %pred : i1 + } args() { + // body (no values to yield) + } + ``` + + Invariants enforced by the verifier: + - The number of `in_values` equals the number of results. + - Each `in_value`, the corresponding block argument of each region, + and the corresponding result have matching types. + - Both regions terminate with a `jeff.yield` (per + `SingleBlockImplicitTerminator`). + + Differences from `jeff.while`: + - The body runs unconditionally on the first iteration, before the + condition is ever evaluated. The body region is therefore + declared before the condition region in the op's region list and + in the printed form. + + The differences from `scf.while` are the same as for `jeff.while`. + }]; let arguments = (ins Variadic:$in_values diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 3edf9ba..52fdf21 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -446,9 +447,13 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { // Sizes are already equal at this point (both equal types.size()), so just compare names. for (auto [c, b] : llvm::zip_equal(condOperands, bodyOperands)) { if (c.name != b.name || c.number != b.number) { + auto cOperand = llvm::formatv("%{0}{1}", c.name, + (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); + auto bOperand = llvm::formatv("%{0}{1}", b.name, + (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); return parser.emitError(parser.getNameLoc()) << "condition and body args must bind the same operands " - << "(got " << b.name << ", expected " << c.name << ")"; + << "(got " << bOperand << ", expected " << cOperand << ")"; } } @@ -504,26 +509,117 @@ LogicalResult WhileOp::verifyRegions() { void DoWhileOp::print(OpAsmPrinter& p) { auto inValues = getInValues(); + // Emit `: ( types )` only when there are in-values. + if (!inValues.empty()) { + p << " : (" << inValues.getTypes() << ")"; + } + + // Body region: `args ( $assignments )` then the region. auto& body = getBody(); auto bodyArgs = body.getArguments(); printInitializationList(p, bodyArgs, inValues, " args"); - p << " -> (" << inValues.getTypes() << ") "; + p << ' '; p.printRegion(body, /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/!inValues.empty()); + // Condition region: `args ( $assignments )` then the region. auto& condition = getCondition(); auto conditionArgs = condition.getArguments(); printInitializationList(p, conditionArgs, inValues, " args"); - p << " -> (" << IntegerType::get(getContext(), 1) << ") "; + p << ' '; p.printRegion(condition, /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/!inValues.empty()); + /*printBlockTerminators=*/true); p.printOptionalAttrDict((*this)->getAttrs()); } -ParseResult DoWhileOp::parse(OpAsmParser& /*parser*/, OperationState& /*result*/) { - // TODO: Implement this - llvm::report_fatal_error("DoWhileOp::parse is not implemented yet"); +ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { + auto& builder = parser.getBuilder(); + + Region* body = result.addRegion(); + Region* condition = result.addRegion(); + + // Parse optional `: ( types )`. Omitted when there are no in-values. + llvm::SmallVector types; + if (succeeded(parser.parseOptionalColon())) { + if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseType(types.emplace_back()); + })) { + return failure(); + } + } + + // Parse the body region's `args ( $assignments )`. + llvm::SmallVector bodyRegionArgs; + llvm::SmallVector bodyOperands; + if (parser.parseKeyword("args") || parser.parseAssignmentList(bodyRegionArgs, bodyOperands)) { + return failure(); + } + + if (bodyRegionArgs.size() != types.size()) { + return parser.emitError(parser.getNameLoc()) + << "expected " << types.size() << " body arguments but got " + << bodyRegionArgs.size(); + } + + for (auto [arg, ty] : llvm::zip_equal(bodyRegionArgs, types)) { + arg.type = ty; + } + + if (parser.parseRegion(*body, bodyRegionArgs)) { + return failure(); + } + WhileOp::ensureTerminator(*body, builder, result.location); + + // Parse the condition region's `args ( $assignments )`. + llvm::SmallVector condRegionArgs; + llvm::SmallVector condOperands; + if (parser.parseKeyword("args") || parser.parseAssignmentList(condRegionArgs, condOperands)) { + return failure(); + } + + if (condRegionArgs.size() != types.size()) { + return parser.emitError(parser.getNameLoc()) + << "expected " << types.size() << " condition arguments but got " + << condRegionArgs.size(); + } + + // Both args clauses must reference the same operands. + // Sizes are already equal at this point (both equal types.size()), so just compare names. + for (auto [b, c] : llvm::zip_equal(bodyOperands, condOperands)) { + if (b.name != c.name || b.number != c.number) { + auto bOperand = llvm::formatv("%{0}{1}", b.name, + (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); + auto cOperand = llvm::formatv("%{0}{1}", c.name, + (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); + return parser.emitError(parser.getNameLoc()) + << "body and condition args must bind the same operands " + << "(got " << cOperand << ", expected " << bOperand << ")"; + } + } + + for (auto [arg, ty] : llvm::zip_equal(condRegionArgs, types)) { + arg.type = ty; + } + + if (parser.parseRegion(*condition, condRegionArgs)) { + return failure(); + } + WhileOp::ensureTerminator(*condition, builder, result.location); + + // Resolve operands (condition and body operand lists are equal). + if (parser.resolveOperands(condOperands, types, parser.getCurrentLocation(), result.operands)) { + return failure(); + } + + // Op results have the same types as in-values. + result.addTypes(types); + + if (parser.parseOptionalAttrDict(result.attributes)) { + return failure(); + } + + return success(); } LogicalResult DoWhileOp::verify() { diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt index 7ea7ce7..645c3d7 100644 --- a/unittests/IR/CMakeLists.txt +++ b/unittests/IR/CMakeLists.txt @@ -2,6 +2,7 @@ set(test_name "jeff-mlir-parse-test") if(NOT TARGET ${test_name}) add_executable(${test_name} + test_parse_do_while_op.cpp test_parse_for_op.cpp test_parse_while_op.cpp ) diff --git a/unittests/IR/test_parse_do_while_op.cpp b/unittests/IR/test_parse_do_while_op.cpp new file mode 100644 index 0000000..15d4f0a --- /dev/null +++ b/unittests/IR/test_parse_do_while_op.cpp @@ -0,0 +1,206 @@ +#include "jeff/IR/JeffDialect.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace mlir; + +class DoWhileOpTest : public ::testing::Test { + protected: + MLIRContext ctx; + ScopedDiagnosticHandler handler{&ctx, [](Diagnostic&) { return success(); }}; + + void SetUp() override { ctx.loadDialect(); } +}; + +// === Valid tests === + +TEST_F(DoWhileOpTest, NoArgs) { + const std::string src = R"MLIR( + func.func @f(%pred: i1) { + jeff.doWhile args() { + } args() { + jeff.yield %pred : i1 + } + return + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(DoWhileOpTest, WithArgsSingle) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) -> i32 { + %r = jeff.doWhile : (i32) args(%b_x = %a) { + jeff.yield %b_x : i32 + } args(%c_x = %a) { + jeff.yield %pred : i1 + } + return %r : i32 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(DoWhileOpTest, WithArgsMultiple) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { + %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { + jeff.yield %b_x, %b_y : i32, i64 + } args(%c_x = %a, %c_y = %b) { + jeff.yield %pred : i1 + } + return %r1, %r2 : i32, i64 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(DoWhileOpTest, Nested) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) -> i32 { + %r = jeff.doWhile : (i32) args(%b_x = %a) { + %s = jeff.doWhile : (i32) args(%bb = %b_x) { + jeff.yield %bb : i32 + } args(%cc = %b_x) { + jeff.yield %pred : i1 + } + jeff.yield %s : i32 + } args(%c_x = %a) { + jeff.yield %pred : i1 + } + return %r : i32 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +// `DoWhileOp::print` elides the empty yield for the body when there are no +// in-values, but bare `jeff.yield` is still valid input. It can come from +// `YieldOp`'s own printer, generic-form output (`-mlir-print-op-generic`), +// or handwritten MLIR. The parser must accept this shape. +TEST_F(DoWhileOpTest, ExplicitEmptyBodyYield) { + const std::string src = R"MLIR( + func.func @f(%pred: i1) { + jeff.doWhile args() { + jeff.yield + } args() { + jeff.yield %pred : i1 + } + return + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +// Parse → print → parse → print, then assert idempotent. +// The first round normalizes (whitespace, SSA names). +// The second round must be a no-op. +TEST_F(DoWhileOpTest, RoundTripIdempotent) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { + %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { + jeff.yield %b_x, %b_y : i32, i64 + } args(%c_x = %a, %c_y = %b) { + jeff.yield %pred : i1 + } + return %r1, %r2 : i32, i64 + } + )MLIR"; + + const auto module1 = parseSourceString(src, &ctx); + ASSERT_TRUE(module1); + std::string printed1; + llvm::raw_string_ostream(printed1) << *module1; + + const auto module2 = parseSourceString(printed1, &ctx); + ASSERT_TRUE(module2); + std::string printed2; + llvm::raw_string_ostream(printed2) << *module2; + + EXPECT_EQ(printed1, printed2); +} + +// === Invalid syntax tests (parse-level) === + +TEST_F(DoWhileOpTest, InvalidMissingArgsKeyword) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) { + jeff.doWhile : (i32) (%c_x = %a) { + jeff.yield %b_x : i32 + } args(%b_x = %a) { + jeff.yield %pred : i1 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// `: (...)` is present but `args(...)` is empty (or vice versa). +TEST_F(DoWhileOpTest, InvalidArgCountMismatchWithTypes) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) { + jeff.doWhile : (i32, i64) args(%c_x = %a) { + jeff.yield %b_x : i32 + } args(%b_x = %a) { + jeff.yield %pred : i1 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// Condition has 2 args but body has 1. +TEST_F(DoWhileOpTest, InvalidCondBodyArgCountMismatch) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i64, %pred: i1) { + jeff.doWhile : (i32, i64) args(%c_x = %a, %c_y = %b) { + jeff.yield %b_x : i32 + } args(%b_x = %a) { + jeff.yield %pred : i1 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// Condition and body bind to different operand SSA values. +TEST_F(DoWhileOpTest, InvalidCondBodyOperandsDiffer) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %b: i32, %pred: i1) -> i32 { + %r = jeff.doWhile : (i32) args(%c_x = %a) { + jeff.yield %b_x : i32 + } args(%b_x = %b) { + jeff.yield %pred : i1 + } + return %r : i32 + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// `args(...)` is non-empty but no `: (...)` is provided. +TEST_F(DoWhileOpTest, InvalidMissingTypeAnnotation) { + const std::string src = R"MLIR( + func.func @f(%a: i32, %pred: i1) { + jeff.doWhile args(%c_x = %a) { + jeff.yield %b_x : i32 + } args(%b_x = %a) { + jeff.yield %pred : i1 + } + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} From 5b4038c99ca3f07f1510cc73ed23a8dc252aa274 Mon Sep 17 00:00:00 2001 From: rturrado Date: Thu, 14 May 2026 19:52:15 +0200 Subject: [PATCH 04/15] Implement parse for `jeff.switch` SCF op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement SwitchOp::parse and rewrite SwitchOp::print to use a folded syntax that states the op's operands and their types together as a single function-type, decoupled from the result types: ``` %r1, %r2 = jeff.switch (%sel, %a, %b) : (i32, T_in...) -> (T_out...) case 0 args(%x, %y) { ... } case 1 args(%x, %y) { ... } default args(%x, %y) { ... } ``` Each region's `args(...)` lists only local block-argument names, not assignments — the operands are already named once in the op's header, so the block arguments are bound positionally to `in_values` and their types are inferred. Case labels are explicit but contiguous starting at 0; the parser enforces this so that print-then-parse is faithful. Decouple `in_values` from `out_values` in the verifier. They were previously forced to match in count and type as a carry-over from loop iter-args, but that constraint is not part of switch semantics. Each region's block arguments now match `in_values` and each region's `jeff.yield` matches the op's result types as two separate checks. Drop `hasVerifier` from the ODS since the op-level `verify` is now trivial. Update the ODS description for `jeff.switch` to reflect the new syntax, spelling out the implicit inferences (positional binding, inferred arg types). Add implicit-inference notes to the `jeff.while` and `jeff.doWhile` descriptions. Fix a stale local-name swap in the `jeff.doWhile` example. Add unittests/IR/test_parse_switch_op.cpp covering valid forms, parse-level errors, unsupported selector types, and verifier-level errors. Signed-off-by: rturrado --- include/jeff/IR/JeffOps.td | 65 ++++-- lib/IR/JeffOps.cpp | 244 +++++++++++++++++----- unittests/IR/CMakeLists.txt | 1 + unittests/IR/test_parse_switch_op.cpp | 288 ++++++++++++++++++++++++++ 4 files changed, 523 insertions(+), 75 deletions(-) create mode 100644 unittests/IR/test_parse_switch_op.cpp diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index c8a99da..8513691 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -1896,7 +1896,7 @@ def SwitchOp : SCF_Op<"switch", [SingleBlockImplicitTerminator<"jeff::YieldOp">] The op carries a list of in-values (`in_values`) shared across all branches and the default region: each region declares its own local - block arguments, bound to the corresponding in-values on entry. Each + block arguments, bound positionally to the in-values on entry. Each region must terminate with a `jeff.yield` whose operands provide the op's results for the selected branch. @@ -1905,55 +1905,63 @@ def SwitchOp : SCF_Op<"switch", [SingleBlockImplicitTerminator<"jeff::YieldOp">] Unlike a `for` or `while` loop, a `switch` has no iteration semantics: it dispatches once. The op's result count and types are therefore not - required to match the in-value count and types — branches may yield - any number of values of any type, provided every branch (and the - default, if present) yields the same shape. + related to the in-value count and types — branches may yield any + number of values of any type, provided every branch (and the default, + if present) yields the same shape. Schematic syntax: ``` - jeff.switch ( $selection ) : $selection_type -> ( $result_types ) - ( case $i args ( $assignments ) { $branch_body } )* - ( default args ( $assignments ) { $default_body } )? + jeff.switch ( $selection, $in_values ) : ( $selection_type, $in_value_types ) + -> ( $result_types ) + ( case $i args ( $local_names ) { $branch_body } )* + ( default args ( $local_names ) { $default_body } )? ``` Example: ```mlir - %r1, %r2 = jeff.switch (%sel) : i32 -> (i32, i64) - case 0 args(%x = %a, %y = %b) { + %r1, %r2 = jeff.switch (%sel, %a, %b) : (i32, i32, i64) -> (i32, i64) + case 0 args(%x, %y) { jeff.yield %x, %y : i32, i64 } - case 1 args(%x = %a, %y = %b) { + case 1 args(%x, %y) { // ... compute %next_x, %next_y ... jeff.yield %next_x, %next_y : i32, i64 } - default args(%x = %a, %y = %b) { + default args(%x, %y) { jeff.yield %x, %y : i32, i64 } ``` + Implicit inferences in the syntax: + - Each region's `args(%x, %y)` lists *only the local block-argument + names*, not assignments. The block arguments are bound positionally + to the op's `in_values` (in the example above, `%x` to `%a`, `%y` + to `%b`) — the operands are already named in the op's header. + - Block-argument types are not written: they are inferred from the + in-value types in the header. + Invariants: - `selection` is a `SupportedIntType`. - - For each branch region and the default region (if present), each - block argument has the same type as the corresponding in-value. - - Every region's yielded values match the op's result types in - count and order. + - Case labels are contiguous integers starting at 0: the i-th + branch region is labeled `case i`. The parser enforces this so + that print-then-parse is faithful. + - Every region's block argument count and types match `in_values`. + - Every region's yielded value count and types match the op's + result types. Underspecified: - The semantics when `selection` does not match any branch label and the default region is absent. - Structurally, the op admits zero branches and an empty default; such a form is legal but meaningless and is not constrained out. - - Whether branch labels form a contiguous range starting at 0: the - printer emits them positionally, so this is currently implicit in - the in-memory representation rather than enforced. Differences from `scf.index_switch`: - Selector type is restricted to `SupportedIntType` (no `index`). - Case labels are implicit and positional (`case 0`, `case 1`, ...) rather than arbitrary integer literals. - The default region is optional rather than mandatory. - - Supports loop-carried-style in-values shared across regions - (`scf.index_switch` has no iter-args mechanism). + - Supports in-values shared across regions, decoupled from the + op's results (`scf.index_switch` has no shared-value mechanism). }]; let arguments = (ins @@ -1971,7 +1979,6 @@ def SwitchOp : SCF_Op<"switch", [SingleBlockImplicitTerminator<"jeff::YieldOp">] ); let hasCustomAssemblyFormat = 1; - let hasVerifier = 1; let hasRegionVerifier = 1; } @@ -2113,6 +2120,12 @@ def WhileOp : SCF_Op<"while", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> both `args(...)` clauses must reference the same op operands in the same order. + Implicit inferences in the syntax: + - Result types are not written: they are the same as the `: (...)` + types (Jeff's single-signature design choice, T1 = T2). + - Block-argument types inside the `args(...)` clauses are not + written: they are inferred from the same `: (...)` annotation. + Example with loop-carried values: ```mlir %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { @@ -2213,12 +2226,18 @@ def DoWhileOp : SCF_Op<"doWhile", [SingleBlockImplicitTerminator<"jeff::YieldOp" both `args(...)` clauses must reference the same op operands in the same order. + Implicit inferences in the syntax: + - Result types are not written: they are the same as the `: (...)` + types (Jeff's single-signature design choice, T1 = T2). + - Block-argument types inside the `args(...)` clauses are not + written: they are inferred from the same `: (...)` annotation. + Example with loop-carried values: ```mlir - %r1, %r2 = jeff.doWhile : (i32, i64) args(%c_x = %a, %c_y = %b) { + %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { // body jeff.yield %next_x, %next_y : i32, i64 - } args(%b_x = %a, %b_y = %b) { + } args(%c_x = %a, %c_y = %b) { // condition jeff.yield %pred : i1 } diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 52fdf21..5b24bba 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -32,6 +32,7 @@ #include #include +#include using namespace mlir; using namespace mlir::jeff; @@ -159,73 +160,212 @@ void IntBinaryOp::getCanonicalizationPatterns(RewritePatternSet& results, MLIRCo } void SwitchOp::print(OpAsmPrinter& p) { - auto inValues = getInValues(); - - p << '(' << getSelection() << ")"; - p << " : " << getSelection().getType(); - p << " -> (" << inValues.getTypes() << ") "; - - auto branches = getBranches(); - for (size_t i = 0; i < branches.size(); ++i) { + // The op's operand list is `selection` followed by `in_values`, in that + // order, so we can stream both together. + auto operands = (*this)->getOperands(); + auto resultTypes = getResultTypes(); + + // Header: `(%sel, %a, %b) : (i32, T_in...) -> (T_out...)`. + p << " ("; + llvm::interleaveComma(operands, p); + p << ") : ("; + llvm::interleaveComma(operands.getTypes(), p); + p << ") -> (" << resultTypes << ")"; + + auto printRegionWithArgs = [&](Region& region) { + p << " args("; + llvm::interleaveComma(region.getArguments(), p); + p << ") "; + p.printRegion(region, /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/!resultTypes.empty()); + }; + + for (auto [i, branch] : llvm::enumerate(getBranches())) { p.printNewline(); - p << "case " << i << ' '; - auto& branch = branches[i]; - auto regionArgs = branch.getArguments(); - printInitializationList(p, regionArgs, inValues, "args"); - p << ' '; - p.printRegion(branch, /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/!inValues.empty()); + p << "case " << i; + printRegionWithArgs(branch); } auto& defaultRegion = getDefault(); if (!defaultRegion.empty()) { p.printNewline(); - p << "default "; - auto regionArgs = defaultRegion.getArguments(); - printInitializationList(p, regionArgs, inValues, "args"); - p << ' '; - p.printRegion(defaultRegion, /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/!inValues.empty()); + p << "default"; + printRegionWithArgs(defaultRegion); } p.printOptionalAttrDict((*this)->getAttrs()); } -ParseResult SwitchOp::parse(OpAsmParser& /*parser*/, OperationState& /*result*/) { - // TODO: Implement this. - // TODO: When implementing the parser, also relax the verifier — the - // current `in_values.size() == out_values.size()` requirement (enforced - // via `verifyRegionArgs`) is a carry-over from loop iter-args and is - // not part of the intended semantics for a switch. - llvm::report_fatal_error("SwitchOp::parse is not implemented yet"); -} +ParseResult SwitchOp::parse(OpAsmParser& parser, OperationState& result) { + auto& builder = parser.getBuilder(); -LogicalResult SwitchOp::verify() { - if (getInValues().size() != getNumResults()) { - return emitOpError("mismatch in number of input and output values"); + // The op declares `$default` first and `$branches` after, + // so the default region must occupy index 0. + // Pre-allocate it; populate later if present. + Region* defaultRegion = result.addRegion(); + + // Parse `(%sel, %a, %b)` — selector first, then in-values. + llvm::SmallVector operands; + if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseOperand(operands.emplace_back()); + })) { + return failure(); + } + if (operands.empty()) { + return parser.emitError(parser.getNameLoc(), "expected at least the selector operand"); + } + + // Parse `: (T_sel, T_in...)` — operand types, in the same order. + llvm::SmallVector operandTypes; + if (parser.parseColon() || parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseType(operandTypes.emplace_back()); + })) { + return failure(); + } + if (operandTypes.size() != operands.size()) { + return parser.emitError(parser.getNameLoc()) + << "expected " << operands.size() << " operand types but got " + << operandTypes.size(); + } + + // Parse `-> (T_out...)` — result types, independent of operand types. + llvm::SmallVector resultTypes; + if (parser.parseArrow() || parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseType(resultTypes.emplace_back()); + })) { + return failure(); + } + + if (parser.resolveOperands(operands, operandTypes, parser.getCurrentLocation(), + result.operands)) { + return failure(); + } + + // In-value types are everything after the selector. + auto inValueTypes = llvm::ArrayRef(operandTypes).drop_front(1); + + // Helper that parses `args(%x, %y) { ... }` into a region. + auto parseRegionWithArgs = [&](Region& region) -> ParseResult { + llvm::SmallVector regionArgs; + if (parser.parseKeyword("args") || + parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseArgument(regionArgs.emplace_back()); + })) { + return failure(); + } + if (regionArgs.size() != inValueTypes.size()) { + return parser.emitError(parser.getNameLoc()) + << "expected " << inValueTypes.size() << " region arguments but got " + << regionArgs.size(); + } + for (auto [arg, ty] : llvm::zip_equal(regionArgs, inValueTypes)) { + arg.type = ty; + } + if (parser.parseRegion(region, regionArgs)) { + return failure(); + } + SwitchOp::ensureTerminator(region, builder, result.location); + return success(); + }; + + // Parse `case N args(...) { ... }` while the keyword is present. + // Case labels are positional; require them to be 0, 1, 2, ... + // so that print-then-parse round-trips faithfully. + int64_t expectedCase = 0; + while (succeeded(parser.parseOptionalKeyword("case"))) { + int64_t caseValue = 0; + if (parser.parseInteger(caseValue)) { + return failure(); + } + if (caseValue != expectedCase) { + return parser.emitError(parser.getNameLoc()) + << "expected `case " << expectedCase << "` but got `case " << caseValue << "`"; + } + ++expectedCase; + + Region* branch = result.addRegion(); + if (parseRegionWithArgs(*branch)) { + return failure(); + } + } + + // Optional `default args(...) { ... }`. + if (succeeded(parser.parseOptionalKeyword("default"))) { + if (parseRegionWithArgs(*defaultRegion)) { + return failure(); + } + } + + result.addTypes(resultTypes); + + if (parser.parseOptionalAttrDict(result.attributes)) { + return failure(); } return success(); } +// `in_values` and `out_values` are independent for switch — no count or type +// relationship between them. Each region's block args mirror `in_values`, and +// each region's yield mirrors the op's results, but those are region-level +// checks done in `verifyRegions`. No op-level `verify` is needed. LogicalResult SwitchOp::verifyRegions() { - llvm::SmallVector regions; - auto branches = getBranches(); - regions.reserve(1 + branches.size()); - regions.push_back(&getDefault()); - for (auto& branch : branches) { - regions.push_back(&branch); - } + auto inValueTypes = getInValues().getTypes(); + auto resultTypes = getResultTypes(); + + // Helper that verifies one region (a `case` body or the `default`). + auto verifyRegion = [&](Region& region, const llvm::Twine& name) -> LogicalResult { + // The parser always pre-allocates the default region, + // leaving it empty when the source has no `default { ... }` clause. + if (region.empty()) { + return success(); + } - auto inValues = getInValues(); - auto outValues = getOutValues(); + // Block-argument count match the `in_values` count. + auto regionArgs = region.getArguments(); + if (regionArgs.size() != inValueTypes.size()) { + return emitOpError() << name << " region has " << regionArgs.size() + << " block arguments but op has " << inValueTypes.size() + << " in-values"; + } + // Block-argument types match the `in_values` types. + for (auto [i, regionArg, inTy] : llvm::enumerate(regionArgs, inValueTypes)) { + if (regionArg.getType() != inTy) { + return emitOpError() << name << " region block argument " << i + << " type does not match the corresponding in-value type"; + } + } - for (auto& region : regions) { - auto regionArgs = region->getArguments(); - if (verifyRegionArgs(*this, inValues, outValues, regionArgs).failed()) { + // `jeff.yield` is present. + auto yield = dyn_cast(region.front().back()); + if (!yield) { + return emitOpError() << name << " region must terminate with `jeff.yield`"; + } + // `jeff.yield` operand count match the op's result count. + if (yield.getNumOperands() != resultTypes.size()) { + return emitOpError() << name << " region yields " << yield.getNumOperands() + << " values but op has " << resultTypes.size() << " results"; + } + // `jeff.yield` operand types match the op's result types. + for (auto [i, yieldOp, resTy] : llvm::enumerate(yield.getOperands(), resultTypes)) { + if (yieldOp.getType() != resTy) { + return emitOpError() << name << " region yield operand " << i + << " type does not match the corresponding result type"; + } + } + return success(); + }; + + // Verify `case` regions. + for (auto [i, branch] : llvm::enumerate(getBranches())) { + if (verifyRegion(branch, "case " + llvm::Twine(i)).failed()) { return failure(); } } + // Verify `default` region. + if (verifyRegion(getDefault(), "default").failed()) { + return failure(); + } return success(); } @@ -447,10 +587,10 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { // Sizes are already equal at this point (both equal types.size()), so just compare names. for (auto [c, b] : llvm::zip_equal(condOperands, bodyOperands)) { if (c.name != b.name || c.number != b.number) { - auto cOperand = llvm::formatv("%{0}{1}", c.name, - (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); - auto bOperand = llvm::formatv("%{0}{1}", b.name, - (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); + auto cOperand = llvm::formatv( + "%{0}{1}", c.name, (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); + auto bOperand = llvm::formatv( + "%{0}{1}", b.name, (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); return parser.emitError(parser.getNameLoc()) << "condition and body args must bind the same operands " << "(got " << bOperand << ", expected " << cOperand << ")"; @@ -588,10 +728,10 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { // Sizes are already equal at this point (both equal types.size()), so just compare names. for (auto [b, c] : llvm::zip_equal(bodyOperands, condOperands)) { if (b.name != c.name || b.number != c.number) { - auto bOperand = llvm::formatv("%{0}{1}", b.name, - (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); - auto cOperand = llvm::formatv("%{0}{1}", c.name, - (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); + auto bOperand = llvm::formatv( + "%{0}{1}", b.name, (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); + auto cOperand = llvm::formatv( + "%{0}{1}", c.name, (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); return parser.emitError(parser.getNameLoc()) << "body and condition args must bind the same operands " << "(got " << cOperand << ", expected " << bOperand << ")"; diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt index 645c3d7..714631e 100644 --- a/unittests/IR/CMakeLists.txt +++ b/unittests/IR/CMakeLists.txt @@ -4,6 +4,7 @@ if(NOT TARGET ${test_name}) add_executable(${test_name} test_parse_do_while_op.cpp test_parse_for_op.cpp + test_parse_switch_op.cpp test_parse_while_op.cpp ) diff --git a/unittests/IR/test_parse_switch_op.cpp b/unittests/IR/test_parse_switch_op.cpp new file mode 100644 index 0000000..21a34d2 --- /dev/null +++ b/unittests/IR/test_parse_switch_op.cpp @@ -0,0 +1,288 @@ +#include "jeff/IR/JeffDialect.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace mlir; + +class SwitchOpTest : public ::testing::Test { + protected: + MLIRContext ctx; + ScopedDiagnosticHandler handler{&ctx, [](Diagnostic&) { return success(); }}; + + void SetUp() override { ctx.loadDialect(); } +}; + +// === Valid tests === + +// Structurally legal: a switch with zero branches and no default. +// (See "Underspecified" in the op description.) +TEST_F(SwitchOpTest, ZeroCasesAndNoDefault) { + const std::string src = R"MLIR( + func.func @f(%sel: i32) { + jeff.switch (%sel) : (i32) -> () + return + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, NoInValuesNoResults) { + const std::string src = R"MLIR( + func.func @f(%sel: i32) { + jeff.switch (%sel) : (i32) -> () + case 0 args() {} + case 1 args() {} + default args() {} + return + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, WithInValuesAndResults) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32, %b: i64) -> (i32, i64) { + %r1, %r2 = jeff.switch (%sel, %a, %b) : (i32, i32, i64) -> (i32, i64) + case 0 args(%x, %y) { + jeff.yield %x, %y : i32, i64 + } + case 1 args(%x, %y) { + jeff.yield %x, %y : i32, i64 + } + default args(%x, %y) { + jeff.yield %x, %y : i32, i64 + } + return %r1, %r2 : i32, i64 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, NoDefault) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32) -> i32 { + %r = jeff.switch (%sel, %a) : (i32, i32) -> (i32) + case 0 args(%x) { + jeff.yield %x : i32 + } + case 1 args(%x) { + jeff.yield %x : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, OnlyDefault) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32) -> i32 { + %r = jeff.switch (%sel, %a) : (i32, i32) -> (i32) + default args(%x) { + jeff.yield %x : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +// In-value types and result types are independent: +// the in-values are (i32, i64), but the op yields a single i1. +TEST_F(SwitchOpTest, DecoupledInValueAndResultTypes) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32, %b: i64, %p: i1) -> i1 { + %r = jeff.switch (%sel, %a, %b) : (i32, i32, i64) -> (i1) + case 0 args(%x, %y) { + jeff.yield %p : i1 + } + default args(%x, %y) { + jeff.yield %p : i1 + } + return %r : i1 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, Nested) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32) -> i32 { + %r = jeff.switch (%sel, %a) : (i32, i32) -> (i32) + case 0 args(%x) { + %s = jeff.switch (%sel, %x) : (i32, i32) -> (i32) + case 0 args(%xx) { + jeff.yield %xx : i32 + } + default args(%xx) { + jeff.yield %xx : i32 + } + jeff.yield %s : i32 + } + default args(%x) { + jeff.yield %x : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_TRUE(parseSourceString(src, &ctx)); +} + +// Parse → print → parse → print, then assert idempotent. +TEST_F(SwitchOpTest, RoundTripIdempotent) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32, %b: i64) -> (i32, i64) { + %r1, %r2 = jeff.switch (%sel, %a, %b) : (i32, i32, i64) -> (i32, i64) + case 0 args(%x, %y) { + jeff.yield %x, %y : i32, i64 + } + case 1 args(%x, %y) { + jeff.yield %x, %y : i32, i64 + } + default args(%x, %y) { + jeff.yield %x, %y : i32, i64 + } + return %r1, %r2 : i32, i64 + } + )MLIR"; + + const auto module1 = parseSourceString(src, &ctx); + ASSERT_TRUE(module1); + std::string printed1; + llvm::raw_string_ostream(printed1) << *module1; + + const auto module2 = parseSourceString(printed1, &ctx); + ASSERT_TRUE(module2); + std::string printed2; + llvm::raw_string_ostream(printed2) << *module2; + + EXPECT_EQ(printed1, printed2); +} + +// === Invalid syntax tests (parse-level) === + +// Case labels must start at 0 and increase by 1. +TEST_F(SwitchOpTest, InvalidNonContiguousCaseLabels) { + const std::string src = R"MLIR( + func.func @f(%sel: i32) { + jeff.switch (%sel) : (i32) -> () + case 0 args() {} + case 2 args() {} + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, InvalidCaseLabelNotStartingAtZero) { + const std::string src = R"MLIR( + func.func @f(%sel: i32) { + jeff.switch (%sel) : (i32) -> () + case 1 args() {} + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, InvalidMissingArgsKeyword) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32) { + jeff.switch (%sel, %a) : (i32, i32) -> () + case 0 (%x) {} + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// Region arg count must match the number of in-values. +TEST_F(SwitchOpTest, InvalidRegionArgCountMismatch) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32, %b: i64) { + jeff.switch (%sel, %a, %b) : (i32, i32, i64) -> () + case 0 args(%x) {} + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// Operand-type count must match operand count. +TEST_F(SwitchOpTest, InvalidOperandTypeCountMismatch) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32) { + jeff.switch (%sel, %a) : (i32) -> () + case 0 args(%x) {} + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// The selector must be a `SupportedIntType` (i1/i8/i16/i32/i64). +// `index` is rejected by the ODS-generated verifier. +TEST_F(SwitchOpTest, InvalidSelectorTypeIndex) { + const std::string src = R"MLIR( + func.func @f(%sel: index) { + jeff.switch (%sel) : (index) -> () + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +TEST_F(SwitchOpTest, InvalidSelectorTypeFloat) { + const std::string src = R"MLIR( + func.func @f(%sel: f32) { + jeff.switch (%sel) : (f32) -> () + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// === Invalid verifier tests === + +// Yield operand count must match the op's result count. +TEST_F(SwitchOpTest, InvalidYieldCountMismatch) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32) -> i32 { + %r = jeff.switch (%sel, %a) : (i32, i32) -> (i32) + case 0 args(%x) { + jeff.yield + } + default args(%x) { + jeff.yield %x : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// Yield operand type must match the op's result type. +TEST_F(SwitchOpTest, InvalidYieldTypeMismatch) { + const std::string src = R"MLIR( + func.func @f(%sel: i32, %a: i32, %b: i64) -> i32 { + %r = jeff.switch (%sel, %a, %b) : (i32, i32, i64) -> (i32) + case 0 args(%x, %y) { + jeff.yield %y : i64 + } + default args(%x, %y) { + jeff.yield %x : i32 + } + return %r : i32 + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} From 10a794e82b7d06335bceccbb4a4206201ad574e7 Mon Sep 17 00:00:00 2001 From: rturrado Date: Thu, 14 May 2026 22:48:59 +0200 Subject: [PATCH 05/15] Drop assignments from the second region in `jeff.while` and `jeff.doWhile` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The operands of a Jeff while/doWhile loop are stated once at the op level. Repeating them in the second region's `args(...)` clause was redundant — and the verifier had to check that both clauses agreed on the operands. Following the design discussion on issue #7, the second region now lists only local block-argument names; the operands are inherited positionally from the first region. `jeff.while` now prints as `: (types) args(%c_x = %a, ...) { cond } args(%b_x, ...) { body }`, and `jeff.doWhile` symmetrically as `: (types) args(%b_x = %a, ...) { body } args(%c_x, ...) { cond }`. Drop the "both args clauses must bind the same operands" check from both parsers (and the `llvm::formatv`-based diagnostic helper that supported it), since the constraint can no longer be expressed in the syntax. Drop the `` include that became unused. Update the ODS descriptions for both ops: drop the "RHS of both `args(...)` clauses must reference the same op operands" sentence, and add an "Implicit inferences" bullet documenting that the second region's `args(...)` lists names only. Update tests in test_parse_while_op.cpp and test_parse_do_while_op.cpp to use the new syntax in the valid and round-trip cases, drop the `InvalidCondBodyOperandsDiffer` test (no longer expressible — and that's the point), and rename `InvalidCondBodyArgCountMismatch` to `InvalidBodyCondArgCountMismatch` in the doWhile suite to reflect the body-first region order. Signed-off-by: rturrado --- include/jeff/IR/JeffOps.td | 29 +++++----- lib/IR/JeffOps.cpp | 77 ++++++++++--------------- unittests/IR/test_parse_do_while_op.cpp | 47 +++++---------- unittests/IR/test_parse_while_op.cpp | 33 +++-------- 4 files changed, 71 insertions(+), 115 deletions(-) diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index 8513691..56113be 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -2111,27 +2111,29 @@ def WhileOp : SCF_Op<"while", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> ``` [ $results = ] jeff.while [ : ( $types ) ] args ( $cond_assignments ) $cond_body - args ( $body_assignments ) $body_body + args ( $body_names ) $body_body ``` The `: ( $types )` annotation is present iff `$types` is non-empty. Both `args(...)` clauses are always emitted (possibly empty), - serving as a structural separator between the regions. The RHS of - both `args(...)` clauses must reference the same op operands in the - same order. + serving as a structural separator between the regions. Implicit inferences in the syntax: - Result types are not written: they are the same as the `: (...)` types (Jeff's single-signature design choice, T1 = T2). - Block-argument types inside the `args(...)` clauses are not written: they are inferred from the same `: (...)` annotation. + - The body region's `args(...)` lists only local block-argument + names, not assignments. The operands are stated once in the + condition's `args(...)`; the body's block arguments are bound + positionally to those same operands. Example with loop-carried values: ```mlir %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { // condition jeff.yield %pred : i1 - } args(%b_x = %a, %b_y = %b) { + } args(%b_x, %b_y) { // body jeff.yield %next_x, %next_y : i32, i64 } @@ -2216,28 +2218,30 @@ def DoWhileOp : SCF_Op<"doWhile", [SingleBlockImplicitTerminator<"jeff::YieldOp" Syntax: ``` [ $results = ] jeff.doWhile [ : ( $types ) ] - args ( $body_assignments ) $body_body - args ( $cond_assignments ) $cond_body + args ( $body_assignments ) $body_body + args ( $cond_names ) $cond_body ``` The `: ( $types )` annotation is present iff `$types` is non-empty. Both `args(...)` clauses are always emitted (possibly empty), - serving as a structural separator between the regions. The RHS of - both `args(...)` clauses must reference the same op operands in the - same order. + serving as a structural separator between the regions. Implicit inferences in the syntax: - Result types are not written: they are the same as the `: (...)` types (Jeff's single-signature design choice, T1 = T2). - Block-argument types inside the `args(...)` clauses are not written: they are inferred from the same `: (...)` annotation. + - The condition region's `args(...)` lists only local + block-argument names, not assignments. The operands are stated + once in the body's `args(...)`; the condition's block arguments + are bound positionally to those same operands. Example with loop-carried values: ```mlir %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { // body jeff.yield %next_x, %next_y : i32, i64 - } args(%c_x = %a, %c_y = %b) { + } args(%c_x, %c_y) { // condition jeff.yield %pred : i1 } @@ -2246,9 +2250,8 @@ def DoWhileOp : SCF_Op<"doWhile", [SingleBlockImplicitTerminator<"jeff::YieldOp" Example with no loop-carried values: ```mlir jeff.doWhile args() { - jeff.yield %pred : i1 } args() { - // body (no values to yield) + jeff.yield %pred : i1 } ``` diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 5b24bba..2a46611 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -511,7 +510,8 @@ void WhileOp::print(OpAsmPrinter& p) { p << " : (" << inValues.getTypes() << ")"; } - // Condition region: `args ( $assignments )` then the region. + // Condition region: `args ( $assignments )`. + // Full assignments, since this is where the op's operands are introduced. auto& condition = getCondition(); auto conditionArgs = condition.getArguments(); printInitializationList(p, conditionArgs, inValues, " args"); @@ -519,11 +519,13 @@ void WhileOp::print(OpAsmPrinter& p) { p.printRegion(condition, /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/true); - // Body region: `args ( $assignments )` then the region. + // Body region: `args ( $names )`. + // Names only. The operands are already stated in the condition's `args(...)`. auto& body = getBody(); auto bodyArgs = body.getArguments(); - printInitializationList(p, bodyArgs, inValues, " args"); - p << ' '; + p << " args("; + llvm::interleaveComma(bodyArgs, p); + p << ") "; p.printRegion(body, /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/!inValues.empty()); @@ -570,10 +572,13 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { } WhileOp::ensureTerminator(*condition, builder, result.location); - // Parse the body region's `args ( $assignments )`. + // Parse the body region's `args ( $names )`. + // Names only. The operands are inherited from the condition's `args(...)`. llvm::SmallVector bodyRegionArgs; - llvm::SmallVector bodyOperands; - if (parser.parseKeyword("args") || parser.parseAssignmentList(bodyRegionArgs, bodyOperands)) { + if (parser.parseKeyword("args") || + parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseArgument(bodyRegionArgs.emplace_back()); + })) { return failure(); } @@ -583,20 +588,6 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { << bodyRegionArgs.size(); } - // Both args clauses must reference the same operands. - // Sizes are already equal at this point (both equal types.size()), so just compare names. - for (auto [c, b] : llvm::zip_equal(condOperands, bodyOperands)) { - if (c.name != b.name || c.number != b.number) { - auto cOperand = llvm::formatv( - "%{0}{1}", c.name, (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); - auto bOperand = llvm::formatv( - "%{0}{1}", b.name, (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); - return parser.emitError(parser.getNameLoc()) - << "condition and body args must bind the same operands " - << "(got " << bOperand << ", expected " << cOperand << ")"; - } - } - for (auto [arg, ty] : llvm::zip_equal(bodyRegionArgs, types)) { arg.type = ty; } @@ -606,8 +597,8 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { } WhileOp::ensureTerminator(*body, builder, result.location); - // Resolve operands (condition and body operand lists are equal). - if (parser.resolveOperands(bodyOperands, types, parser.getCurrentLocation(), result.operands)) { + // Resolve operands from the condition's `args(...)`. + if (parser.resolveOperands(condOperands, types, parser.getCurrentLocation(), result.operands)) { return failure(); } @@ -654,7 +645,8 @@ void DoWhileOp::print(OpAsmPrinter& p) { p << " : (" << inValues.getTypes() << ")"; } - // Body region: `args ( $assignments )` then the region. + // Body region: `args ( $assignments )`. + // Fll assignments, since this is where the op's operands are introduced. auto& body = getBody(); auto bodyArgs = body.getArguments(); printInitializationList(p, bodyArgs, inValues, " args"); @@ -662,11 +654,13 @@ void DoWhileOp::print(OpAsmPrinter& p) { p.printRegion(body, /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/!inValues.empty()); - // Condition region: `args ( $assignments )` then the region. + // Condition region: `args ( $names )`. + // Names only. The operands are already stated in the body's `args(...)`. auto& condition = getCondition(); auto conditionArgs = condition.getArguments(); - printInitializationList(p, conditionArgs, inValues, " args"); - p << ' '; + p << " args("; + llvm::interleaveComma(conditionArgs, p); + p << ") "; p.printRegion(condition, /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/true); @@ -711,10 +705,13 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { } WhileOp::ensureTerminator(*body, builder, result.location); - // Parse the condition region's `args ( $assignments )`. + // Parse the condition region's `args ( $names )`. + // Names only. The operands are inherited from the body's `args(...)`. llvm::SmallVector condRegionArgs; - llvm::SmallVector condOperands; - if (parser.parseKeyword("args") || parser.parseAssignmentList(condRegionArgs, condOperands)) { + if (parser.parseKeyword("args") || + parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { + return parser.parseArgument(condRegionArgs.emplace_back()); + })) { return failure(); } @@ -724,20 +721,6 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { << condRegionArgs.size(); } - // Both args clauses must reference the same operands. - // Sizes are already equal at this point (both equal types.size()), so just compare names. - for (auto [b, c] : llvm::zip_equal(bodyOperands, condOperands)) { - if (b.name != c.name || b.number != c.number) { - auto bOperand = llvm::formatv( - "%{0}{1}", b.name, (b.number > 0) ? llvm::formatv("#{0}", b.number).str() : ""); - auto cOperand = llvm::formatv( - "%{0}{1}", c.name, (c.number > 0) ? llvm::formatv("#{0}", c.number).str() : ""); - return parser.emitError(parser.getNameLoc()) - << "body and condition args must bind the same operands " - << "(got " << cOperand << ", expected " << bOperand << ")"; - } - } - for (auto [arg, ty] : llvm::zip_equal(condRegionArgs, types)) { arg.type = ty; } @@ -747,8 +730,8 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { } WhileOp::ensureTerminator(*condition, builder, result.location); - // Resolve operands (condition and body operand lists are equal). - if (parser.resolveOperands(condOperands, types, parser.getCurrentLocation(), result.operands)) { + // Resolve operands from the body's `args(...)`. + if (parser.resolveOperands(bodyOperands, types, parser.getCurrentLocation(), result.operands)) { return failure(); } diff --git a/unittests/IR/test_parse_do_while_op.cpp b/unittests/IR/test_parse_do_while_op.cpp index 15d4f0a..8eb6b8b 100644 --- a/unittests/IR/test_parse_do_while_op.cpp +++ b/unittests/IR/test_parse_do_while_op.cpp @@ -41,7 +41,7 @@ TEST_F(DoWhileOpTest, WithArgsSingle) { func.func @f(%a: i32, %pred: i1) -> i32 { %r = jeff.doWhile : (i32) args(%b_x = %a) { jeff.yield %b_x : i32 - } args(%c_x = %a) { + } args(%c_x) { jeff.yield %pred : i1 } return %r : i32 @@ -55,7 +55,7 @@ TEST_F(DoWhileOpTest, WithArgsMultiple) { func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { jeff.yield %b_x, %b_y : i32, i64 - } args(%c_x = %a, %c_y = %b) { + } args(%c_x, %c_y) { jeff.yield %pred : i1 } return %r1, %r2 : i32, i64 @@ -70,11 +70,11 @@ TEST_F(DoWhileOpTest, Nested) { %r = jeff.doWhile : (i32) args(%b_x = %a) { %s = jeff.doWhile : (i32) args(%bb = %b_x) { jeff.yield %bb : i32 - } args(%cc = %b_x) { + } args(%cc) { jeff.yield %pred : i1 } jeff.yield %s : i32 - } args(%c_x = %a) { + } args(%c_x) { jeff.yield %pred : i1 } return %r : i32 @@ -109,7 +109,7 @@ TEST_F(DoWhileOpTest, RoundTripIdempotent) { func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { jeff.yield %b_x, %b_y : i32, i64 - } args(%c_x = %a, %c_y = %b) { + } args(%c_x, %c_y) { jeff.yield %pred : i1 } return %r1, %r2 : i32, i64 @@ -134,9 +134,9 @@ TEST_F(DoWhileOpTest, RoundTripIdempotent) { TEST_F(DoWhileOpTest, InvalidMissingArgsKeyword) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.doWhile : (i32) (%c_x = %a) { + jeff.doWhile : (i32) (%b_x = %a) { jeff.yield %b_x : i32 - } args(%b_x = %a) { + } args(%c_x) { jeff.yield %pred : i1 } return @@ -149,9 +149,9 @@ TEST_F(DoWhileOpTest, InvalidMissingArgsKeyword) { TEST_F(DoWhileOpTest, InvalidArgCountMismatchWithTypes) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.doWhile : (i32, i64) args(%c_x = %a) { + jeff.doWhile : (i32, i64) args(%b_x = %a) { jeff.yield %b_x : i32 - } args(%b_x = %a) { + } args(%c_x) { jeff.yield %pred : i1 } return @@ -160,13 +160,13 @@ TEST_F(DoWhileOpTest, InvalidArgCountMismatchWithTypes) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// Condition has 2 args but body has 1. -TEST_F(DoWhileOpTest, InvalidCondBodyArgCountMismatch) { +// Body has 2 args but condition has 1. +TEST_F(DoWhileOpTest, InvalidBodyCondArgCountMismatch) { const std::string src = R"MLIR( func.func @f(%a: i32, %b: i64, %pred: i1) { - jeff.doWhile : (i32, i64) args(%c_x = %a, %c_y = %b) { - jeff.yield %b_x : i32 - } args(%b_x = %a) { + jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { + jeff.yield %b_x, %b_y : i32, i64 + } args(%c_x) { jeff.yield %pred : i1 } return @@ -175,28 +175,13 @@ TEST_F(DoWhileOpTest, InvalidCondBodyArgCountMismatch) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// Condition and body bind to different operand SSA values. -TEST_F(DoWhileOpTest, InvalidCondBodyOperandsDiffer) { - const std::string src = R"MLIR( - func.func @f(%a: i32, %b: i32, %pred: i1) -> i32 { - %r = jeff.doWhile : (i32) args(%c_x = %a) { - jeff.yield %b_x : i32 - } args(%b_x = %b) { - jeff.yield %pred : i1 - } - return %r : i32 - } - )MLIR"; - ASSERT_FALSE(parseSourceString(src, &ctx)); -} - // `args(...)` is non-empty but no `: (...)` is provided. TEST_F(DoWhileOpTest, InvalidMissingTypeAnnotation) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.doWhile args(%c_x = %a) { + jeff.doWhile args(%b_x = %a) { jeff.yield %b_x : i32 - } args(%b_x = %a) { + } args(%c_x) { jeff.yield %pred : i1 } return diff --git a/unittests/IR/test_parse_while_op.cpp b/unittests/IR/test_parse_while_op.cpp index 4af5154..538bf1f 100644 --- a/unittests/IR/test_parse_while_op.cpp +++ b/unittests/IR/test_parse_while_op.cpp @@ -41,7 +41,7 @@ TEST_F(WhileOpTest, WithArgsSingle) { func.func @f(%a: i32, %pred: i1) -> i32 { %r = jeff.while : (i32) args(%c_x = %a) { jeff.yield %pred : i1 - } args(%b_x = %a) { + } args(%b_x) { jeff.yield %b_x : i32 } return %r : i32 @@ -55,7 +55,7 @@ TEST_F(WhileOpTest, WithArgsMultiple) { func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { jeff.yield %pred : i1 - } args(%b_x = %a, %b_y = %b) { + } args(%b_x, %b_y) { jeff.yield %b_x, %b_y : i32, i64 } return %r1, %r2 : i32, i64 @@ -69,10 +69,10 @@ TEST_F(WhileOpTest, Nested) { func.func @f(%a: i32, %pred: i1) -> i32 { %r = jeff.while : (i32) args(%c_x = %a) { jeff.yield %pred : i1 - } args(%b_x = %a) { + } args(%b_x) { %s = jeff.while : (i32) args(%cc = %b_x) { jeff.yield %pred : i1 - } args(%bb = %b_x) { + } args(%bb) { jeff.yield %bb : i32 } jeff.yield %s : i32 @@ -109,7 +109,7 @@ TEST_F(WhileOpTest, RoundTripIdempotent) { func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { jeff.yield %pred : i1 - } args(%b_x = %a, %b_y = %b) { + } args(%b_x, %b_y) { jeff.yield %b_x, %b_y : i32, i64 } return %r1, %r2 : i32, i64 @@ -136,7 +136,7 @@ TEST_F(WhileOpTest, InvalidMissingArgsKeyword) { func.func @f(%a: i32, %pred: i1) { jeff.while : (i32) (%c_x = %a) { jeff.yield %pred : i1 - } args(%b_x = %a) { + } args(%b_x) { jeff.yield %b_x : i32 } return @@ -151,7 +151,7 @@ TEST_F(WhileOpTest, InvalidArgCountMismatchWithTypes) { func.func @f(%a: i32, %pred: i1) { jeff.while : (i32, i64) args(%c_x = %a) { jeff.yield %pred : i1 - } args(%b_x = %a) { + } args(%b_x) { jeff.yield %b_x : i32 } return @@ -166,7 +166,7 @@ TEST_F(WhileOpTest, InvalidCondBodyArgCountMismatch) { func.func @f(%a: i32, %b: i64, %pred: i1) { jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { jeff.yield %pred : i1 - } args(%b_x = %a) { + } args(%b_x) { jeff.yield %b_x : i32 } return @@ -175,28 +175,13 @@ TEST_F(WhileOpTest, InvalidCondBodyArgCountMismatch) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// Condition and body bind to different operand SSA values. -TEST_F(WhileOpTest, InvalidCondBodyOperandsDiffer) { - const std::string src = R"MLIR( - func.func @f(%a: i32, %b: i32, %pred: i1) -> i32 { - %r = jeff.while : (i32) args(%c_x = %a) { - jeff.yield %pred : i1 - } args(%b_x = %b) { - jeff.yield %b_x : i32 - } - return %r : i32 - } - )MLIR"; - ASSERT_FALSE(parseSourceString(src, &ctx)); -} - // `args(...)` is non-empty but no `: (...)` is provided. TEST_F(WhileOpTest, InvalidMissingTypeAnnotation) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { jeff.while args(%c_x = %a) { jeff.yield %pred : i1 - } args(%b_x = %a) { + } args(%b_x) { jeff.yield %b_x : i32 } return From 12c7cbc97cd1be3dc32b3a246321416fe2275518 Mon Sep 17 00:00:00 2001 From: rturrado Date: Fri, 15 May 2026 23:28:26 +0200 Subject: [PATCH 06/15] Tidy includes in JeffOps.cpp Fixes the two `misc-include-cleaner` warnings flagged by CI lint on PR #20 by dropping unused `` and adding `` (so `mlir::dyn_cast` resolves to a directly included header). Also tidies a couple of comments. Signed-off-by: rturrado --- lib/IR/JeffOps.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 2a46611..cf227af 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -27,10 +27,10 @@ #include #include #include +#include #include #include -#include #include using namespace mlir; @@ -304,10 +304,11 @@ ParseResult SwitchOp::parse(OpAsmParser& parser, OperationState& result) { return success(); } -// `in_values` and `out_values` are independent for switch — no count or type -// relationship between them. Each region's block args mirror `in_values`, and -// each region's yield mirrors the op's results, but those are region-level -// checks done in `verifyRegions`. No op-level `verify` is needed. +// `in_values` and `out_values` are independent for switch. There is no count or type relationship +// between them. +// Each region's block args mirror `in_values`, and each region's yield mirrors the op's results, +// but those are region-level checks done in `verifyRegions`. +// No op-level `verify` is needed. LogicalResult SwitchOp::verifyRegions() { auto inValueTypes = getInValues().getTypes(); auto resultTypes = getResultTypes(); @@ -320,7 +321,7 @@ LogicalResult SwitchOp::verifyRegions() { return success(); } - // Block-argument count match the `in_values` count. + // Block-argument count matches the `in_values` count. auto regionArgs = region.getArguments(); if (regionArgs.size() != inValueTypes.size()) { return emitOpError() << name << " region has " << regionArgs.size() @@ -340,7 +341,7 @@ LogicalResult SwitchOp::verifyRegions() { if (!yield) { return emitOpError() << name << " region must terminate with `jeff.yield`"; } - // `jeff.yield` operand count match the op's result count. + // `jeff.yield` operand count matches the op's result count. if (yield.getNumOperands() != resultTypes.size()) { return emitOpError() << name << " region yields " << yield.getNumOperands() << " values but op has " << resultTypes.size() << " results"; @@ -540,7 +541,8 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { Region* condition = result.addRegion(); Region* body = result.addRegion(); - // Parse optional `: ( types )`. Omitted when there are no in-values. + // Parse optional `: ( types )`. + // Omitted when there are no in-values. llvm::SmallVector types; if (succeeded(parser.parseOptionalColon())) { if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { @@ -673,7 +675,8 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { Region* body = result.addRegion(); Region* condition = result.addRegion(); - // Parse optional `: ( types )`. Omitted when there are no in-values. + // Parse optional `: ( types )`. + // Omitted when there are no in-values. llvm::SmallVector types; if (succeeded(parser.parseOptionalColon())) { if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { From 70acb20123272b57e10aaa6ef7a714a6629b55a4 Mon Sep 17 00:00:00 2001 From: Roberto Turrado Camblor Date: Wed, 20 May 2026 18:08:20 +0200 Subject: [PATCH 07/15] Apply suggestions from code review Co-authored-by: Daniel Haag <121057143+denialhaag@users.noreply.github.com> --- include/jeff/IR/JeffOps.td | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index 56113be..44e6677 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -2018,8 +2018,7 @@ def ForOp : SCF_Op<"for", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { ``` The body's block-argument types are not printed explicitly: they - are recovered from `-> ( $result_types )`, which by the verifier - must equal the iter-arg types. + are recovered from `-> ( $result_types )` and must equal the iter-arg types. Example without carried values: ```mlir @@ -2090,7 +2089,7 @@ def WhileOp : SCF_Op<"while", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> - The `condition` region computes the loop's continuation predicate. It must terminate with a `jeff.yield` whose only operand is an - `i1`. By the Jeff language spec, the condition region may not + `i1`. By the `jeff` language spec, the condition region may not modify quantum state (qubits or qubit registers); it may only inspect classical metadata. - The `body` region is executed when the condition yields `true`. From 4bcd858c4e465049dd651324f2ba64f85b8528c6 Mon Sep 17 00:00:00 2001 From: Roberto Turrado Camblor Date: Wed, 20 May 2026 18:13:35 +0200 Subject: [PATCH 08/15] Apply suggestions from code review Co-authored-by: Daniel Haag <121057143+denialhaag@users.noreply.github.com> --- lib/IR/JeffOps.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index cf227af..7df8117 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -204,7 +204,7 @@ ParseResult SwitchOp::parse(OpAsmParser& parser, OperationState& result) { Region* defaultRegion = result.addRegion(); // Parse `(%sel, %a, %b)` — selector first, then in-values. - llvm::SmallVector operands; + llvm::SmallVector operands; if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseOperand(operands.emplace_back()); })) { @@ -706,7 +706,7 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { if (parser.parseRegion(*body, bodyRegionArgs)) { return failure(); } - WhileOp::ensureTerminator(*body, builder, result.location); + DoWhileOp::ensureTerminator(*body, builder, result.location); // Parse the condition region's `args ( $names )`. // Names only. The operands are inherited from the body's `args(...)`. @@ -731,7 +731,7 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { if (parser.parseRegion(*condition, condRegionArgs)) { return failure(); } - WhileOp::ensureTerminator(*condition, builder, result.location); + DoWhileOp::ensureTerminator(*condition, builder, result.location); // Resolve operands from the body's `args(...)`. if (parser.resolveOperands(bodyOperands, types, parser.getCurrentLocation(), result.operands)) { From af7d244e9dfa8e77ee2f715882932b656e699465 Mon Sep 17 00:00:00 2001 From: rturrado Date: Tue, 26 May 2026 16:41:01 +0200 Subject: [PATCH 09/15] Drop explicit inline capacity from llvm::SmallVector declarations in JeffOps.cpp Signed-off-by: rturrado --- lib/IR/JeffOps.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 7df8117..6a521ec 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -215,7 +215,7 @@ ParseResult SwitchOp::parse(OpAsmParser& parser, OperationState& result) { } // Parse `: (T_sel, T_in...)` — operand types, in the same order. - llvm::SmallVector operandTypes; + llvm::SmallVector operandTypes; if (parser.parseColon() || parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseType(operandTypes.emplace_back()); })) { @@ -228,7 +228,7 @@ ParseResult SwitchOp::parse(OpAsmParser& parser, OperationState& result) { } // Parse `-> (T_out...)` — result types, independent of operand types. - llvm::SmallVector resultTypes; + llvm::SmallVector resultTypes; if (parser.parseArrow() || parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseType(resultTypes.emplace_back()); })) { @@ -245,7 +245,7 @@ ParseResult SwitchOp::parse(OpAsmParser& parser, OperationState& result) { // Helper that parses `args(%x, %y) { ... }` into a region. auto parseRegionWithArgs = [&](Region& region) -> ParseResult { - llvm::SmallVector regionArgs; + llvm::SmallVector regionArgs; if (parser.parseKeyword("args") || parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseArgument(regionArgs.emplace_back()); @@ -412,8 +412,8 @@ ParseResult ForOp::parse(OpAsmParser& parser, OperationState& result) { } // Parse the optional initial iteration arguments. - llvm::SmallVector regionArgs; - llvm::SmallVector operands; + llvm::SmallVector regionArgs; + llvm::SmallVector operands; regionArgs.push_back(inductionVar); // Parse assignment list and result types list. @@ -543,7 +543,7 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { // Parse optional `: ( types )`. // Omitted when there are no in-values. - llvm::SmallVector types; + llvm::SmallVector types; if (succeeded(parser.parseOptionalColon())) { if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseType(types.emplace_back()); @@ -553,8 +553,8 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { } // Parse the condition region's `args ( $assignments )`. - llvm::SmallVector condRegionArgs; - llvm::SmallVector condOperands; + llvm::SmallVector condRegionArgs; + llvm::SmallVector condOperands; if (parser.parseKeyword("args") || parser.parseAssignmentList(condRegionArgs, condOperands)) { return failure(); } @@ -576,7 +576,7 @@ ParseResult WhileOp::parse(OpAsmParser& parser, OperationState& result) { // Parse the body region's `args ( $names )`. // Names only. The operands are inherited from the condition's `args(...)`. - llvm::SmallVector bodyRegionArgs; + llvm::SmallVector bodyRegionArgs; if (parser.parseKeyword("args") || parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseArgument(bodyRegionArgs.emplace_back()); @@ -677,7 +677,7 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { // Parse optional `: ( types )`. // Omitted when there are no in-values. - llvm::SmallVector types; + llvm::SmallVector types; if (succeeded(parser.parseOptionalColon())) { if (parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseType(types.emplace_back()); @@ -687,8 +687,8 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { } // Parse the body region's `args ( $assignments )`. - llvm::SmallVector bodyRegionArgs; - llvm::SmallVector bodyOperands; + llvm::SmallVector bodyRegionArgs; + llvm::SmallVector bodyOperands; if (parser.parseKeyword("args") || parser.parseAssignmentList(bodyRegionArgs, bodyOperands)) { return failure(); } @@ -710,7 +710,7 @@ ParseResult DoWhileOp::parse(OpAsmParser& parser, OperationState& result) { // Parse the condition region's `args ( $names )`. // Names only. The operands are inherited from the body's `args(...)`. - llvm::SmallVector condRegionArgs; + llvm::SmallVector condRegionArgs; if (parser.parseKeyword("args") || parser.parseCommaSeparatedList(OpAsmParser::Delimiter::Paren, [&]() { return parser.parseArgument(condRegionArgs.emplace_back()); From 4b19e55bada79a8fb0cd9529a148a36a7c3ce8bf Mon Sep 17 00:00:00 2001 From: rturrado Date: Tue, 26 May 2026 16:47:14 +0200 Subject: [PATCH 10/15] Rewrite 'Jeff language' as '`jeff` language' in JeffOps.td Signed-off-by: rturrado --- include/jeff/IR/JeffOps.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index 44e6677..61df2df 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -2163,7 +2163,7 @@ def WhileOp : SCF_Op<"while", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> - Both regions share a single op-level operand list; in SCF the operand list feeds only the `before` region. - Input types and result types are required to match (single - signature), reflecting the Jeff language design choice of T1 = T2. + signature), reflecting the `jeff` language design choice of T1 = T2. }]; let arguments = (ins @@ -2194,7 +2194,7 @@ def DoWhileOp : SCF_Op<"doWhile", [SingleBlockImplicitTerminator<"jeff::YieldOp" - The `condition` region computes the loop's continuation predicate. It must terminate with a `jeff.yield` whose only operand is an - `i1`. By the Jeff language spec, the condition region may not + `i1`. By the `jeff` language spec, the condition region may not modify quantum state (qubits or qubit registers); it may only inspect classical metadata. - The `body` region executes unconditionally on the first iteration, From c1ed62d029119448009a9995fe6a94f5c6425831 Mon Sep 17 00:00:00 2001 From: rturrado Date: Tue, 26 May 2026 16:57:09 +0200 Subject: [PATCH 11/15] Rewrite SwitchOp::verifyRegions header comment as a Doxygen comment Signed-off-by: rturrado --- lib/IR/JeffOps.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 6a521ec..313ee9e 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -304,11 +304,13 @@ ParseResult SwitchOp::parse(OpAsmParser& parser, OperationState& result) { return success(); } -// `in_values` and `out_values` are independent for switch. There is no count or type relationship -// between them. -// Each region's block args mirror `in_values`, and each region's yield mirrors the op's results, -// but those are region-level checks done in `verifyRegions`. -// No op-level `verify` is needed. +/** + * @brief Verifies the 'case' and 'default' regions of a `jeff.switch`. + * + * `in_values` and `out_values` are independent for switch: there is no count or type relationship + * between them. Each region's block arguments mirror `in_values`, and each region's `jeff.yield` + * mirrors the op's results - two separate checks. + */ LogicalResult SwitchOp::verifyRegions() { auto inValueTypes = getInValues().getTypes(); auto resultTypes = getResultTypes(); From 4f8480c27d3e29b4891f434a909bd7c222c946d1 Mon Sep 17 00:00:00 2001 From: rturrado Date: Tue, 26 May 2026 18:02:51 +0200 Subject: [PATCH 12/15] ForOp induction variable has to have the same type as start, stop and step Update ForOP description at JeffOps.td. Enforce check at ForOp::verifyRegions at JeffOps.cpp. Add tests to check the constraint at test_parse_for_op.cpp. Signed-off-by: rturrado --- include/jeff/IR/JeffOps.td | 6 ++-- lib/IR/JeffOps.cpp | 11 ++++-- unittests/IR/test_parse_for_op.cpp | 54 ++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/include/jeff/IR/JeffOps.td b/include/jeff/IR/JeffOps.td index 61df2df..f76626b 100644 --- a/include/jeff/IR/JeffOps.td +++ b/include/jeff/IR/JeffOps.td @@ -1993,7 +1993,7 @@ def ForOp : SCF_Op<"for", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { The op takes three SSA integer operands — `start`, `stop`, and `step`, each of which must be a `SupportedIntType` (one of `i1`, `i8`, `i16`, `i32`, `i64`). The induction variable has the same - type as `start`. + type as `start`, `stop`, and `step`. The op may optionally carry a list of loop-carried values (`in_values`). These are bound to the corresponding body block @@ -2038,14 +2038,12 @@ def ForOp : SCF_Op<"for", [SingleBlockImplicitTerminator<"jeff::YieldOp">]> { Invariants enforced by the verifier: - `start`, `stop`, and `step` are each a `SupportedIntType`. - - The induction variable has the same type as `start`. + - The induction variable has the same type as `start`, `stop`, and `step`. - The number of `in_values` equals the number of results. - Each `in_value`, the corresponding body block argument, and the corresponding result have matching types. Underspecified (not currently enforced): - - Whether `start`, `stop`, and `step` must share a common type - (only `start` is checked against the induction variable). - Comparison signedness (the operand types are signless MLIR integers). - That `step` is non-zero and positive. diff --git a/lib/IR/JeffOps.cpp b/lib/IR/JeffOps.cpp index 313ee9e..8b1f9b4 100644 --- a/lib/IR/JeffOps.cpp +++ b/lib/IR/JeffOps.cpp @@ -489,8 +489,15 @@ LogicalResult ForOp::verify() { // https://github.com/llvm/llvm-project/blob/a58268a77cdbfeb0b71f3e76d169ddd7edf7a4df/mlir/lib/Dialect/SCF/IR/SCF.cpp#L359 LogicalResult ForOp::verifyRegions() { auto inductionVar = getBody().getArgument(0); - if (inductionVar.getType() != getStart().getType()) { - return emitOpError("expected induction variable to be same type as bounds and step"); + auto inductionVarType = inductionVar.getType(); + if (inductionVarType != getStart().getType()) { + return emitOpError("expected induction variable to be same type as start"); + } + if (inductionVarType != getStop().getType()) { + return emitOpError("expected induction variable to be same type as stop"); + } + if (inductionVarType != getStep().getType()) { + return emitOpError("expected induction variable to be same type as step"); } auto inValues = getInValues(); diff --git a/unittests/IR/test_parse_for_op.cpp b/unittests/IR/test_parse_for_op.cpp index 17d5214..19a2e0d 100644 --- a/unittests/IR/test_parse_for_op.cpp +++ b/unittests/IR/test_parse_for_op.cpp @@ -194,6 +194,32 @@ TEST_F(ForOpTest, InvalidArgCountMismatch) { ASSERT_FALSE(parseSourceString(src, &ctx)); } +// === Invalid semantics tests (parse-level) === + +// `stop` is declared with a different type than the loop's type annotation. +// The parser's `resolveOperand` catches the mismatch. +TEST_F(ForOpTest, InvalidStopTypeMismatch) { + const std::string src = R"MLIR( + func.func @f(%lo: i32, %hi: i64, %s: i32) { + jeff.for %i = %lo to %hi step %s : i32 {} + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +// `step` is declared with a different type than the loop's type annotation. +// The parser's `resolveOperand` catches the mismatch. +TEST_F(ForOpTest, InvalidStepTypeMismatch) { + const std::string src = R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i64) { + jeff.for %i = %lo to %hi step %s : i32 {} + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + // === Invalid semantics tests (verify-level) === // `index` is rejected by SupportedIntType. @@ -217,3 +243,31 @@ TEST_F(ForOpTest, InvalidFloatType) { )MLIR"; ASSERT_FALSE(parseSourceString(src, &ctx)); } + +// Generic form bypasses the custom parser, letting start/stop/step have distinct types. +// The verifier check in `ForOp::verifyRegions` is the only thing that catches the mismatch. +TEST_F(ForOpTest, InvalidStopTypeMismatchGenericForm) { + const std::string src = R"MLIR( + func.func @f(%lo: i32, %hi: i64, %s: i32) { + "jeff.for"(%lo, %hi, %s) ({ + ^bb0(%i: i32): + "jeff.yield"() : () -> () + }) : (i32, i64, i32) -> () + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} + +TEST_F(ForOpTest, InvalidStepTypeMismatchGenericForm) { + const std::string src = R"MLIR( + func.func @f(%lo: i32, %hi: i32, %s: i64) { + "jeff.for"(%lo, %hi, %s) ({ + ^bb0(%i: i32): + "jeff.yield"() : () -> () + }) : (i32, i32, i64) -> () + return + } + )MLIR"; + ASSERT_FALSE(parseSourceString(src, &ctx)); +} From 714d4b168144d817130a3018ca0c56f38d4b218b Mon Sep 17 00:00:00 2001 From: rturrado Date: Wed, 27 May 2026 00:19:40 +0200 Subject: [PATCH 13/15] Rewrite while/doWhile/switch parse tests for isolated-region compliance Signed-off-by: rturrado --- unittests/IR/test_parse_do_while_op.cpp | 108 +++++++++++++----------- unittests/IR/test_parse_switch_op.cpp | 17 ++-- unittests/IR/test_parse_while_op.cpp | 108 +++++++++++++----------- 3 files changed, 128 insertions(+), 105 deletions(-) diff --git a/unittests/IR/test_parse_do_while_op.cpp b/unittests/IR/test_parse_do_while_op.cpp index 8eb6b8b..71c8371 100644 --- a/unittests/IR/test_parse_do_while_op.cpp +++ b/unittests/IR/test_parse_do_while_op.cpp @@ -21,14 +21,23 @@ class DoWhileOpTest : public ::testing::Test { void SetUp() override { ctx.loadDialect(); } }; +// Jeff SCF regions are isolated from above: every value used inside a region must come +// from a block argument or be computed locally. +// Tests with carried values pass the loop predicate through as an additional in-value +// (idiomatic Jeff). +// The `NoArgs` and `ExplicitEmptyBodyYield` tests exercise the no-in-values parser path +// and therefore compute the predicate inside the condition via `jeff.int_const1`, +// since they have no operands to inherit one from. + // === Valid tests === TEST_F(DoWhileOpTest, NoArgs) { const std::string src = R"MLIR( - func.func @f(%pred: i1) { + func.func @f() { jeff.doWhile args() { } args() { - jeff.yield %pred : i1 + %c_pred = jeff.int_const1(true) : i1 + jeff.yield %c_pred : i1 } return } @@ -39,12 +48,12 @@ TEST_F(DoWhileOpTest, NoArgs) { TEST_F(DoWhileOpTest, WithArgsSingle) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) -> i32 { - %r = jeff.doWhile : (i32) args(%b_x = %a) { - jeff.yield %b_x : i32 - } args(%c_x) { - jeff.yield %pred : i1 + %r1, %r2 = jeff.doWhile : (i32, i1) args(%b_x = %a, %b_pred = %pred) { + jeff.yield %b_x, %b_pred : i32, i1 + } args(%c_x, %c_pred) { + jeff.yield %c_pred : i1 } - return %r : i32 + return %r1 : i32 } )MLIR"; ASSERT_TRUE(parseSourceString(src, &ctx)); @@ -53,10 +62,10 @@ TEST_F(DoWhileOpTest, WithArgsSingle) { TEST_F(DoWhileOpTest, WithArgsMultiple) { const std::string src = R"MLIR( func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { - %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { - jeff.yield %b_x, %b_y : i32, i64 - } args(%c_x, %c_y) { - jeff.yield %pred : i1 + %r1, %r2, %r3 = jeff.doWhile : (i32, i64, i1) args(%b_x = %a, %b_y = %b, %b_pred = %pred) { + jeff.yield %b_x, %b_y, %b_pred : i32, i64, i1 + } args(%c_x, %c_y, %c_pred) { + jeff.yield %c_pred : i1 } return %r1, %r2 : i32, i64 } @@ -67,33 +76,34 @@ TEST_F(DoWhileOpTest, WithArgsMultiple) { TEST_F(DoWhileOpTest, Nested) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) -> i32 { - %r = jeff.doWhile : (i32) args(%b_x = %a) { - %s = jeff.doWhile : (i32) args(%bb = %b_x) { - jeff.yield %bb : i32 - } args(%cc) { - jeff.yield %pred : i1 + %r1, %r2 = jeff.doWhile : (i32, i1) args(%b_x = %a, %b_pred = %pred) { + %s1, %s2 = jeff.doWhile : (i32, i1) args(%bb = %b_x, %bbp = %b_pred) { + jeff.yield %bb, %bbp : i32, i1 + } args(%cc, %ccp) { + jeff.yield %ccp : i1 } - jeff.yield %s : i32 - } args(%c_x) { - jeff.yield %pred : i1 + jeff.yield %s1, %s2 : i32, i1 + } args(%c_x, %c_pred) { + jeff.yield %c_pred : i1 } - return %r : i32 + return %r1 : i32 } )MLIR"; ASSERT_TRUE(parseSourceString(src, &ctx)); } -// `DoWhileOp::print` elides the empty yield for the body when there are no -// in-values, but bare `jeff.yield` is still valid input. It can come from -// `YieldOp`'s own printer, generic-form output (`-mlir-print-op-generic`), -// or handwritten MLIR. The parser must accept this shape. +// `DoWhileOp::print` elides the empty yield for the body when there are no in-values, +// but bare `jeff.yield` is still valid input. It can come from `YieldOp`'s own printer, +// generic-form output (`-mlir-print-op-generic`), or handwritten MLIR. +// The parser must accept this shape. TEST_F(DoWhileOpTest, ExplicitEmptyBodyYield) { const std::string src = R"MLIR( - func.func @f(%pred: i1) { + func.func @f() { jeff.doWhile args() { jeff.yield } args() { - jeff.yield %pred : i1 + %c_pred = jeff.int_const1(true) : i1 + jeff.yield %c_pred : i1 } return } @@ -107,10 +117,10 @@ TEST_F(DoWhileOpTest, ExplicitEmptyBodyYield) { TEST_F(DoWhileOpTest, RoundTripIdempotent) { const std::string src = R"MLIR( func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { - %r1, %r2 = jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { - jeff.yield %b_x, %b_y : i32, i64 - } args(%c_x, %c_y) { - jeff.yield %pred : i1 + %r1, %r2, %r3 = jeff.doWhile : (i32, i64, i1) args(%b_x = %a, %b_y = %b, %b_pred = %pred) { + jeff.yield %b_x, %b_y, %b_pred : i32, i64, i1 + } args(%c_x, %c_y, %c_pred) { + jeff.yield %c_pred : i1 } return %r1, %r2 : i32, i64 } @@ -134,10 +144,10 @@ TEST_F(DoWhileOpTest, RoundTripIdempotent) { TEST_F(DoWhileOpTest, InvalidMissingArgsKeyword) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.doWhile : (i32) (%b_x = %a) { - jeff.yield %b_x : i32 - } args(%c_x) { - jeff.yield %pred : i1 + jeff.doWhile : (i32, i1) (%b_x = %a, %b_pred = %pred) { + jeff.yield %b_x, %b_pred : i32, i1 + } args(%c_x, %c_pred) { + jeff.yield %c_pred : i1 } return } @@ -145,14 +155,14 @@ TEST_F(DoWhileOpTest, InvalidMissingArgsKeyword) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// `: (...)` is present but `args(...)` is empty (or vice versa). +// In-values count and body args count differ. TEST_F(DoWhileOpTest, InvalidArgCountMismatchWithTypes) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.doWhile : (i32, i64) args(%b_x = %a) { - jeff.yield %b_x : i32 - } args(%c_x) { - jeff.yield %pred : i1 + jeff.doWhile : (i32, i1, i64) args(%b_x = %a, %b_pred = %pred) { + jeff.yield %b_x, %b_pred : i32, i1 + } args(%c_x, %c_pred) { + jeff.yield %c_pred : i1 } return } @@ -160,14 +170,14 @@ TEST_F(DoWhileOpTest, InvalidArgCountMismatchWithTypes) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// Body has 2 args but condition has 1. +// Body args count and condition args count differ. TEST_F(DoWhileOpTest, InvalidBodyCondArgCountMismatch) { const std::string src = R"MLIR( func.func @f(%a: i32, %b: i64, %pred: i1) { - jeff.doWhile : (i32, i64) args(%b_x = %a, %b_y = %b) { - jeff.yield %b_x, %b_y : i32, i64 - } args(%c_x) { - jeff.yield %pred : i1 + jeff.doWhile : (i32, i64, i1) args(%b_x = %a, %b_y = %b, %b_pred = %pred) { + jeff.yield %b_x, %b_y, %b_pred : i32, i64, i1 + } args(%c_x, %c_pred) { + jeff.yield %c_pred : i1 } return } @@ -175,14 +185,14 @@ TEST_F(DoWhileOpTest, InvalidBodyCondArgCountMismatch) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// `args(...)` is non-empty but no `: (...)` is provided. +// Missing in-value types with non-empty body args. TEST_F(DoWhileOpTest, InvalidMissingTypeAnnotation) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.doWhile args(%b_x = %a) { - jeff.yield %b_x : i32 - } args(%c_x) { - jeff.yield %pred : i1 + jeff.doWhile args(%b_x = %a, %b_pred = %pred) { + jeff.yield %b_x, %b_pred : i32, i1 + } args(%c_x, %c_pred) { + jeff.yield %c_pred : i1 } return } diff --git a/unittests/IR/test_parse_switch_op.cpp b/unittests/IR/test_parse_switch_op.cpp index 21a34d2..1abc88c 100644 --- a/unittests/IR/test_parse_switch_op.cpp +++ b/unittests/IR/test_parse_switch_op.cpp @@ -96,17 +96,20 @@ TEST_F(SwitchOpTest, OnlyDefault) { ASSERT_TRUE(parseSourceString(src, &ctx)); } -// In-value types and result types are independent: -// the in-values are (i32, i64), but the op yields a single i1. +// In-value types and result types are independent: the in-values are (i32, i64, i1), +// but the op yields a single i1. +// The yielded value comes from `%q`, which is the i1 in-value passed in. +// Regions are isolated from above, so `%p` from the function scope cannot be used directly inside +// the regions. TEST_F(SwitchOpTest, DecoupledInValueAndResultTypes) { const std::string src = R"MLIR( func.func @f(%sel: i32, %a: i32, %b: i64, %p: i1) -> i1 { - %r = jeff.switch (%sel, %a, %b) : (i32, i32, i64) -> (i1) - case 0 args(%x, %y) { - jeff.yield %p : i1 + %r = jeff.switch (%sel, %a, %b, %p) : (i32, i32, i64, i1) -> (i1) + case 0 args(%x, %y, %q) { + jeff.yield %q : i1 } - default args(%x, %y) { - jeff.yield %p : i1 + default args(%x, %y, %q) { + jeff.yield %q : i1 } return %r : i1 } diff --git a/unittests/IR/test_parse_while_op.cpp b/unittests/IR/test_parse_while_op.cpp index 538bf1f..ca3832b 100644 --- a/unittests/IR/test_parse_while_op.cpp +++ b/unittests/IR/test_parse_while_op.cpp @@ -21,13 +21,22 @@ class WhileOpTest : public ::testing::Test { void SetUp() override { ctx.loadDialect(); } }; +// Jeff SCF regions are isolated from above: every value used inside a region must come +// from a block argument or be computed locally. +// Tests with carried values pass the loop predicate through as an additional in-value +// (idiomatic Jeff). +// The `NoArgs` and `ExplicitEmptyBodyYield` tests exercise the no-in-values parser path +// and therefore compute the predicate inside the condition via `jeff.int_const1`, +// since they have no operands to inherit one from. + // === Valid tests === TEST_F(WhileOpTest, NoArgs) { const std::string src = R"MLIR( - func.func @f(%pred: i1) { + func.func @f() { jeff.while args() { - jeff.yield %pred : i1 + %c_pred = jeff.int_const1(true) : i1 + jeff.yield %c_pred : i1 } args() { } return @@ -39,12 +48,12 @@ TEST_F(WhileOpTest, NoArgs) { TEST_F(WhileOpTest, WithArgsSingle) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) -> i32 { - %r = jeff.while : (i32) args(%c_x = %a) { - jeff.yield %pred : i1 - } args(%b_x) { - jeff.yield %b_x : i32 + %r1, %r2 = jeff.while : (i32, i1) args(%c_x = %a, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_pred) { + jeff.yield %b_x, %b_pred : i32, i1 } - return %r : i32 + return %r1 : i32 } )MLIR"; ASSERT_TRUE(parseSourceString(src, &ctx)); @@ -53,10 +62,10 @@ TEST_F(WhileOpTest, WithArgsSingle) { TEST_F(WhileOpTest, WithArgsMultiple) { const std::string src = R"MLIR( func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { - %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { - jeff.yield %pred : i1 - } args(%b_x, %b_y) { - jeff.yield %b_x, %b_y : i32, i64 + %r1, %r2, %r3 = jeff.while : (i32, i64, i1) args(%c_x = %a, %c_y = %b, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_y, %b_pred) { + jeff.yield %b_x, %b_y, %b_pred : i32, i64, i1 } return %r1, %r2 : i32, i64 } @@ -67,31 +76,32 @@ TEST_F(WhileOpTest, WithArgsMultiple) { TEST_F(WhileOpTest, Nested) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) -> i32 { - %r = jeff.while : (i32) args(%c_x = %a) { - jeff.yield %pred : i1 - } args(%b_x) { - %s = jeff.while : (i32) args(%cc = %b_x) { - jeff.yield %pred : i1 - } args(%bb) { - jeff.yield %bb : i32 + %r1, %r2 = jeff.while : (i32, i1) args(%c_x = %a, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_pred) { + %s1, %s2 = jeff.while : (i32, i1) args(%cc = %b_x, %ccp = %b_pred) { + jeff.yield %ccp : i1 + } args(%bb, %bbp) { + jeff.yield %bb, %bbp : i32, i1 } - jeff.yield %s : i32 + jeff.yield %s1, %s2 : i32, i1 } - return %r : i32 + return %r1 : i32 } )MLIR"; ASSERT_TRUE(parseSourceString(src, &ctx)); } -// `WhileOp::print` elides the empty yield for the body when there are no -// in-values, but bare `jeff.yield` is still valid input. It can come from -// `YieldOp`'s own printer, generic-form output (`-mlir-print-op-generic`), -// or handwritten MLIR. The parser must accept this shape. +// `WhileOp::print` elides the empty yield for the body when there are no in-values, +// but bare `jeff.yield` is still valid input. It can come from `YieldOp`'s own printer, +// generic-form output (`-mlir-print-op-generic`), or handwritten MLIR. +// The parser must accept this shape. TEST_F(WhileOpTest, ExplicitEmptyBodyYield) { const std::string src = R"MLIR( - func.func @f(%pred: i1) { + func.func @f() { jeff.while args() { - jeff.yield %pred : i1 + %c_pred = jeff.int_const1(true) : i1 + jeff.yield %c_pred : i1 } args() { jeff.yield } @@ -107,10 +117,10 @@ TEST_F(WhileOpTest, ExplicitEmptyBodyYield) { TEST_F(WhileOpTest, RoundTripIdempotent) { const std::string src = R"MLIR( func.func @f(%a: i32, %b: i64, %pred: i1) -> (i32, i64) { - %r1, %r2 = jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { - jeff.yield %pred : i1 - } args(%b_x, %b_y) { - jeff.yield %b_x, %b_y : i32, i64 + %r1, %r2, %r3 = jeff.while : (i32, i64, i1) args(%c_x = %a, %c_y = %b, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_y, %b_pred) { + jeff.yield %b_x, %b_y, %b_pred : i32, i64, i1 } return %r1, %r2 : i32, i64 } @@ -134,10 +144,10 @@ TEST_F(WhileOpTest, RoundTripIdempotent) { TEST_F(WhileOpTest, InvalidMissingArgsKeyword) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.while : (i32) (%c_x = %a) { - jeff.yield %pred : i1 - } args(%b_x) { - jeff.yield %b_x : i32 + jeff.while : (i32, i1) (%c_x = %a, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_pred) { + jeff.yield %b_x, %b_pred : i32, i1 } return } @@ -145,14 +155,14 @@ TEST_F(WhileOpTest, InvalidMissingArgsKeyword) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// `: (...)` is present but `args(...)` is empty (or vice versa). +// In-values count and condition args count differ. TEST_F(WhileOpTest, InvalidArgCountMismatchWithTypes) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.while : (i32, i64) args(%c_x = %a) { - jeff.yield %pred : i1 - } args(%b_x) { - jeff.yield %b_x : i32 + jeff.while : (i32, i1, i64) args(%c_x = %a, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_pred) { + jeff.yield %b_x, %b_pred : i32, i1 } return } @@ -160,14 +170,14 @@ TEST_F(WhileOpTest, InvalidArgCountMismatchWithTypes) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// Condition has 2 args but body has 1. +// Condition args count and body args count differ. TEST_F(WhileOpTest, InvalidCondBodyArgCountMismatch) { const std::string src = R"MLIR( func.func @f(%a: i32, %b: i64, %pred: i1) { - jeff.while : (i32, i64) args(%c_x = %a, %c_y = %b) { - jeff.yield %pred : i1 - } args(%b_x) { - jeff.yield %b_x : i32 + jeff.while : (i32, i64, i1) args(%c_x = %a, %c_y = %b, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_pred) { + jeff.yield %b_x, %b_pred : i64, i1 } return } @@ -175,14 +185,14 @@ TEST_F(WhileOpTest, InvalidCondBodyArgCountMismatch) { ASSERT_FALSE(parseSourceString(src, &ctx)); } -// `args(...)` is non-empty but no `: (...)` is provided. +// Missing in-value types with non-empty condition args. TEST_F(WhileOpTest, InvalidMissingTypeAnnotation) { const std::string src = R"MLIR( func.func @f(%a: i32, %pred: i1) { - jeff.while args(%c_x = %a) { - jeff.yield %pred : i1 - } args(%b_x) { - jeff.yield %b_x : i32 + jeff.while args(%c_x = %a, %c_pred = %pred) { + jeff.yield %c_pred : i1 + } args(%b_x, %b_pred) { + jeff.yield %b_x, %b_pred : i32, i1 } return } From f62109c49fecf4b9b7d60c97d519c9aef233148a Mon Sep 17 00:00:00 2001 From: Roberto Turrado Camblor Date: Fri, 29 May 2026 15:16:50 +0200 Subject: [PATCH 14/15] Apply suggestions from code review Co-authored-by: Daniel Haag <121057143+denialhaag@users.noreply.github.com> --- unittests/IR/test_parse_do_while_op.cpp | 8 ++------ unittests/IR/test_parse_while_op.cpp | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/unittests/IR/test_parse_do_while_op.cpp b/unittests/IR/test_parse_do_while_op.cpp index 71c8371..71a1ff8 100644 --- a/unittests/IR/test_parse_do_while_op.cpp +++ b/unittests/IR/test_parse_do_while_op.cpp @@ -21,13 +21,9 @@ class DoWhileOpTest : public ::testing::Test { void SetUp() override { ctx.loadDialect(); } }; -// Jeff SCF regions are isolated from above: every value used inside a region must come +// jeff SCF regions are isolated from above: every value used inside a region must come // from a block argument or be computed locally. -// Tests with carried values pass the loop predicate through as an additional in-value -// (idiomatic Jeff). -// The `NoArgs` and `ExplicitEmptyBodyYield` tests exercise the no-in-values parser path -// and therefore compute the predicate inside the condition via `jeff.int_const1`, -// since they have no operands to inherit one from. +// Tests with carried values pass the loop predicate through as an additional in-value. // === Valid tests === diff --git a/unittests/IR/test_parse_while_op.cpp b/unittests/IR/test_parse_while_op.cpp index ca3832b..24a502d 100644 --- a/unittests/IR/test_parse_while_op.cpp +++ b/unittests/IR/test_parse_while_op.cpp @@ -21,13 +21,9 @@ class WhileOpTest : public ::testing::Test { void SetUp() override { ctx.loadDialect(); } }; -// Jeff SCF regions are isolated from above: every value used inside a region must come +// jeff SCF regions are isolated from above: every value used inside a region must come // from a block argument or be computed locally. -// Tests with carried values pass the loop predicate through as an additional in-value -// (idiomatic Jeff). -// The `NoArgs` and `ExplicitEmptyBodyYield` tests exercise the no-in-values parser path -// and therefore compute the predicate inside the condition via `jeff.int_const1`, -// since they have no operands to inherit one from. +// Tests with carried values pass the loop predicate through as an additional in-value. // === Valid tests === From eaf80b13282aadb2a239a56506d153f3392a4b74 Mon Sep 17 00:00:00 2001 From: rturrado Date: Fri, 29 May 2026 17:01:03 +0200 Subject: [PATCH 15/15] Address comments from Daniel Haag's review Add jeff.yield to ForOpTest::BasicFormI{32,64} so they meaningfully differ from ForOpTest::ImplicitYield (previously both shapes looked identical). Fix ForOpTest::Nested to make %lo, %hi, and %s visible to the inner jeff.for by passing them through the outer loop's args (Jeff regions are isolated from above). Signed-off-by: rturrado --- unittests/IR/test_parse_for_op.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/unittests/IR/test_parse_for_op.cpp b/unittests/IR/test_parse_for_op.cpp index 19a2e0d..636d6b7 100644 --- a/unittests/IR/test_parse_for_op.cpp +++ b/unittests/IR/test_parse_for_op.cpp @@ -26,7 +26,9 @@ class ForOpTest : public ::testing::Test { TEST_F(ForOpTest, BasicFormI32) { const std::string src = R"MLIR( func.func @f(%lo: i32, %hi: i32, %s: i32) { - jeff.for %i = %lo to %hi step %s : i32 {} + jeff.for %i = %lo to %hi step %s : i32 { + jeff.yield + } return } )MLIR"; @@ -36,7 +38,9 @@ TEST_F(ForOpTest, BasicFormI32) { TEST_F(ForOpTest, BasicFormI64) { const std::string src = R"MLIR( func.func @f(%lo: i64, %hi: i64, %s: i64) { - jeff.for %i = %lo to %hi step %s : i64 {} + jeff.for %i = %lo to %hi step %s : i64 { + jeff.yield + } return } )MLIR"; @@ -80,22 +84,23 @@ TEST_F(ForOpTest, WithArgsMultiple) { ASSERT_TRUE(parseSourceString(src, &ctx)); } +// Inner `jeff.for` cannot see %lo, %hi, and %s from the enclosing function, +// so the outer `jeff.for` has to pass them in as args. TEST_F(ForOpTest, Nested) { const std::string src = R"MLIR( - func.func @f(%lo: i32, %hi: i32, %s: i32) { - jeff.for %i = %lo to %hi step %s : i32 { - jeff.for %j = %lo to %hi step %s : i32 {} + func.func @f(%lo: i32, %hi: i32, %s: i32) -> (i32, i32, i32) { + %r1, %r2, %r3 = jeff.for %i = %lo to %hi step %s args(%lo_arg = %lo, %hi_arg = %hi, %s_arg = %s) -> (i32, i32, i32) : i32 { + jeff.for %j = %lo_arg to %hi_arg step %s_arg : i32 {} + jeff.yield %lo_arg, %hi_arg, %s_arg : i32, i32, i32 } - return + return %r1, %r2, %r3 : i32, i32, i32 } )MLIR"; ASSERT_TRUE(parseSourceString(src, &ctx)); } -// `ForOp::print` elides the empty yield, -// but bare `jeff.yield` is still valid input. -// It can come from `YieldOp`'s own printer, -// generic-form output (`-mlir-print-op-generic`), or +// `ForOp::print` elides the empty yield, but bare `jeff.yield` is still valid input. +// It can come from `YieldOp`'s own printer, generic-form output (`-mlir-print-op-generic`), or // handwritten MLIR. // The parser must accept this shape. TEST_F(ForOpTest, ExplicitEmptyYield) {