From 1ed3dcc65647dc7f3eabf19a40f2abb456c89bba Mon Sep 17 00:00:00 2001 From: chances190 Date: Thu, 9 Apr 2026 21:13:47 -0300 Subject: [PATCH 1/6] feat: implement parsing for tagged types (struct, union, enum) --- .github/copilot-instructions.md | 26 ++ docs/00-guide.md | 443 ++++++++++++++++++++++++++++++ src/interpreter/eval_expr.rs | 4 +- src/ir/ast.rs | 45 ++- src/parser/functions.rs | 64 +---- src/parser/identifiers.rs | 23 +- src/parser/mod.rs | 2 + src/parser/program.rs | 34 ++- src/parser/statements.rs | 6 +- src/parser/types.rs | 124 +++++++++ src/semantic/type_checker.rs | 80 +++--- tests/fixtures/tagged_types.minic | 5 + tests/parser.rs | 345 ++++++++++++++++++++--- tests/program.rs | 39 ++- 14 files changed, 1087 insertions(+), 153 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 100644 docs/00-guide.md create mode 100644 src/parser/types.rs create mode 100644 tests/fixtures/tagged_types.minic diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..2547d9d --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,26 @@ +--- +name: "miniC educational parser code" +description: Coding standards for this repo +--- + +When working in this project, prioritize code that is: + +- clear and easy to read for educational purposes +- self-documenting through explicit naming, structure, and straightforward control flow +- idiomatic Rust, but not at the expense of readability +- aligned with the existing miniC codebase style and parser-combinator design + +Prefer: + +- descriptive function, type, and variable names +- simple parser structure and well-scoped helper functions +- comments only when they explain why a design choice matters, not what obvious code does +- preserving spec-driven behavior and making language rules understandable + +Avoid: + +- overly terse or clever code that reduces comprehension +- large unrelated refactors when the task is focused on parser/AST/spec behavior +- introducing new patterns that conflict with the established codebase style + +IMPORTANT! Always thoroughly review the relevant `docs/` documentation before starting a task and again whenever you encounter a roadblock. diff --git a/docs/00-guide.md b/docs/00-guide.md new file mode 100644 index 0000000..5001970 --- /dev/null +++ b/docs/00-guide.md @@ -0,0 +1,443 @@ +# MiniC + Nom + +Este guia introduz o projeto MiniC da disciplina **sem assumir experiência prévia com Rust** ou **com Nom**. + +Tem um objetivo: levar você de "sei o que são combinadores de parsing em teoria" até "consigo ler e estender essa implementação real de linguagem em Rust". + +Use este documento como seu ponto de partida e consulte as referências vinculadas quando precisar de ajuda. + +Referências rápidas: +- Rust Book: +- Documentação da crate Nom: +- Guias do Nom: + + +## O que é MiniC + +MiniC é uma pequena linguagem similar a C implementada em Rust para aprender construção de compiladores. + +Um programa MiniC é uma lista de declarações de funções: + +```c +int factorial(int n) { + if n <= 1 { return 1; } + return n * factorial(n - 1); +} + +void main() { + int result = factorial(10); + print(result); +} +``` + +Pipeline do MiniC: +1. Fazer parsing do código-fonte em uma AST. +2. Verificar tipos da AST. +3. Interpretar (executar) a AST verificada. + +Por que isso importa pedagogicamente: +- Cada etapa tem uma responsabilidade clara. +- Você pode testar cada etapa independentemente. +- O trabalho de extensão é estruturado e previsível. + +Para mais informações sobre a linguagem e o pipeline, veja [docs/01-language.md](01-language.md) e [docs/02-pipeline.md](02-pipeline.md). + +## Parte A: Introdução Rápida a Rust (Apenas o Necessário) + +Você **não** precisa de todo o livro de Rust para trabalhar em MiniC. Você só precisa de um pequeno subconjunto. + +### 1) Variáveis e Funções + +Estrutura de função em Rust: + +```rust +fn add(x: i64, y: i64) -> i64 { + x + y +} +``` + +- `fn` começa uma função. +- `x: i64` é o nome do parâmetro e tipo. +- `-> i64` é o tipo de retorno. +- A última expressão sem `;` é retornada. + +(Rust Book: ) + +### 2) Structs (Dados com Campos Nomeados) + +```rust +struct Point { + x: i64, + y: i64, +} +``` + +MiniC usa structs como wrappers da AST (nós de expressão com metadados). + +(Rust Book: ) + +### 3) Enums (Uma de Várias Variantes) + +```rust +enum Value { + Int(i64), + Bool(bool), + Str(String), +} +``` + +Uma enum pode conter diferentes formas sob um tipo. Isso é central para AST e valores em tempo de execução. + +(Rust Book: ) + +### 4) match (Ramificação por Variante) + +```rust +match value { + Value::Int(n) => println!("int: {}", n), + Value::Bool(b) => println!("bool: {}", b), + Value::Str(s) => println!("str: {}", s), +} +``` + +Verificação de tipos e interpretação de MiniC são principalmente grandes instruções `match` explícitas. + +(Rust Book: ) + +### 5) Result para Tratamento de Erros + +```rust +fn parse_number(s: &str) -> Result { + s.parse::().map_err(|e| e.to_string()) +} +``` + +`Result` significa um de dois casos: +- `Ok(T)` sucesso +- `Err(E)` falha + +Parser, verificador de tipos e interpretador de MiniC todos dependem desse estilo. + +(Rust Book: ) + +### 6) Box para Árvores Recursivas + +Enums recursivas precisam de indireção: + +```rust +enum Expr { + Int(i64), + Add(Box, Box), +} +``` + +Sem `Box`, Rust não consegue determinar o tamanho recursivo em tempo de compilação. + +(Rust Book: ) + +### 7) Generics (Parametrização da AST através de Fases) + +Nós da AST de MiniC são genéricos sobre um parâmetro de tipo `Ty` que carrega metadados específicos de cada fase. A mesma estrutura de árvore é usada após parsing e após type-checking: +- Imediatamente após parsing: `Ty = ()` (sem informação de tipo) +- Após type-checking: `Ty = Type` (com informação de tipo para cada sub-expressão) + +Esse design previne acidentalmente misturar AST verificada com AST não verificada em tempo de compilação, pois são tipos diferentes (`Expr<()>` vs `Expr`). + +(Rust Book: generics , ownership and borowing ) + +## Parte B: Introdução Rápida a Nom (Apenas o Necessário) + +Nom é uma biblioteca de combinadores de parsers para Rust. + +Conforme a documentação do Nom, a forma central de um parsing é: + +```rust +fn parser(input: I) -> IResult +``` + +Para MiniC, tipicamente: +- tipo de entrada: `&str` +- tipo de saída: fragmento de AST + +(Visão geral do Nom: , guia "fazendo um novo parser": ) + +### 1) O que IResult Significa + +`IResult` é essencialmente: +- `Ok((remaining_input, output_value))` +- ou `Err(...)` + +Então um parser retorna ambos: +1. O que foi parseado. +2. O que restou sem parsear. + +É por isso que a composição de parsers funciona naturalmente. + +(Referência: ) + +Modelo de erro do Nom (importante ao debugar comportamento de parsing): +- `Err::Error` é recuperável (então `alt` pode tentar outro branch) +- `Err::Failure` é irrecuperável (branch confirmado) +- `Err::Incomplete` significa que mais entrada é necessária em modo streaming (mas nunca ocorre em modo complete). + +O combinator `cut` muda erros recuperáveis para falhas quando você sabe que está no branch correto. Veja e . + +### 2) Complete vs Streaming + +Nom tem variantes `complete` e `streaming`. + +MiniC usa parsers `complete` porque arquivos de código estão totalmente disponíveis na memória. + +Conforme a documentação do Nom: +- streaming pode retornar `Incomplete` para buffers parciais. +- complete trata dados faltantes como um erro. + +Para parsing de linguagem baseado em arquivo, complete é o padrão certo. + +(Referência e exemplos: ) + +### 3) Combinators Principais Usados em MiniC + +- `tag("if")`: correspondência exata de string. +- `char('(')`: correspondência exata de caractere. +- `alt((a, b, c))`: tenta alternativas em ordem. +- `tuple((a, b, c))`: faz parsing de sequência. +- `preceded(a, b)`: faz parsing de `a` depois `b`, mantém `b`. +- `delimited(a, b, c)`: faz parsing de `a b c`, mantém `b`. +- `map(p, f)`: transforma saída de parsing em nó de AST. +- `opt(p)`: parsing opcional (`Some` ou `None`). +- `many0(p)`: repete zero ou mais vezes. +- `separated_list0(sep, item)`: faz parsing de itens de lista separados por um separador. +- `verify(p, pred)`: faz parsing com `p`, depois aplica predicado. + +Quando não tiver certeza qual combinator usar, consulte o guia de escolha: . + +### 4) Três Comportamentos Importantes do Nom + +#### alt é ordenado + +`alt((a, b, c))` tenta da esquerda para a direita e retorna o primeiro sucesso. + +Então a ordem de branches indica diretamente o comportamento da linguagem. + +#### Sucesso do parser não implica consumo completo + +`many0(p)` coleta enquanto `p` sucede e para em falha recuperável. Similarmente, parsers individuais retornam `Ok((rest, value))` onde `rest` pode não estar vazio. + +A responsabilidade de verificar consumo completo da entrada é do chamador, não do parser. Use `all_consuming` em testes ou validação explícita em produção. + +Referências API úteis: `alt` , `many0` , `separated_list0` , `all_consuming` . + +## Parte C: Arquitetura do Parser de MiniC + +Módulos do parser são divididos por categoria de gramática: +- `src/parser/identifiers.rs` +- `src/parser/literals.rs` +- `src/parser/expressions.rs` +- `src/parser/statements.rs` +- `src/parser/functions.rs` +- `src/parser/program.rs` + +Essa separação reflete a organização da gramática, permite que cada módulo tenha responsabilidade clara, facilita localizar code relacionado e mapeia intuitivamente da linguagem formal para o código Rust. + +Veja notas sobre arquitetura do parser em [docs/04-parser.md](04-parser.md), depois compare diretamente com [src/parser/mod.rs](../src/parser/mod.rs). + +### 1) Identificadores + +Padrão: +1. Fazer parsing da forma de identificador. +2. Rejeitar palavras-chave reservadas com `verify`. + +Isso separa claramente forma léxica de política de palavras-chave. + +(Nom `verify`: ) + +### 2) Literais + +Faz parsing de int, float, string, bool. + +Escolhas de implementação notáveis: +- Parser inteiro rejeita `12.34` como inteiro. +- Parser de string suporta escapes (`\\`, `\"`, `\n`, `\t`) via combinators de escape. + +(Referências de parse de texto do Nom: `escaped_transform` , tabela de parser-chooser de parse de texto: ) + +### 3) Expressões (Precedência + Associatividade) + +MiniC codifica precedência via camadas de função (função por nível), em ordem decrescente de precedência: +- ou lógico (precedência mais baixa) +- e lógico +- não +- relacional +- aditivo +- multiplicativo +- unário +- primário +- atômico (precedência mais alta) + +Cada camada chama a camada anterior quando precisa de um operando. **Todos os operadores binários de MiniC são associativos à esquerda**, implementados com um loop de acumulador em cada nível: parseia o operando esquerdo, depois enquanto o operador esperado não falha, parseia o operador e o operando direito (que se torna o novo operando esquerdo). + +Exemplo: `1 - 2 - 3` se torna `(1 - 2) - 3` porque o primeiro `2` é consumido como direito, o resultado `(1 - 2)` vira o novo esquerdo, e `3` é consumido como novo direito. + +Código concreto: [src/parser/expressions.rs](../src/parser/expressions.rs) e testes em [tests/parser.rs](../tests/parser.rs). + +### 4) Declarações + +Parser de declaração é um conjunto ordenado de alternativas: +- bloco +- if +- while +- return +- declaração +- declaração de chamada +- atribuição + +A ordem é deliberada, especialmente para formas com prefixos sobrepostos. + +### 5) Funções e Tipos + +Parser de tipo inclui formas escalares e de array. + +Porque `alt` é ordenado, prefixos mais longos (como formas de array 2D) devem vir listados antes dos mais curtos (formas 1D). + +(Nom `alt`: ) + +### 6) Parser de Programa + +Parser de nível superior usa repetição sobre declarações de função. + +Tradeoff pedagógico: +- Código simples +- Mas você deve raciocinar cuidadosamente sobre comportamento de consumo parcial + +Se quiser melhorar esse comportamento ou diagnósticos, os guias de construção de parsers e erros do Nom são o próximo passo certo: e . + +## Parte D: AST, Verificação de Tipos e Interpretação + +### 1) Design da AST + +Nós de AST representam: +- Expressões +- Declarações +- Declarações de função +- Programa + +MiniC usa decorações genéricas de nó para que parser e verificador de tipo compartilhem a mesma forma. + +Referências do projeto: [docs/03-ast.md](03-ast.md), [src/ir/ast.rs](../src/ir/ast.rs). + +### 2) Responsabilidades do Verificador de Tipos + +Verificação de tipos valida: +- Assinatura `main` obrigatória +- Tipos de declaração e atribuição +- Contagem/tipos de argumento de chamada de função +- Digitação de operador de expressão +- Indexação de array e consistência de elementos +- cCorreção de tipo de retorno + +Usa um ambiente mapeando nomes para tipos. + +Referências do projeto: [docs/05-type-checker.md](05-type-checker.md), [src/semantic/type_checker.rs](../src/semantic/type_checker.rs). + +### 3) Responsabilidades do Interpretador + +Interpretador executa AST verificada: +- Avaliação de expressão em valores em tempo de execução +- Execução de declaração (incluindo fluxo de controle) +- Chamadas de função (definidas pelo usuário e nativas) +- Erros em tempo de execução (ex., fora dos limites) + +Usa um ambiente mapeando nomes para valores em tempo de execução. + +Referências do projeto: [docs/06-interpreter.md](06-interpreter.md), [src/interpreter/eval_expr.rs](../src/interpreter/eval_expr.rs), [src/interpreter/exec_stmt.rs](../src/interpreter/exec_stmt.rs). + +### 4) Equivalência do Ambiente + +As duas fases tem a mesma abstração central: +- Ambiente semântico (`name -> Type`) +- Ambiente de tempo de execução (`name -> Value`) + +## Parte E: Como Adicionar Funcionalidades (Fluxo de Trabalho do Aluno) + +Para cada nova funcionalidade de linguagem, use esta checklist: +1. Estender AST. +2. Estender parser. +3. Estender verificador de tipos. +4. Estender interpretador. +5. Adicionar testes. +6. Atualizar docs. + +Se você pular um passo, a funcionalidade fica incompleta. + +### Exemplo: Adicionar um Novo Operador Binário + +1. Adicionar variante de expressão nova à AST. +2. Adicionar regra de parser na camada de precedência correta. +3. Adicionar regra de tipo. +4. Adicionar regra de avaliação em tempo de execução. +5. Adicionar testes de precedência do parser + testes de tipo + testes de interpretador. + +### Exemplo: Adicionar uma Nova Declaração + +1. Adicionar variante de declaração. +2. Adicionar branch de parser na ordem correta. +3. Adicionar branch de verificação de tipos. +4. Adicionar branch de execução. +5. Adicionar testes de escopo de bloco e testes de integração. + +Para adições de funcionalidade que tocam builtins ou fiação de tempo de execução, consulte [docs/07-stdlib.md](07-stdlib.md), [docs/08-testing.md](08-testing.md), e [src/stdlib/mod.rs](../src/stdlib/mod.rs). + +## Parte F: Estratégia de Teste Que Você Deveria Seguir + +Camadas de teste de MiniC: +- Testes do parser +- Testes do verificador de tipos +- Testes do interpretador +- Testes CLI + +Use todas as quatro camadas ao adicionar funcionalidades não triviais. + +Regra prática: +- um teste unitário para cada regra local +- um teste ponta-a-ponta para cada comportamento visível ao usuário + +Pontos de entrada de teste úteis: +- Testes de parser: [tests/parser.rs](../tests/parser.rs) +- Testes de programa: [tests/program.rs](../tests/program.rs) +- Testes de verificador de tipos: [tests/type_checker.rs](../tests/type_checker.rs) +- Testes de interpretador: [tests/interpreter.rs](../tests/interpreter.rs) +- Testes de stdlib: [tests/stdlib.rs](../tests/stdlib.rs) +- Testes CLI: [tests/cli](../tests/cli) + +Detalhes de estratégia de teste e convenções shelltest são documentados em [docs/08-testing.md](08-testing.md). + +## Parte G: Ordem de Leitura para Este Repositório + +Comece com docs: +1. [docs/01-pipeline.md](01-language.md) +2. [docs/02-pipeline.md](02-pipeline.md) +3. [docs/03-ast.md](03-ast.md) +4. [docs/04-parser.md](04-parser.md) +5. [docs/05-type-checker.md](05-type-checker.md) +6. [docs/06-interpreter.md](06-interpreter.md) +7. [docs/07-stdlib.md](07-stdlib.md) +8. [docs/08-testing.md](08-testing.md) + +Depois leia código nesta ordem: +1. [src/ir/ast.rs](../src/ir/ast.rs) +2. [src/parser/mod.rs](../src/parser/mod.rs) e submódulos do parser +3. [src/semantic/type_checker.rs](../src/semantic/type_checker.rs) +4. [src/interpreter/eval_expr.rs](../src/interpreter/eval_expr.rs) +5. [src/interpreter/exec_stmt.rs](../src/interpreter/exec_stmt.rs) +6. [src/stdlib/mod.rs](../src/stdlib/mod.rs) + +## Conclusão + +Você pode pensar em MiniC como quatro problemas de ensino conectados: +1. Fazer parsing de sintaxe (combinadores Nom). +2. Validar significado (verificador de tipos). +3. Executar comportamento (interpretador). +4. Preservar confiança (testes). + +Uma vez que consiga rastrear uma funcionalidade através de todas as quatro, você consegue estender a linguagem com confiança. diff --git a/src/interpreter/eval_expr.rs b/src/interpreter/eval_expr.rs index 49fcbef..ee9ca56 100644 --- a/src/interpreter/eval_expr.rs +++ b/src/interpreter/eval_expr.rs @@ -168,8 +168,8 @@ pub fn eval_call( ))); } let snapshot = env.snapshot(); - for ((param_name, _), val) in decl.params.iter().zip(args.into_iter()) { - env.declare(param_name.clone(), val); + for (param, val) in decl.params.iter().zip(args.into_iter()) { + env.declare(param.name.clone(), val); } let result = exec_stmt(&decl.body, env)?; env.restore(snapshot); diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 5f57b24..818647d 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -48,6 +48,14 @@ //! compatibility check (`types_compatible`) treats `Any` as matching //! everything, keeping the special case local to one function. +/// Tagged types: struct, union, enum +#[derive(Debug, Clone, PartialEq)] +pub enum TagType { + Struct, + Union, + Enum, +} + /// MiniC types: scalar, array, function, and Any (for polymorphic native params). #[derive(Debug, Clone, PartialEq)] pub enum Type { @@ -57,7 +65,14 @@ pub enum Type { Bool, Str, Array(Box), - Fun(Vec, Box), + Tagged { + tag_type: TagType, + tag_name: String, + }, + Function { + params: Vec, + return_type: Box, + }, /// Matches any type. Only used as a parameter type in native stdlib registrations. Any, } @@ -153,21 +168,41 @@ pub enum Statement { Return(Option>>), } -/// A typed parameter: (name, type). -pub type Param = (String, Type); +/// An identifier with a declared type. +#[derive(Debug, Clone, PartialEq)] +pub struct IdentifierDecl { + pub name: String, + pub ty: Type, +} + +/// A field or enumerator inside a tagged type declaration. +#[derive(Debug, Clone, PartialEq)] +pub enum Member { + Field(IdentifierDecl), + Enumerator { name: String, value: Option }, +} + +/// A tagged type declaration: struct, union, or enum. +#[derive(Debug, Clone, PartialEq)] +pub struct TaggedTypeDecl { + pub tag_type: TagType, + pub tag_name: String, + pub members: Vec, +} /// A function declaration. #[derive(Debug, Clone, PartialEq)] pub struct FunDecl { pub name: String, - pub params: Vec, + pub params: Vec, pub return_type: Type, pub body: Box>, } -/// A complete MiniC program: function declarations only. Execution starts at `main`. +/// A complete MiniC program: top-level type declarations and function declarations. #[derive(Debug, Clone, PartialEq)] pub struct Program { + pub tagged_types: Vec, pub functions: Vec>, } diff --git a/src/parser/functions.rs b/src/parser/functions.rs index 845a59c..832b4e5 100644 --- a/src/parser/functions.rs +++ b/src/parser/functions.rs @@ -1,81 +1,41 @@ -//! Function declaration and type-name parsers for MiniC. +//! Function declaration parser for MiniC. //! //! # Overview //! -//! Exposes two public functions: +//! Exposes one public function: //! //! * [`fun_decl`] — parses a complete function declaration in C style: //! `ReturnType name(Type param, …) body`. The body is any single //! statement (typically a block `{ … }`). -//! * [`type_name`] — parses a MiniC type keyword (`int`, `float`, `bool`, -//! `str`, `void`, or an array variant like `int[]`). Re-used by the -//! statement parser for variable declarations. //! //! # Design Decisions //! -//! ## 2D array types must be tried before 1D +//! ## Type syntax is a shared, lower-level concept //! -//! `nom`'s `alt` combinator tries alternatives left-to-right and stops at -//! the first match. Because `int[][]` starts with the same prefix as -//! `int[]`, the 2D variants must appear before the 1D variants in the -//! `alt` list, otherwise `int[][]` would be incorrectly parsed as `int[]` -//! followed by a leftover `[]`. +//! Function parsing should reuse the same `type_definition` parser as other +//! declaration forms. That keeps grammar ownership aligned with language +//! concepts rather than implementation convenience. -use crate::ir::ast::{FunDecl, Type, UncheckedFunDecl}; -use crate::parser::identifiers::identifier; +use crate::ir::ast::{FunDecl, UncheckedFunDecl}; +use crate::parser::identifiers::{identifier, identifier_decl}; use crate::parser::statements::statement; +use crate::parser::types::type_definition; use nom::{ - branch::alt, bytes::complete::tag, character::complete::{multispace0, multispace1}, - combinator::map, multi::separated_list0, - sequence::{delimited, preceded, tuple}, + sequence::{delimited, preceded}, IResult, }; -/// Parse a type name: int | float | bool | str | void | T[] | T[][] (C-style lowercase). -pub fn type_name(input: &str) -> IResult<&str, Type> { - preceded( - multispace0, - alt(( - // 2D arrays must be tried before 1D (longer prefix first) - map(tag("int[][]"), |_| Type::Array(Box::new(Type::Array(Box::new(Type::Int))))), - map(tag("float[][]"), |_| Type::Array(Box::new(Type::Array(Box::new(Type::Float))))), - map(tag("bool[][]"), |_| Type::Array(Box::new(Type::Array(Box::new(Type::Bool))))), - map(tag("str[][]"), |_| Type::Array(Box::new(Type::Array(Box::new(Type::Str))))), - map(tag("int[]"), |_| Type::Array(Box::new(Type::Int))), - map(tag("float[]"), |_| Type::Array(Box::new(Type::Float))), - map(tag("bool[]"), |_| Type::Array(Box::new(Type::Bool))), - map(tag("str[]"), |_| Type::Array(Box::new(Type::Str))), - map(tag("int"), |_| Type::Int), - map(tag("float"), |_| Type::Float), - map(tag("bool"), |_| Type::Bool), - map(tag("str"), |_| Type::Str), - map(tag("void"), |_| Type::Unit), - )), - )(input) -} - -/// Parse a typed parameter (C-style): `Type name`. -fn param(input: &str) -> IResult<&str, (String, Type)> { - map( - tuple(( - preceded(multispace0, type_name), - preceded(multispace1, identifier), - )), - |(ty, name)| -> (String, Type) { (name.to_string(), ty) }, - )(input) -} - /// Parse a function declaration (C-style): `ReturnType name(Type name, ...) body`. /// Example: `int add(int x, int y) { ... }` or `void main() x = 1`. pub fn fun_decl(input: &str) -> IResult<&str, UncheckedFunDecl> { - let (rest, return_type) = preceded(multispace0, type_name)(input)?; + let (rest, return_type) = preceded(multispace0, type_definition)(input)?; let (rest, name) = preceded(multispace1, identifier)(rest)?; let (rest, params) = delimited( preceded(multispace0, tag("(")), - separated_list0(preceded(multispace0, tag(",")), param), + separated_list0(preceded(multispace0, tag(",")), identifier_decl), preceded(multispace0, tag(")")), )(rest)?; let (rest, body) = preceded(multispace0, statement)(rest)?; diff --git a/src/parser/identifiers.rs b/src/parser/identifiers.rs index f6bdecb..93ecbd0 100644 --- a/src/parser/identifiers.rs +++ b/src/parser/identifiers.rs @@ -22,13 +22,18 @@ use nom::{ bytes::complete::{take_while, take_while1}, - combinator::{recognize, verify}, - sequence::pair, + character::complete::multispace1, + combinator::{map, recognize, verify}, + sequence::{pair, preceded, tuple}, IResult, }; +use crate::{ir::ast::IdentifierDecl, parser::types::type_definition}; + /// Reserved words: boolean literals and type names. -const RESERVED: &[&str] = &["true", "false", "int", "float", "bool", "str", "void", "return"]; +const RESERVED: &[&str] = &[ + "true", "false", "struct", "union", "enum", "int", "float", "bool", "str", "void", "return", +]; /// Parse an identifier (variable name). /// Must start with letter or underscore; subsequent chars may be letter, digit, or underscore. @@ -40,3 +45,15 @@ pub fn identifier(input: &str) -> IResult<&str, &str> { )); verify(id_parser, |s: &str| !RESERVED.contains(&s))(input) } + +/// Parse an identifier declaration: `Type name`. +/// Must only be called for parameters and tagged union members +pub fn identifier_decl(input: &str) -> IResult<&str, IdentifierDecl> { + map( + tuple((type_definition, preceded(multispace1, identifier))), + |(ty, name)| IdentifierDecl { + name: name.to_string(), + ty, + }, + )(input) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a0824ef..b8c2d6a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -60,6 +60,7 @@ pub mod identifiers; pub mod literals; pub mod program; pub mod statements; +pub mod types; pub use expressions::expression; pub use functions::fun_decl; @@ -67,3 +68,4 @@ pub use identifiers::identifier; pub use literals::{literal, Literal}; pub use program::program; pub use statements::{assignment, statement}; +pub use types::tagged_type_decl; diff --git a/src/parser/program.rs b/src/parser/program.rs index f91be58..665d231 100644 --- a/src/parser/program.rs +++ b/src/parser/program.rs @@ -26,12 +26,38 @@ //! `main` is a semantic constraint checked in the next pipeline stage, not //! a syntactic one enforced here. -use crate::ir::ast::{Program, UncheckedProgram}; +use crate::ir::ast::{Program, TaggedTypeDecl, UncheckedProgram}; use crate::parser::functions::fun_decl; -use nom::{combinator::map, multi::many0, IResult}; +use crate::parser::types::tagged_type_decl; +use nom::{branch::alt, combinator::map, multi::many0, IResult}; -/// Parse a complete MiniC program: zero or more function declarations. +/// Parse a complete MiniC program: zero or more struct or function declarations. /// Execution starts at the `main` function (validated by the type checker). pub fn program(input: &str) -> IResult<&str, UncheckedProgram> { - map(many0(fun_decl), |functions| Program { functions })(input) + let (rest, items) = many0(alt(( + map(tagged_type_decl, |decl| Item::TypeDecl(decl)), + map(fun_decl, |f| Item::Function(f)), + )))(input)?; + + let mut type_decls = Vec::new(); + let mut functions = Vec::new(); + for item in items { + match item { + Item::TypeDecl(decl) => type_decls.push(decl), + Item::Function(f) => functions.push(f), + } + } + + Ok(( + rest, + Program { + tagged_types: type_decls, + functions, + }, + )) +} + +enum Item { + TypeDecl(TaggedTypeDecl), + Function(crate::ir::ast::FunDecl<()>), } diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 9dcfef5..5386783 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -29,7 +29,7 @@ //! //! Both `int x = 0` (declaration) and `x = 0` (assignment) begin with an //! identifier-like token, so the order of alternatives in [`statement`] -//! matters. Declaration is tried first because it starts with a type keyword +//! matters. Declaration is tried first because it starts with type_definition keyword //! (`int`, `float`, …), which is unambiguous. If declaration fails, the //! parser backtracks and tries assignment. //! @@ -42,8 +42,8 @@ use crate::ir::ast::{Expr, ExprD, Statement, StatementD, UncheckedExpr, UncheckedStmt}; use crate::parser::expressions::{expression, parse_call}; -use crate::parser::functions::type_name; use crate::parser::identifiers::identifier; +use crate::parser::types::type_definition; use nom::{ branch::alt, bytes::complete::tag, @@ -86,7 +86,7 @@ fn return_statement(input: &str) -> IResult<&str, UncheckedStmt> { fn decl_statement(input: &str) -> IResult<&str, UncheckedStmt> { map( tuple(( - type_name, + type_definition, preceded(nom::character::complete::multispace1, identifier), preceded(multispace0, nom::bytes::complete::tag("=")), preceded(multispace0, expression), diff --git a/src/parser/types.rs b/src/parser/types.rs new file mode 100644 index 0000000..cbc7296 --- /dev/null +++ b/src/parser/types.rs @@ -0,0 +1,124 @@ +//! Shared type parsers for MiniC. +//! +//! This module defines the language's type syntax: base types, arrays, and +//! struct type names. It is reused by function parsing, struct field parsing, +//! and variable declarations. + +use crate::ir::ast::{Member, TagType, TaggedTypeDecl, Type}; +use crate::parser::identifiers::{identifier, identifier_decl}; +use crate::parser::literals::integer_literal; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{char, multispace0, multispace1}, + combinator::{map, opt}, + multi::{many0, many1}, + sequence::{delimited, pair, preceded, tuple}, + IResult, +}; + +fn member_field(input: &str) -> IResult<&str, Member> { + map( + tuple(( + preceded(multispace0, identifier_decl), + preceded(multispace0, char(';')), + )), + |(decl, _)| Member::Field(decl), + )(input) +} + +fn enum_variant(input: &str) -> IResult<&str, Member> { + map( + tuple(( + preceded(multispace0, identifier), + opt(preceded( + preceded(multispace0, char('=')), + preceded(multispace0, integer_literal), + )), + preceded(multispace0, char(';')), + )), + |(name, value, _)| Member::Enumerator { + name: name.to_string(), + value, + }, + )(input) +} + +fn tagged_type_and_name(input: &str) -> IResult<&str, (TagType, String)> { + alt(( + map( + tuple(( + preceded(multispace0, tag("struct")), + preceded(multispace1, identifier), + )), + |(_, name)| (TagType::Struct, name.to_string()), + ), + map( + tuple(( + preceded(multispace0, tag("union")), + preceded(multispace1, identifier), + )), + |(_, name)| (TagType::Union, name.to_string()), + ), + map( + tuple(( + preceded(multispace0, tag("enum")), + preceded(multispace1, identifier), + )), + |(_, name)| (TagType::Enum, name.to_string()), + ), + ))(input) +} + +/// Parse a tagged type: `[ struct | union | enum ] N {...}`. +pub fn tagged_type_decl(input: &str) -> IResult<&str, TaggedTypeDecl> { + let (rest, (tag_type, tag_name)) = tagged_type_and_name(input)?; + + let (rest, members) = match tag_type { + TagType::Struct | TagType::Union => delimited( + preceded(multispace0, char('{')), + many1(member_field), + preceded(multispace0, char('}')), + )(rest)?, + TagType::Enum => delimited( + preceded(multispace0, char('{')), + many1(enum_variant), + preceded(multispace0, char('}')), + )(rest)?, + }; + + Ok(( + rest, + TaggedTypeDecl { + tag_type, + tag_name, + members, + }, + )) +} + +fn base_type(input: &str) -> IResult<&str, Type> { + preceded( + multispace0, + alt(( + map(tagged_type_and_name, |(tag_type, tag_name)| Type::Tagged { + tag_type, + tag_name, + }), + map(tag("int"), |_| Type::Int), + map(tag("float"), |_| Type::Float), + map(tag("bool"), |_| Type::Bool), + map(tag("str"), |_| Type::Str), + map(tag("void"), |_| Type::Unit), + )), + )(input) +} + +/// Parse a type name: int | float | bool | str | void | struct N | union N | enum N | T[] | T[][]. +pub fn type_definition(input: &str) -> IResult<&str, Type> { + map(pair(base_type, many0(tag("[]"))), |(base, dimensions)| { + dimensions + .into_iter() + .fold(base, |inner, _| Type::Array(Box::new(inner))) + })(input) +} diff --git a/src/semantic/type_checker.rs b/src/semantic/type_checker.rs index 46681cf..2fb14e2 100644 --- a/src/semantic/type_checker.rs +++ b/src/semantic/type_checker.rs @@ -94,19 +94,28 @@ pub fn type_check(program: &UncheckedProgram) -> Result::new(); - // Register native stdlib functions as Type::Fun bindings. + // Register native stdlib functions as Type::Function bindings. let registry = NativeRegistry::default(); for (name, entry) in registry.iter() { env.declare( name.clone(), - Type::Fun(entry.params.clone(), Box::new(entry.return_type.clone())), + Type::Function { + params: entry.params.clone(), + return_type: Box::new(entry.return_type.clone()), + }, ); } - // Register user-defined function signatures as Type::Fun bindings. + // Register user-defined function signatures as Type::Function bindings. for f in &program.functions { - let param_tys = f.params.iter().map(|(_, ty)| ty.clone()).collect(); - env.declare(f.name.clone(), Type::Fun(param_tys, Box::new(f.return_type.clone()))); + let param_tys = f.params.iter().map(|param| param.ty.clone()).collect(); + env.declare( + f.name.clone(), + Type::Function { + params: param_tys, + return_type: Box::new(f.return_type.clone()), + }, + ); } // Clean snapshot: only function bindings, no variable bindings. @@ -117,7 +126,10 @@ pub fn type_check(program: &UncheckedProgram) -> Result Result { // Restore to clean function-only state, then add parameters. env.restore(fn_snapshot.clone()); - for (name, ty) in &f.params { - env.declare(name.clone(), ty.clone()); + for param in &f.params { + env.declare(param.name.clone(), param.ty.clone()); } let body = type_check_stmt(&f.body, env, &f.return_type)?; Ok(FunDecl { @@ -150,7 +162,10 @@ fn type_check_stmt( return Err(TypeError::new("cannot declare variable of type void")); } if env.get(name).is_some() { - return Err(TypeError::new(format!("redeclaration of variable: {}", name))); + return Err(TypeError::new(format!( + "redeclaration of variable: {}", + name + ))); } let init_checked = type_check_expr_to_typed(init, env)?; if !types_compatible(&init_checked.ty, ty) { @@ -263,13 +278,11 @@ fn type_check_stmt( }) } -fn check_call( - name: &str, - args: &[CheckedExpr], - env: &Environment, -) -> Result<(), TypeError> { +fn check_call(name: &str, args: &[CheckedExpr], env: &Environment) -> Result<(), TypeError> { match env.get(name) { - Some(Type::Fun(param_tys, _)) => { + Some(Type::Function { + params: param_tys, .. + }) => { if args.len() != param_tys.len() { return Err(TypeError::new(format!( "function '{}' expects {} arguments, got {}", @@ -342,10 +355,7 @@ fn type_check_expr_to_typed( Ok(ExprD { exp, ty }) } -fn type_check_expr_inner( - e: &Expr<()>, - env: &Environment, -) -> Result, TypeError> { +fn type_check_expr_inner(e: &Expr<()>, env: &Environment) -> Result, TypeError> { match e { Expr::Literal(l) => Ok(Expr::Literal(l.clone())), Expr::Ident(name) => Ok(Expr::Ident(name.clone())), @@ -400,16 +410,20 @@ fn type_check_expr_inner( Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Call { name, args } => { - let args_checked: Result, _> = - args.iter().map(|a| type_check_expr_to_typed(a, env)).collect(); + let args_checked: Result, _> = args + .iter() + .map(|a| type_check_expr_to_typed(a, env)) + .collect(); Ok(Expr::Call { name: name.clone(), args: args_checked?, }) } Expr::ArrayLit(elems) => { - let elems_checked: Result, _> = - elems.iter().map(|e| type_check_expr_to_typed(e, env)).collect(); + let elems_checked: Result, _> = elems + .iter() + .map(|e| type_check_expr_to_typed(e, env)) + .collect(); Ok(Expr::ArrayLit(elems_checked?)) } Expr::Index { base, index } => Ok(Expr::Index { @@ -419,14 +433,11 @@ fn type_check_expr_inner( } } -fn type_check_expr( - e: &UncheckedExpr, - env: &Environment, -) -> Result { +fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { match &e.exp { Expr::Literal(l) => Ok(literal_type(l)), Expr::Ident(name) => match env.get(name) { - Some(Type::Fun(_, _)) => Err(TypeError::new(format!( + Some(Type::Function { .. }) => Err(TypeError::new(format!( "cannot use function '{}' as a value", name ))), @@ -486,11 +497,16 @@ fn type_check_expr( } } Expr::Call { name, args } => { - let args_checked: Result, _> = - args.iter().map(|a| type_check_expr_to_typed(a, env)).collect(); + let args_checked: Result, _> = args + .iter() + .map(|a| type_check_expr_to_typed(a, env)) + .collect(); let args_checked = args_checked?; match env.get(name) { - Some(Type::Fun(param_tys, return_ty)) => { + Some(Type::Function { + params: param_tys, + return_type, + }) => { if args_checked.len() != param_tys.len() { return Err(TypeError::new(format!( "function '{}' expects {} arguments, got {}", @@ -512,7 +528,7 @@ fn type_check_expr( ))); } } - Ok((**return_ty).clone()) + Ok((**return_type).clone()) } Some(_) => Err(TypeError::new(format!("'{}' is not a function", name))), None => Err(TypeError::new(format!("undefined function: {}", name))), diff --git a/tests/fixtures/tagged_types.minic b/tests/fixtures/tagged_types.minic new file mode 100644 index 0000000..42b6a3b --- /dev/null +++ b/tests/fixtures/tagged_types.minic @@ -0,0 +1,5 @@ +struct Point { int x; int y; } +union Payload { int code; struct Point p; } +enum Kind { A; B = 2; } + +void main() { return; } diff --git a/tests/parser.rs b/tests/parser.rs index eca6640..7f1ef77 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -1,14 +1,17 @@ //! Integration tests for the MiniC parser. -use nom::combinator::all_consuming; -use mini_c::ir::ast::{Expr, ExprD, Literal, Statement, Type}; +use mini_c::ir::ast::{Expr, ExprD, IdentifierDecl, Literal, Member, Statement, TagType, Type}; use mini_c::parser::{ - assignment, expression, fun_decl, identifier, literal, + assignment, expression, fun_decl, identifier, + identifiers::identifier_decl, + literal, literals::{ boolean_literal, float_literal, integer_literal, string_literal, Literal as ParserLiteral, }, statement, + types::{tagged_type_decl, type_definition}, }; +use nom::combinator::all_consuming; // --- Literals --- @@ -115,6 +118,194 @@ fn test_identifier_accept_true_prefix() { assert_eq!(identifier("truex"), Ok(("", "truex"))); } +// --- Types --- + +#[test] +fn test_tagged_type_definition() { + assert_eq!( + type_definition("struct Point"), + Ok(( + "", + Type::Tagged { + tag_type: TagType::Struct, + tag_name: "Point".to_string(), + }, + )) + ); + + assert_eq!( + type_definition("union Value"), + Ok(( + "", + Type::Tagged { + tag_type: TagType::Union, + tag_name: "Value".to_string(), + }, + )) + ); + + assert_eq!( + type_definition("enum Kind"), + Ok(( + "", + Type::Tagged { + tag_type: TagType::Enum, + tag_name: "Kind".to_string(), + }, + )) + ); +} + +#[test] +fn test_tagged_type_definition_array() { + assert_eq!( + type_definition("struct S[]"), + Ok(( + "", + Type::Array(Box::new(Type::Tagged { + tag_type: TagType::Struct, + tag_name: "S".to_string(), + })) + )) + ); +} + +#[test] +fn test_tagged_type_identifier_decl() { + assert_eq!( + identifier_decl("struct Point p"), + Ok(( + "", + IdentifierDecl { + name: "p".to_string(), + ty: Type::Tagged { + tag_type: TagType::Struct, + tag_name: "Point".to_string(), + }, + } + )) + ); + + assert_eq!( + identifier_decl("union Value v"), + Ok(( + "", + IdentifierDecl { + name: "v".to_string(), + ty: Type::Tagged { + tag_type: TagType::Union, + tag_name: "Value".to_string(), + }, + } + )) + ); + + assert_eq!( + identifier_decl("enum Kind k"), + Ok(( + "", + IdentifierDecl { + name: "k".to_string(), + ty: Type::Tagged { + tag_type: TagType::Enum, + tag_name: "Kind".to_string(), + }, + } + )) + ); +} + +#[test] +fn test_struct_decl() { + let result = tagged_type_decl("struct Point { int x; float y; }") + .unwrap() + .1; + assert_eq!(result.tag_type, TagType::Struct); + assert_eq!(result.tag_name, "Point"); + assert_eq!(result.members.len(), 2); + assert_eq!( + result.members[0], + Member::Field(IdentifierDecl { + name: "x".into(), + ty: Type::Int, + }) + ); + assert_eq!( + result.members[1], + Member::Field(IdentifierDecl { + name: "y".into(), + ty: Type::Float, + }) + ); +} + +#[test] +fn test_union_decl() { + let result = tagged_type_decl("union Value { int i; float f; }") + .unwrap() + .1; + assert_eq!(result.tag_type, TagType::Union); + assert_eq!(result.tag_name, "Value"); + assert_eq!(result.members.len(), 2); + assert_eq!( + result.members[0], + Member::Field(IdentifierDecl { + name: "i".into(), + ty: Type::Int, + }) + ); + assert_eq!( + result.members[1], + Member::Field(IdentifierDecl { + name: "f".into(), + ty: Type::Float, + }) + ); +} + +#[test] +fn test_enum_decl() { + let result = tagged_type_decl("enum Kind { OK; Err = -1; }").unwrap().1; + assert_eq!(result.tag_type, TagType::Enum); + assert_eq!(result.tag_name, "Kind"); + assert_eq!(result.members.len(), 2); + assert_eq!( + result.members[0], + Member::Enumerator { + name: "OK".into(), + value: None, + } + ); + assert_eq!( + result.members[1], + Member::Enumerator { + name: "Err".into(), + value: Some(-1), + } + ); +} + +#[test] +fn test_tagged_type_decl_reject_empty_members() { + assert!(tagged_type_decl("struct S { }").is_err()); + assert!(tagged_type_decl("union U { }").is_err()); + assert!(tagged_type_decl("enum E { }").is_err()); +} + +#[test] +fn test_tagged_type_decl_reject_missing_member_semicolon() { + assert!(tagged_type_decl("struct S { int x }").is_err()); + assert!(tagged_type_decl("union U { int x }").is_err()); + assert!(tagged_type_decl("enum E { A = 1 }").is_err()); +} + +#[test] +fn test_tagged_type_decl_reject_reserved_tag_name() { + assert!(tagged_type_decl("struct return { int x; }").is_err()); + assert!(tagged_type_decl("union return { int x; }").is_err()); + assert!(tagged_type_decl("enum return { A; }").is_err()); +} + // --- Expressions --- #[test] @@ -233,9 +424,15 @@ fn test_parentheses() { #[test] fn test_relational() { - assert!(matches!(expression("a == b").unwrap().1.exp, Expr::Eq(_, _))); + assert!(matches!( + expression("a == b").unwrap().1.exp, + Expr::Eq(_, _) + )); assert!(matches!(expression("x < 5").unwrap().1.exp, Expr::Lt(_, _))); - assert!(matches!(expression("1 + 2 < 5").unwrap().1.exp, Expr::Lt(_, _))); + assert!(matches!( + expression("1 + 2 < 5").unwrap().1.exp, + Expr::Lt(_, _) + )); } #[test] @@ -289,24 +486,32 @@ fn test_invalid_unbalanced_paren() { #[test] fn test_simple_assignment() { let result = assignment("x = 42;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, ref value } - if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(42)))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(42))) + ); let result = assignment("count = 0;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, ref value } - if matches!(target.exp, Expr::Ident(ref s) if s == "count") && value.exp == Expr::Literal(Literal::Int(0)))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Ident(ref s) if s == "count") && value.exp == Expr::Literal(Literal::Int(0))) + ); } #[test] fn test_assignment_with_expression() { let result = assignment("sum = a + b;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "sum"))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "sum")) + ); if let Statement::Assign { value, .. } = &result.stmt { assert!(matches!(value.exp, Expr::Add(_, _))); } let result = assignment("flag = x < 5;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "flag"))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "flag")) + ); if let Statement::Assign { value, .. } = &result.stmt { assert!(matches!(value.exp, Expr::Lt(_, _))); } @@ -315,16 +520,22 @@ fn test_assignment_with_expression() { #[test] fn test_assignment_whitespace() { let result = assignment("x=1;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, ref value } - if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(1)))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(1))) + ); let result = assignment("x = 1;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, ref value } - if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(1)))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(1))) + ); let result = assignment("x = 1;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, ref value } - if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(1)))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Ident(ref s) if s == "x") && value.exp == Expr::Literal(Literal::Int(1))) + ); } #[test] @@ -337,19 +548,25 @@ fn test_invalid_assignment() { #[test] fn test_decl_statement() { let result = statement("int x = 42;").unwrap().1; - assert!(matches!(result.stmt, Statement::Decl { ref name, ref ty, .. } - if name == "x" && ty == &Type::Int)); + assert!( + matches!(result.stmt, Statement::Decl { ref name, ref ty, .. } + if name == "x" && ty == &Type::Int) + ); if let Statement::Decl { ref init, .. } = result.stmt { assert_eq!(init.exp, Expr::Literal(Literal::Int(42))); } let result = statement("float y = 3.14;").unwrap().1; - assert!(matches!(result.stmt, Statement::Decl { ref name, ref ty, .. } - if name == "y" && ty == &Type::Float)); + assert!( + matches!(result.stmt, Statement::Decl { ref name, ref ty, .. } + if name == "y" && ty == &Type::Float) + ); let result = statement("int[] arr = [1, 2, 3];").unwrap().1; - assert!(matches!(result.stmt, Statement::Decl { ref name, ref ty, .. } - if name == "arr" && matches!(ty, Type::Array(_)))); + assert!( + matches!(result.stmt, Statement::Decl { ref name, ref ty, .. } + if name == "arr" && matches!(ty, Type::Array(_))) + ); } #[test] @@ -363,7 +580,9 @@ fn test_if_without_else() { } )); if let Statement::If { - ref cond, ref then_branch, .. + ref cond, + ref then_branch, + .. } = result.stmt { assert!(matches!(cond.exp, Expr::Ident(ref s) if s == "x")); @@ -384,7 +603,10 @@ fn test_if_with_else() { .. } )); - if let Statement::If { ref else_branch, .. } = &result.stmt { + if let Statement::If { + ref else_branch, .. + } = &result.stmt + { let else_s = else_branch.as_ref().unwrap(); assert!(matches!(else_s.stmt, Statement::Block { ref seq } if seq.len() == 1 @@ -400,9 +622,14 @@ fn test_if_with_else() { #[test] fn test_nested_if() { - let result = statement("if a { if b { x = 1; } else { x = 2; } }").unwrap().1; + let result = statement("if a { if b { x = 1; } else { x = 2; } }") + .unwrap() + .1; assert!(matches!(result.stmt, Statement::If { .. })); - if let Statement::If { ref then_branch, .. } = &result.stmt { + if let Statement::If { + ref then_branch, .. + } = &result.stmt + { assert!(matches!(then_branch.stmt, Statement::Block { ref seq } if seq.len() == 1 && matches!(seq[0].stmt, Statement::If { .. }))); } @@ -474,7 +701,16 @@ fn test_fun_decl_with_params() { assert_eq!(result.name, "foo"); assert_eq!( result.params, - vec![("x".to_string(), Type::Int), ("y".to_string(), Type::Int)] + vec![ + IdentifierDecl { + name: "x".to_string(), + ty: Type::Int + }, + IdentifierDecl { + name: "y".to_string(), + ty: Type::Int + }, + ] ); assert!(matches!(result.body.stmt, Statement::Block { ref seq } if seq.len() == 1 @@ -498,13 +734,11 @@ fn test_fun_decl_no_params() { let result = fun_decl("void bar() { x = 1; }").unwrap().1; assert_eq!(result.name, "bar"); assert!(result.params.is_empty()); - assert!( - matches!(result.body.stmt, Statement::Block { ref seq } + assert!(matches!(result.body.stmt, Statement::Block { ref seq } if seq.len() == 1 && matches!(seq[0].stmt, Statement::Assign { ref target, ref value } if matches!(target.exp, Expr::Ident(ref s) if s == "x") - && value.exp == Expr::Literal(Literal::Int(1)))) - ); + && value.exp == Expr::Literal(Literal::Int(1))))); } #[test] @@ -560,7 +794,9 @@ fn test_block_single_statement() { let result = statement("{ x = 1; }").unwrap().1; assert!(matches!(result.stmt, Statement::Block { ref seq } if seq.len() == 1)); if let Statement::Block { ref seq } = result.stmt { - assert!(matches!(seq[0].stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "x"))); + assert!( + matches!(seq[0].stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "x")) + ); } } @@ -569,8 +805,12 @@ fn test_block_multiple_statements() { let result = statement("{ x = 1; y = 2; }").unwrap().1; assert!(matches!(result.stmt, Statement::Block { ref seq } if seq.len() == 2)); if let Statement::Block { ref seq } = result.stmt { - assert!(matches!(seq[0].stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "x"))); - assert!(matches!(seq[1].stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "y"))); + assert!( + matches!(seq[0].stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "x")) + ); + assert!( + matches!(seq[1].stmt, Statement::Assign { ref target, .. } if matches!(target.exp, Expr::Ident(ref s) if s == "y")) + ); } } @@ -586,7 +826,10 @@ fn test_block_in_function_body() { fn test_block_in_if_body() { let result = statement("if x { a = 1; b = 2; }").unwrap().1; assert!(matches!(result.stmt, Statement::If { .. })); - if let Statement::If { ref then_branch, .. } = &result.stmt { + if let Statement::If { + ref then_branch, .. + } = &result.stmt + { assert!(matches!(then_branch.stmt, Statement::Block { ref seq } if seq.len() == 2)); } } @@ -629,10 +872,16 @@ fn test_index_read() { #[test] fn test_indexed_assignment() { let result = statement("arr[i] = 1;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, ref value } - if matches!(target.exp, Expr::Index { .. }) && value.exp == Expr::Literal(Literal::Int(1)))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Index { .. }) && value.exp == Expr::Literal(Literal::Int(1))) + ); if let Statement::Assign { ref target, .. } = result.stmt { - if let Expr::Index { ref base, ref index } = target.exp { + if let Expr::Index { + ref base, + ref index, + } = target.exp + { assert!(matches!(base.exp, Expr::Ident(ref s) if s == "arr")); assert!(matches!(index.exp, Expr::Ident(ref s) if s == "i")); } @@ -642,12 +891,22 @@ fn test_indexed_assignment() { #[test] fn test_multidimensional_indexed_assignment() { let result = statement("arr[i][j] = x;").unwrap().1; - assert!(matches!(result.stmt, Statement::Assign { ref target, ref value } - if matches!(target.exp, Expr::Index { .. }) && matches!(value.exp, Expr::Ident(ref s) if s == "x"))); + assert!( + matches!(result.stmt, Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Index { .. }) && matches!(value.exp, Expr::Ident(ref s) if s == "x")) + ); if let Statement::Assign { ref target, .. } = result.stmt { - if let Expr::Index { ref base, ref index } = target.exp { + if let Expr::Index { + ref base, + ref index, + } = target.exp + { assert!(matches!(index.exp, Expr::Ident(ref s) if s == "j")); - if let Expr::Index { ref base, ref index } = base.exp { + if let Expr::Index { + ref base, + ref index, + } = base.exp + { assert!(matches!(base.exp, Expr::Ident(ref s) if s == "arr")); assert!(matches!(index.exp, Expr::Ident(ref s) if s == "i")); } diff --git a/tests/program.rs b/tests/program.rs index dd8160b..2116f76 100644 --- a/tests/program.rs +++ b/tests/program.rs @@ -1,9 +1,9 @@ //! Integration tests for parsing MiniC programs from files. +use mini_c::ir::ast::{IdentifierDecl, Statement, Type, UncheckedProgram}; +use mini_c::parser::program; use nom::combinator::all_consuming; use std::path::Path; -use mini_c::ir::ast::{Statement, Type, UncheckedProgram}; -use mini_c::parser::program; fn fixtures_dir() -> std::path::PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures") @@ -28,8 +28,7 @@ fn test_parse_empty_program() { #[test] fn test_parse_main_only() { - let prog = - parse_program_file("statements_only.minic").expect("main-only program should parse"); + let prog = parse_program_file("statements_only.minic").expect("main-only program should parse"); assert_eq!(prog.functions.len(), 1); assert_eq!(prog.functions[0].name, "main"); assert!(matches!(prog.functions[0].body.stmt, Statement::Block { ref seq } if seq.len() == 2)); @@ -46,9 +45,7 @@ fn test_parse_function_single() { assert_eq!(prog.functions.len(), 1); assert_eq!(prog.functions[0].name, "foo"); assert!(prog.functions[0].params.is_empty()); - assert!( - matches!(prog.functions[0].body.stmt, Statement::Decl { ref name, .. } if name == "x") - ); + assert!(matches!(prog.functions[0].body.stmt, Statement::Decl { ref name, .. } if name == "x")); } #[test] @@ -59,7 +56,16 @@ fn test_parse_function_with_block() { assert_eq!(prog.functions[0].name, "add"); assert_eq!( prog.functions[0].params, - vec![("x".to_string(), Type::Int), ("y".to_string(), Type::Int)] + vec![ + IdentifierDecl { + name: "x".to_string(), + ty: Type::Int, + }, + IdentifierDecl { + name: "y".to_string(), + ty: Type::Int, + }, + ] ); assert!(matches!(prog.functions[0].body.stmt, Statement::Block { ref seq } if seq.len() == 2)); } @@ -89,5 +95,20 @@ fn test_parse_invalid_syntax_fails() { #[test] fn test_parse_top_level_statements_fail() { let result = parse_program_file("top_level_statements.minic"); - assert!(result.is_err(), "top-level statements without def should fail to parse"); + assert!( + result.is_err(), + "top-level statements without def should fail to parse" + ); +} + +#[test] +fn test_parse_program_with_tagged_declarations() { + let prog = parse_program_file("tagged_types.minic") + .expect("program with tagged type declarations should parse"); + assert_eq!(prog.tagged_types.len(), 3); + assert_eq!(prog.tagged_types[0].tag_name, "Point"); + assert_eq!(prog.tagged_types[1].tag_name, "Payload"); + assert_eq!(prog.tagged_types[2].tag_name, "Kind"); + assert_eq!(prog.functions.len(), 1); + assert_eq!(prog.functions[0].name, "main"); } From 01919703ef0eddeea302945043b95b442900883d Mon Sep 17 00:00:00 2001 From: chances190 Date: Thu, 9 Apr 2026 21:54:21 -0300 Subject: [PATCH 2/6] feat: implement member access for tagged types --- src/ir/ast.rs | 5 +++++ src/parser/expressions.rs | 35 +++++++++++++++++++---------- src/parser/statements.rs | 46 ++++++++++++++++++++++++++------------- tests/parser.rs | 41 ++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 26 deletions(-) diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 818647d..5cdaa12 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -125,6 +125,11 @@ pub enum Expr { base: Box>, index: Box>, }, + /// Member access: `base.member` + Member { + base: Box>, + member: String, + }, } /// Statement with type decoration. diff --git a/src/parser/expressions.rs b/src/parser/expressions.rs index 8cfcab4..622e98a 100644 --- a/src/parser/expressions.rs +++ b/src/parser/expressions.rs @@ -94,21 +94,34 @@ fn atom(input: &str) -> IResult<&str, UncheckedExpr> { fn primary(input: &str) -> IResult<&str, UncheckedExpr> { let (mut rest, mut acc) = atom(input)?; loop { - let index_parse = delimited( + if let Ok((r, index)) = delimited( preceded(multispace0, char('[')), preceded(multispace0, expression), preceded(multispace0, char(']')), - )(rest); - match index_parse { - Ok((r, index)) => { - acc = wrap(Expr::Index { - base: Box::new(acc), - index: Box::new(index), - }); - rest = r; - } - Err(_) => break, + )(rest) + { + acc = wrap(Expr::Index { + base: Box::new(acc), + index: Box::new(index), + }); + rest = r; + continue; } + + if let Ok((r, (_, member))) = pair( + preceded(multispace0, char('.')), + preceded(multispace0, identifier), + )(rest) + { + acc = wrap(Expr::Member { + base: Box::new(acc), + member: member.to_string(), + }); + rest = r; + continue; + } + + break; } Ok((rest, acc)) } diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 5386783..506ac54 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -160,7 +160,7 @@ fn while_statement(input: &str) -> IResult<&str, UncheckedStmt> { )) } -/// Parse an lvalue: identifier followed by zero or more `[ expr ]` suffixes. +/// Parse an lvalue: identifier followed by zero or more `[ expr ]` or `.member` suffixes. fn lvalue(input: &str) -> IResult<&str, UncheckedExpr> { let (mut rest, id) = preceded(multispace0, identifier)(input)?; let mut acc = ExprD { @@ -168,24 +168,40 @@ fn lvalue(input: &str) -> IResult<&str, UncheckedExpr> { ty: (), }; loop { - let index_parse = delimited( + if let Ok((r, index)) = delimited( preceded(multispace0, char('[')), preceded(multispace0, expression), preceded(multispace0, char(']')), - )(rest); - match index_parse { - Ok((r, index)) => { - acc = ExprD { - exp: Expr::Index { - base: Box::new(acc), - index: Box::new(index), - }, - ty: (), - }; - rest = r; - } - Err(_) => break, + )(rest) + { + acc = ExprD { + exp: Expr::Index { + base: Box::new(acc), + index: Box::new(index), + }, + ty: (), + }; + rest = r; + continue; } + + if let Ok((r, (_, member))) = tuple(( + preceded(multispace0, char('.')), + preceded(multispace0, identifier), + ))(rest) + { + acc = ExprD { + exp: Expr::Member { + base: Box::new(acc), + member: member.to_string(), + }, + ty: (), + }; + rest = r; + continue; + } + + break; } Ok((rest, acc)) } diff --git a/tests/parser.rs b/tests/parser.rs index 7f1ef77..f1b1f4b 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -931,3 +931,44 @@ fn test_array_in_expression() { assert!(matches!(result.exp, Expr::Index { ref base, ref index } if matches!(base.exp, Expr::ArrayLit(_)) && index.exp == Expr::Literal(Literal::Int(0)))); } + +#[test] +fn test_member_access_expression() { + let result = expression("point.x").unwrap().1; + assert!(matches!( + result.exp, + Expr::Member { ref base, ref member } + if matches!(base.exp, Expr::Ident(ref s) if s == "point") && member == "x" + )); +} + +#[test] +fn test_chained_member_access_expression() { + let result = expression("root.left.value").unwrap().1; + assert!(matches!(result.exp, Expr::Member { ref member, .. } if member == "value")); + if let Expr::Member { ref base, .. } = result.exp { + assert!(matches!(base.exp, Expr::Member { ref member, .. } if member == "left")); + } +} + +#[test] +fn test_member_access_with_index_expression() { + let result = expression("items.head[0]").unwrap().1; + assert!(matches!(result.exp, Expr::Index { ref base, .. } if matches!(base.exp, Expr::Member { ref member, .. } if member == "head"))); +} + +#[test] +fn test_member_assignment_target() { + let result = assignment("p.x = 1;").unwrap().1; + assert!(matches!( + result.stmt, + Statement::Assign { ref target, ref value } + if matches!(target.exp, Expr::Member { ref member, .. } if member == "x") + && value.exp == Expr::Literal(Literal::Int(1)) + )); +} + +#[test] +fn test_invalid_member_access_trailing_dot() { + assert!(all_consuming(expression)("point.").is_err()); +} From b65ab5a9644cf4d8dc5235ef820c9335bb204a4e Mon Sep 17 00:00:00 2001 From: chances190 Date: Sun, 12 Apr 2026 21:44:01 -0300 Subject: [PATCH 3/6] feat: implement type-checking for tagged types --- src/ir/ast.rs | 2 +- src/semantic/type_checker.rs | 314 +++++++++++++++++++++++++++-------- tests/type_checker.rs | 109 +++++++++++- 3 files changed, 350 insertions(+), 75 deletions(-) diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 5cdaa12..4761fb8 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -49,7 +49,7 @@ //! everything, keeping the special case local to one function. /// Tagged types: struct, union, enum -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TagType { Struct, Union, diff --git a/src/semantic/type_checker.rs b/src/semantic/type_checker.rs index 2fb14e2..bd28dc0 100644 --- a/src/semantic/type_checker.rs +++ b/src/semantic/type_checker.rs @@ -49,8 +49,8 @@ use std::collections::HashMap; use crate::environment::Environment; use crate::ir::ast::{ CheckedExpr, CheckedFunDecl, CheckedProgram, CheckedStmt, Expr, ExprD, FunDecl, Literal, - Program, Statement, StatementD, Type, UncheckedExpr, UncheckedFunDecl, UncheckedProgram, - UncheckedStmt, + Member, Program, Statement, StatementD, TagType, TaggedTypeDecl, Type, UncheckedExpr, + UncheckedFunDecl, UncheckedProgram, UncheckedStmt, }; use crate::stdlib::NativeRegistry; @@ -76,9 +76,13 @@ impl std::fmt::Display for TypeError { impl std::error::Error for TypeError {} +type TaggedTypeTable = HashMap<(TagType, String), TaggedTypeDecl>; + /// Type-check a program. Returns `Ok(CheckedProgram)` if well-typed, `Err(TypeError)` on first error. /// Requires a `main` function with signature `void main()`. pub fn type_check(program: &UncheckedProgram) -> Result { + let tagged_table = build_tagged_type_table(&program.tagged_types)?; + let main_fn = program.functions.iter().find(|f| f.name == "main"); match main_fn { None => return Err(TypeError::new("program must have a main function")), @@ -123,7 +127,7 @@ pub fn type_check(program: &UncheckedProgram) -> Result, fn_snapshot: &HashMap, + tagged_table: &TaggedTypeTable, ) -> Result { // Restore to clean function-only state, then add parameters. env.restore(fn_snapshot.clone()); for param in &f.params { env.declare(param.name.clone(), param.ty.clone()); } - let body = type_check_stmt(&f.body, env, &f.return_type)?; + let body = type_check_stmt(&f.body, env, &f.return_type, tagged_table)?; Ok(FunDecl { name: f.name.clone(), params: f.params.clone(), @@ -155,20 +160,35 @@ fn type_check_stmt( s: &UncheckedStmt, env: &mut Environment, expected_return: &Type, + tagged_table: &TaggedTypeTable, ) -> Result { let stmt = match &s.stmt { Statement::Decl { name, ty, init } => { if ty == &Type::Unit { return Err(TypeError::new("cannot declare variable of type void")); } + if let Type::Tagged { tag_type, tag_name } = ty { + if !tagged_table.contains_key(&(tag_type.clone(), tag_name.clone())) { + return Err(TypeError::new(format!( + "unknown tagged type: {:?} {}", + tag_type, tag_name + ))); + } + } if env.get(name).is_some() { return Err(TypeError::new(format!( "redeclaration of variable: {}", name ))); } - let init_checked = type_check_expr_to_typed(init, env)?; - if !types_compatible(&init_checked.ty, ty) { + let init_checked = type_check_expr_to_typed(init, env, tagged_table)?; + if matches!(ty, Type::Tagged { .. }) { + if init_checked.ty != Type::Int { + return Err(TypeError::new( + "tagged-typed variable declarations currently require integer placeholder initializer", + )); + } + } else if !types_compatible(&init_checked.ty, ty) { return Err(TypeError::new(format!( "declaration of {}: expected {:?}, got {:?}", name, ty, init_checked.ty @@ -182,10 +202,10 @@ fn type_check_stmt( } } Statement::Assign { target, value } => { - let value_checked = type_check_expr_to_typed(value, env)?; - type_check_assign_target(&target.exp, &value_checked.ty, env)?; + let value_checked = type_check_expr_to_typed(value, env, tagged_table)?; + type_check_assign_target(&target.exp, &value_checked.ty, env, tagged_table)?; Statement::Assign { - target: Box::new(type_check_expr_to_typed(target, env)?), + target: Box::new(type_check_expr_to_typed(target, env, tagged_table)?), value: Box::new(value_checked), } } @@ -193,7 +213,7 @@ fn type_check_stmt( let snapshot = env.snapshot(); let mut checked = Vec::new(); for st in seq { - checked.push(type_check_stmt(st, env, expected_return)?); + checked.push(type_check_stmt(st, env, expected_return, tagged_table)?); } env.restore(snapshot); Statement::Block { seq: checked } @@ -201,7 +221,7 @@ fn type_check_stmt( Statement::Call { name, args } => { let args_checked: Result, _> = args .iter() - .map(|a| type_check_expr_to_typed(a, env)) + .map(|a| type_check_expr_to_typed(a, env, tagged_table)) .collect(); let args_checked = args_checked?; check_call(name, &args_checked, env)?; @@ -215,17 +235,17 @@ fn type_check_stmt( then_branch, else_branch, } => { - let cond_checked = type_check_expr_to_typed(cond, env)?; + let cond_checked = type_check_expr_to_typed(cond, env, tagged_table)?; if cond_checked.ty != Type::Bool { return Err(TypeError::new(format!( "if condition must be Bool, got {:?}", cond_checked.ty ))); } - let then_checked = type_check_stmt(then_branch, env, expected_return)?; + let then_checked = type_check_stmt(then_branch, env, expected_return, tagged_table)?; let else_checked = else_branch .as_ref() - .map(|e| type_check_stmt(e, env, expected_return)) + .map(|e| type_check_stmt(e, env, expected_return, tagged_table)) .transpose()?; Statement::If { cond: Box::new(cond_checked), @@ -234,14 +254,14 @@ fn type_check_stmt( } } Statement::While { cond, body } => { - let cond_checked = type_check_expr_to_typed(cond, env)?; + let cond_checked = type_check_expr_to_typed(cond, env, tagged_table)?; if cond_checked.ty != Type::Bool { return Err(TypeError::new(format!( "while condition must be Bool, got {:?}", cond_checked.ty ))); } - let body_checked = type_check_stmt(body, env, expected_return)?; + let body_checked = type_check_stmt(body, env, expected_return, tagged_table)?; Statement::While { cond: Box::new(cond_checked), body: Box::new(body_checked), @@ -261,7 +281,7 @@ fn type_check_stmt( if *expected_return == Type::Unit { return Err(TypeError::new("void function must not return a value")); } - let checked = type_check_expr_to_typed(e, env)?; + let checked = type_check_expr_to_typed(e, env, tagged_table)?; if !types_compatible(&checked.ty, expected_return) { return Err(TypeError::new(format!( "return type mismatch: expected {:?}, got {:?}", @@ -313,6 +333,7 @@ fn type_check_assign_target( target: &Expr<()>, value_ty: &Type, env: &Environment, + tagged_table: &TaggedTypeTable, ) -> Result<(), TypeError> { match target { Expr::Ident(name) => { @@ -328,11 +349,11 @@ fn type_check_assign_target( Ok(()) } Expr::Index { base, index } => { - let index_ty = type_check_expr(index, env)?; + let index_ty = type_check_expr(index, env, tagged_table)?; if index_ty != Type::Int { return Err(TypeError::new("array index must be Int")); } - let base_ty = type_check_expr(base, env)?; + let base_ty = type_check_expr(base, env, tagged_table)?; if let Type::Array(elem) = &base_ty { if **elem != *value_ty { return Err(TypeError::new("assignment type mismatch")); @@ -342,6 +363,54 @@ fn type_check_assign_target( } Ok(()) } + Expr::Member { base, member } => { + let base_ty = type_check_expr(base, env, tagged_table)?; + match base_ty { + Type::Tagged { tag_type, tag_name } => { + let decl = tagged_table + .get(&(tag_type.clone(), tag_name.clone())) + .ok_or_else(|| { + TypeError::new(format!( + "unknown tagged type in member assignment: {:?} {}", + tag_type, tag_name + )) + })?; + + match tag_type { + TagType::Struct | TagType::Union => { + let field_ty = decl + .members + .iter() + .find_map(|m| match m { + Member::Field(decl) if decl.name == *member => { + Some(decl.ty.clone()) + } + _ => None, + }) + .ok_or_else(|| { + TypeError::new(format!( + "unknown member '{}' on {:?} {}", + member, tag_type, tag_name + )) + })?; + + if !types_compatible(value_ty, &field_ty) { + return Err(TypeError::new(format!( + "assignment to {}.{}: expected {:?}, got {:?}", + tag_name, member, field_ty, value_ty + ))); + } + Ok(()) + } + TagType::Enum => Err(TypeError::new("cannot assign to enum members")), + } + } + other => Err(TypeError::new(format!( + "member assignment requires tagged base type, got {:?}", + other + ))), + } + } _ => Err(TypeError::new("invalid assignment target")), } } @@ -349,70 +418,83 @@ fn type_check_assign_target( fn type_check_expr_to_typed( e: &UncheckedExpr, env: &Environment, + tagged_table: &TaggedTypeTable, ) -> Result { - let ty = type_check_expr(e, env)?; - let exp = type_check_expr_inner(&e.exp, env)?; + let ty = type_check_expr(e, env, tagged_table)?; + let exp = type_check_expr_inner(&e.exp, env, tagged_table)?; Ok(ExprD { exp, ty }) } -fn type_check_expr_inner(e: &Expr<()>, env: &Environment) -> Result, TypeError> { +fn type_check_expr_inner( + e: &Expr<()>, + env: &Environment, + tagged_table: &TaggedTypeTable, +) -> Result, TypeError> { match e { Expr::Literal(l) => Ok(Expr::Literal(l.clone())), Expr::Ident(name) => Ok(Expr::Ident(name.clone())), - Expr::Neg(inner) => Ok(Expr::Neg(Box::new(type_check_expr_to_typed(inner, env)?))), + Expr::Neg(inner) => Ok(Expr::Neg(Box::new(type_check_expr_to_typed( + inner, + env, + tagged_table, + )?))), Expr::Add(l, r) => Ok(Expr::Add( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Sub(l, r) => Ok(Expr::Sub( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Mul(l, r) => Ok(Expr::Mul( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Div(l, r) => Ok(Expr::Div( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Eq(l, r) => Ok(Expr::Eq( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Ne(l, r) => Ok(Expr::Ne( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Lt(l, r) => Ok(Expr::Lt( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Le(l, r) => Ok(Expr::Le( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Gt(l, r) => Ok(Expr::Gt( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Ge(l, r) => Ok(Expr::Ge( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), - Expr::Not(inner) => Ok(Expr::Not(Box::new(type_check_expr_to_typed(inner, env)?))), + Expr::Not(inner) => Ok(Expr::Not(Box::new(type_check_expr_to_typed( + inner, + env, + tagged_table, + )?))), Expr::And(l, r) => Ok(Expr::And( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Or(l, r) => Ok(Expr::Or( - Box::new(type_check_expr_to_typed(l, env)?), - Box::new(type_check_expr_to_typed(r, env)?), + Box::new(type_check_expr_to_typed(l, env, tagged_table)?), + Box::new(type_check_expr_to_typed(r, env, tagged_table)?), )), Expr::Call { name, args } => { let args_checked: Result, _> = args .iter() - .map(|a| type_check_expr_to_typed(a, env)) + .map(|a| type_check_expr_to_typed(a, env, tagged_table)) .collect(); Ok(Expr::Call { name: name.clone(), @@ -422,18 +504,26 @@ fn type_check_expr_inner(e: &Expr<()>, env: &Environment) -> Result { let elems_checked: Result, _> = elems .iter() - .map(|e| type_check_expr_to_typed(e, env)) + .map(|e| type_check_expr_to_typed(e, env, tagged_table)) .collect(); Ok(Expr::ArrayLit(elems_checked?)) } Expr::Index { base, index } => Ok(Expr::Index { - base: Box::new(type_check_expr_to_typed(base, env)?), - index: Box::new(type_check_expr_to_typed(index, env)?), + base: Box::new(type_check_expr_to_typed(base, env, tagged_table)?), + index: Box::new(type_check_expr_to_typed(index, env, tagged_table)?), + }), + Expr::Member { base, member } => Ok(Expr::Member { + base: Box::new(type_check_expr_to_typed(base, env, tagged_table)?), + member: member.clone(), }), } } -fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { +fn type_check_expr( + e: &UncheckedExpr, + env: &Environment, + tagged_table: &TaggedTypeTable, +) -> Result { match &e.exp { Expr::Literal(l) => Ok(literal_type(l)), Expr::Ident(name) => match env.get(name) { @@ -445,7 +535,7 @@ fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result Err(TypeError::new(format!("undeclared variable: {}", name))), }, Expr::Neg(inner) => { - let ty = type_check_expr(inner, env)?; + let ty = type_check_expr(inner, env, tagged_table)?; if matches!(ty, Type::Int | Type::Float) { Ok(ty) } else { @@ -453,13 +543,13 @@ fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { - let lt = type_check_expr(l, env)?; - let rt = type_check_expr(r, env)?; + let lt = type_check_expr(l, env, tagged_table)?; + let rt = type_check_expr(r, env, tagged_table)?; numeric_binop_result(<, &rt) } Expr::Eq(l, r) | Expr::Ne(l, r) => { - let lt = type_check_expr(l, env)?; - let rt = type_check_expr(r, env)?; + let lt = type_check_expr(l, env, tagged_table)?; + let rt = type_check_expr(r, env, tagged_table)?; if !types_compatible(<, &rt) { return Err(TypeError::new(format!( "equality operands must have compatible types, got {:?} and {:?}", @@ -469,8 +559,8 @@ fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { - let lt = type_check_expr(l, env)?; - let rt = type_check_expr(r, env)?; + let lt = type_check_expr(l, env, tagged_table)?; + let rt = type_check_expr(r, env, tagged_table)?; if !is_numeric(<) || !is_numeric(&rt) { return Err(TypeError::new(format!( "ordering comparison requires numeric operands, got {:?} and {:?}", @@ -480,7 +570,7 @@ fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { - let ty = type_check_expr(inner, env)?; + let ty = type_check_expr(inner, env, tagged_table)?; if ty == Type::Bool { Ok(Type::Bool) } else { @@ -488,8 +578,8 @@ fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { - let lt = type_check_expr(l, env)?; - let rt = type_check_expr(r, env)?; + let lt = type_check_expr(l, env, tagged_table)?; + let rt = type_check_expr(r, env, tagged_table)?; if lt == Type::Bool && rt == Type::Bool { Ok(Type::Bool) } else { @@ -499,7 +589,7 @@ fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { let args_checked: Result, _> = args .iter() - .map(|a| type_check_expr_to_typed(a, env)) + .map(|a| type_check_expr_to_typed(a, env, tagged_table)) .collect(); let args_checked = args_checked?; match env.get(name) { @@ -538,9 +628,9 @@ fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result) -> Result { - let index_ty = type_check_expr(index, env)?; + let index_ty = type_check_expr(index, env, tagged_table)?; if index_ty != Type::Int { return Err(TypeError::new("array index must be Int")); } - let base_ty = type_check_expr(base, env)?; + let base_ty = type_check_expr(base, env, tagged_table)?; if let Type::Array(elem) = base_ty { Ok(*elem) } else { Err(TypeError::new("indexed expression must be array")) } } + Expr::Member { base, member } => { + let base_ty = type_check_expr(base, env, tagged_table)?; + match base_ty { + Type::Tagged { tag_type, tag_name } => { + let decl = tagged_table + .get(&(tag_type.clone(), tag_name.clone())) + .ok_or_else(|| { + TypeError::new(format!( + "unknown tagged type in member access: {:?} {}", + tag_type, tag_name + )) + })?; + + match tag_type { + TagType::Struct | TagType::Union => decl + .members + .iter() + .find_map(|m| match m { + Member::Field(decl) if decl.name == *member => { + Some(decl.ty.clone()) + } + _ => None, + }) + .ok_or_else(|| { + TypeError::new(format!( + "unknown member '{}' on {:?} {}", + member, tag_type, tag_name + )) + }), + TagType::Enum => { + let exists = decl.members.iter().any(|m| match m { + Member::Enumerator { name, .. } => name == member, + _ => false, + }); + if exists { + Ok(Type::Int) + } else { + Err(TypeError::new(format!( + "unknown enumerator '{}' on enum {}", + member, tag_name + ))) + } + } + } + } + other => Err(TypeError::new(format!( + "member access requires tagged base type, got {:?}", + other + ))), + } + } } } @@ -596,6 +737,43 @@ fn types_compatible(a: &Type, b: &Type) -> bool { | (Type::Unit, Type::Unit) => true, (Type::Int, Type::Float) | (Type::Float, Type::Int) => true, (Type::Array(a), Type::Array(b)) => types_compatible(a, b), + ( + Type::Tagged { + tag_type: a_kind, + tag_name: a_name, + }, + Type::Tagged { + tag_type: b_kind, + tag_name: b_name, + }, + ) => a_kind == b_kind && a_name == b_name, _ => false, } } + +fn build_tagged_type_table(tagged_types: &[TaggedTypeDecl]) -> Result { + let mut seen = std::collections::HashSet::<(TagType, String)>::new(); + let mut table = TaggedTypeTable::new(); + + for decl in tagged_types { + let key = (decl.tag_type.clone(), decl.tag_name.clone()); + + if !seen.insert(key.clone()) { + return Err(TypeError::new(format!( + "duplicate tagged type declaration: {:?} {}", + decl.tag_type, decl.tag_name + ))); + } + + if decl.members.is_empty() { + return Err(TypeError::new(format!( + "tagged type declaration '{}' must declare at least one member", + decl.tag_name + ))); + } + + table.insert(key, decl.clone()); + } + + Ok(table) +} diff --git a/tests/type_checker.rs b/tests/type_checker.rs index 3357161..c9e9996 100644 --- a/tests/type_checker.rs +++ b/tests/type_checker.rs @@ -1,15 +1,13 @@ //! Integration tests for the MiniC type checker. -use nom::combinator::all_consuming; use mini_c::ir::ast::{CheckedProgram, Type}; use mini_c::parser::program; use mini_c::semantic::type_check; +use nom::combinator::all_consuming; fn parse_and_type_check(src: &str) -> Result { - let (_, prog) = all_consuming(program)(src).map_err(|_| { - mini_c::semantic::TypeError { - message: "parse failed".to_string(), - } + let (_, prog) = all_consuming(program)(src).map_err(|_| mini_c::semantic::TypeError { + message: "parse failed".to_string(), })?; type_check(&prog) } @@ -136,7 +134,10 @@ fn test_type_check_return_void_ok() { fn test_type_check_return_value_in_void_fn() { let result = parse_and_type_check("void main() return 1;"); assert!(result.is_err()); - assert!(result.unwrap_err().message.contains("void function must not return a value")); + assert!(result + .unwrap_err() + .message + .contains("void function must not return a value")); } #[test] @@ -202,3 +203,99 @@ fn test_type_check_print_wrong_arity() { let result = parse_and_type_check("void main() { print(1, 2); }"); assert!(result.is_err(), "expected arity error for print(1, 2)"); } + +// --------------------------------------------------------------------------- +// Tagged Types +// --------------------------------------------------------------------------- + +#[test] +fn test_type_check_accepts_struct_decl_and_member_access() { + let result = parse_and_type_check( + "struct Point { int x; int y; }\nvoid main() { struct Point p = 0; p.x = 12; int v = p.x; }", + ); + assert!(result.is_ok()); +} + +#[test] +fn test_type_check_accepts_union_decl_and_member_access() { + let result = parse_and_type_check( + "union Number { int i; float f; }\nvoid main() { union Number n = 0; n.i = 7; int v = n.i; }", + ); + assert!(result.is_ok()); +} + +#[test] +fn test_type_check_accepts_enum_decl_and_member_access() { + let result = parse_and_type_check( + "enum Color { Red; Green = 5; Blue; }\nvoid main() { enum Color c = 0; int v = c.Blue; }", + ); + assert!(result.is_ok()); +} + +#[test] +fn test_type_check_rejects_unknown_struct_member_access() { + let result = parse_and_type_check( + "struct Point { int x; }\nvoid main() { struct Point p = 0; int v = p.y; }", + ); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("unknown member")); +} + +#[test] +fn test_type_check_rejects_enum_member_assignment() { + let result = parse_and_type_check( + "enum Color { Red; Green; }\nvoid main() { enum Color c = 0; c.Red = 1; }", + ); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .message + .contains("cannot assign to enum members")); +} + +#[test] +fn test_type_check_rejects_unknown_tagged_type_declaration_use() { + let result = parse_and_type_check("void main() { struct Missing x = 0; }"); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("unknown tagged type")); +} + +#[test] +fn test_type_check_rejects_union_member_assignment_type_mismatch() { + let result = parse_and_type_check( + "union Number { int i; float f; }\nvoid main() { union Number n = 0; n.i = true; }", + ); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("expected Int")); +} + +#[test] +fn test_type_check_rejects_unknown_enum_member_access() { + let result = parse_and_type_check( + "enum Color { Red; Green; }\nvoid main() { enum Color c = 0; int v = c.Blue; }", + ); + assert!(result.is_err()); + assert!(result.unwrap_err().message.contains("unknown enumerator")); +} + +#[test] +fn test_type_check_rejects_member_access_on_non_tagged_value() { + let result = parse_and_type_check("void main() { int x = 0; int y = x.foo; }"); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .message + .contains("member access requires tagged base type")); +} + +#[test] +fn test_type_check_rejects_duplicate_tagged_declarations() { + let result = parse_and_type_check( + "struct Point { int x; }\nstruct Point { int y; }\nvoid main() { int z = 0; }", + ); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .message + .contains("duplicate tagged type declaration")); +} From bd169bd178199aebe85656b049e20accf04174bb Mon Sep 17 00:00:00 2001 From: chances190 Date: Sun, 12 Apr 2026 21:44:17 -0300 Subject: [PATCH 4/6] feat: implement interpretation for tagged types --- src/interpreter/eval_expr.rs | 184 +++++++++++++++++++++++++----- src/interpreter/exec_stmt.rs | 209 +++++++++++++++++++++++++++++++---- src/interpreter/mod.rs | 31 +++++- src/interpreter/value.rs | 20 ++++ tests/interpreter.rs | 119 ++++++++++++++++++++ 5 files changed, 512 insertions(+), 51 deletions(-) diff --git a/src/interpreter/eval_expr.rs b/src/interpreter/eval_expr.rs index ee9ca56..ae405cc 100644 --- a/src/interpreter/eval_expr.rs +++ b/src/interpreter/eval_expr.rs @@ -40,13 +40,21 @@ //! for more detail on this mechanism. use crate::environment::Environment; -use crate::ir::ast::{CheckedExpr, Expr, Literal}; +use crate::ir::ast::{CheckedExpr, Expr, Literal, Member, TagType, TaggedTypeDecl, Type}; + +use std::collections::HashMap; use super::exec_stmt::exec_stmt; use super::value::{FnValue, RuntimeError, Value}; +type TaggedRuntimeTable = HashMap<(TagType, String), TaggedTypeDecl>; + /// Evaluate a checked expression to a runtime value. -pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result { +pub fn eval_expr( + expr: &CheckedExpr, + env: &mut Environment, + tagged_table: &TaggedRuntimeTable, +) -> Result { match &expr.exp { Expr::Literal(lit) => Ok(eval_literal(lit)), @@ -55,7 +63,7 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result match eval_expr(inner, env)? { + Expr::Neg(inner) => match eval_expr(inner, env, tagged_table)? { Value::Int(n) => Ok(Value::Int(-n)), Value::Float(x) => Ok(Value::Float(-x)), v => Err(RuntimeError::new(format!( @@ -64,28 +72,68 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result numeric_binop(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a + b, |a, b| a + b), - Expr::Sub(l, r) => numeric_binop(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a - b, |a, b| a - b), - Expr::Mul(l, r) => numeric_binop(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a * b, |a, b| a * b), - Expr::Div(l, r) => numeric_binop(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a / b, |a, b| a / b), + Expr::Add(l, r) => numeric_binop( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a + b, + |a, b| a + b, + ), + Expr::Sub(l, r) => numeric_binop( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a - b, + |a, b| a - b, + ), + Expr::Mul(l, r) => numeric_binop( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a * b, + |a, b| a * b, + ), + Expr::Div(l, r) => numeric_binop( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a / b, + |a, b| a / b, + ), - Expr::Lt(l, r) => numeric_cmp(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a < b, |a, b| a < b), - Expr::Le(l, r) => numeric_cmp(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a <= b, |a, b| a <= b), - Expr::Gt(l, r) => numeric_cmp(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a > b, |a, b| a > b), - Expr::Ge(l, r) => numeric_cmp(eval_expr(l, env)?, eval_expr(r, env)?, |a, b| a >= b, |a, b| a >= b), + Expr::Lt(l, r) => numeric_cmp( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a < b, + |a, b| a < b, + ), + Expr::Le(l, r) => numeric_cmp( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a <= b, + |a, b| a <= b, + ), + Expr::Gt(l, r) => numeric_cmp( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a > b, + |a, b| a > b, + ), + Expr::Ge(l, r) => numeric_cmp( + eval_expr(l, env, tagged_table)?, + eval_expr(r, env, tagged_table)?, + |a, b| a >= b, + |a, b| a >= b, + ), Expr::Eq(l, r) => { - let lv = eval_expr(l, env)?; - let rv = eval_expr(r, env)?; + let lv = eval_expr(l, env, tagged_table)?; + let rv = eval_expr(r, env, tagged_table)?; Ok(Value::Bool(values_equal(&lv, &rv))) } Expr::Ne(l, r) => { - let lv = eval_expr(l, env)?; - let rv = eval_expr(r, env)?; + let lv = eval_expr(l, env, tagged_table)?; + let rv = eval_expr(r, env, tagged_table)?; Ok(Value::Bool(!values_equal(&lv, &rv))) } - Expr::Not(inner) => match eval_expr(inner, env)? { + Expr::Not(inner) => match eval_expr(inner, env, tagged_table)? { Value::Bool(b) => Ok(Value::Bool(!b)), v => Err(RuntimeError::new(format!( "expected bool for '!', got: {}", @@ -93,10 +141,10 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result { - let lv = eval_expr(l, env)?; + let lv = eval_expr(l, env, tagged_table)?; match lv { Value::Bool(false) => Ok(Value::Bool(false)), - Value::Bool(true) => eval_expr(r, env), + Value::Bool(true) => eval_expr(r, env, tagged_table), v => Err(RuntimeError::new(format!( "expected bool for 'and', got: {}", v @@ -104,10 +152,10 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result { - let lv = eval_expr(l, env)?; + let lv = eval_expr(l, env, tagged_table)?; match lv { Value::Bool(true) => Ok(Value::Bool(true)), - Value::Bool(false) => eval_expr(r, env), + Value::Bool(false) => eval_expr(r, env, tagged_table), v => Err(RuntimeError::new(format!( "expected bool for 'or', got: {}", v @@ -116,14 +164,16 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result { - let vals: Result, RuntimeError> = - elems.iter().map(|e| eval_expr(e, env)).collect(); + let vals: Result, RuntimeError> = elems + .iter() + .map(|e| eval_expr(e, env, tagged_table)) + .collect(); Ok(Value::Array(vals?)) } Expr::Index { base, index } => { - let base_val = eval_expr(base, env)?; - let idx_val = eval_expr(index, env)?; + let base_val = eval_expr(base, env, tagged_table)?; + let idx_val = eval_expr(index, env, tagged_table)?; match (base_val, idx_val) { (Value::Array(elems), Value::Int(i)) => { let i = i as usize; @@ -143,9 +193,60 @@ pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result { - let arg_vals: Result, RuntimeError> = - args.iter().map(|a| eval_expr(a, env)).collect(); - eval_call(name, arg_vals?, env) + let arg_vals: Result, RuntimeError> = args + .iter() + .map(|a| eval_expr(a, env, tagged_table)) + .collect(); + eval_call(name, arg_vals?, env, tagged_table) + } + + Expr::Member { base, member } => { + let base_val = eval_expr(base, env, tagged_table)?; + match &base.ty { + Type::Tagged { tag_type, tag_name } => match tag_type { + TagType::Struct => match base_val { + Value::Struct { fields, .. } => { + fields.get(member).cloned().ok_or_else(|| { + RuntimeError::new(format!( + "missing struct member '{}.{}'", + tag_name, member + )) + }) + } + other => Err(RuntimeError::new(format!( + "expected struct runtime value for {}, got {}", + tag_name, other + ))), + }, + TagType::Union => match base_val { + Value::Union { + active_field, + value, + .. + } => { + if &active_field == member { + Ok(*value) + } else { + Err(RuntimeError::new(format!( + "union member '{}.{}' is inactive (active field: {})", + tag_name, member, active_field + ))) + } + } + other => Err(RuntimeError::new(format!( + "expected union runtime value for {}, got {}", + tag_name, other + ))), + }, + TagType::Enum => { + enum_member_value(tag_name, member, tagged_table).map(Value::Int) + } + }, + other => Err(RuntimeError::new(format!( + "member access requires tagged base type, got {:?}", + other + ))), + } } } } @@ -155,6 +256,7 @@ pub fn eval_call( name: &str, args: Vec, env: &mut Environment, + tagged_table: &TaggedRuntimeTable, ) -> Result { match env.get(name).cloned() { Some(Value::Fn(FnValue::Native(f))) => (f)(args), @@ -171,7 +273,7 @@ pub fn eval_call( for (param, val) in decl.params.iter().zip(args.into_iter()) { env.declare(param.name.clone(), val); } - let result = exec_stmt(&decl.body, env)?; + let result = exec_stmt(&decl.body, env, tagged_table)?; env.restore(snapshot); Ok(result.unwrap_or(Value::Void)) } @@ -180,6 +282,32 @@ pub fn eval_call( } } +fn enum_member_value( + tag_name: &str, + member: &str, + tagged_table: &TaggedRuntimeTable, +) -> Result { + let decl = tagged_table + .get(&(TagType::Enum, tag_name.to_string())) + .ok_or_else(|| RuntimeError::new(format!("unknown enum type '{}'", tag_name)))?; + + let mut next_value: i64 = 0; + for entry in &decl.members { + if let Member::Enumerator { name, value } = entry { + let resolved = value.unwrap_or(next_value); + if name == member { + return Ok(resolved); + } + next_value = resolved + 1; + } + } + + Err(RuntimeError::new(format!( + "unknown enumerator '{}.{}'", + tag_name, member + ))) +} + // --- Helpers --- fn eval_literal(lit: &Literal) -> Value { diff --git a/src/interpreter/exec_stmt.rs b/src/interpreter/exec_stmt.rs index ceeda2c..1e98e81 100644 --- a/src/interpreter/exec_stmt.rs +++ b/src/interpreter/exec_stmt.rs @@ -34,28 +34,44 @@ //! This gives MiniC correct lexical block scoping without a scope stack. use crate::environment::Environment; -use crate::ir::ast::{CheckedExpr, CheckedStmt, Expr, Statement}; +use crate::ir::ast::{ + CheckedExpr, CheckedStmt, Expr, Member, Statement, TagType, TaggedTypeDecl, Type, +}; + +use std::collections::HashMap; use super::eval_expr::{eval_call, eval_expr}; use super::value::{RuntimeError, Value}; +type TaggedRuntimeTable = HashMap<(TagType, String), TaggedTypeDecl>; + /// `None` = normal fall-through; `Some(v)` = early return with value. pub type ExecResult = Result, RuntimeError>; /// Execute a checked statement. Returns `Some(v)` if a `return` was hit. -pub fn exec_stmt(stmt: &CheckedStmt, env: &mut Environment) -> ExecResult { +pub fn exec_stmt( + stmt: &CheckedStmt, + env: &mut Environment, + tagged_table: &TaggedRuntimeTable, +) -> ExecResult { match &stmt.stmt { // --- Variable declaration --- - Statement::Decl { name, init, .. } => { - let val = eval_expr(init, env)?; - env.declare(name.clone(), val); + Statement::Decl { name, ty, init } => { + let init_val = eval_expr(init, env, tagged_table)?; + let stored = match ty { + Type::Tagged { tag_type, tag_name } => { + build_tagged_value(tag_type, tag_name, init_val, tagged_table)? + } + _ => init_val, + }; + env.declare(name.clone(), stored); Ok(None) } // --- Assignment --- Statement::Assign { target, value } => { - let val = eval_expr(value, env)?; - assign_lvalue(target, val, env)?; + let val = eval_expr(value, env, tagged_table)?; + assign_lvalue(target, val, env, tagged_table)?; Ok(None) } @@ -65,7 +81,7 @@ pub fn exec_stmt(stmt: &CheckedStmt, env: &mut Environment) -> ExecResult Statement::Block { seq } => { let outer_keys = env.names(); for s in seq { - if let Some(ret) = exec_stmt(s, env)? { + if let Some(ret) = exec_stmt(s, env, tagged_table)? { env.remove_new(&outer_keys); return Ok(Some(ret)); } @@ -79,11 +95,11 @@ pub fn exec_stmt(stmt: &CheckedStmt, env: &mut Environment) -> ExecResult cond, then_branch, else_branch, - } => match eval_expr(cond, env)? { - Value::Bool(true) => exec_stmt(then_branch, env), + } => match eval_expr(cond, env, tagged_table)? { + Value::Bool(true) => exec_stmt(then_branch, env, tagged_table), Value::Bool(false) => { if let Some(eb) = else_branch { - exec_stmt(eb, env) + exec_stmt(eb, env, tagged_table) } else { Ok(None) } @@ -96,9 +112,9 @@ pub fn exec_stmt(stmt: &CheckedStmt, env: &mut Environment) -> ExecResult // --- While --- Statement::While { cond, body } => loop { - match eval_expr(cond, env)? { + match eval_expr(cond, env, tagged_table)? { Value::Bool(true) => { - if let Some(ret) = exec_stmt(body, env)? { + if let Some(ret) = exec_stmt(body, env, tagged_table)? { return Ok(Some(ret)); } } @@ -114,16 +130,18 @@ pub fn exec_stmt(stmt: &CheckedStmt, env: &mut Environment) -> ExecResult // --- Return --- Statement::Return(Some(expr)) => { - let val = eval_expr(expr, env)?; + let val = eval_expr(expr, env, tagged_table)?; Ok(Some(val)) } Statement::Return(None) => Ok(Some(Value::Void)), // --- Statement-level function call --- Statement::Call { name, args } => { - let arg_vals: Result, RuntimeError> = - args.iter().map(|a| eval_expr(a, env)).collect(); - eval_call(name, arg_vals?, env)?; + let arg_vals: Result, RuntimeError> = args + .iter() + .map(|a| eval_expr(a, env, tagged_table)) + .collect(); + eval_call(name, arg_vals?, env, tagged_table)?; Ok(None) } } @@ -134,6 +152,7 @@ fn assign_lvalue( target: &CheckedExpr, val: Value, env: &mut Environment, + tagged_table: &TaggedRuntimeTable, ) -> Result<(), RuntimeError> { match &target.exp { Expr::Ident(name) => { @@ -147,7 +166,7 @@ fn assign_lvalue( } } Expr::Index { base, index } => { - let idx = match eval_expr(index, &mut *env)? { + let idx = match eval_expr(index, &mut *env, tagged_table)? { Value::Int(i) => i as usize, v => { return Err(RuntimeError::new(format!( @@ -156,8 +175,9 @@ fn assign_lvalue( ))) } }; - assign_index(base, idx, val, env) + assign_index(base, idx, val, env, tagged_table) } + Expr::Member { base, member } => assign_member(base, member, val, env, tagged_table), _ => Err(RuntimeError::new("invalid assignment target".to_string())), } } @@ -168,6 +188,7 @@ fn assign_index( idx: usize, val: Value, env: &mut Environment, + tagged_table: &TaggedRuntimeTable, ) -> Result<(), RuntimeError> { match &base.exp { Expr::Ident(name) => { @@ -198,7 +219,7 @@ fn assign_index( base: inner_base, index: inner_index, } => { - let inner_idx = match eval_expr(inner_index, env)? { + let inner_idx = match eval_expr(inner_index, env, tagged_table)? { Value::Int(i) => i as usize, v => { return Err(RuntimeError::new(format!( @@ -252,6 +273,154 @@ fn assign_index( } } +fn assign_member( + base: &CheckedExpr, + member: &str, + val: Value, + env: &mut Environment, + _tagged_table: &TaggedRuntimeTable, +) -> Result<(), RuntimeError> { + match &base.exp { + Expr::Ident(name) => { + let current = env + .get(name) + .cloned() + .ok_or_else(|| RuntimeError::new(format!("undefined variable '{}'", name)))?; + let updated = match current { + Value::Struct { + tag_name, + mut fields, + } => { + fields.insert(member.to_string(), val); + Value::Struct { tag_name, fields } + } + Value::Union { tag_name, .. } => Value::Union { + tag_name, + active_field: member.to_string(), + value: Box::new(val), + }, + other => { + return Err(RuntimeError::new(format!( + "cannot assign member on non-tagged value: {}", + other + ))) + } + }; + env.set(name, updated); + Ok(()) + } + _ => Err(RuntimeError::new( + "member assignment currently requires a simple variable base".to_string(), + )), + } +} + +fn build_tagged_value( + tag_type: &TagType, + tag_name: &str, + init_val: Value, + tagged_table: &TaggedRuntimeTable, +) -> Result { + let decl = tagged_table + .get(&(tag_type.clone(), tag_name.to_string())) + .ok_or_else(|| { + RuntimeError::new(format!( + "unknown tagged type at runtime: {:?} {}", + tag_type, tag_name + )) + })?; + + match tag_type { + TagType::Struct => { + let mut fields = HashMap::new(); + for member in &decl.members { + if let Member::Field(field) = member { + fields.insert( + field.name.clone(), + default_value_for_type(&field.ty, tagged_table)?, + ); + } + } + Ok(Value::Struct { + tag_name: tag_name.to_string(), + fields, + }) + } + TagType::Union => { + let first_field = decl + .members + .iter() + .find_map(|member| match member { + Member::Field(field) => Some(field), + _ => None, + }) + .ok_or_else(|| { + RuntimeError::new(format!("union {} has no fields at runtime", tag_name)) + })?; + + let coerced = coerce_value_to_type(init_val, &first_field.ty)?; + Ok(Value::Union { + tag_name: tag_name.to_string(), + active_field: first_field.name.clone(), + value: Box::new(coerced), + }) + } + TagType::Enum => { + let numeric = match init_val { + Value::Int(n) => n, + other => { + return Err(RuntimeError::new(format!( + "enum initializer must be integer, got {}", + other + ))) + } + }; + + Ok(Value::Enum { + tag_name: tag_name.to_string(), + value: numeric, + }) + } + } +} + +fn default_value_for_type( + ty: &Type, + tagged_table: &TaggedRuntimeTable, +) -> Result { + match ty { + Type::Unit => Ok(Value::Void), + Type::Int => Ok(Value::Int(0)), + Type::Float => Ok(Value::Float(0.0)), + Type::Bool => Ok(Value::Bool(false)), + Type::Str => Ok(Value::Str(String::new())), + Type::Array(_) => Ok(Value::Array(vec![])), + Type::Tagged { tag_type, tag_name } => { + build_tagged_value(tag_type, tag_name, Value::Int(0), tagged_table) + } + Type::Function { .. } | Type::Any => Err(RuntimeError::new( + "cannot create default runtime value for this type", + )), + } +} + +fn coerce_value_to_type(val: Value, ty: &Type) -> Result { + match (val, ty) { + (Value::Int(n), Type::Int) => Ok(Value::Int(n)), + (Value::Int(n), Type::Float) => Ok(Value::Float(n as f64)), + (Value::Int(n), Type::Bool) => Ok(Value::Bool(n != 0)), + (Value::Int(n), Type::Str) => Ok(Value::Str(n.to_string())), + (Value::Float(x), Type::Float) => Ok(Value::Float(x)), + (Value::Float(x), Type::Int) => Ok(Value::Int(x as i64)), + (Value::Bool(b), Type::Bool) => Ok(Value::Bool(b)), + (Value::Str(s), Type::Str) => Ok(Value::Str(s)), + (other, _) => Err(RuntimeError::new(format!( + "cannot coerce value {} to required type {:?}", + other, ty + ))), + } +} + fn extract_ident_name(expr: &CheckedExpr) -> Result { match &expr.exp { Expr::Ident(name) => Ok(name.clone()), diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index 08b93c1..3cd9346 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -55,14 +55,20 @@ pub mod exec_stmt; pub mod value; use crate::environment::Environment; -use crate::ir::ast::CheckedProgram; +use crate::ir::ast::{CheckedProgram, TagType, TaggedTypeDecl}; use crate::stdlib::NativeRegistry; +use std::collections::HashMap; + use eval_expr::eval_call; use value::{FnValue, RuntimeError, Value}; +type TaggedRuntimeTable = HashMap<(TagType, String), TaggedTypeDecl>; + /// Interpret a type-checked MiniC program, starting execution at `main`. pub fn interpret(program: &CheckedProgram) -> Result<(), RuntimeError> { + let tagged_table = build_tagged_runtime_table(&program.tagged_types)?; + let mut env = Environment::::new(); // Register native stdlib functions as Value::Fn(FnValue::Native) bindings. @@ -73,13 +79,32 @@ pub fn interpret(program: &CheckedProgram) -> Result<(), RuntimeError> { // Register user-defined functions as Value::Fn(FnValue::UserDefined) bindings. for fun in &program.functions { - env.declare(fun.name.clone(), Value::Fn(FnValue::UserDefined(fun.clone()))); + env.declare( + fun.name.clone(), + Value::Fn(FnValue::UserDefined(fun.clone())), + ); } if env.get("main").is_none() { return Err(RuntimeError::new("no 'main' function found")); } - eval_call("main", vec![], &mut env)?; + eval_call("main", vec![], &mut env, &tagged_table)?; Ok(()) } + +fn build_tagged_runtime_table( + tagged_types: &[TaggedTypeDecl], +) -> Result { + let mut table = TaggedRuntimeTable::new(); + for decl in tagged_types { + let key = (decl.tag_type.clone(), decl.tag_name.clone()); + if table.insert(key, decl.clone()).is_some() { + return Err(RuntimeError::new(format!( + "duplicate tagged type declaration at runtime: {:?} {}", + decl.tag_type, decl.tag_name + ))); + } + } + Ok(table) +} diff --git a/src/interpreter/value.rs b/src/interpreter/value.rs index 2abdc6e..7a11d28 100644 --- a/src/interpreter/value.rs +++ b/src/interpreter/value.rs @@ -101,6 +101,19 @@ pub enum Value { Bool(bool), Str(String), Array(Vec), + Struct { + tag_name: String, + fields: std::collections::HashMap, + }, + Union { + tag_name: String, + active_field: String, + value: Box, + }, + Enum { + tag_name: String, + value: i64, + }, Void, Fn(FnValue), } @@ -123,6 +136,13 @@ impl fmt::Display for Value { } write!(f, "]") } + Value::Struct { tag_name, .. } => write!(f, "", tag_name), + Value::Union { + tag_name, + active_field, + .. + } => write!(f, "", tag_name, active_field), + Value::Enum { tag_name, value } => write!(f, "", tag_name, value), Value::Fn(_) => write!(f, ""), } } diff --git a/tests/interpreter.rs b/tests/interpreter.rs index 51696c9..5f0b18b 100644 --- a/tests/interpreter.rs +++ b/tests/interpreter.rs @@ -256,3 +256,122 @@ fn test_stdlib_pow_float_args() { "#; assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); } + +// --------------------------------------------------------------------------- +// Tagged Unions +// --------------------------------------------------------------------------- + +#[test] +fn test_struct_member_assign_and_read() { + let src = r#" + struct Point { int x; int y; } + void main() { + struct Point p = 0; + p.x = 21; + int v = p.x; + } + "#; + + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_union_member_assign_and_read_active_field() { + let src = r#" + union Number { int i; float f; } + void main() { + union Number n = 0; + n.i = 10; + int v = n.i; + } + "#; + + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_union_member_read_inactive_field_errors() { + let src = r#" + union Number { int i; float f; } + void main() { + union Number n = 0; + n.i = 10; + float v = n.f; + } + "#; + + let result = run(src); + assert!( + result.is_err(), + "expected inactive union member runtime error" + ); + assert!( + result.unwrap_err().contains("inactive"), + "error should mention inactive field" + ); +} + +#[test] +fn test_enum_member_access() { + let src = r#" + enum Color { Red; Green = 5; Blue; } + void main() { + enum Color c = 0; + int v = c.Blue; + } + "#; + + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_union_member_switching_makes_previous_field_inactive() { + let src = r#" + union Number { int i; float f; } + void main() { + union Number n = 0; + n.i = 10; + n.f = 2.5; + int v = n.i; + } + "#; + + let result = run(src); + assert!( + result.is_err(), + "expected inactive union member runtime error" + ); + assert!( + result.unwrap_err().contains("inactive"), + "error should mention inactive field" + ); +} + +#[test] +fn test_union_read_after_switch_on_active_field_is_ok() { + let src = r#" + union Number { int i; float f; } + void main() { + union Number n = 0; + n.i = 10; + n.f = 2.5; + float v = n.f; + } + "#; + + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} + +#[test] +fn test_enum_member_with_explicit_and_implicit_values_is_valid() { + let src = r#" + enum Flags { A = 3; B; C = 9; D; } + void main() { + enum Flags f = 0; + int x = f.B; + int y = f.D; + } + "#; + + assert!(run(src).is_ok(), "{}", run(src).unwrap_err()); +} From bfe94f99dd5ea7529645029839fcc841b3d044a1 Mon Sep 17 00:00:00 2001 From: chances190 Date: Sun, 19 Apr 2026 00:37:54 -0300 Subject: [PATCH 5/6] docs: update guide --- docs/00-guide.md | 351 +++++++++++++++++++++++------------------------ 1 file changed, 168 insertions(+), 183 deletions(-) diff --git a/docs/00-guide.md b/docs/00-guide.md index 5001970..525ed2c 100644 --- a/docs/00-guide.md +++ b/docs/00-guide.md @@ -31,7 +31,7 @@ void main() { ``` Pipeline do MiniC: -1. Fazer parsing do código-fonte em uma AST. +1. Fazer parsing do código-fonte para uma AST. 2. Verificar tipos da AST. 3. Interpretar (executar) a AST verificada. @@ -66,84 +66,135 @@ fn add(x: i64, y: i64) -> i64 { ### 2) Structs (Dados com Campos Nomeados) ```rust -struct Point { +struct Example { x: i64, y: i64, } ``` -MiniC usa structs como wrappers da AST (nós de expressão com metadados). - (Rust Book: ) ### 3) Enums (Uma de Várias Variantes) +Enums em Rust têm três estilos principais de variante: +- Tuple-like (`Name(T1, T2)`): campos posicionais, útil quando a ordem importa (ex.: operandos). +- Struct-like (`Name { f1: T1, f2: T2 }`): campos nomeados, mais explícito e resistente à reordenação. +- Unit-like (`Name`): etiqueta sem dados, para estados/flags. + ```rust -enum Value { - Int(i64), - Bool(bool), - Str(String), +enum Example { + Tuple(i64, i64), // tuple-like (posicional) + Record { x: i64, y: i64 }, // struct-like (campos nomeados) + Unit, // unit-like (sem dados) } ``` -Uma enum pode conter diferentes formas sob um tipo. Isso é central para AST e valores em tempo de execução. - (Rust Book: ) ### 4) match (Ramificação por Variante) +Use `match` ou `if let` para desestruturar enums: + ```rust -match value { - Value::Int(n) => println!("int: {}", n), - Value::Bool(b) => println!("bool: {}", b), - Value::Str(s) => println!("str: {}", s), +match some_enum { + Expr::Tuple(a, b) => println!("posicional: {} {}", a, b), + Expr::Record { x, .. } => println!("campo x = {}", x), + Expr::Unit => println!("unit"), } ``` -Verificação de tipos e interpretação de MiniC são principalmente grandes instruções `match` explícitas. +O compilador garante que o `match` seja exaustivo sobre todas as possibilidades do enum. Para ter um caso "catch-all", podemos usar `_ => {}` como o último match. (Rust Book: ) -### 5) Result para Tratamento de Erros +### 5) Box para Árvores Recursivas + +Tipos recursivos (como nós de expressão) precisam de `Box` para que o compilador saiba calcular seu tamanho: ```rust -fn parse_number(s: &str) -> Result { - s.parse::().map_err(|e| e.to_string()) +enum Expr { + Int(i64), + Add(Box, Box), +} + +// Construindo um nó Add: +let left = Expr::Int(1); +let right = Expr::Int(2); +let add = Expr::Add(Box::new(left), Box::new(right)); + +// Desestruturando com `match`: +match expr_example { + Expr::Add(box Expr::Int(l), box Expr::Int(r)) => { + println!("Add node: {} + {} = {}", l, r, l + r); + } + _ => {} } ``` +(Rust Book: ) + +### 6) Result para Tratamento de Erros + `Result` significa um de dois casos: - `Ok(T)` sucesso - `Err(E)` falha -Parser, verificador de tipos e interpretador de MiniC todos dependem desse estilo. - -(Rust Book: ) +Ao chamar uma função que retorna `Result`, podemos: +- Desempacotá-lo para lidar com ambos os casos, ou +- Propagar o erro caso a função atual também retorne `Result`. -### 6) Box para Árvores Recursivas +```rust +fn parse_to_number(input: &str) -> Result { + input.parse::() +} -Enums recursivas precisam de indireção: +// chamando e lidando com o resultado explicitamente +fn try_parse_number() { + match parse_number("42") { + Ok(n) => println!("numero: {}", n), + Err(e) => eprintln!("erro ao parsear: {}", e), + } +} -```rust -enum Expr { - Int(i64), - Add(Box, Box), +// propagando erros com `?` +fn try_double(s: &str) -> Result { + let n = parse_number(s)?; + Ok(n * 2) } ``` -Sem `Box`, Rust não consegue determinar o tamanho recursivo em tempo de compilação. +(Rust Book: ) -(Rust Book: ) +### 7) Generics + +Generics são parâmetros de tipo: permitem escrever uma definição uma única vez e instanciá-la com diferentes tipos. + +No MiniC, a [AST](../src/ir/ast.rs#L214) usa `Ty` como o tipo genérico nos nós. O parser produz `ExprD<()>` (sem tipos) e o type-checker produz `ExprD` (cada nó carrega seu `Type`). + +(Rust Book: ) + +### 8) Macros `#[derive(...)]` + +Na [AST](../src/ir/ast.rs), muitas structs/enums usam `#[derive(...)]`. Derives geram código boilerplate automaticamente durante a compilação. -### 7) Generics (Parametrização da AST através de Fases) +- `Debug`: permite imprimir o nó para debugging/tests (`{:?}`). +- `Clone`: gera uma implementação automática de `clone()` para copiar nós quando necessário. +- `PartialEq`/`Eq`: permitem comparar nós (útil em testes e transformações). +- `Hash`: permite usar o valor como chave em `HashMap`/`HashSet`. -Nós da AST de MiniC são genéricos sobre um parâmetro de tipo `Ty` que carrega metadados específicos de cada fase. A mesma estrutura de árvore é usada após parsing e após type-checking: -- Imediatamente após parsing: `Ty = ()` (sem informação de tipo) -- Após type-checking: `Ty = Type` (com informação de tipo para cada sub-expressão) +(Rust Book: ) -Esse design previne acidentalmente misturar AST verificada com AST não verificada em tempo de compilação, pois são tipos diferentes (`Expr<()>` vs `Expr`). +### 9) Ownership e Borrowing -(Rust Book: generics , ownership and borowing ) +Breve resumo das regras essenciais do Rust aplicáveis ao projeto: + +- O borrow-checker é um verificador em tempo de compilação que evita dangling references e condições de corrida sem custo em tempo de execução. +- Cada valor tem um dono. Quando o dono sai de escopo, o valor é liberado. +- `&T` e `&mut T` são empréstimos (borrows). O *borrow-checker* garante que essas referências não ultrapassem o tempo de vida do dono e impede usos concorrentes inválidos. +- Use `Box` para tipos recursivos (p.ex., nós de AST). +- Prefira `&str` para views de string e faça `clone()` só quando necessário. + +(Rust Book: ) ## Parte B: Introdução Rápida a Nom (Apenas o Necessário) @@ -171,8 +222,6 @@ Então um parser retorna ambos: 1. O que foi parseado. 2. O que restou sem parsear. -É por isso que a composição de parsers funciona naturalmente. - (Referência: ) Modelo de erro do Nom (importante ao debugar comportamento de parsing): @@ -200,71 +249,71 @@ Para parsing de linguagem baseado em arquivo, complete é o padrão certo. - `tag("if")`: correspondência exata de string. - `char('(')`: correspondência exata de caractere. -- `alt((a, b, c))`: tenta alternativas em ordem. -- `tuple((a, b, c))`: faz parsing de sequência. -- `preceded(a, b)`: faz parsing de `a` depois `b`, mantém `b`. -- `delimited(a, b, c)`: faz parsing de `a b c`, mantém `b`. -- `map(p, f)`: transforma saída de parsing em nó de AST. -- `opt(p)`: parsing opcional (`Some` ou `None`). -- `many0(p)`: repete zero ou mais vezes. -- `separated_list0(sep, item)`: faz parsing de itens de lista separados por um separador. -- `verify(p, pred)`: faz parsing com `p`, depois aplica predicado. +- `alt((a, b, c))`: tenta parsers em ordem, aplica o primeiro que funcionar. +- `tuple((a, b, c))`: aplica parsers em sequência. +- `preceded(a, b)`: retorna `b` se for precedido por `a`. Descarta `a`. Usado para descartar whitespace. +- `delimited(a, b, c)`: retorna `b` se estiver entre `a` e `c`. Descarta `a` e `c`. Usado para encontrar "{}", "()" e "[]". +- `opt(p)`: pode ou não estar presente (p?). +- `many0(p)`: se repete zero ou mais vezes (p*). +- `many1(p)`: se repete um ou mais vezes (p+). +- `separated_list0(sep, item)`: uma lista de `item` separada por `sep`. (Ex. parâmetros de função) +- `verify(p, pred)`: verifica a condição `pred` sobre o resultado do parser `p`. +- `map(p, f)`: aplica `f` sobre o resultado do parser `p`. Usado para transformar a saída dos parsers em nós da AST. Quando não tiver certeza qual combinator usar, consulte o guia de escolha: . -### 4) Três Comportamentos Importantes do Nom +### 4) Comportamentos Importantes do Nom -#### alt é ordenado +#### Sucesso do parser não implica consumo completo -`alt((a, b, c))` tenta da esquerda para a direita e retorna o primeiro sucesso. +Um parser em Nom retorna `Ok((rest, value))`. Mesmo quando `value` foi reconhecido com sucesso, parte da entrada pode sobrar em `rest`. Se você precisa garantir que o parser consuma toda a entrada (útil nos testes unitários), envolva-o com `all_consuming`: -Então a ordem de branches indica diretamente o comportamento da linguagem. +```rust +let all = all_consuming(tag("str")); +assert!(all("str").is_ok()); // consome tudo +assert!(all("struct").is_err()); // sobra entrada -> erro +``` -#### Sucesso do parser não implica consumo completo +(Nom:`all_consuming` ) -`many0(p)` coleta enquanto `p` sucede e para em falha recuperável. Similarmente, parsers individuais retornam `Ok((rest, value))` onde `rest` pode não estar vazio. +#### alt é ordenado -A responsabilidade de verificar consumo completo da entrada é do chamador, não do parser. Use `all_consuming` em testes ou validação explícita em produção. +`alt((a, b, c))` tenta da esquerda para a direita e retorna o primeiro sucesso. A ordem dos branches afeta diretamente comportamento da linguagem. -Referências API úteis: `alt` , `many0` , `separated_list0` , `all_consuming` . +Exemplo onde a ordem importa (um branch é prefixo de outro): -## Parte C: Arquitetura do Parser de MiniC +```rust +let parser_wrong = alt((tag("str"), tag("struct"))); +assert_eq!(parser_wrong("struct"), Ok(("uct", "str"))); +// branch curto primeiro -> escolhe "str" e deixa "uct" sobrando -Módulos do parser são divididos por categoria de gramática: -- `src/parser/identifiers.rs` -- `src/parser/literals.rs` -- `src/parser/expressions.rs` -- `src/parser/statements.rs` -- `src/parser/functions.rs` -- `src/parser/program.rs` +let parser_right = alt((tag("struct"), tag("str"))); +assert_eq!(parser_right("struct"), Ok(("", "struct"))); +// branch longo primeiro -> escolhe "struct" como esperado +``` -Essa separação reflete a organização da gramática, permite que cada módulo tenha responsabilidade clara, facilita localizar code relacionado e mapeia intuitivamente da linguagem formal para o código Rust. +(Nom: `alt` ) -Veja notas sobre arquitetura do parser em [docs/04-parser.md](04-parser.md), depois compare diretamente com [src/parser/mod.rs](../src/parser/mod.rs). +## Parte C: Arquitetura do Parser de MiniC -### 1) Identificadores +Módulos do parser são divididos por categoria de gramática. -Padrão: -1. Fazer parsing da forma de identificador. -2. Rejeitar palavras-chave reservadas com `verify`. +Veja notas sobre arquitetura do parser em [docs/04-parser.md](04-parser.md), depois compare diretamente com a implementação. -Isso separa claramente forma léxica de política de palavras-chave. +### 1) [Identificadores](../src/parser/identifiers.rs) -(Nom `verify`: ) +Nomes de variáveis, funções, etc. Rejeita palavras-chave reservadas com `verify`. -### 2) Literais +(Nom `verify`: ) -Faz parsing de int, float, string, bool. +### 2) [Literais](../src/parser/literals.rs) -Escolhas de implementação notáveis: -- Parser inteiro rejeita `12.34` como inteiro. -- Parser de string suporta escapes (`\\`, `\"`, `\n`, `\t`) via combinators de escape. +Faz parsing de int, float, string, bool. Parser de string suporta escapes (`\\`, `\"`, `\n`, `\t`). -(Referências de parse de texto do Nom: `escaped_transform` , tabela de parser-chooser de parse de texto: ) +(Nom: `escaped_transform` ) -### 3) Expressões (Precedência + Associatividade) +### 3) [Expressões (Precedência + Associatividade)](../src/parser/expressions.rs) -MiniC codifica precedência via camadas de função (função por nível), em ordem decrescente de precedência: - ou lógico (precedência mais baixa) - e lógico - não @@ -275,56 +324,45 @@ MiniC codifica precedência via camadas de função (função por nível), em or - primário - atômico (precedência mais alta) -Cada camada chama a camada anterior quando precisa de um operando. **Todos os operadores binários de MiniC são associativos à esquerda**, implementados com um loop de acumulador em cada nível: parseia o operando esquerdo, depois enquanto o operador esperado não falha, parseia o operador e o operando direito (que se torna o novo operando esquerdo). +MiniC codifica precedência de operadores via camadas de função. Cada camada chama a camada anterior quando precisa de um operando. **Todos os operadores binários de MiniC são associativos à esquerda**, implementados com um loop de acumulador em cada nível: parseia o operando esquerdo, depois enquanto o operador esperado não falha, parseia o operador e o operando direito (que se torna o novo operando esquerdo). Exemplo: `1 - 2 - 3` se torna `(1 - 2) - 3` porque o primeiro `2` é consumido como direito, o resultado `(1 - 2)` vira o novo esquerdo, e `3` é consumido como novo direito. -Código concreto: [src/parser/expressions.rs](../src/parser/expressions.rs) e testes em [tests/parser.rs](../tests/parser.rs). +### 4) [Declarações](../src/parser/statements.rs) -### 4) Declarações - -Parser de declaração é um conjunto ordenado de alternativas: - bloco -- if +- if - while - return -- declaração -- declaração de chamada +- declaração de variável +- chamada de função - atribuição A ordem é deliberada, especialmente para formas com prefixos sobrepostos. -### 5) Funções e Tipos - -Parser de tipo inclui formas escalares e de array. - -Porque `alt` é ordenado, prefixos mais longos (como formas de array 2D) devem vir listados antes dos mais curtos (formas 1D). - -(Nom `alt`: ) +### 5) [Funções e Tipos](../src/parser/functions.rs) -### 6) Parser de Programa +Declaração de função e tipos. -Parser de nível superior usa repetição sobre declarações de função. +Parser de tipo inclui formas escalares e de array. Porque `alt` é ordenado, prefixos mais longos (como formas de array 2D) são listados listados antes dos mais curtos (formas 1D). -Tradeoff pedagógico: -- Código simples -- Mas você deve raciocinar cuidadosamente sobre comportamento de consumo parcial +### 6) [Parser de Programa](../src/parser/program.rs) -Se quiser melhorar esse comportamento ou diagnósticos, os guias de construção de parsers e erros do Nom são o próximo passo certo: e . +Parser de declarações top-level. Usa repetição sobre declarações de função. ## Parte D: AST, Verificação de Tipos e Interpretação ### 1) Design da AST -Nós de AST representam: -- Expressões -- Declarações -- Declarações de função -- Programa +Um nó da AST é uma unidade da árvore que representa uma construção sintática (ex.: expressão, declaração) e agrupa os campos necessários para representá-la. -MiniC usa decorações genéricas de nó para que parser e verificador de tipo compartilhem a mesma forma. +Nós seguem o padrão `Object` + `ObjectD`: +- `Object` descreve a forma de um objeto. +- `ObjectD` agrupa o objeto com o tipo que ele carrega para execução. -Referências do projeto: [docs/03-ast.md](03-ast.md), [src/ir/ast.rs](../src/ir/ast.rs). +Na prática: o parser produz `ObjectD<()>` (sem tipos) e o type-checker produz `ObjectD` (com tipos). Isso reaproveita a mesma forma estrutural entre fases sem duplicação. + +Arquivos: [docs/03-ast.md](03-ast.md), [src/ir/ast.rs](../src/ir/ast.rs). ### 2) Responsabilidades do Verificador de Tipos @@ -334,11 +372,11 @@ Verificação de tipos valida: - Contagem/tipos de argumento de chamada de função - Digitação de operador de expressão - Indexação de array e consistência de elementos -- cCorreção de tipo de retorno +- Correção de tipo de retorno Usa um ambiente mapeando nomes para tipos. -Referências do projeto: [docs/05-type-checker.md](05-type-checker.md), [src/semantic/type_checker.rs](../src/semantic/type_checker.rs). +Arquivos: [docs/05-type-checker.md](05-type-checker.md), [src/semantic/type_checker.rs](../src/semantic/type_checker.rs). ### 3) Responsabilidades do Interpretador @@ -350,94 +388,41 @@ Interpretador executa AST verificada: Usa um ambiente mapeando nomes para valores em tempo de execução. -Referências do projeto: [docs/06-interpreter.md](06-interpreter.md), [src/interpreter/eval_expr.rs](../src/interpreter/eval_expr.rs), [src/interpreter/exec_stmt.rs](../src/interpreter/exec_stmt.rs). - -### 4) Equivalência do Ambiente - -As duas fases tem a mesma abstração central: -- Ambiente semântico (`name -> Type`) -- Ambiente de tempo de execução (`name -> Value`) +Arquivos: [docs/06-interpreter.md](06-interpreter.md), [src/interpreter/eval_expr.rs](../src/interpreter/eval_expr.rs), [src/interpreter/exec_stmt.rs](../src/interpreter/exec_stmt.rs). -## Parte E: Como Adicionar Funcionalidades (Fluxo de Trabalho do Aluno) +## Parte E: Como Adicionar Funcionalidades -Para cada nova funcionalidade de linguagem, use esta checklist: +Para cada nova funcionalidade de linguagem, o ideal é fazer nessa ordem: 1. Estender AST. 2. Estender parser. -3. Estender verificador de tipos. -4. Estender interpretador. -5. Adicionar testes. -6. Atualizar docs. +3. Adicionar testes de parser/programa. +4. Estender type-checker. +5. Adicionar testes de type-checker. +6. Estender interpretador. +7. Adicionar testes de interpretador. +8. Verificar testes de stdlib e CLI +8. Atualizar docs. -Se você pular um passo, a funcionalidade fica incompleta. - -### Exemplo: Adicionar um Novo Operador Binário - -1. Adicionar variante de expressão nova à AST. -2. Adicionar regra de parser na camada de precedência correta. -3. Adicionar regra de tipo. -4. Adicionar regra de avaliação em tempo de execução. -5. Adicionar testes de precedência do parser + testes de tipo + testes de interpretador. - -### Exemplo: Adicionar uma Nova Declaração - -1. Adicionar variante de declaração. -2. Adicionar branch de parser na ordem correta. -3. Adicionar branch de verificação de tipos. -4. Adicionar branch de execução. -5. Adicionar testes de escopo de bloco e testes de integração. - -Para adições de funcionalidade que tocam builtins ou fiação de tempo de execução, consulte [docs/07-stdlib.md](07-stdlib.md), [docs/08-testing.md](08-testing.md), e [src/stdlib/mod.rs](../src/stdlib/mod.rs). +Para adição de funcionalidades que impactam nas funções builtin em tempo de execução, consulte [docs/07-stdlib.md](07-stdlib.md) e [src/stdlib/mod.rs](../src/stdlib/mod.rs). ## Parte F: Estratégia de Teste Que Você Deveria Seguir Camadas de teste de MiniC: -- Testes do parser -- Testes do verificador de tipos +- Testes do parser / programa +- Testes do type-checker - Testes do interpretador - Testes CLI -Use todas as quatro camadas ao adicionar funcionalidades não triviais. - Regra prática: -- um teste unitário para cada regra local -- um teste ponta-a-ponta para cada comportamento visível ao usuário +- em cada camada, pelo menos um teste unitário para cada regra de funcionamento da funcionalidade adicionada +- um teste CLI para cada comportamento visível ao usuário -Pontos de entrada de teste úteis: -- Testes de parser: [tests/parser.rs](../tests/parser.rs) -- Testes de programa: [tests/program.rs](../tests/program.rs) -- Testes de verificador de tipos: [tests/type_checker.rs](../tests/type_checker.rs) -- Testes de interpretador: [tests/interpreter.rs](../tests/interpreter.rs) -- Testes de stdlib: [tests/stdlib.rs](../tests/stdlib.rs) -- Testes CLI: [tests/cli](../tests/cli) +Arquivos: +- [tests/parser.rs](../tests/parser.rs) +- [tests/program.rs](../tests/program.rs) +- [tests/type_checker.rs](../tests/type_checker.rs) +- [tests/interpreter.rs](../tests/interpreter.rs) +- [tests/stdlib.rs](../tests/stdlib.rs) +- [tests/cli](../tests/cli) Detalhes de estratégia de teste e convenções shelltest são documentados em [docs/08-testing.md](08-testing.md). - -## Parte G: Ordem de Leitura para Este Repositório - -Comece com docs: -1. [docs/01-pipeline.md](01-language.md) -2. [docs/02-pipeline.md](02-pipeline.md) -3. [docs/03-ast.md](03-ast.md) -4. [docs/04-parser.md](04-parser.md) -5. [docs/05-type-checker.md](05-type-checker.md) -6. [docs/06-interpreter.md](06-interpreter.md) -7. [docs/07-stdlib.md](07-stdlib.md) -8. [docs/08-testing.md](08-testing.md) - -Depois leia código nesta ordem: -1. [src/ir/ast.rs](../src/ir/ast.rs) -2. [src/parser/mod.rs](../src/parser/mod.rs) e submódulos do parser -3. [src/semantic/type_checker.rs](../src/semantic/type_checker.rs) -4. [src/interpreter/eval_expr.rs](../src/interpreter/eval_expr.rs) -5. [src/interpreter/exec_stmt.rs](../src/interpreter/exec_stmt.rs) -6. [src/stdlib/mod.rs](../src/stdlib/mod.rs) - -## Conclusão - -Você pode pensar em MiniC como quatro problemas de ensino conectados: -1. Fazer parsing de sintaxe (combinadores Nom). -2. Validar significado (verificador de tipos). -3. Executar comportamento (interpretador). -4. Preservar confiança (testes). - -Uma vez que consiga rastrear uma funcionalidade através de todas as quatro, você consegue estender a linguagem com confiança. From baee47c6002e53c4d49a40cf2bebc6530b6b5ccc Mon Sep 17 00:00:00 2001 From: chances190 Date: Sun, 19 Apr 2026 03:52:21 -0300 Subject: [PATCH 6/6] refactor: organize aggregate types - Rename "tagged types" -> "aggregate types", kind -> specifier, tag -> identifier, for semantic clarity - Move types hashmap to evironment for reuse in type-checker and interpreter --- src/environment/env.rs | 39 ++++ src/environment/mod.rs | 2 +- src/interpreter/eval_expr.rs | 125 +++++------ src/interpreter/exec_stmt.rs | 127 +++++------ src/interpreter/mod.rs | 30 +-- src/interpreter/value.rs | 14 +- src/ir/ast.rs | 26 +-- src/parser/identifiers.rs | 2 +- src/parser/mod.rs | 2 +- src/parser/program.rs | 14 +- src/parser/types.rs | 61 +++--- src/semantic/type_checker.rs | 316 ++++++++++++--------------- tests/cli/run.test | 6 + tests/fixtures/aggregate_types.minic | 12 + tests/fixtures/tagged_types.minic | 5 - tests/interpreter.rs | 2 +- tests/parser.rs | 114 +++++----- tests/program.rs | 14 +- tests/type_checker.rs | 17 +- 19 files changed, 458 insertions(+), 470 deletions(-) create mode 100644 tests/fixtures/aggregate_types.minic delete mode 100644 tests/fixtures/tagged_types.minic diff --git a/src/environment/env.rs b/src/environment/env.rs index dd83ba1..5553b94 100644 --- a/src/environment/env.rs +++ b/src/environment/env.rs @@ -11,6 +11,8 @@ //! * [`set`](Environment::set) — update an existing binding. //! * [`snapshot`](Environment::snapshot) / [`restore`](Environment::restore) //! — save and restore the entire map (used for scoping). +//! * [`aggregate_type`](Environment::aggregate_type) — look up an aggregate +//! type declaration from the shared type-declaration table. //! //! Additionally, [`names`](Environment::names) and //! [`remove_new`](Environment::remove_new) support block-exit cleanup. @@ -54,20 +56,57 @@ //! acceptable at MiniC's scale. use std::collections::{HashMap, HashSet}; +use std::rc::Rc; + +use crate::ir::ast::{AggregateTypeDecl, AgtTypeSpecifier}; + +pub type TypeDeclKey = (AgtTypeSpecifier, String); +pub type TypeDeclMap = HashMap; + +pub fn build_type_decl_map(decls: &[AggregateTypeDecl]) -> TypeDeclMap { + let mut type_map = TypeDeclMap::new(); + for decl in decls { + let key = (decl.specifier.clone(), decl.identifier.clone()); + type_map.insert(key, decl.clone()); + } + type_map +} /// Unified parametric environment: maps names to values of type `V`. /// Both variable bindings and function bindings are stored in the same map. pub struct Environment { bindings: HashMap, + type_decls: Rc, } impl Environment { pub fn new() -> Self { Self { bindings: HashMap::new(), + type_decls: Rc::new(TypeDeclMap::new()), + } + } + + pub fn with_type_decls(type_decls: TypeDeclMap) -> Self { + Self { + bindings: HashMap::new(), + type_decls: Rc::new(type_decls), } } + pub fn aggregate_type( + &self, + specifier: &AgtTypeSpecifier, + identifier: &str, + ) -> Option<&AggregateTypeDecl> { + self.type_decls + .get(&(specifier.clone(), identifier.to_string())) + } + + pub fn has_aggregate_type(&self, specifier: &AgtTypeSpecifier, identifier: &str) -> bool { + self.aggregate_type(specifier, identifier).is_some() + } + /// Bind `name` to `value`, overwriting any existing binding. pub fn declare(&mut self, name: impl Into, value: V) { self.bindings.insert(name.into(), value); diff --git a/src/environment/mod.rs b/src/environment/mod.rs index ee49f46..b96bf91 100644 --- a/src/environment/mod.rs +++ b/src/environment/mod.rs @@ -21,4 +21,4 @@ pub mod env; -pub use env::Environment; \ No newline at end of file +pub use env::{build_type_decl_map, Environment, TypeDeclKey, TypeDeclMap}; diff --git a/src/interpreter/eval_expr.rs b/src/interpreter/eval_expr.rs index ae405cc..9f64518 100644 --- a/src/interpreter/eval_expr.rs +++ b/src/interpreter/eval_expr.rs @@ -40,21 +40,13 @@ //! for more detail on this mechanism. use crate::environment::Environment; -use crate::ir::ast::{CheckedExpr, Expr, Literal, Member, TagType, TaggedTypeDecl, Type}; - -use std::collections::HashMap; +use crate::ir::ast::{AgtTypeMember, AgtTypeSpecifier, CheckedExpr, Expr, Literal, Type}; use super::exec_stmt::exec_stmt; use super::value::{FnValue, RuntimeError, Value}; -type TaggedRuntimeTable = HashMap<(TagType, String), TaggedTypeDecl>; - /// Evaluate a checked expression to a runtime value. -pub fn eval_expr( - expr: &CheckedExpr, - env: &mut Environment, - tagged_table: &TaggedRuntimeTable, -) -> Result { +pub fn eval_expr(expr: &CheckedExpr, env: &mut Environment) -> Result { match &expr.exp { Expr::Literal(lit) => Ok(eval_literal(lit)), @@ -63,7 +55,7 @@ pub fn eval_expr( .cloned() .ok_or_else(|| RuntimeError::new(format!("undefined variable '{}'", name))), - Expr::Neg(inner) => match eval_expr(inner, env, tagged_table)? { + Expr::Neg(inner) => match eval_expr(inner, env)? { Value::Int(n) => Ok(Value::Int(-n)), Value::Float(x) => Ok(Value::Float(-x)), v => Err(RuntimeError::new(format!( @@ -73,67 +65,67 @@ pub fn eval_expr( }, Expr::Add(l, r) => numeric_binop( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a + b, |a, b| a + b, ), Expr::Sub(l, r) => numeric_binop( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a - b, |a, b| a - b, ), Expr::Mul(l, r) => numeric_binop( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a * b, |a, b| a * b, ), Expr::Div(l, r) => numeric_binop( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a / b, |a, b| a / b, ), Expr::Lt(l, r) => numeric_cmp( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a < b, |a, b| a < b, ), Expr::Le(l, r) => numeric_cmp( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a <= b, |a, b| a <= b, ), Expr::Gt(l, r) => numeric_cmp( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a > b, |a, b| a > b, ), Expr::Ge(l, r) => numeric_cmp( - eval_expr(l, env, tagged_table)?, - eval_expr(r, env, tagged_table)?, + eval_expr(l, env)?, + eval_expr(r, env)?, |a, b| a >= b, |a, b| a >= b, ), Expr::Eq(l, r) => { - let lv = eval_expr(l, env, tagged_table)?; - let rv = eval_expr(r, env, tagged_table)?; + let lv = eval_expr(l, env)?; + let rv = eval_expr(r, env)?; Ok(Value::Bool(values_equal(&lv, &rv))) } Expr::Ne(l, r) => { - let lv = eval_expr(l, env, tagged_table)?; - let rv = eval_expr(r, env, tagged_table)?; + let lv = eval_expr(l, env)?; + let rv = eval_expr(r, env)?; Ok(Value::Bool(!values_equal(&lv, &rv))) } - Expr::Not(inner) => match eval_expr(inner, env, tagged_table)? { + Expr::Not(inner) => match eval_expr(inner, env)? { Value::Bool(b) => Ok(Value::Bool(!b)), v => Err(RuntimeError::new(format!( "expected bool for '!', got: {}", @@ -141,10 +133,10 @@ pub fn eval_expr( ))), }, Expr::And(l, r) => { - let lv = eval_expr(l, env, tagged_table)?; + let lv = eval_expr(l, env)?; match lv { Value::Bool(false) => Ok(Value::Bool(false)), - Value::Bool(true) => eval_expr(r, env, tagged_table), + Value::Bool(true) => eval_expr(r, env), v => Err(RuntimeError::new(format!( "expected bool for 'and', got: {}", v @@ -152,10 +144,10 @@ pub fn eval_expr( } } Expr::Or(l, r) => { - let lv = eval_expr(l, env, tagged_table)?; + let lv = eval_expr(l, env)?; match lv { Value::Bool(true) => Ok(Value::Bool(true)), - Value::Bool(false) => eval_expr(r, env, tagged_table), + Value::Bool(false) => eval_expr(r, env), v => Err(RuntimeError::new(format!( "expected bool for 'or', got: {}", v @@ -164,16 +156,14 @@ pub fn eval_expr( } Expr::ArrayLit(elems) => { - let vals: Result, RuntimeError> = elems - .iter() - .map(|e| eval_expr(e, env, tagged_table)) - .collect(); + let vals: Result, RuntimeError> = + elems.iter().map(|e| eval_expr(e, env)).collect(); Ok(Value::Array(vals?)) } Expr::Index { base, index } => { - let base_val = eval_expr(base, env, tagged_table)?; - let idx_val = eval_expr(index, env, tagged_table)?; + let base_val = eval_expr(base, env)?; + let idx_val = eval_expr(index, env)?; match (base_val, idx_val) { (Value::Array(elems), Value::Int(i)) => { let i = i as usize; @@ -193,32 +183,33 @@ pub fn eval_expr( } Expr::Call { name, args } => { - let arg_vals: Result, RuntimeError> = args - .iter() - .map(|a| eval_expr(a, env, tagged_table)) - .collect(); - eval_call(name, arg_vals?, env, tagged_table) + let arg_vals: Result, RuntimeError> = + args.iter().map(|a| eval_expr(a, env)).collect(); + eval_call(name, arg_vals?, env) } Expr::Member { base, member } => { - let base_val = eval_expr(base, env, tagged_table)?; + let base_val = eval_expr(base, env)?; match &base.ty { - Type::Tagged { tag_type, tag_name } => match tag_type { - TagType::Struct => match base_val { + Type::Aggregate { + specifier, + identifier, + } => match specifier { + AgtTypeSpecifier::Struct => match base_val { Value::Struct { fields, .. } => { fields.get(member).cloned().ok_or_else(|| { RuntimeError::new(format!( "missing struct member '{}.{}'", - tag_name, member + identifier, member )) }) } other => Err(RuntimeError::new(format!( "expected struct runtime value for {}, got {}", - tag_name, other + identifier, other ))), }, - TagType::Union => match base_val { + AgtTypeSpecifier::Union => match base_val { Value::Union { active_field, value, @@ -229,21 +220,21 @@ pub fn eval_expr( } else { Err(RuntimeError::new(format!( "union member '{}.{}' is inactive (active field: {})", - tag_name, member, active_field + identifier, member, active_field ))) } } other => Err(RuntimeError::new(format!( "expected union runtime value for {}, got {}", - tag_name, other + identifier, other ))), }, - TagType::Enum => { - enum_member_value(tag_name, member, tagged_table).map(Value::Int) + AgtTypeSpecifier::Enum => { + enum_member_value(identifier, member, env).map(Value::Int) } }, other => Err(RuntimeError::new(format!( - "member access requires tagged base type, got {:?}", + "member access requires aggregate base type, got {:?}", other ))), } @@ -256,7 +247,6 @@ pub fn eval_call( name: &str, args: Vec, env: &mut Environment, - tagged_table: &TaggedRuntimeTable, ) -> Result { match env.get(name).cloned() { Some(Value::Fn(FnValue::Native(f))) => (f)(args), @@ -273,7 +263,7 @@ pub fn eval_call( for (param, val) in decl.params.iter().zip(args.into_iter()) { env.declare(param.name.clone(), val); } - let result = exec_stmt(&decl.body, env, tagged_table)?; + let result = exec_stmt(&decl.body, env)?; env.restore(snapshot); Ok(result.unwrap_or(Value::Void)) } @@ -283,17 +273,17 @@ pub fn eval_call( } fn enum_member_value( - tag_name: &str, + agt_identifier: &str, member: &str, - tagged_table: &TaggedRuntimeTable, + env: &Environment, ) -> Result { - let decl = tagged_table - .get(&(TagType::Enum, tag_name.to_string())) - .ok_or_else(|| RuntimeError::new(format!("unknown enum type '{}'", tag_name)))?; + let decl = env + .aggregate_type(&AgtTypeSpecifier::Enum, agt_identifier) + .ok_or_else(|| RuntimeError::new(format!("unknown enum type '{}'", agt_identifier)))?; let mut next_value: i64 = 0; for entry in &decl.members { - if let Member::Enumerator { name, value } = entry { + if let AgtTypeMember::Enumerator { name, value } = entry { let resolved = value.unwrap_or(next_value); if name == member { return Ok(resolved); @@ -304,12 +294,11 @@ fn enum_member_value( Err(RuntimeError::new(format!( "unknown enumerator '{}.{}'", - tag_name, member + agt_identifier, member ))) } // --- Helpers --- - fn eval_literal(lit: &Literal) -> Value { match lit { Literal::Int(n) => Value::Int(*n), diff --git a/src/interpreter/exec_stmt.rs b/src/interpreter/exec_stmt.rs index 1e98e81..638611b 100644 --- a/src/interpreter/exec_stmt.rs +++ b/src/interpreter/exec_stmt.rs @@ -35,33 +35,28 @@ use crate::environment::Environment; use crate::ir::ast::{ - CheckedExpr, CheckedStmt, Expr, Member, Statement, TagType, TaggedTypeDecl, Type, + AgtTypeMember, AgtTypeSpecifier, CheckedExpr, CheckedStmt, Expr, Statement, Type, }; -use std::collections::HashMap; - use super::eval_expr::{eval_call, eval_expr}; use super::value::{RuntimeError, Value}; -type TaggedRuntimeTable = HashMap<(TagType, String), TaggedTypeDecl>; +use std::collections::HashMap; /// `None` = normal fall-through; `Some(v)` = early return with value. pub type ExecResult = Result, RuntimeError>; /// Execute a checked statement. Returns `Some(v)` if a `return` was hit. -pub fn exec_stmt( - stmt: &CheckedStmt, - env: &mut Environment, - tagged_table: &TaggedRuntimeTable, -) -> ExecResult { +pub fn exec_stmt(stmt: &CheckedStmt, env: &mut Environment) -> ExecResult { match &stmt.stmt { // --- Variable declaration --- Statement::Decl { name, ty, init } => { - let init_val = eval_expr(init, env, tagged_table)?; + let init_val = eval_expr(init, env)?; let stored = match ty { - Type::Tagged { tag_type, tag_name } => { - build_tagged_value(tag_type, tag_name, init_val, tagged_table)? - } + Type::Aggregate { + specifier, + identifier, + } => build_aggregate_value(specifier, identifier, init_val, env)?, _ => init_val, }; env.declare(name.clone(), stored); @@ -70,8 +65,8 @@ pub fn exec_stmt( // --- Assignment --- Statement::Assign { target, value } => { - let val = eval_expr(value, env, tagged_table)?; - assign_lvalue(target, val, env, tagged_table)?; + let val = eval_expr(value, env)?; + assign_lvalue(target, val, env)?; Ok(None) } @@ -81,7 +76,7 @@ pub fn exec_stmt( Statement::Block { seq } => { let outer_keys = env.names(); for s in seq { - if let Some(ret) = exec_stmt(s, env, tagged_table)? { + if let Some(ret) = exec_stmt(s, env)? { env.remove_new(&outer_keys); return Ok(Some(ret)); } @@ -95,11 +90,11 @@ pub fn exec_stmt( cond, then_branch, else_branch, - } => match eval_expr(cond, env, tagged_table)? { - Value::Bool(true) => exec_stmt(then_branch, env, tagged_table), + } => match eval_expr(cond, env)? { + Value::Bool(true) => exec_stmt(then_branch, env), Value::Bool(false) => { if let Some(eb) = else_branch { - exec_stmt(eb, env, tagged_table) + exec_stmt(eb, env) } else { Ok(None) } @@ -112,9 +107,9 @@ pub fn exec_stmt( // --- While --- Statement::While { cond, body } => loop { - match eval_expr(cond, env, tagged_table)? { + match eval_expr(cond, env)? { Value::Bool(true) => { - if let Some(ret) = exec_stmt(body, env, tagged_table)? { + if let Some(ret) = exec_stmt(body, env)? { return Ok(Some(ret)); } } @@ -130,18 +125,16 @@ pub fn exec_stmt( // --- Return --- Statement::Return(Some(expr)) => { - let val = eval_expr(expr, env, tagged_table)?; + let val = eval_expr(expr, env)?; Ok(Some(val)) } Statement::Return(None) => Ok(Some(Value::Void)), // --- Statement-level function call --- Statement::Call { name, args } => { - let arg_vals: Result, RuntimeError> = args - .iter() - .map(|a| eval_expr(a, env, tagged_table)) - .collect(); - eval_call(name, arg_vals?, env, tagged_table)?; + let arg_vals: Result, RuntimeError> = + args.iter().map(|a| eval_expr(a, env)).collect(); + eval_call(name, arg_vals?, env)?; Ok(None) } } @@ -152,7 +145,6 @@ fn assign_lvalue( target: &CheckedExpr, val: Value, env: &mut Environment, - tagged_table: &TaggedRuntimeTable, ) -> Result<(), RuntimeError> { match &target.exp { Expr::Ident(name) => { @@ -166,7 +158,7 @@ fn assign_lvalue( } } Expr::Index { base, index } => { - let idx = match eval_expr(index, &mut *env, tagged_table)? { + let idx = match eval_expr(index, &mut *env)? { Value::Int(i) => i as usize, v => { return Err(RuntimeError::new(format!( @@ -175,9 +167,9 @@ fn assign_lvalue( ))) } }; - assign_index(base, idx, val, env, tagged_table) + assign_index(base, idx, val, env) } - Expr::Member { base, member } => assign_member(base, member, val, env, tagged_table), + Expr::Member { base, member } => assign_member(base, member, val, env), _ => Err(RuntimeError::new("invalid assignment target".to_string())), } } @@ -188,7 +180,6 @@ fn assign_index( idx: usize, val: Value, env: &mut Environment, - tagged_table: &TaggedRuntimeTable, ) -> Result<(), RuntimeError> { match &base.exp { Expr::Ident(name) => { @@ -219,7 +210,7 @@ fn assign_index( base: inner_base, index: inner_index, } => { - let inner_idx = match eval_expr(inner_index, env, tagged_table)? { + let inner_idx = match eval_expr(inner_index, env)? { Value::Int(i) => i as usize, v => { return Err(RuntimeError::new(format!( @@ -278,7 +269,6 @@ fn assign_member( member: &str, val: Value, env: &mut Environment, - _tagged_table: &TaggedRuntimeTable, ) -> Result<(), RuntimeError> { match &base.exp { Expr::Ident(name) => { @@ -288,20 +278,20 @@ fn assign_member( .ok_or_else(|| RuntimeError::new(format!("undefined variable '{}'", name)))?; let updated = match current { Value::Struct { - tag_name, + identifier, mut fields, } => { fields.insert(member.to_string(), val); - Value::Struct { tag_name, fields } + Value::Struct { identifier, fields } } - Value::Union { tag_name, .. } => Value::Union { - tag_name, + Value::Union { identifier, .. } => Value::Union { + identifier, active_field: member.to_string(), value: Box::new(val), }, other => { return Err(RuntimeError::new(format!( - "cannot assign member on non-tagged value: {}", + "cannot assign member on non-aggregate value: {}", other ))) } @@ -315,57 +305,52 @@ fn assign_member( } } -fn build_tagged_value( - tag_type: &TagType, - tag_name: &str, +fn build_aggregate_value( + specifier: &AgtTypeSpecifier, + identifier: &str, init_val: Value, - tagged_table: &TaggedRuntimeTable, + env: &Environment, ) -> Result { - let decl = tagged_table - .get(&(tag_type.clone(), tag_name.to_string())) - .ok_or_else(|| { - RuntimeError::new(format!( - "unknown tagged type at runtime: {:?} {}", - tag_type, tag_name - )) - })?; + let decl = env.aggregate_type(specifier, identifier).ok_or_else(|| { + RuntimeError::new(format!( + "unknown aggregate type at runtime: {:?} {}", + specifier, identifier + )) + })?; - match tag_type { - TagType::Struct => { + match specifier { + AgtTypeSpecifier::Struct => { let mut fields = HashMap::new(); for member in &decl.members { - if let Member::Field(field) = member { - fields.insert( - field.name.clone(), - default_value_for_type(&field.ty, tagged_table)?, - ); + if let AgtTypeMember::Field(field) = member { + fields.insert(field.name.clone(), default_value_for_type(&field.ty, env)?); } } Ok(Value::Struct { - tag_name: tag_name.to_string(), + identifier: identifier.to_string(), fields, }) } - TagType::Union => { + AgtTypeSpecifier::Union => { let first_field = decl .members .iter() .find_map(|member| match member { - Member::Field(field) => Some(field), + AgtTypeMember::Field(field) => Some(field), _ => None, }) .ok_or_else(|| { - RuntimeError::new(format!("union {} has no fields at runtime", tag_name)) + RuntimeError::new(format!("union {} has no fields at runtime", identifier)) })?; let coerced = coerce_value_to_type(init_val, &first_field.ty)?; Ok(Value::Union { - tag_name: tag_name.to_string(), + identifier: identifier.to_string(), active_field: first_field.name.clone(), value: Box::new(coerced), }) } - TagType::Enum => { + AgtTypeSpecifier::Enum => { let numeric = match init_val { Value::Int(n) => n, other => { @@ -377,17 +362,14 @@ fn build_tagged_value( }; Ok(Value::Enum { - tag_name: tag_name.to_string(), + identifier: identifier.to_string(), value: numeric, }) } } } -fn default_value_for_type( - ty: &Type, - tagged_table: &TaggedRuntimeTable, -) -> Result { +fn default_value_for_type(ty: &Type, env: &Environment) -> Result { match ty { Type::Unit => Ok(Value::Void), Type::Int => Ok(Value::Int(0)), @@ -395,9 +377,10 @@ fn default_value_for_type( Type::Bool => Ok(Value::Bool(false)), Type::Str => Ok(Value::Str(String::new())), Type::Array(_) => Ok(Value::Array(vec![])), - Type::Tagged { tag_type, tag_name } => { - build_tagged_value(tag_type, tag_name, Value::Int(0), tagged_table) - } + Type::Aggregate { + specifier, + identifier, + } => build_aggregate_value(specifier, identifier, Value::Int(0), env), Type::Function { .. } | Type::Any => Err(RuntimeError::new( "cannot create default runtime value for this type", )), diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index 3cd9346..36360c4 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -54,22 +54,18 @@ pub mod eval_expr; pub mod exec_stmt; pub mod value; -use crate::environment::Environment; -use crate::ir::ast::{CheckedProgram, TagType, TaggedTypeDecl}; +use crate::environment::{build_type_decl_map, Environment}; +use crate::ir::ast::CheckedProgram; use crate::stdlib::NativeRegistry; -use std::collections::HashMap; - use eval_expr::eval_call; use value::{FnValue, RuntimeError, Value}; -type TaggedRuntimeTable = HashMap<(TagType, String), TaggedTypeDecl>; - /// Interpret a type-checked MiniC program, starting execution at `main`. pub fn interpret(program: &CheckedProgram) -> Result<(), RuntimeError> { - let tagged_table = build_tagged_runtime_table(&program.tagged_types)?; + let type_map = build_type_decl_map(&program.type_declarations); - let mut env = Environment::::new(); + let mut env = Environment::with_type_decls(type_map); // Register native stdlib functions as Value::Fn(FnValue::Native) bindings. let registry = NativeRegistry::default(); @@ -89,22 +85,6 @@ pub fn interpret(program: &CheckedProgram) -> Result<(), RuntimeError> { return Err(RuntimeError::new("no 'main' function found")); } - eval_call("main", vec![], &mut env, &tagged_table)?; + eval_call("main", vec![], &mut env)?; Ok(()) } - -fn build_tagged_runtime_table( - tagged_types: &[TaggedTypeDecl], -) -> Result { - let mut table = TaggedRuntimeTable::new(); - for decl in tagged_types { - let key = (decl.tag_type.clone(), decl.tag_name.clone()); - if table.insert(key, decl.clone()).is_some() { - return Err(RuntimeError::new(format!( - "duplicate tagged type declaration at runtime: {:?} {}", - decl.tag_type, decl.tag_name - ))); - } - } - Ok(table) -} diff --git a/src/interpreter/value.rs b/src/interpreter/value.rs index 7a11d28..58ac943 100644 --- a/src/interpreter/value.rs +++ b/src/interpreter/value.rs @@ -102,16 +102,16 @@ pub enum Value { Str(String), Array(Vec), Struct { - tag_name: String, + identifier: String, fields: std::collections::HashMap, }, Union { - tag_name: String, + identifier: String, active_field: String, value: Box, }, Enum { - tag_name: String, + identifier: String, value: i64, }, Void, @@ -136,13 +136,13 @@ impl fmt::Display for Value { } write!(f, "]") } - Value::Struct { tag_name, .. } => write!(f, "", tag_name), + Value::Struct { identifier, .. } => write!(f, "", identifier), Value::Union { - tag_name, + identifier, active_field, .. - } => write!(f, "", tag_name, active_field), - Value::Enum { tag_name, value } => write!(f, "", tag_name, value), + } => write!(f, "", identifier, active_field), + Value::Enum { identifier, value } => write!(f, "", identifier, value), Value::Fn(_) => write!(f, ""), } } diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 4761fb8..f0e908e 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -48,9 +48,9 @@ //! compatibility check (`types_compatible`) treats `Any` as matching //! everything, keeping the special case local to one function. -/// Tagged types: struct, union, enum +/// Aggregate types: struct, union, enum #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TagType { +pub enum AgtTypeSpecifier { Struct, Union, Enum, @@ -65,9 +65,9 @@ pub enum Type { Bool, Str, Array(Box), - Tagged { - tag_type: TagType, - tag_name: String, + Aggregate { + specifier: AgtTypeSpecifier, + identifier: String, }, Function { params: Vec, @@ -180,19 +180,19 @@ pub struct IdentifierDecl { pub ty: Type, } -/// A field or enumerator inside a tagged type declaration. +/// A field or enumerator inside an aggregate type declaration. #[derive(Debug, Clone, PartialEq)] -pub enum Member { +pub enum AgtTypeMember { Field(IdentifierDecl), Enumerator { name: String, value: Option }, } -/// A tagged type declaration: struct, union, or enum. +/// An aggregate type declaration: struct, union, or enum. #[derive(Debug, Clone, PartialEq)] -pub struct TaggedTypeDecl { - pub tag_type: TagType, - pub tag_name: String, - pub members: Vec, +pub struct AggregateTypeDecl { + pub specifier: AgtTypeSpecifier, + pub identifier: String, + pub members: Vec, } /// A function declaration. @@ -207,7 +207,7 @@ pub struct FunDecl { /// A complete MiniC program: top-level type declarations and function declarations. #[derive(Debug, Clone, PartialEq)] pub struct Program { - pub tagged_types: Vec, + pub type_declarations: Vec, pub functions: Vec>, } diff --git a/src/parser/identifiers.rs b/src/parser/identifiers.rs index 93ecbd0..a8200e3 100644 --- a/src/parser/identifiers.rs +++ b/src/parser/identifiers.rs @@ -47,7 +47,7 @@ pub fn identifier(input: &str) -> IResult<&str, &str> { } /// Parse an identifier declaration: `Type name`. -/// Must only be called for parameters and tagged union members +/// Must only be called for parameters and struct/union members pub fn identifier_decl(input: &str) -> IResult<&str, IdentifierDecl> { map( tuple((type_definition, preceded(multispace1, identifier))), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b8c2d6a..f796bde 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -68,4 +68,4 @@ pub use identifiers::identifier; pub use literals::{literal, Literal}; pub use program::program; pub use statements::{assignment, statement}; -pub use types::tagged_type_decl; +pub use types::aggregate_type_decl; diff --git a/src/parser/program.rs b/src/parser/program.rs index 665d231..bcdc292 100644 --- a/src/parser/program.rs +++ b/src/parser/program.rs @@ -26,24 +26,24 @@ //! `main` is a semantic constraint checked in the next pipeline stage, not //! a syntactic one enforced here. -use crate::ir::ast::{Program, TaggedTypeDecl, UncheckedProgram}; +use crate::ir::ast::{AggregateTypeDecl, Program, UncheckedProgram}; use crate::parser::functions::fun_decl; -use crate::parser::types::tagged_type_decl; +use crate::parser::types::aggregate_type_decl; use nom::{branch::alt, combinator::map, multi::many0, IResult}; /// Parse a complete MiniC program: zero or more struct or function declarations. /// Execution starts at the `main` function (validated by the type checker). pub fn program(input: &str) -> IResult<&str, UncheckedProgram> { let (rest, items) = many0(alt(( - map(tagged_type_decl, |decl| Item::TypeDecl(decl)), + map(aggregate_type_decl, |decl| Item::TypeDecl(decl)), map(fun_decl, |f| Item::Function(f)), )))(input)?; - let mut type_decls = Vec::new(); + let mut type_declarations = Vec::new(); let mut functions = Vec::new(); for item in items { match item { - Item::TypeDecl(decl) => type_decls.push(decl), + Item::TypeDecl(decl) => type_declarations.push(decl), Item::Function(f) => functions.push(f), } } @@ -51,13 +51,13 @@ pub fn program(input: &str) -> IResult<&str, UncheckedProgram> { Ok(( rest, Program { - tagged_types: type_decls, + type_declarations, functions, }, )) } enum Item { - TypeDecl(TaggedTypeDecl), + TypeDecl(AggregateTypeDecl), Function(crate::ir::ast::FunDecl<()>), } diff --git a/src/parser/types.rs b/src/parser/types.rs index cbc7296..159978f 100644 --- a/src/parser/types.rs +++ b/src/parser/types.rs @@ -4,30 +4,31 @@ //! struct type names. It is reused by function parsing, struct field parsing, //! and variable declarations. -use crate::ir::ast::{Member, TagType, TaggedTypeDecl, Type}; +use crate::ir::ast::{AggregateTypeDecl, AgtTypeMember, AgtTypeSpecifier, Type}; use crate::parser::identifiers::{identifier, identifier_decl}; use crate::parser::literals::integer_literal; +use nom::multi::many1; use nom::{ branch::alt, bytes::complete::tag, character::complete::{char, multispace0, multispace1}, combinator::{map, opt}, - multi::{many0, many1}, + multi::many0, sequence::{delimited, pair, preceded, tuple}, IResult, }; -fn member_field(input: &str) -> IResult<&str, Member> { +fn agt_member_field(input: &str) -> IResult<&str, AgtTypeMember> { map( tuple(( preceded(multispace0, identifier_decl), preceded(multispace0, char(';')), )), - |(decl, _)| Member::Field(decl), + |(decl, _)| AgtTypeMember::Field(decl), )(input) } -fn enum_variant(input: &str) -> IResult<&str, Member> { +fn agt_member_enumerator(input: &str) -> IResult<&str, AgtTypeMember> { map( tuple(( preceded(multispace0, identifier), @@ -37,61 +38,59 @@ fn enum_variant(input: &str) -> IResult<&str, Member> { )), preceded(multispace0, char(';')), )), - |(name, value, _)| Member::Enumerator { + |(name, value, _)| AgtTypeMember::Enumerator { name: name.to_string(), value, }, )(input) } -fn tagged_type_and_name(input: &str) -> IResult<&str, (TagType, String)> { +fn aggregate_type_name(input: &str) -> IResult<&str, (AgtTypeSpecifier, String)> { alt(( map( tuple(( preceded(multispace0, tag("struct")), preceded(multispace1, identifier), )), - |(_, name)| (TagType::Struct, name.to_string()), + |(_, name)| (AgtTypeSpecifier::Struct, name.to_string()), ), map( tuple(( preceded(multispace0, tag("union")), preceded(multispace1, identifier), )), - |(_, name)| (TagType::Union, name.to_string()), + |(_, name)| (AgtTypeSpecifier::Union, name.to_string()), ), map( tuple(( preceded(multispace0, tag("enum")), preceded(multispace1, identifier), )), - |(_, name)| (TagType::Enum, name.to_string()), + |(_, name)| (AgtTypeSpecifier::Enum, name.to_string()), ), ))(input) } -/// Parse a tagged type: `[ struct | union | enum ] N {...}`. -pub fn tagged_type_decl(input: &str) -> IResult<&str, TaggedTypeDecl> { - let (rest, (tag_type, tag_name)) = tagged_type_and_name(input)?; +/// Parse an aggregate type: `[ struct | union | enum ] N {...}`. +pub fn aggregate_type_decl(input: &str) -> IResult<&str, AggregateTypeDecl> { + let (rest, (specifier, identifier)) = aggregate_type_name(input)?; - let (rest, members) = match tag_type { - TagType::Struct | TagType::Union => delimited( - preceded(multispace0, char('{')), - many1(member_field), - preceded(multispace0, char('}')), - )(rest)?, - TagType::Enum => delimited( - preceded(multispace0, char('{')), - many1(enum_variant), - preceded(multispace0, char('}')), - )(rest)?, + let member_parser = match specifier { + AgtTypeSpecifier::Struct | AgtTypeSpecifier::Union => agt_member_field, + AgtTypeSpecifier::Enum => agt_member_enumerator, }; + let (rest, members) = delimited( + preceded(multispace0, char('{')), + many1(preceded(multispace0, member_parser)), + preceded(multispace0, char('}')), + )(rest)?; + Ok(( rest, - TaggedTypeDecl { - tag_type, - tag_name, + AggregateTypeDecl { + specifier, + identifier, members, }, )) @@ -101,9 +100,11 @@ fn base_type(input: &str) -> IResult<&str, Type> { preceded( multispace0, alt(( - map(tagged_type_and_name, |(tag_type, tag_name)| Type::Tagged { - tag_type, - tag_name, + map(aggregate_type_name, |(specifier, identifier)| { + Type::Aggregate { + specifier, + identifier, + } }), map(tag("int"), |_| Type::Int), map(tag("float"), |_| Type::Float), diff --git a/src/semantic/type_checker.rs b/src/semantic/type_checker.rs index bd28dc0..ade506f 100644 --- a/src/semantic/type_checker.rs +++ b/src/semantic/type_checker.rs @@ -44,13 +44,13 @@ //! Centralising compatibility logic here means all callers (declaration, //! assignment, call-argument checking) share one consistent definition. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; -use crate::environment::Environment; +use crate::environment::{build_type_decl_map, Environment}; use crate::ir::ast::{ - CheckedExpr, CheckedFunDecl, CheckedProgram, CheckedStmt, Expr, ExprD, FunDecl, Literal, - Member, Program, Statement, StatementD, TagType, TaggedTypeDecl, Type, UncheckedExpr, - UncheckedFunDecl, UncheckedProgram, UncheckedStmt, + AggregateTypeDecl, AgtTypeMember, AgtTypeSpecifier, CheckedExpr, CheckedFunDecl, + CheckedProgram, CheckedStmt, Expr, ExprD, FunDecl, Literal, Program, Statement, StatementD, + Type, UncheckedExpr, UncheckedFunDecl, UncheckedProgram, UncheckedStmt, }; use crate::stdlib::NativeRegistry; @@ -76,12 +76,25 @@ impl std::fmt::Display for TypeError { impl std::error::Error for TypeError {} -type TaggedTypeTable = HashMap<(TagType, String), TaggedTypeDecl>; +fn check_type_decls_unique(decls: &[AggregateTypeDecl]) -> Result<(), TypeError> { + let mut seen = HashSet::new(); + for decl in decls { + let key = (decl.specifier.clone(), decl.identifier.clone()); + if !seen.insert(key) { + return Err(TypeError::new(format!( + "duplicate type declaration: {:?} {}", + decl.specifier, decl.identifier + ))); + } + } + Ok(()) +} /// Type-check a program. Returns `Ok(CheckedProgram)` if well-typed, `Err(TypeError)` on first error. /// Requires a `main` function with signature `void main()`. pub fn type_check(program: &UncheckedProgram) -> Result { - let tagged_table = build_tagged_type_table(&program.tagged_types)?; + check_type_decls_unique(&program.type_declarations)?; + let type_map = build_type_decl_map(&program.type_declarations); let main_fn = program.functions.iter().find(|f| f.name == "main"); match main_fn { @@ -96,7 +109,7 @@ pub fn type_check(program: &UncheckedProgram) -> Result::new(); + let mut env = Environment::with_type_decls(type_map); // Register native stdlib functions as Type::Function bindings. let registry = NativeRegistry::default(); @@ -127,11 +140,11 @@ pub fn type_check(program: &UncheckedProgram) -> Result, fn_snapshot: &HashMap, - tagged_table: &TaggedTypeTable, ) -> Result { // Restore to clean function-only state, then add parameters. env.restore(fn_snapshot.clone()); for param in &f.params { env.declare(param.name.clone(), param.ty.clone()); } - let body = type_check_stmt(&f.body, env, &f.return_type, tagged_table)?; + let body = type_check_stmt(&f.body, env, &f.return_type)?; Ok(FunDecl { name: f.name.clone(), params: f.params.clone(), @@ -160,18 +172,21 @@ fn type_check_stmt( s: &UncheckedStmt, env: &mut Environment, expected_return: &Type, - tagged_table: &TaggedTypeTable, ) -> Result { let stmt = match &s.stmt { Statement::Decl { name, ty, init } => { if ty == &Type::Unit { return Err(TypeError::new("cannot declare variable of type void")); } - if let Type::Tagged { tag_type, tag_name } = ty { - if !tagged_table.contains_key(&(tag_type.clone(), tag_name.clone())) { + if let Type::Aggregate { + specifier, + identifier, + } = ty + { + if !env.has_aggregate_type(specifier, identifier) { return Err(TypeError::new(format!( - "unknown tagged type: {:?} {}", - tag_type, tag_name + "unknown aggregate type: {:?} {}", + specifier, identifier ))); } } @@ -181,11 +196,11 @@ fn type_check_stmt( name ))); } - let init_checked = type_check_expr_to_typed(init, env, tagged_table)?; - if matches!(ty, Type::Tagged { .. }) { + let init_checked = type_check_expr_to_typed(init, env)?; + if matches!(ty, Type::Aggregate { .. }) { if init_checked.ty != Type::Int { return Err(TypeError::new( - "tagged-typed variable declarations currently require integer placeholder initializer", + "aggregate-typed variable declarations currently require integer placeholder initializer", )); } } else if !types_compatible(&init_checked.ty, ty) { @@ -202,10 +217,10 @@ fn type_check_stmt( } } Statement::Assign { target, value } => { - let value_checked = type_check_expr_to_typed(value, env, tagged_table)?; - type_check_assign_target(&target.exp, &value_checked.ty, env, tagged_table)?; + let value_checked = type_check_expr_to_typed(value, env)?; + type_check_assign_target(&target.exp, &value_checked.ty, env)?; Statement::Assign { - target: Box::new(type_check_expr_to_typed(target, env, tagged_table)?), + target: Box::new(type_check_expr_to_typed(target, env)?), value: Box::new(value_checked), } } @@ -213,7 +228,7 @@ fn type_check_stmt( let snapshot = env.snapshot(); let mut checked = Vec::new(); for st in seq { - checked.push(type_check_stmt(st, env, expected_return, tagged_table)?); + checked.push(type_check_stmt(st, env, expected_return)?); } env.restore(snapshot); Statement::Block { seq: checked } @@ -221,7 +236,7 @@ fn type_check_stmt( Statement::Call { name, args } => { let args_checked: Result, _> = args .iter() - .map(|a| type_check_expr_to_typed(a, env, tagged_table)) + .map(|a| type_check_expr_to_typed(a, env)) .collect(); let args_checked = args_checked?; check_call(name, &args_checked, env)?; @@ -235,17 +250,17 @@ fn type_check_stmt( then_branch, else_branch, } => { - let cond_checked = type_check_expr_to_typed(cond, env, tagged_table)?; + let cond_checked = type_check_expr_to_typed(cond, env)?; if cond_checked.ty != Type::Bool { return Err(TypeError::new(format!( "if condition must be Bool, got {:?}", cond_checked.ty ))); } - let then_checked = type_check_stmt(then_branch, env, expected_return, tagged_table)?; + let then_checked = type_check_stmt(then_branch, env, expected_return)?; let else_checked = else_branch .as_ref() - .map(|e| type_check_stmt(e, env, expected_return, tagged_table)) + .map(|e| type_check_stmt(e, env, expected_return)) .transpose()?; Statement::If { cond: Box::new(cond_checked), @@ -254,14 +269,14 @@ fn type_check_stmt( } } Statement::While { cond, body } => { - let cond_checked = type_check_expr_to_typed(cond, env, tagged_table)?; + let cond_checked = type_check_expr_to_typed(cond, env)?; if cond_checked.ty != Type::Bool { return Err(TypeError::new(format!( "while condition must be Bool, got {:?}", cond_checked.ty ))); } - let body_checked = type_check_stmt(body, env, expected_return, tagged_table)?; + let body_checked = type_check_stmt(body, env, expected_return)?; Statement::While { cond: Box::new(cond_checked), body: Box::new(body_checked), @@ -281,7 +296,7 @@ fn type_check_stmt( if *expected_return == Type::Unit { return Err(TypeError::new("void function must not return a value")); } - let checked = type_check_expr_to_typed(e, env, tagged_table)?; + let checked = type_check_expr_to_typed(e, env)?; if !types_compatible(&checked.ty, expected_return) { return Err(TypeError::new(format!( "return type mismatch: expected {:?}, got {:?}", @@ -333,7 +348,6 @@ fn type_check_assign_target( target: &Expr<()>, value_ty: &Type, env: &Environment, - tagged_table: &TaggedTypeTable, ) -> Result<(), TypeError> { match target { Expr::Ident(name) => { @@ -349,11 +363,11 @@ fn type_check_assign_target( Ok(()) } Expr::Index { base, index } => { - let index_ty = type_check_expr(index, env, tagged_table)?; + let index_ty = type_check_expr(index, env)?; if index_ty != Type::Int { return Err(TypeError::new("array index must be Int")); } - let base_ty = type_check_expr(base, env, tagged_table)?; + let base_ty = type_check_expr(base, env)?; if let Type::Array(elem) = &base_ty { if **elem != *value_ty { return Err(TypeError::new("assignment type mismatch")); @@ -364,25 +378,26 @@ fn type_check_assign_target( Ok(()) } Expr::Member { base, member } => { - let base_ty = type_check_expr(base, env, tagged_table)?; + let base_ty = type_check_expr(base, env)?; match base_ty { - Type::Tagged { tag_type, tag_name } => { - let decl = tagged_table - .get(&(tag_type.clone(), tag_name.clone())) - .ok_or_else(|| { - TypeError::new(format!( - "unknown tagged type in member assignment: {:?} {}", - tag_type, tag_name - )) - })?; - - match tag_type { - TagType::Struct | TagType::Union => { + Type::Aggregate { + specifier, + identifier, + } => { + let decl = env.aggregate_type(&specifier, &identifier).ok_or_else(|| { + TypeError::new(format!( + "unknown aggregate type in member assignment: {:?} {}", + specifier, identifier + )) + })?; + + match specifier { + AgtTypeSpecifier::Struct | AgtTypeSpecifier::Union => { let field_ty = decl .members .iter() .find_map(|m| match m { - Member::Field(decl) if decl.name == *member => { + AgtTypeMember::Field(decl) if decl.name == *member => { Some(decl.ty.clone()) } _ => None, @@ -390,23 +405,25 @@ fn type_check_assign_target( .ok_or_else(|| { TypeError::new(format!( "unknown member '{}' on {:?} {}", - member, tag_type, tag_name + member, specifier, identifier )) })?; if !types_compatible(value_ty, &field_ty) { return Err(TypeError::new(format!( "assignment to {}.{}: expected {:?}, got {:?}", - tag_name, member, field_ty, value_ty + identifier, member, field_ty, value_ty ))); } Ok(()) } - TagType::Enum => Err(TypeError::new("cannot assign to enum members")), + AgtTypeSpecifier::Enum => { + Err(TypeError::new("cannot assign to enum members")) + } } } other => Err(TypeError::new(format!( - "member assignment requires tagged base type, got {:?}", + "member assignment requires aggregate base type, got {:?}", other ))), } @@ -418,83 +435,70 @@ fn type_check_assign_target( fn type_check_expr_to_typed( e: &UncheckedExpr, env: &Environment, - tagged_table: &TaggedTypeTable, ) -> Result { - let ty = type_check_expr(e, env, tagged_table)?; - let exp = type_check_expr_inner(&e.exp, env, tagged_table)?; + let ty = type_check_expr(e, env)?; + let exp = type_check_expr_inner(&e.exp, env)?; Ok(ExprD { exp, ty }) } -fn type_check_expr_inner( - e: &Expr<()>, - env: &Environment, - tagged_table: &TaggedTypeTable, -) -> Result, TypeError> { +fn type_check_expr_inner(e: &Expr<()>, env: &Environment) -> Result, TypeError> { match e { Expr::Literal(l) => Ok(Expr::Literal(l.clone())), Expr::Ident(name) => Ok(Expr::Ident(name.clone())), - Expr::Neg(inner) => Ok(Expr::Neg(Box::new(type_check_expr_to_typed( - inner, - env, - tagged_table, - )?))), + Expr::Neg(inner) => Ok(Expr::Neg(Box::new(type_check_expr_to_typed(inner, env)?))), Expr::Add(l, r) => Ok(Expr::Add( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Sub(l, r) => Ok(Expr::Sub( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Mul(l, r) => Ok(Expr::Mul( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Div(l, r) => Ok(Expr::Div( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Eq(l, r) => Ok(Expr::Eq( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Ne(l, r) => Ok(Expr::Ne( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Lt(l, r) => Ok(Expr::Lt( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Le(l, r) => Ok(Expr::Le( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Gt(l, r) => Ok(Expr::Gt( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Ge(l, r) => Ok(Expr::Ge( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), - Expr::Not(inner) => Ok(Expr::Not(Box::new(type_check_expr_to_typed( - inner, - env, - tagged_table, - )?))), + Expr::Not(inner) => Ok(Expr::Not(Box::new(type_check_expr_to_typed(inner, env)?))), Expr::And(l, r) => Ok(Expr::And( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Or(l, r) => Ok(Expr::Or( - Box::new(type_check_expr_to_typed(l, env, tagged_table)?), - Box::new(type_check_expr_to_typed(r, env, tagged_table)?), + Box::new(type_check_expr_to_typed(l, env)?), + Box::new(type_check_expr_to_typed(r, env)?), )), Expr::Call { name, args } => { let args_checked: Result, _> = args .iter() - .map(|a| type_check_expr_to_typed(a, env, tagged_table)) + .map(|a| type_check_expr_to_typed(a, env)) .collect(); Ok(Expr::Call { name: name.clone(), @@ -504,26 +508,22 @@ fn type_check_expr_inner( Expr::ArrayLit(elems) => { let elems_checked: Result, _> = elems .iter() - .map(|e| type_check_expr_to_typed(e, env, tagged_table)) + .map(|e| type_check_expr_to_typed(e, env)) .collect(); Ok(Expr::ArrayLit(elems_checked?)) } Expr::Index { base, index } => Ok(Expr::Index { - base: Box::new(type_check_expr_to_typed(base, env, tagged_table)?), - index: Box::new(type_check_expr_to_typed(index, env, tagged_table)?), + base: Box::new(type_check_expr_to_typed(base, env)?), + index: Box::new(type_check_expr_to_typed(index, env)?), }), Expr::Member { base, member } => Ok(Expr::Member { - base: Box::new(type_check_expr_to_typed(base, env, tagged_table)?), + base: Box::new(type_check_expr_to_typed(base, env)?), member: member.clone(), }), } } -fn type_check_expr( - e: &UncheckedExpr, - env: &Environment, - tagged_table: &TaggedTypeTable, -) -> Result { +fn type_check_expr(e: &UncheckedExpr, env: &Environment) -> Result { match &e.exp { Expr::Literal(l) => Ok(literal_type(l)), Expr::Ident(name) => match env.get(name) { @@ -535,7 +535,7 @@ fn type_check_expr( None => Err(TypeError::new(format!("undeclared variable: {}", name))), }, Expr::Neg(inner) => { - let ty = type_check_expr(inner, env, tagged_table)?; + let ty = type_check_expr(inner, env)?; if matches!(ty, Type::Int | Type::Float) { Ok(ty) } else { @@ -543,13 +543,13 @@ fn type_check_expr( } } Expr::Add(l, r) | Expr::Sub(l, r) | Expr::Mul(l, r) | Expr::Div(l, r) => { - let lt = type_check_expr(l, env, tagged_table)?; - let rt = type_check_expr(r, env, tagged_table)?; + let lt = type_check_expr(l, env)?; + let rt = type_check_expr(r, env)?; numeric_binop_result(<, &rt) } Expr::Eq(l, r) | Expr::Ne(l, r) => { - let lt = type_check_expr(l, env, tagged_table)?; - let rt = type_check_expr(r, env, tagged_table)?; + let lt = type_check_expr(l, env)?; + let rt = type_check_expr(r, env)?; if !types_compatible(<, &rt) { return Err(TypeError::new(format!( "equality operands must have compatible types, got {:?} and {:?}", @@ -559,8 +559,8 @@ fn type_check_expr( Ok(Type::Bool) } Expr::Lt(l, r) | Expr::Le(l, r) | Expr::Gt(l, r) | Expr::Ge(l, r) => { - let lt = type_check_expr(l, env, tagged_table)?; - let rt = type_check_expr(r, env, tagged_table)?; + let lt = type_check_expr(l, env)?; + let rt = type_check_expr(r, env)?; if !is_numeric(<) || !is_numeric(&rt) { return Err(TypeError::new(format!( "ordering comparison requires numeric operands, got {:?} and {:?}", @@ -570,7 +570,7 @@ fn type_check_expr( Ok(Type::Bool) } Expr::Not(inner) => { - let ty = type_check_expr(inner, env, tagged_table)?; + let ty = type_check_expr(inner, env)?; if ty == Type::Bool { Ok(Type::Bool) } else { @@ -578,8 +578,8 @@ fn type_check_expr( } } Expr::And(l, r) | Expr::Or(l, r) => { - let lt = type_check_expr(l, env, tagged_table)?; - let rt = type_check_expr(r, env, tagged_table)?; + let lt = type_check_expr(l, env)?; + let rt = type_check_expr(r, env)?; if lt == Type::Bool && rt == Type::Bool { Ok(Type::Bool) } else { @@ -589,7 +589,7 @@ fn type_check_expr( Expr::Call { name, args } => { let args_checked: Result, _> = args .iter() - .map(|a| type_check_expr_to_typed(a, env, tagged_table)) + .map(|a| type_check_expr_to_typed(a, env)) .collect(); let args_checked = args_checked?; match env.get(name) { @@ -628,9 +628,9 @@ fn type_check_expr( if elems.is_empty() { return Err(TypeError::new("empty array literal needs type annotation")); } - let first = type_check_expr(&elems[0], env, tagged_table)?; + let first = type_check_expr(&elems[0], env)?; for e in elems.iter().skip(1) { - let ty = type_check_expr(e, env, tagged_table)?; + let ty = type_check_expr(e, env)?; if !types_compatible(&first, &ty) { return Err(TypeError::new("array elements must have same type")); } @@ -638,11 +638,11 @@ fn type_check_expr( Ok(Type::Array(Box::new(first))) } Expr::Index { base, index } => { - let index_ty = type_check_expr(index, env, tagged_table)?; + let index_ty = type_check_expr(index, env)?; if index_ty != Type::Int { return Err(TypeError::new("array index must be Int")); } - let base_ty = type_check_expr(base, env, tagged_table)?; + let base_ty = type_check_expr(base, env)?; if let Type::Array(elem) = base_ty { Ok(*elem) } else { @@ -650,24 +650,25 @@ fn type_check_expr( } } Expr::Member { base, member } => { - let base_ty = type_check_expr(base, env, tagged_table)?; + let base_ty = type_check_expr(base, env)?; match base_ty { - Type::Tagged { tag_type, tag_name } => { - let decl = tagged_table - .get(&(tag_type.clone(), tag_name.clone())) - .ok_or_else(|| { - TypeError::new(format!( - "unknown tagged type in member access: {:?} {}", - tag_type, tag_name - )) - })?; - - match tag_type { - TagType::Struct | TagType::Union => decl + Type::Aggregate { + specifier, + identifier, + } => { + let decl = env.aggregate_type(&specifier, &identifier).ok_or_else(|| { + TypeError::new(format!( + "unknown aggregate type in member access: {:?} {}", + specifier, identifier + )) + })?; + + match specifier { + AgtTypeSpecifier::Struct | AgtTypeSpecifier::Union => decl .members .iter() .find_map(|m| match m { - Member::Field(decl) if decl.name == *member => { + AgtTypeMember::Field(decl) if decl.name == *member => { Some(decl.ty.clone()) } _ => None, @@ -675,12 +676,12 @@ fn type_check_expr( .ok_or_else(|| { TypeError::new(format!( "unknown member '{}' on {:?} {}", - member, tag_type, tag_name + member, specifier, identifier )) }), - TagType::Enum => { + AgtTypeSpecifier::Enum => { let exists = decl.members.iter().any(|m| match m { - Member::Enumerator { name, .. } => name == member, + AgtTypeMember::Enumerator { name, .. } => name == member, _ => false, }); if exists { @@ -688,14 +689,14 @@ fn type_check_expr( } else { Err(TypeError::new(format!( "unknown enumerator '{}' on enum {}", - member, tag_name + member, identifier ))) } } } } other => Err(TypeError::new(format!( - "member access requires tagged base type, got {:?}", + "member access requires aggregate base type, got {:?}", other ))), } @@ -738,42 +739,15 @@ fn types_compatible(a: &Type, b: &Type) -> bool { (Type::Int, Type::Float) | (Type::Float, Type::Int) => true, (Type::Array(a), Type::Array(b)) => types_compatible(a, b), ( - Type::Tagged { - tag_type: a_kind, - tag_name: a_name, + Type::Aggregate { + specifier: a_kind, + identifier: a_name, }, - Type::Tagged { - tag_type: b_kind, - tag_name: b_name, + Type::Aggregate { + specifier: b_kind, + identifier: b_name, }, ) => a_kind == b_kind && a_name == b_name, _ => false, } } - -fn build_tagged_type_table(tagged_types: &[TaggedTypeDecl]) -> Result { - let mut seen = std::collections::HashSet::<(TagType, String)>::new(); - let mut table = TaggedTypeTable::new(); - - for decl in tagged_types { - let key = (decl.tag_type.clone(), decl.tag_name.clone()); - - if !seen.insert(key.clone()) { - return Err(TypeError::new(format!( - "duplicate tagged type declaration: {:?} {}", - decl.tag_type, decl.tag_name - ))); - } - - if decl.members.is_empty() { - return Err(TypeError::new(format!( - "tagged type declaration '{}' must declare at least one member", - decl.tag_name - ))); - } - - table.insert(key, decl.clone()); - } - - Ok(table) -} diff --git a/tests/cli/run.test b/tests/cli/run.test index 0852bcb..2973a5d 100644 --- a/tests/cli/run.test +++ b/tests/cli/run.test @@ -19,6 +19,12 @@ $ ./target/debug/mini_c --run tests/fixtures/interpreter_array.minic 30 >=0 +# --run executes aggregate type fixture and prints bool/enum values +$ ./target/debug/mini_c --run tests/fixtures/aggregate_types.minic +true +2 +>=0 + # --run on a type-error program fails before interpretation $ ./target/debug/mini_c --run tests/fixtures/cli_type_mismatch.minic >2 /Type error/ diff --git a/tests/fixtures/aggregate_types.minic b/tests/fixtures/aggregate_types.minic new file mode 100644 index 0000000..b0d39de --- /dev/null +++ b/tests/fixtures/aggregate_types.minic @@ -0,0 +1,12 @@ +struct Point { bool valid; int[][] coords; } +union Payload { int code; struct Point p; } +enum Kind { A; B = 2; } + +void main() { + struct Point p = 0; + p.valid = true; + print(p.valid); + enum Kind k = 0; + int v = k.B; + print(v); +} diff --git a/tests/fixtures/tagged_types.minic b/tests/fixtures/tagged_types.minic deleted file mode 100644 index 42b6a3b..0000000 --- a/tests/fixtures/tagged_types.minic +++ /dev/null @@ -1,5 +0,0 @@ -struct Point { int x; int y; } -union Payload { int code; struct Point p; } -enum Kind { A; B = 2; } - -void main() { return; } diff --git a/tests/interpreter.rs b/tests/interpreter.rs index 5f0b18b..60f33a4 100644 --- a/tests/interpreter.rs +++ b/tests/interpreter.rs @@ -258,7 +258,7 @@ fn test_stdlib_pow_float_args() { } // --------------------------------------------------------------------------- -// Tagged Unions +// Aggregate Unions // --------------------------------------------------------------------------- #[test] diff --git a/tests/parser.rs b/tests/parser.rs index f1b1f4b..493e067 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -1,6 +1,8 @@ //! Integration tests for the MiniC parser. -use mini_c::ir::ast::{Expr, ExprD, IdentifierDecl, Literal, Member, Statement, TagType, Type}; +use mini_c::ir::ast::{ + AgtTypeMember, AgtTypeSpecifier, Expr, ExprD, IdentifierDecl, Literal, Statement, Type, +}; use mini_c::parser::{ assignment, expression, fun_decl, identifier, identifiers::identifier_decl, @@ -9,7 +11,7 @@ use mini_c::parser::{ boolean_literal, float_literal, integer_literal, string_literal, Literal as ParserLiteral, }, statement, - types::{tagged_type_decl, type_definition}, + types::{aggregate_type_decl, type_definition}, }; use nom::combinator::all_consuming; @@ -121,14 +123,14 @@ fn test_identifier_accept_true_prefix() { // --- Types --- #[test] -fn test_tagged_type_definition() { +fn test_aggregate_type_definition() { assert_eq!( type_definition("struct Point"), Ok(( "", - Type::Tagged { - tag_type: TagType::Struct, - tag_name: "Point".to_string(), + Type::Aggregate { + specifier: AgtTypeSpecifier::Struct, + identifier: "Point".to_string(), }, )) ); @@ -137,9 +139,9 @@ fn test_tagged_type_definition() { type_definition("union Value"), Ok(( "", - Type::Tagged { - tag_type: TagType::Union, - tag_name: "Value".to_string(), + Type::Aggregate { + specifier: AgtTypeSpecifier::Union, + identifier: "Value".to_string(), }, )) ); @@ -148,39 +150,39 @@ fn test_tagged_type_definition() { type_definition("enum Kind"), Ok(( "", - Type::Tagged { - tag_type: TagType::Enum, - tag_name: "Kind".to_string(), + Type::Aggregate { + specifier: AgtTypeSpecifier::Enum, + identifier: "Kind".to_string(), }, )) ); } #[test] -fn test_tagged_type_definition_array() { +fn test_aggregate_type_definition_array() { assert_eq!( type_definition("struct S[]"), Ok(( "", - Type::Array(Box::new(Type::Tagged { - tag_type: TagType::Struct, - tag_name: "S".to_string(), + Type::Array(Box::new(Type::Aggregate { + specifier: AgtTypeSpecifier::Struct, + identifier: "S".to_string(), })) )) ); } #[test] -fn test_tagged_type_identifier_decl() { +fn test_aggregate_type_identifier_decl() { assert_eq!( identifier_decl("struct Point p"), Ok(( "", IdentifierDecl { name: "p".to_string(), - ty: Type::Tagged { - tag_type: TagType::Struct, - tag_name: "Point".to_string(), + ty: Type::Aggregate { + specifier: AgtTypeSpecifier::Struct, + identifier: "Point".to_string(), }, } )) @@ -192,9 +194,9 @@ fn test_tagged_type_identifier_decl() { "", IdentifierDecl { name: "v".to_string(), - ty: Type::Tagged { - tag_type: TagType::Union, - tag_name: "Value".to_string(), + ty: Type::Aggregate { + specifier: AgtTypeSpecifier::Union, + identifier: "Value".to_string(), }, } )) @@ -206,9 +208,9 @@ fn test_tagged_type_identifier_decl() { "", IdentifierDecl { name: "k".to_string(), - ty: Type::Tagged { - tag_type: TagType::Enum, - tag_name: "Kind".to_string(), + ty: Type::Aggregate { + specifier: AgtTypeSpecifier::Enum, + identifier: "Kind".to_string(), }, } )) @@ -217,22 +219,22 @@ fn test_tagged_type_identifier_decl() { #[test] fn test_struct_decl() { - let result = tagged_type_decl("struct Point { int x; float y; }") + let result = all_consuming(aggregate_type_decl)("struct Point { int x; float y; }") .unwrap() .1; - assert_eq!(result.tag_type, TagType::Struct); - assert_eq!(result.tag_name, "Point"); + assert_eq!(result.specifier, AgtTypeSpecifier::Struct); + assert_eq!(result.identifier, "Point"); assert_eq!(result.members.len(), 2); assert_eq!( result.members[0], - Member::Field(IdentifierDecl { + AgtTypeMember::Field(IdentifierDecl { name: "x".into(), ty: Type::Int, }) ); assert_eq!( result.members[1], - Member::Field(IdentifierDecl { + AgtTypeMember::Field(IdentifierDecl { name: "y".into(), ty: Type::Float, }) @@ -241,22 +243,22 @@ fn test_struct_decl() { #[test] fn test_union_decl() { - let result = tagged_type_decl("union Value { int i; float f; }") + let result = all_consuming(aggregate_type_decl)("union Value { int i; float f; }") .unwrap() .1; - assert_eq!(result.tag_type, TagType::Union); - assert_eq!(result.tag_name, "Value"); + assert_eq!(result.specifier, AgtTypeSpecifier::Union); + assert_eq!(result.identifier, "Value"); assert_eq!(result.members.len(), 2); assert_eq!( result.members[0], - Member::Field(IdentifierDecl { + AgtTypeMember::Field(IdentifierDecl { name: "i".into(), ty: Type::Int, }) ); assert_eq!( result.members[1], - Member::Field(IdentifierDecl { + AgtTypeMember::Field(IdentifierDecl { name: "f".into(), ty: Type::Float, }) @@ -265,20 +267,22 @@ fn test_union_decl() { #[test] fn test_enum_decl() { - let result = tagged_type_decl("enum Kind { OK; Err = -1; }").unwrap().1; - assert_eq!(result.tag_type, TagType::Enum); - assert_eq!(result.tag_name, "Kind"); + let result = all_consuming(aggregate_type_decl)("enum Kind { OK; Err = -1; }") + .unwrap() + .1; + assert_eq!(result.specifier, AgtTypeSpecifier::Enum); + assert_eq!(result.identifier, "Kind"); assert_eq!(result.members.len(), 2); assert_eq!( result.members[0], - Member::Enumerator { + AgtTypeMember::Enumerator { name: "OK".into(), value: None, } ); assert_eq!( result.members[1], - Member::Enumerator { + AgtTypeMember::Enumerator { name: "Err".into(), value: Some(-1), } @@ -286,24 +290,24 @@ fn test_enum_decl() { } #[test] -fn test_tagged_type_decl_reject_empty_members() { - assert!(tagged_type_decl("struct S { }").is_err()); - assert!(tagged_type_decl("union U { }").is_err()); - assert!(tagged_type_decl("enum E { }").is_err()); +fn test_aggregate_type_decl_reject_empty_members() { + assert!(all_consuming(aggregate_type_decl)("struct S { }").is_err()); + assert!(all_consuming(aggregate_type_decl)("union U { }").is_err()); + assert!(all_consuming(aggregate_type_decl)("enum E { }").is_err()); } #[test] -fn test_tagged_type_decl_reject_missing_member_semicolon() { - assert!(tagged_type_decl("struct S { int x }").is_err()); - assert!(tagged_type_decl("union U { int x }").is_err()); - assert!(tagged_type_decl("enum E { A = 1 }").is_err()); +fn test_aggregate_type_decl_reject_missing_member_semicolon() { + assert!(all_consuming(aggregate_type_decl)("struct S { int x }").is_err()); + assert!(all_consuming(aggregate_type_decl)("union U { int x }").is_err()); + assert!(all_consuming(aggregate_type_decl)("enum E { A = 1 }").is_err()); } #[test] -fn test_tagged_type_decl_reject_reserved_tag_name() { - assert!(tagged_type_decl("struct return { int x; }").is_err()); - assert!(tagged_type_decl("union return { int x; }").is_err()); - assert!(tagged_type_decl("enum return { A; }").is_err()); +fn test_aggregate_type_decl_reject_reserved_identifier_name() { + assert!(all_consuming(aggregate_type_decl)("struct return { int x; }").is_err()); + assert!(all_consuming(aggregate_type_decl)("union return { int x; }").is_err()); + assert!(all_consuming(aggregate_type_decl)("enum return { A; }").is_err()); } // --- Expressions --- @@ -954,7 +958,9 @@ fn test_chained_member_access_expression() { #[test] fn test_member_access_with_index_expression() { let result = expression("items.head[0]").unwrap().1; - assert!(matches!(result.exp, Expr::Index { ref base, .. } if matches!(base.exp, Expr::Member { ref member, .. } if member == "head"))); + assert!( + matches!(result.exp, Expr::Index { ref base, .. } if matches!(base.exp, Expr::Member { ref member, .. } if member == "head")) + ); } #[test] diff --git a/tests/program.rs b/tests/program.rs index 2116f76..14c1dfc 100644 --- a/tests/program.rs +++ b/tests/program.rs @@ -102,13 +102,13 @@ fn test_parse_top_level_statements_fail() { } #[test] -fn test_parse_program_with_tagged_declarations() { - let prog = parse_program_file("tagged_types.minic") - .expect("program with tagged type declarations should parse"); - assert_eq!(prog.tagged_types.len(), 3); - assert_eq!(prog.tagged_types[0].tag_name, "Point"); - assert_eq!(prog.tagged_types[1].tag_name, "Payload"); - assert_eq!(prog.tagged_types[2].tag_name, "Kind"); +fn test_parse_program_with_aggregate_declarations() { + let prog = parse_program_file("aggregate_types.minic") + .expect("program with aggregate type declarations should parse"); + assert_eq!(prog.type_declarations.len(), 3); + assert_eq!(prog.type_declarations[0].identifier, "Point"); + assert_eq!(prog.type_declarations[1].identifier, "Payload"); + assert_eq!(prog.type_declarations[2].identifier, "Kind"); assert_eq!(prog.functions.len(), 1); assert_eq!(prog.functions[0].name, "main"); } diff --git a/tests/type_checker.rs b/tests/type_checker.rs index c9e9996..c03b12d 100644 --- a/tests/type_checker.rs +++ b/tests/type_checker.rs @@ -205,7 +205,7 @@ fn test_type_check_print_wrong_arity() { } // --------------------------------------------------------------------------- -// Tagged Types +// Aggregate Types // --------------------------------------------------------------------------- #[test] @@ -254,10 +254,13 @@ fn test_type_check_rejects_enum_member_assignment() { } #[test] -fn test_type_check_rejects_unknown_tagged_type_declaration_use() { +fn test_type_check_rejects_unknown_aggregate_type_declaration_use() { let result = parse_and_type_check("void main() { struct Missing x = 0; }"); assert!(result.is_err()); - assert!(result.unwrap_err().message.contains("unknown tagged type")); + assert!(result + .unwrap_err() + .message + .contains("unknown aggregate type")); } #[test] @@ -279,17 +282,17 @@ fn test_type_check_rejects_unknown_enum_member_access() { } #[test] -fn test_type_check_rejects_member_access_on_non_tagged_value() { +fn test_type_check_rejects_member_access_on_non_aggregate_value() { let result = parse_and_type_check("void main() { int x = 0; int y = x.foo; }"); assert!(result.is_err()); assert!(result .unwrap_err() .message - .contains("member access requires tagged base type")); + .contains("member access requires aggregate base type")); } #[test] -fn test_type_check_rejects_duplicate_tagged_declarations() { +fn test_type_check_rejects_duplicate_aggregate_declarations() { let result = parse_and_type_check( "struct Point { int x; }\nstruct Point { int y; }\nvoid main() { int z = 0; }", ); @@ -297,5 +300,5 @@ fn test_type_check_rejects_duplicate_tagged_declarations() { assert!(result .unwrap_err() .message - .contains("duplicate tagged type declaration")); + .contains("duplicate type declaration")); }