diff --git a/gazprea-clean/Makefile b/gazprea-clean/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/gazprea-clean/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/gazprea-clean/README.md b/gazprea-clean/README.md new file mode 100644 index 0000000..b9b562b --- /dev/null +++ b/gazprea-clean/README.md @@ -0,0 +1,31 @@ +# Editing the Spec + +This project uses [Sphinx](https://www.sphinx-doc.org/) to generate +documentation from [reStructuredText](https://docutils.sourceforge.io/rst.html) +(RST). + +For a quick introduction to Sphinx, refer to the +[Sphinx Quickstart](https://www.sphinx-doc.org/en/master/usage/quickstart.html) +documentation. + +For a quick introduction to RST, refer to the +[reStructuredText Primer](https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html). + +# Building + +## Required Software +- `texlive-full` (for PDF output only) +- `python` + +## Required Python (PyPI) Packages +- `sphinx` +- `sphinx_rtd_theme` + +## Usage + +- `make html` to build html files +- `make latexpdf` to build PDF/LaTeX files +- `make clean` to delete build files + +The HTML build is output to the `_build/html` folder. +The PDF/LaTeX build is output to the `_build/latex` folder. diff --git a/gazprea-clean/_static/css/googleFonts.css b/gazprea-clean/_static/css/googleFonts.css new file mode 100644 index 0000000..427463f --- /dev/null +++ b/gazprea-clean/_static/css/googleFonts.css @@ -0,0 +1,18 @@ +/* + * This file is meant to reduce our upload size. Instead of uploading static + * font files, we outsource them instead to other static hosting sites. + * + * If you delete this file you must leave the generated + * _build/html/_static/css/fonts directory alone. This includes the line that + * deletes this folder in the root Makefile. + */ + +@import url('https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,400;0,700;1,400&family=Roboto+Slab:wght@400;700&display=swap'); + +@font-face { + font-family: 'FontAwesome'; + src: url('https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/fonts/fontawesome-webfont.eot?v=4.7.0'); + src: url('https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/fonts/fontawesome-webfont.eot?#iefix&v=4.7.0') format('embedded-opentype'), url('https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/fonts/fontawesome-webfont.woff2?v=4.7.0') format('woff2'), url('https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/fonts/fontawesome-webfont.woff?v=4.7.0') format('woff'), url('https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/fonts/fontawesome-webfont.ttf?v=4.7.0') format('truetype'), url('https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/fonts/fontawesome-webfont.svg?v=4.7.0#fontawesomeregular') format('svg'); + font-weight: normal; + font-style: normal; +} diff --git a/gazprea-clean/_templates/.gitkeep b/gazprea-clean/_templates/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/gazprea-clean/assets/images/GazpreaLogo.png b/gazprea-clean/assets/images/GazpreaLogo.png new file mode 100644 index 0000000..a0b0419 Binary files /dev/null and b/gazprea-clean/assets/images/GazpreaLogo.png differ diff --git a/gazprea-clean/assets/images/logo-reverse.png b/gazprea-clean/assets/images/logo-reverse.png new file mode 100755 index 0000000..5db0b5b Binary files /dev/null and b/gazprea-clean/assets/images/logo-reverse.png differ diff --git a/gazprea-clean/conf.py b/gazprea-clean/conf.py new file mode 100644 index 0000000..a7c3404 --- /dev/null +++ b/gazprea-clean/conf.py @@ -0,0 +1,79 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'gazprea' +copyright = '2025, cmput415' +author = 'cmput415' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx_rtd_theme', + 'sphinx.ext.todo' +] + +# Toggles the display of "Todo" message boxes in the output +todo_include_todos = True + +# Toggle warnings in build log when todos are present +todo_emit_warnings = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +html_theme_options = { + 'logo_only': False, + 'style_nav_header_background': '#007C41', + # Toc options + 'collapse_navigation': False, + 'sticky_navigation': False, +} + +html_logo = 'assets/images/logo-reverse.png' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# These paths are either relative to html_static_path +# or fully qualified paths (eg. https://...) +html_css_files = [ + 'css/googleFonts.css', +] + +# Disable syntax highlighting in code blocks +highlight_language ='none' diff --git a/gazprea-clean/htaccess b/gazprea-clean/htaccess new file mode 100644 index 0000000..717ec6f --- /dev/null +++ b/gazprea-clean/htaccess @@ -0,0 +1 @@ +DirectoryIndex index.html diff --git a/gazprea-clean/impl/backend.rst b/gazprea-clean/impl/backend.rst new file mode 100644 index 0000000..cd45e19 --- /dev/null +++ b/gazprea-clean/impl/backend.rst @@ -0,0 +1,116 @@ +.. _sec:backend: + +Backend +======= + +You don’t need to implement an interpreter for Gazprea. You only need to +implement a *MLIR* code generator that outputs *LLVM IR*. + +.. _ssec:backend_memory: + +Memory Management +----------------- + +It is important that you are able to automatically free and allocate memory for +arrays when they enter and exit scope. You could allocate them on the stack, +but this could be problematic if the arrays are very large. +It is likely safer to use ``malloc`` and ``free`` for these purposes. +This may be done in either your runtime or directly within MLIR. + +Below is an example of how to use ``malloc`` and ``free`` within MLIR using the LLVM dialect: + +:: + + module { + llvm.func @malloc(i32) -> !llvm.ptr + llvm.func @free(!llvm.ptr) + llvm.func @main() -> i32 { + %0 = llvm.mlir.constant(128 : i32) : i32 + %1 = llvm.call @malloc(%0) : (i32) -> !llvm.ptr + llvm.call @free(%1) : (!llvm.ptr) -> () + %c0_i32 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %c0_i32 : i32 + } + } + +It is important that the code generated by your compiler has no memory leaks, +and that all memory is freed as it leaves scope. + +.. _ssec:backend_runtime: + +Runtime Libraries +----------------- + +If you make a runtime library, the runtime library must be implemented +in a runtime directory (``runtime``). Beware that in C++ there is additional +name mangling that occurs to allow class functions. Thus, we recommend +that all runtime functions should be written in C and not in C++. There +is a Makefile in the ``runtime`` folder designed to turn all ``*.c`` and +``*.h`` pairs into part of the unified runtime library ``libruntime.a``. +An example of how to make a runtime function is provided bellow. + +``functions.c`` + +:: + + #include "functions.h" + + uint64_t factorial(uint64_t n) { + uint64_t fact = 1; + + while (n > 0) { + fact *= n; + n--; + } + + return fact; + } + +``functions.h`` + +:: + + #pragma once + + #include + + uint64_t factorial(uint64_t n); + +If your compiler was compiling the following input + +:: + + 3! + (2 + 7)! + +Here is how to call the function in the LLVM dialect of MLIR: + +``MLIR src`` + +:: + + module { + // This makes the function available for calling + llvm.func @factorial(i64) -> i64 + + llvm.func @main() -> i32 { + // Calls factorial with the constant 3 as an argument + %0 = llvm.mlir.constant(3 : i64) : i64 + %1 = llvm.call @factorial(%0) : (i64) -> (i64) + + // Adds 2 and 7 together + %2 = llvm.mlir.constant(2 : i64) : i64 + %3 = llvm.mlir.constant(7 : i64) : i64 + %4 = llvm.add %2, %3 : i64 + + // Calls factorial with the result of 2+7 + %5 = llvm.call @factorial(%4) : (i64) -> (i64) + + // Adds the result of 3! with (2+7)! + %6 = llvm.add %1, %5 : i64 + + // Done, return 0 + %c0_i32 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %c0_i32 : i32 + } + } + diff --git a/gazprea-clean/impl/errors.rst b/gazprea-clean/impl/errors.rst new file mode 100644 index 0000000..349a040 --- /dev/null +++ b/gazprea-clean/impl/errors.rst @@ -0,0 +1,323 @@ +.. _sec:errors: + +Errors +====== + +Your implementation is required to report both compile-time and run-time errors. +You must use the exceptions defined in ``include/CompileTimeExceptions.h`` and +the functions defined in ``runtime/include/run_time_errors.h``. Do not modify +these files, you can pass a string to a constructor/function to provide more +details about a particular error. You must pass the corresponding line number to +the exceptions for compile-time errors but not run-time errors. Do not create +new errors. Your compiler is only expected to report the first error it +encounters. + +Syntax Errors +~~~~~~~~~~~~~ + +ANTLR handles syntax errors automatically, but you are required to override the +behavior and throw the ``SyntaxError`` exception from +``include/CompileTimeExceptions.h``. + +For example: + +:: + + /* main.cpp */ + + class MyErrorListener : public antlr4::BaseErrorListener { + void syntaxError(antlr4::Recognizer *recognizer, antlr4::Token * offendingSymbol, + size_t line, size_t charPositionInLine, const std::string &msg, + std::exception_ptr e) override { + std::vector rule_stack = ((antlr4::Parser*) recognizer)->getRuleInvocationStack(); + // The rule_stack may be used for determining what rule and context the error has occurred in. + // You may want to print the stack along with the error message, or use the stack contents to + // make a more detailed error message. + + throw SyntaxError(line, msg); // Throw our exception with ANTLR's error message. You can customize this as appropriate. + } + }; + + int main(int argc, char **argv) { + + ... + + gazprea::GazpreaParser parser(&tokens); + + parser.removeErrorListeners(); // Remove the default console error listener + parser.addErrorListener(new MyErrorListener()); // Add our error listener + + ... + } + +For more information regarding the handling of syntax errors in ANTLR, refer to +chapter 9 of +`The Definitive ANTLR 4 Reference `__. + +Compile-time Errors +------------------- + +Compile-time errors must be handled by throwing the exceptions defined in +``include/CompileTimeExceptions.h``. To throw an exception, use the ``throw`` +keyword. + +:: + + throw MainError(1, "program does not have a main procedure"); + +Here are the compile-time errors your compiler must throw: + +* ``SyntaxError`` + + Raised during compilation if the parser encounters a syntactic error in the + program. + +* ``SymbolError`` + + Raised during compilation if an undefined symbol is referenced or a defined + symbol is re-defined in the same scope. + +* ``TypeError`` + + Raised during compilation if an operation or statement is applied to or + betweeen expressions with invalid or incompatible types. + +* ``AliasingError`` + + Raised during compilation if the compiler detects that mutable memory + locations may be aliased. + +* ``AssignError`` + + Raised during compilation if the compiler detects an assignment to a const + value or a tuple unpacking assignment with the number of lvalues different + than the number of fields in the tuple rvalue. + +* ``MainError`` + + Raised during compilation if the program does not have a procedure named + ``main`` or when the signature of ``main`` is invalid. + +* ``ReturnError`` + + Raised during compilation if the program detects a function or procedure + with a return value that does not have a return statement reachable by all + control flows. Control flow constructs may be assumed to always be undecideable, + meaning they may branch in either direction. + + If the subroutine has a ``return`` statement with a type that does not + match the owning subroutine's type, the line number of the ``return`` + statement should be reported, along with the name and (correct) type of the + enclosing routine. + + Note also that, strictly speaking, this is a type error, not a return error. + If the procedure/function is missing a ``return`` statement, then the line + number of the subroutine declaration should be printed instead. + +* ``GlobalError`` + + Raised during compilation if the program detects a ``var`` global + declaration, a global declaration without an initializing expression, a + global declaration with an invalid initializing expression or any statement + that does not belong in the global scope. + +* ``StatementError`` + + Raised during compilation if the program is syntactically valid but the + compiler detects an invalid statement in some context. For example, + ``continue`` or ``break`` outside of a loop body. + +* ``CallError`` + + Raised during compilation if the procedure call statement is used to call a + function. Also raised if a procedure is called in an invalid context. For + example, a procedure call in an output stream expression. + +* ``DefinitionError`` + + Raised during compilation if a procedure or function is declared but not + defined. + +* ``LiteralError`` + + Raised during compilation if a literal value in the program does not fit + into its corresponding data type. + +* ``MathError`` + + May be raised during compile time expression evaluation when division by zero occurs. + Conditions for raising are eqivalent to a runtime ``MathError``. + +* ``IndexError`` + + May be raised during compilation if an expression used to index an array is an + ``integer``, but is invalid for the array size. + +* ``SizeError`` + + May be aised during compilation if the compiler detects an operation or statement + is applied to or between arrays with invalid or incompatible + sizes. Read more about when a ``SizeError`` should be raised at run-time + instead of compile-time in the :ref:`ssec:errors_sizeErrors` section. + +* ``StrideError`` + + May be raised during compilation if the ``by`` operation is used with a stride value + ``<=0``. + +Here is an example invalid program and a corresponding compile-time error: + +:: + + 1 procedure main() returns integer { + 2 integer x; + 3 } + +:: + + ReturnError on line 1: procedure "main" does not have a return statement reachable by all control flows + +Run-time Errors +--------------- + +Run-time errors must be handled by calling the functions defined in +``runtime/include/run_time_errors.h``. + +:: + + MathError("cannot divide by zero") + +The runtime errors listed below are a subset of compile time errors. Since it is not only impractical, +but undecidable to catch the following errors exclusively at compile time, Gazprea leaves the setting +at which they are raised up to the implementation. To put simply, you can raise runtime errors either +at compile time or at runtime and the tester will accomodate to different implementations. + +* ``SizeError`` + + Raised at runtime if an operation or statement is applied to or between + arrays with invalid or incompatible sizes. + +* ``IndexError`` + + Raised at runtime if an expression used to index an array is an + ``integer``, but is invalid for the array size. + +* ``MathError`` + + Raised at runtime if either zero to the power of N, where N is <= 0, or a + division by zero is evaluated. + +* ``StrideError`` + + Raised at runtime if the ``by`` operation is used with a stride value + ``<=0``. + +Here is an example invalid program. If your compiler is smart, you may raise the later error, if you +perfer not to implement static analysis, the former error can be emited at runtime. + +:: + + 1 procedure main() returns integer { + 2 integer[3] x = [2, 4, 6]; + 3 return integer[4]; + 4 } + +:: + + IndexError: This is a runtime error, invalid index "4" on array with size 3. + +:: + + IndexError on line 3: This is a compile time error, invalid index of "4" on array with size 3. + + +More Examples +------------- + +:: + + /* Indexes */ + character[3] v = ['a', 'b', 'c']; // Indexing is harder than it looks! + integer i = 10; + v(3) = 'X'; // SyntaxError + v[i] = '?'; // Run-timeerror + v['a'] = '!'; // TypeError + i[1] = 1; // SymbolError + + /* Tuples */ + tuple (integerm integer) a = (9, 5); + integer b; + integer c; + integer d; + b, c, d = a; // AssignError + tuple(integer, integer, integer) z = a; // TypeError + + +How to Write an Error Test Case +------------------------------- + +Your compiler test suite can include error test cases. An error test case can include +a compile-time or run-time error. In either case, the expected output should include +exactly one line of text. In order to simplify marking, **only one error should be present in the test case** +and exactly one line of expected output should catch it. Below is an example: + +:: + + var integer x = 0; + + procedure main() returns integer { + return 0; + } + +:: + + GlobalError on line 1 + +Precisely defining the line number on which an error occurs can be difficult. +Should the ``AssignError`` below occur on line 3, 6 or in between? + +:: + + procedure main() returns integer { + const integer i = 5; + i + = + 5 + ; + } + +For this reason, test cases that deliberately make the line number ambiguous will be disqualified. +If an obvious line number is not apparent, refer to the reference solution on the 415 +compiler explorer. For runtime errors, the line number is not required. Here is an +example of a run-time error test case and the corresponding expected output file: + +:: + + procedure main() returns integer { + 1..1 by 0 -> std_output; + return 0; + } + +:: + + StrideError + +How to make the Tester Happy +------------------------------------------ + +For error test cases, the tester inspects the first line from ``stderr``. +Therefore, you must ensure that you do not pollute this stream with debug messages etc. + +Additionally, the tester only knows to stop the toolchain prematurely if your program +terminates with a non-zero exit code. Once you have caught an error make sure to return +a non-zero exit code. + +Finally, the tester is lenient towards the type given to a particular errror. Specifically +the tester simply confirms that the substring "Error" is present and for compile +time errors that the correct line is provided. + +This leniency is motivated by the fact that sometimes determining which type to call an error is +difficult. For example, it may be arguable that a ``ReturnError`` should be interpreted as a +``TypeError`` and vice versa as previously mentioned. + + diff --git a/gazprea-clean/impl/part_1.rst b/gazprea-clean/impl/part_1.rst new file mode 100644 index 0000000..7b7128e --- /dev/null +++ b/gazprea-clean/impl/part_1.rst @@ -0,0 +1,99 @@ +.. _sec:part1: + +Compiler Implementation — Part 1 +================================ + +This section lists the portions of the *Gazprea* specification that must +be implemented to complete the part 1 of the compiler implementation. +All developers are advised to read the full specification for the +language prior to start the implementation of Part 1 because decisions +made while implementing Part 1 can make the implementation of Part 2 +significantly more challenging. Thus, planning ahead for Part 2 is the +recommended strategy. + +#. :ref:`sec:comments` +#. :ref:`sec:types` + + * :ref:`ssec:boolean` + * :ref:`ssec:character` + * :ref:`ssec:integer` + * :ref:`ssec:real` + * :ref:`ssec:tuple` + +#. Type Support + + * :ref:`sec:typeQualifiers` + + * :ref:`ssec:typeQualifiers_var` + * :ref:`ssec:typeQualifiers_const` + + * :ref:`sec:typePromotion` + * :ref:`sec:typeCasting` + * :ref:`sec:typeInference` + * :ref:`sec:typealias` + +#. :ref:`sec:statements` + + * :ref:`ssec:statements_assign` + * :ref:`sec:declaration` + * :ref:`sec:global` + * :ref:`ssec:statements_block` + * :ref:`ssec:statements_loop` + + * :ref:`ssec:statements_break` + * :ref:`ssec:statements_continue` + + * :ref:`ssec:statements_cond` + * :ref:`sec:streams` + * :ref:`sec:function` + * :ref:`sec:procedure` + +#. :ref:`sec:expressions` + + * unary+, unary-, not + + * ^ + + * \*,/,% + + * +,- + + * <,>,<=,>=,==,!= + + * and + + * or, xor + + * Variable references + + * Literal Values + + * Tuple reference + + * Function calls + +#. :ref:`sec:errors` + + * SyntaxError + + * SymbolError + + * TypeError + + * AliasingError + + * AssignError + + * MainError + + * ReturnError + + * GlobalError + + * StatementError + + * CallError + + * DefinitionError + + * MathError diff --git a/gazprea-clean/impl/part_2.rst b/gazprea-clean/impl/part_2.rst new file mode 100644 index 0000000..179ef94 --- /dev/null +++ b/gazprea-clean/impl/part_2.rst @@ -0,0 +1,38 @@ +Compiler Implementation — Part 2 +================================ + +This section list the elements of the *Gazprea* specification that must +be completed for the Part 2 of the compiler implementation. All the +elements of Part 1 must have been completed because Part 2 builds on +Part 1. + +#. :ref:`All Previous Features ` +#. :ref:`sec:types` + + * :ref:`ssec:array` + * :ref:`ssec:matrix` + * :ref:`ssec:string` + * :ref:`ssec:vector` + * :ref:`ssec:struct` + * :ref:`ssec:tuple` + +#. :ref:`sec:statements` + + * :ref:`sssec:statements_iter_loop` + +#. :ref:`sec:expressions` + + * :ref:`Operators ` + * :ref:`ssec:expressions_generators` + +#. :ref:`sec:builtIn` + + * :ref:`ssec:builtIn_reverse` + * :ref:`ssec:builtIn_rows_cols` + * :ref:`ssec:builtIn_length` + * :ref:`ssec:builtIn_format` + * :ref:`ssec:builtIn_stream_state` + +#. :ref:`ssec:backend_memory` + +#. :ref:`sec:errors` diff --git a/gazprea-clean/index.rst b/gazprea-clean/index.rst new file mode 100644 index 0000000..54a4f74 --- /dev/null +++ b/gazprea-clean/index.rst @@ -0,0 +1,46 @@ +|gazprea_logo| + +Gazprea +======= + +*Gazprea* is derived from a language originally designed at the IBM +Hardware Acceleration Laboratory in Markham, ON. + +.. toctree:: + :hidden: + + self + +.. toctree:: + :maxdepth: 3 + :caption: Language Specification + :numbered: + + spec/keywords + spec/identifiers + spec/comments + spec/declarations + spec/type_qualifiers + spec/types + spec/type_inference + spec/type_casting + spec/type_promotion + spec/typedef + spec/streams + spec/expressions + spec/statements + spec/functions + spec/procedures + spec/globals + spec/built_in_functions + +.. toctree:: + :maxdepth: 2 + :caption: Implementation + + impl/backend + impl/part_1 + impl/part_2 + impl/errors + +.. |gazprea_logo| image:: assets/images/GazpreaLogo.png diff --git a/gazprea-clean/make.bat b/gazprea-clean/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/gazprea-clean/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/gazprea-clean/nextTime.txt b/gazprea-clean/nextTime.txt new file mode 100644 index 0000000..1388b5e --- /dev/null +++ b/gazprea-clean/nextTime.txt @@ -0,0 +1,144 @@ +Should multi-step implicit promotion be allowed? + - e.g. `[2.4, 3.6] * 5` +Define or disallow scalar to Matrix promotion with Matrix Multiplication operator (**) + - e.g. `[[1,2],[3,4],[5,6]] ** 2` +Two-sided promotion allowed? + - e.g. `1 + 1..10` = `[1 1 1] + [1 2 3]`? +How should string concatenation with a character work? + - e.g. `"abcd" || 'e'` + - Disallowed, clarify that `||` only works between `string` and `character[*]` + - As an append, resulting in `"abcde"` + - As a splat and concatenate, resulting in `"abcdeeee"` +Should we allow casting of `null` and `identity`? + - e.g. `as(null)` +Should we change to type and other symbols in a shared scope tree or remain with non-conflicting resolution paths. + - e.g. + ``` + typedef integer a; + const integer a = 0; // Redefinition of symbol `a` or legal? + ``` +How should we refer to functions and procedures collectively when something applies to both? + - Routine + - Subroutine + +How should we approach rewinding whitespace? + - Don't. + - Limit buffer. + - Run-length encoding. + +Reporting errors for size mismatches, compile vs runtime. +e.g. `integer[1 + 2] vec = 1..10;` +Quick fix implemented: +"We've decided to simplify this portion of the specification. There will be no expressions in a size declaration. +The only two valid tokens in a size declaration are * and a literal integer. If the size is a literal integer AND +the variable is initialized with 1) a literal vector or a matrix whose size is a literal in the specified dimension, + or 2) an interval whose bounds are literals, you should perform compile time size checking." + +Should tuples be mutable? +``` +tuple(integer, integer) x = (1, 6); +x.1 = 8; // is this valid? +x.2 <- std_input // is this valid? +``` +``` +// valid: +tuple(integer, integer) tup = (1, 2); +tup = (3, tup.2); // Like changing tup.1 = 3 +``` + +Allow intervals to be printed? +Intervals can't be printed. You would need to print it like this: `1..3 by 1 ->std_output` + +Allow tuple printing? + +Tuple assignment, field renaming. This should be the expected behavior? +e.g. +``` +tuple(real a, real b) foo = (1, 2); +tuple(real c, real d) bar = (3, 4); + +var baz = foo; +baz.a -> std_output; // 1 +baz.b -> std_output; // 2 + +baz = bar; +baz.a -> std_output; // 3 +baz.b -> std_output; // 4 +``` + + +The following code isn't legal +`boolean x = not null;` +In particular, unary operators on the polymorphic constants can't +determine their type without visiting multiple levels of the tree and eventually revisiting to determine the final value +`float x = - + - + - + identity;` + + +Function and variable with the same name. +``` +procedure f(integer a) returns integer {...} +procedure main() return integer { + integer f = 0; + call f(f); + return 0; +} +``` +Should `call f(f);` be resolved to call function f with argument integer f? Or should an error be raised: attempt to call an integer? + + +Change the `reverse` built-in function to accept/return a string instead of a character vector? +``` +string s = "hi"; +string rs = reverse(s); +``` +equivalent to +``` +string s = "hi"; +string rs = as(reverse(as(s))); +``` +but +``` +string s = "hi"; +reverse(s) -> std_output; +``` +produces (at the moment) `['i', 'h']` + + +Behavior of break statements in iterator loops with multiple domains +``` +loop i in u, j in v{ + break; +} +// is equivalent to +loop i in u { + loop j in v { + break; + } +} +``` + + +If passing by reference, types must match? +``` +procedure p(var real a) { + a = 2.5; +} +/* called in main */ +integer i = 3; +call p(i); // error, differing types? +``` + + +Operations on characters +`character x = ('x');` is invalid because characters have no defined operations. +is `character[2] y = (['x', 'y']);` valid? +This is inconvenient because many students implemented characters as a part of expressions. Then (expr) != expr + + +Indexing on intervals is not allowed, but one could cast the interval to a vector before indexing. +Couldn't the interval also be promoted to a vector automatically? +``` +integer interval x = 1..3; +integer a = x[2]; // Illegal +``` + diff --git a/gazprea-clean/spec/built_in_functions.rst b/gazprea-clean/spec/built_in_functions.rst new file mode 100644 index 0000000..877621e --- /dev/null +++ b/gazprea-clean/spec/built_in_functions.rst @@ -0,0 +1,127 @@ +.. _sec:builtIn: + +Built-In Functions +================== + +*Gazprea* has some built-in functions. These built in functions may have +some special behaviour that normal functions can not have, for instance +many of them will work on arrays of any element type. +Normally a function must specify the element type of an array argument. + +The name of built in functions are reserved and a user program cannot +define a function or a procedure with the same name as a built-in function. +If a declaration or a definition with the same name as a built-in function is +encountered in a *Gazprea* program, then the compiler should issue an error. + +Note that although the examples below all use arrays, all the built-ins work +on Vectors and Strings, since they are always compatible with arrays. + +.. _ssec:builtIn_length: + +Length +------ + +``length`` takes an array of any element type, and returns an integer +representing the number of elements in the array. + +:: + + integer[*] v = 1..5; + + length(v) -> std_output; /* Prints 5 */ + + +.. _ssec:builtIn_rows_cols: + +Shape +----- + +The built-in ``shape`` operates on arrays of any dimension, and returns an +array listing the size of each dimension. + +:: + + integer[*, *] M = [[1, 2, 3], [4, 5, 6]]; + + shape(M) -> std_output; /* Prints [2, 3] */ + +.. _ssec:builtIn_reverse: + +Reverse +------- + +The reverse built-in takes any array, Vector, or String, and returns a +reversed version of it. + +:: + + integer[*] v = 1..5; + integer[*] w = reverse(v); + + v -> std_output; /* Prints 12345 */ + w -> std_output; /* Prints 54321 */ + +.. _ssec:builtIn_format: + +Format +------- + +The ``format`` built-in takes any scalar as input and returns a ``string`` +containing the formatted value of the scalar. + +:: + + integer i = 24; + real r = 2.4; + + "i = " || format(i) || ", r = " || format(r) || '\n' -> std_output; + // Prints: "i = 24, r = 2.4\n" + +Note that ``format`` will have to allocate space to hold the return string. +You will have to figure out how to manage the memory so it is reclaimed +eventually. + +.. _ssec:builtIn_stream_state: + +Stream State +------------ + +When reading values of certain types from ``std_input`` it is possible that an +error is encountered, or that the end of the stream has been encountered. In +order to handle these situations *Gazprea* provides a built in procedure that is +implicitly defined in every file: + +:: + + procedure stream_state(var input_stream) returns integer; + +This function can only be called with the ``std_input`` as a parameter, but it’s +general enough that it could be used if the language were expanded to include +multiple input streams. + +When called, ``stream_state`` will return an integer value. The return value is +an error code defined as follows: + + - ``0``: Last read from the stream was successful. + - ``1``: Last read from the stream encountered an error. + - ``2``: Last read from the stream encountered the end of the stream. + +``stream_state`` is initialized to ``0``, which is the value return if no +read has been issued. + +:: + + boolean b; + integer i; + + // Input stream: 9 + b <- std_input; // b = false (error reading boolean) + i = stream_state(std_input); // i = 1 (last read was error) + i <- std_input; // i = 9 (successfully read integer) + i = stream_state(std_input); // i = 0 (last read was success) + b <- std_input; // b = false (read end of stream) + i = stream_state(std_input); // i = 2 (last read was end of stream) + + +The input stream is described in more detail in the +:ref:`input stream ` section. diff --git a/gazprea-clean/spec/comments.rst b/gazprea-clean/spec/comments.rst new file mode 100644 index 0000000..5fd2bb3 --- /dev/null +++ b/gazprea-clean/spec/comments.rst @@ -0,0 +1,32 @@ +.. _sec:comments: + +Comments +======== + +*Gazprea* supports *C99* style comments. + +Single line comments are made using ``//``. Anything on the line after +the two adjacent forward slashes is ignored. For example: + +:: + + integer x = 2 * 3; // This is ignored + +Multi-line block comments are made using **/\*** and **\*/**. The start +of a block comment is marked using **/\***, and the end of the block +comment is the **first** occurrence of the sequence of characters +**\*/**. For example: + +:: + + /* This is a block comment. It can span as many lines as we want, and + only ends when the closing sequence is encountered. + */ + integer x = 2 * 3; /* Block comments can also be on a single line */ + +Block comments cannot be nested because the comment finishes when it +reaches the first closing sequence. For example, this is invalid: + +:: + + /* A comment /* A nested comment */ */ diff --git a/gazprea-clean/spec/constexpr.rst b/gazprea-clean/spec/constexpr.rst new file mode 100644 index 0000000..07da53c --- /dev/null +++ b/gazprea-clean/spec/constexpr.rst @@ -0,0 +1,101 @@ +.. _sec:constexpr: + +Constant Expressions +==================== + +A constant expression (sometimes called a constexpr) is an expression that can +be fully +evaluated by the compiler at compile time. This feature primarily +for specifying the size of +:ref:`statically-sized arrays `. + +In *Gazprea*, a ``constexpr`` is not a keyword, but a property of a ``const`` +variable. A ``const`` variable is considered a ``constexpr`` if and only if its +initializer expression meets a strict set of criteria: + +.. _ssec:constexpr_rules: + +Rules for Constant Expressions +------------------------------ + +An expression is a valid ``constexpr`` if it is composed exclusively of: + +1. Literals of base types (``boolean``, ``integer``, ``real``, ``character``). +2. Operators, including ``+``, ``-``, ``*``, ``/``, ``not``, ``and``, ``or``. + between two or more ``constexpr``s. +3. Constructors for aggregate types, provided that the aggregate is const and + all members are ``constexpr``s. +4. Index or field access on ``constexpr`` aggregate types. +5. Other variables that are themselves valid ``constexpr``s. + +An expression is **not** a ``constexpr`` if it contains: + +1. References to ``var`` variables. +2. Function or procedure calls. +3. Any I/O operations (``<-``). + +The compiler must perform this validation recursively. When checking if a variable +is a ``constexpr``, the compiler must trace its entire dependency chain. If the +chain ever depends on a runtime value, the check fails. + +**Examples:** + +:: + + // Legal Constant Expressions + const A = 10; + const B = A * 2; // Depends on another constexpr + const C = B + 5; // C is 25 + + // Illegal Constant Expressions + var x = 10; + const Y = x + 5; // Illegal: depends on a 'var' + + function get_val() returns integer { return 100; } + const Z = get_val(); // Illegal: depends on a function call + +.. _ssec:constexpr_aggregates: + +Constant Expressions with Aggregate Types +----------------------------------------- + +Arrays and tuples can also be ``constexpr``s if they meet specific criteria, +allowing them to be used to define other constants. + +#. Arrays + + A ``const`` statically-sized array is a ``constexpr`` if: + 1. Its size is a valid ``constexpr``. + 2. All of its element initializers are valid ``constexpr``s. + + Dynamically-sized arrays (e.g., ``integer[*]``) cannot be ``constexpr`` + aggregates as their size is not known at compile time even if it can be + inferred. + + :: + + const WIDTH = 5; + const LOOKUP_TABLE: integer[WIDTH] = [10, 20, 30, 40, 50]; // Legal constexpr array + + const ELEMENT = LOOKUP_TABLE[3]; // Legal: ELEMENT is a constexpr with value 30 + var my_array: integer[ELEMENT]; // Legal: creates a static array of size 30 + + const BAD_TABLE: integer[2] = [10, get_val()]; // Illegal: initializer is not a constexpr + + + Note that these rules also apply to variables marked ``const`` within + non-global scopes. + + + +#. Tuples + + A ``const`` tuple is a ``constexpr`` if all of its fields are initialized with + valid constant expressions. + + :: + + const CONFIG = (true, 10 * 2); // Legal constexpr tuple + + const IS_ENABLED = CONFIG.1; // Legal: IS_ENABLED is a constexpr with value 'true' + const VALUE = CONFIG.2; // Legal: VALUE is a constexpr with value 20 diff --git a/gazprea-clean/spec/declarations.rst b/gazprea-clean/spec/declarations.rst new file mode 100644 index 0000000..d1c4e06 --- /dev/null +++ b/gazprea-clean/spec/declarations.rst @@ -0,0 +1,91 @@ +.. _sec:declaration: + +Declarations +============ + +Variables must be declared before they are used. Aside from +a few :ref:`special cases `, declarations have the +following formats: + +:: + + = ; + ; + +Both declarations are creating a variable with an :ref:`identifier ` of +````, with :ref:`type ` ````, and optionally a :ref:`type qualifier ` of ````. +The two qualifiers are ``var`` and ``const``, which qualify the identifier as +*mutable* or *immutable*, respectively. +In *Gazprea* it is important to remember that if the optional qualifier is +omitted the default is ``const``, i.e. variables are immutable by default. + +The first declaration style explicitly initializes the value of the new +variable with the value of ````. + +In *Gazprea* all variables must be initialized in a well defined manner in order +to ensure functional purity. If the variables are not initialized to a known +value their initial value might change depending on when the program is run. +Therefore, the second declaration style is equivalent to setting the value to +zero. + +For simplicity *Gazprea* assumes that declarations can only appear at +the beginning of a block. For instance this would not be legal in +*Gazprea*: + +:: + + var integer i = 10; + if (blah) { + i = i + 1; + real i = 0; // Illegal placement of a declaration. + } + +because the declaration of the real version of ``i`` does not occur at +the start of the block. + +The following declaration placement is legal: + +:: + + var integer i = 10; + if (blah) { + var real i = 0; // At the start of the block. All good. + i = i + 1; + } + +The declaration of a variable happens after initialization. Thus it is +illegal to refer to a variable within its own initialization statement. + +:: + + /* All of these declarations are illegal, they would result in garbage values. */ + integer i = i; + integer[10] v = v[0] * 2; + +An error message should be raised about the use of undeclared variables +in these cases. If a variable of the same name is declared in an +enclosing scope, then it is legal to use that in the initialization of a +variable with the same name. For instance: + +:: + + integer x = 7; + if (true) { + integer y = x; /* y gets a value of 7 */ + real x = x; /* Refers to the enclosing scope's 'x', so this is legal */ + + /* Now 'x' refers to the real version, with a value of 7.0 */ + } + +.. _ssec:declaration_special: + +Special cases +------------- + +Special cases of declarations are covered in their respective sections. + +#. :ref:`Arrays ` +#. :ref:`Tuples ` +#. :ref:`Globals ` +#. :ref:`Functions ` +#. :ref:`Procedures ` diff --git a/gazprea-clean/spec/expressions.rst b/gazprea-clean/spec/expressions.rst new file mode 100644 index 0000000..bde026d --- /dev/null +++ b/gazprea-clean/spec/expressions.rst @@ -0,0 +1,154 @@ +.. _sec:expressions: + +Expressions +=========== + +Expressions can only exist within a statement or another expression. + +.. _ssec:expressions_toop: + +Table of Operator precedence +---------------------------- + +The following is a table containing all of the precedences and +associativities of the operators in *Gazprea*. + ++----------------+------------------------------------+-------------------+ +| **Precedence** | **Operators** | **Associativity** | ++================+====================================+===================+ +| (Highest) 1 | ``.`` | left | ++----------------+------------------------------------+-------------------+ +| 2 | ``[]`` (indexing) | left | ++----------------+------------------------------------+-------------------+ +| 3 | ``..`` | N/A | ++----------------+------------------------------------+-------------------+ +| 4 | unary ``+``, unary ``-``, ``not`` | right | ++----------------+------------------------------------+-------------------+ +| 5 | ``^`` | right | ++----------------+------------------------------------+-------------------+ +| 6 | ``*``\ , ``/``\ , ``%``, ``**`` | left | ++----------------+------------------------------------+-------------------+ +| 7 | ``+``\ , ``-`` | left | ++----------------+------------------------------------+-------------------+ +| 8 | ``by`` | left | ++----------------+------------------------------------+-------------------+ +| 9 | ``<``\ , ``>``\ , ``<=``\ , ``>=`` | left | ++----------------+------------------------------------+-------------------+ +| 10 | ``==``\ , ``!=`` | left | ++----------------+------------------------------------+-------------------+ +| 11 | ``and`` | left | ++----------------+------------------------------------+-------------------+ +| 12 | ``or``\ , ``xor`` | left | ++----------------+------------------------------------+-------------------+ +| (Lowest) 13 | ``||`` | right | ++----------------+------------------------------------+-------------------+ + +.. _ssec:expressions_generators: + +Generators +---------- + +A generator may be used to construct either a one or two dimensional array. +A generator creates a value of a 1D array type when one domain variable is +used, and a 2D array type when two domain variables are used. +Any other number of domain variables will yield an error. + +A generator consists of either one or two domain expressions, +and an additional expression on the right hand side of the bar (``|``). +This additional expression is used to create the generated values. For example: + +:: + + integer[10] v = [i in 1..10 | i * i]; + /* v[i] == i * i */ + + integer[2][3] M = [i in 1..2, j in 1..3 | i * j]; + /* M[i][j] == i * j */ + +The expression to the right of the bar (``|``), is used to generate the +value at the given index. +Let ``T`` be the type of the expression to the right of the bar (``|``). Then, +if the domain of the generator is an array of size ``N``, the result will be a +array of size ``N`` with element type ``T``. Otherwise, if the domain of the +generator is a matrix of size ``N`` x ``M``, the result will be a matrix of size +``N`` x ``M`` with element type ``T``. +Generators may be nested, and +may be used within domain expressions. For instance, the generator below +is perfectly legal: + +:: + + integer i = 7; + + /* The domain expression should use the previously defined i \*/ + integer[*] v = [i in [i in 1..i | i] | [i in 1..10 | i * i][i]]; + + /* v should contain the first 7 squares. */ + +.. _ssec:expressions_dom_expr: + +Domain Expressions +------------------ + +Domain expressions consist of an identifier denoting an iterator variable and +an expression that evaluates to **any** array type. +Domain expressions can only appear within iterator loops and generators. +A domain expression is a way of declaring a variable that +is local to the loop or generator, that takes on values from +the domain expression array in order. +Domain expressions must evaluate to a type, which means empty literal arrays +yield a ``TypeError``. +The scope of the domain variables (the left hand side of the declaration) is +within the body of the generator or loop. +The domain expressions (the right hand side) are all evaluated before any of the +domain variables are initialized, and therefore the domain expression scope is +the one enclosing the iterator loop or generator. + +For instance: + +:: + + integer i = 7; + + /* This will print 1234567 */ + loop i in 1..i { + i -> std_output; + } + +Domain variables are not initialized when they are declared. For +instance, in loops they are initialized at the start of each execution of +the loop’s body statement. However, we may chain domain variables using +commas, such as in matrix generators. + +:: + + integer i = 2; + + /* The "i"s both domain expressions are at the same scope, which is + * the one enclosing the loop. Therefore the matrix is: [[0 0 0] [0 1 2] [0 2 4]] + */ + integer[3,3] mat = [ i in 0..i, j in 0..i | i*j ]; + +The domain for the domain expression is only evaluated once. For +instance: + +:: + + integer x = 1; + + /* 1..x is only evaluated the first time the loop executes, so it is + simply 1..1, and not an infinite loop. */ + loop i in 1..x { + x = x + 1; + } + +This is true for domain expressions within generators as well. + +Iterator variables can be assigned to and re-declared within the enclosed iterator loop. +The variable is re-initialized according to the expression each iteration. + +:: + + loop i in 1..6 { + integer i = 5; + } diff --git a/gazprea-clean/spec/extensions/borrow-checker.rst b/gazprea-clean/spec/extensions/borrow-checker.rst new file mode 100644 index 0000000..7cc1013 --- /dev/null +++ b/gazprea-clean/spec/extensions/borrow-checker.rst @@ -0,0 +1,82 @@ +.. _ssec:ext_borrow_checking: + +Ownership and Borrowing System +============================== + +One of the most powerful but complex extensions would be to replace *Gazprea*'s +default "deep copy" memory model with an ownership and borrowing system, similar +to the one pioneered by the Rust programming language. + +**High-Level Goal** +------------------- + +The goal of this system is to guarantee memory safety (no dangling pointers, no +data races) at compile time **without** requiring a garbage collector. This would +allow for highly performant code with C-like speed while providing strong safety +guarantees. It would enable powerful features like zero-cost mutable "views" +into arrays, solving the problem discussed in the main specification. + +A new keyword, such as ``ref``, could be introduced to create borrowed +references to data. + +:: + + // Hypothetical Gazprea with borrowing + procedure sort_portion(ref arr: integer[*]) { + // ... sort the slice in-place ... + } + + var my_data: integer[100] = ...; + call sort_portion(ref my_data[10..20]); // Pass a mutable view + +Major Architectural Changes Required +------------------------------------ + +Implementing a borrow checker is not a small feature; it is a fundamental change +to the entire compiler architecture. It would require: + +1. **A Lifetime-Aware Type System:** The type checker would need to be + fundamentally altered to understand lifetimes. A type like ``ref integer`` + is incomplete; the full type is ``ref<'a> integer``, where ``'a`` is a + lifetime parameter that the compiler must track and validate. This involves + complex inference and subtyping rules. + +2. **Control-Flow-Sensitive Static Analysis:** The compiler would need a new, + major analysis pass that runs after initial type checking. This pass must: + a. Build a Control-Flow Graph (CFG) for every function. + b. Track the state of every variable (owned, immutably borrowed, mutably + borrowed, or moved) along every possible execution path. + c. This analysis, often called "borrow checking," is a non-trivial data-flow + analysis problem. + +3. **Sophisticated Error Reporting:** The compiler must be able to generate + human-understandable error messages for complex borrow-checking failures, + such as "cannot borrow `x` as mutable because it is also borrowed as + immutable in function `f`." + +Documentation and Further Reading +--------------------------------- + +**Community and Engineering Focused:** + +* **The Rust Programming Language (Book):** The official book provides the most + accessible introduction to the concepts of ownership, borrowing, and + lifetimes. Chapters 4, 10, and 15 are particularly relevant. + * *Link:* `https://doc.rust-lang.org/book/ch04-00-understanding-ownership.html `_ +* **Rustonomicon:** For a deep dive into the memory model and the unsafe code + that a borrow checker allows you to avoid. + * *Link:* `https://doc.rust-lang.org/nomicon/ `_ +* **Niko Matsakis's Blog:** A series of blog posts from a lead Rust developer + detailing the implementation and evolution of the borrow checker. Essential + for understanding the practical engineering challenges. + * *Link:* `https://smallcultfollowing.com/babysteps/ `_ + +**Academic and Theory Focused:** + +* **"Region-based memory management"** and **"Affine types"** are the core + computer science concepts behind borrow checking. Academic papers on these + topics provide the theoretical foundation. +* **RustBelt: Securing the Foundations of the Rust Programming Language (PLDI 2018):** + A key academic paper that formally models and proves the safety of Rust's + type system. + * *Link:* `https://dl.acm.org/doi/10.1145/3296979.3192384 `_ diff --git a/gazprea-clean/spec/extensions/exposing-memory-primitives.rst b/gazprea-clean/spec/extensions/exposing-memory-primitives.rst new file mode 100644 index 0000000..e69de29 diff --git a/gazprea-clean/spec/extensions/extern.rst b/gazprea-clean/spec/extensions/extern.rst new file mode 100644 index 0000000..e69de29 diff --git a/gazprea-clean/spec/extensions/multi-file-projects.rst b/gazprea-clean/spec/extensions/multi-file-projects.rst new file mode 100644 index 0000000..e69de29 diff --git a/gazprea-clean/spec/extensions/type-methods.rst b/gazprea-clean/spec/extensions/type-methods.rst new file mode 100644 index 0000000..e69de29 diff --git a/gazprea-clean/spec/functions.rst b/gazprea-clean/spec/functions.rst new file mode 100644 index 0000000..238e559 --- /dev/null +++ b/gazprea-clean/spec/functions.rst @@ -0,0 +1,218 @@ +.. _sec:function: + +Functions +========= + +A function in *Gazprea* has several requirements: + +1. All of the arguments are implicitly ``const``, and can not be mutable or + mutated within the function. + +2. Function arguments cannot contain type qualifiers. Including a type qualifier with a function argument should result in a ``SyntaxError``. + +3. Argument types must be explicit. Dynamic sized arrays are allowed + +4. Functions can not perform any I/O. + +5. Functions can not rely upon any mutable state outside of the function. + +6. Functions can not call any procedures. + +7. Functions must be declared in the global scope. + +The reason for this is to ensure that functions in *Gazprea* behave as +pure functions. Every time you call a function with the same arguments +it will perform the exact same operations. This has a lot of benefits. +It makes code easier to understand if functions only depend upon their +inputs and not some hidden state, and it also allows the compiler to +make more assumptions and as a result perform more optimizations. + +.. _ssec:function_syntax: + +Syntax +------ + +A function is declared using the function keyword. Each function is +given an identifier, and an arguments list enclosed in parenthesis. If +no arguments are provided an empty set of parenthesis, ``()``, must be +used. The return type of the function is specified after the arguments +using ``returns``. + +A function can be given by a single expression. For instance: + +:: + + function times_two(integer x) returns integer = 2 * x; + +This defines a function called times_two which can be used as follows: + +:: + + /* Prints 8. value gets assigned the result of calling times_two with an + argument of 4 + */ + integer value = times_two(4); + + value -> std_output; "\n" -> std_output; + +Functions can have an arbitrary number of arguments. Here are some +examples of functions with different numbers of arguments: +:: + + /* A function with no arguments */ + function f() returns integer = 1; + + /* A function with two arguments */ + function pythag(real a, real b) returns real = (a^2 + b^2)^(1./2); + + /* A function with different types of arguments */ + function get(real[*] a, integer i) returns real = a[i]; + +These can be called as follows: + +:: + + integer x = f(); /* x == 1 */ + real c = pythag(3, 4); /* Type promotion to real arguments. c == 5.0 */ + real value = get([i in 1..10 | i], 3); /* value == 3 */ + +A function’s body can also be given by a block statement instead of a +single expression. In this case the return value of the function is +given with the return statement. A return statement must be reached by +all possible control flows in the function before the end of the +function is encountered. + +:: + + /* Invalid -- should cause a compiler error */ + function f (boolean b) returns integer { + if (b) { + return 3; + } + } + + /* Valid, all possible branches hit a return statement with a valid type */ + function g (boolean b) returns integer { + if (b) { + return 3; + } + else { + return 8; + } + } + +``f`` is invalid since if ``b == false``, then we reach the end of the +function without a return statement, so we don’t know what value +``f(false)`` should take on. + +:: + + /* This is invalid because if the loop ever finished executing the + function would end before a return statement is encountered. In + general the compiler can not tell when a loop would execute + forever, so we make the assumption that all branches in the control + flow could be followed. */ + function f() returns integer { + var integer x = 0; + loop { + x = x + 1; + } + } + + /* This is valid. Even though the loop goes on forever so that a + return is never reached, execution never hits the end of the + function without a return. */ + function g() returns integer { + var integer x = 0; + loop { + x = x + 1; + } + + return x; + } + +Each function has its own scope, but globals can be accessed within the +function if they were declared before the function was defined. + +.. _ssec:function_fwd_declr: + +Function Prototypes +------------------- + +Functions can be declared before they are defined in a *Gazprea* file. +This allows function definitions to be moved to more convenient +locations in the file, and allows for multiple compilation units if the +function definitions are in different source files. + +:: + + /* Forward declaration, no body */ + function f(integer y, integer) returns integer; + + procedure main() returns integer { + integer y = f(13, 2); + /* Can use this in main, even though the definition is below */ + return 0; + } + + function f(integer x, integer z) returns integer = x*z; + +Note that only the type signatures of the forward declaration of the +function and the definition must be identical. That means the argument names in +the prototype are *optional*. If the prototype arguments are given names they +do not have to match the argument names in the function definition. + + +.. _ssec:function_vec_mat: + +Array and Matrix Parameters and Returns +---------------------------------------- + +The arguments and return value of functions can have both explicit and inferred sizes. For example: + +:: + + function to_real_vec(integer[*] x) returns real[*] { + /* Some code here */ + } + + function transpose3x3(real[3,3] x) returns real[3,3] { + /* Some code here */ + } + + +Like Rust, array *slices* :ref:`_sssec:array_ops` may be passed as arguments: + +:: + + function to_real_vec(integer[*] x) returns real[*] { + real[*] rvec = x; + return rvec; + } + + function slicer() returns real[*] { + integer a[10] = 1..10; + var vector two_halves = to_real_vec(a[1..5]); + two_halves.append(to_real_vec(a[6..])); + return two_halves; + } + +Remember that all function parameters are ``const`` in *Gazprea*, so that all +functions are pure. That means that while it is legal to pass arrays and slices +*by reference*, the array contents cannot be modified inside the function, +because the change would be visible outside the function. You must check that +the ``const`` requirement is honored. + +.. _ssec:function_namespacing: + +Function Namespacing +-------------------- + +In *Gazprea* function declarations occur in the global scope. +This means that two functions with the same name cannot coexist in the same +gazprea program, nor can you forward declare the same function twice. + +Additionally, functions share the following namespaces: + +- The ``procedure`` namespace: You cannot have a procedure and function with + the same name in the same gazprea program. diff --git a/gazprea-clean/spec/globals.rst b/gazprea-clean/spec/globals.rst new file mode 100644 index 0000000..9ea08bc --- /dev/null +++ b/gazprea-clean/spec/globals.rst @@ -0,0 +1,25 @@ +.. _sec:global: + +Globals +======= + +In *Gazprea* values can be assigned to a global identifier. All globals +must be immutable (``const``). If a global identifier is declared with +the ``var`` specifier, then an error should be raised. This restriction is in +place since mutable global variables would ruin functional purity. +If functions have access to mutable global state then we can not guarantee +their purity. + +Globals must be initialized with a valid :ref:`constant expression `. + +This requirement ensures that the value of every global can be determined by +the compiler before the program runs. This restriction is in place to support +functional purity and enable compile-time optimizations. As a result of this +rule: + +* Functions, procedures, or I/O operations may not appear in a global's + initializer. +* Globals cannot have a dynamically-sized array type (e.g., ``integer[*]``), + as their size cannot be determined at compile time. +* All globals are implicitly ``constexpr``. + diff --git a/gazprea-clean/spec/identifiers.rst b/gazprea-clean/spec/identifiers.rst new file mode 100644 index 0000000..4e742da --- /dev/null +++ b/gazprea-clean/spec/identifiers.rst @@ -0,0 +1,63 @@ +.. _sec:identifiers: + +Identifiers +=========== + +Identifiers in *Gazprea* must start with either an underscore or a +letter (upper or lower cased). Subsequent characters can be an +underscore, letter (upper or lower case), or number. An identifier may +not be any of *Gazprea*\ ’s keywords. Here are some valid identifiers in +*Gazprea*: + +:: + + hello + h3ll0 + _h3LL0 + _Hi + Hi + _3 + +The following are some examples of invalid identifiers. They begin with +a number, contain invalid characters, or are a keyword: + +:: + + 3d + in + a-bad-variable-name + no@twitter + we.don't.like.punctuation + +*Gazprea* imposes no restrictions on the length of identifiers. + +.. _ssec:namespace: + +Namespaces +========== + +Identifiers are used by variables, user-defined types, functions and procedures. + +For the most part, user-defined types are in their own namespace because their +usage does not collide with variables or functions. +The one exception is that struct literals can look like function calls: + +:: + + struct A (integer i, real j); + A a = A(i, j); + +Consequently, struct literals and functions share the same namespace. +In the above example, a definition of function ``A`` should generate a +``SymbolError``, but a definition of variable ``A`` would not. +Outside of types, variables and functions/procedures share the same namespace +in a scope and shadowing is possible between these types. + +:: + + function x() returns integer; // "x" refers to this function in the global scope + + procedure main() { + integer x = 3; // "x" refers to this variable in the scope of main + } + ... diff --git a/gazprea-clean/spec/keywords.rst b/gazprea-clean/spec/keywords.rst new file mode 100644 index 0000000..16588f0 --- /dev/null +++ b/gazprea-clean/spec/keywords.rst @@ -0,0 +1,75 @@ +.. _sec:keywords: + +Keywords +======== + +*Gazprea* has a number of built in keywords that are reserved and should +not be used by a programmer. + +- and + +- as + +- boolean + +- break + +- by + +- call + +- character + +- const + +- continue + +- else + +- false + +- format + +- function + +- if + +- in + +- integer + +- shape + +- loop + +- not + +- or + +- procedure + +- real + +- return + +- returns + +- std_input + +- std_output + +- stream_state + +- string + +- true + +- tuple + +- typealias + +- var + +- while + +- xor diff --git a/gazprea-clean/spec/procedures.rst b/gazprea-clean/spec/procedures.rst new file mode 100644 index 0000000..1d6be19 --- /dev/null +++ b/gazprea-clean/spec/procedures.rst @@ -0,0 +1,256 @@ +.. _sec:procedure: + +Procedures +========== + +A procedure in *Gazprea* is like a function, except that it does not +have to be pure and as a result it may: + +- Have arguments marked with ``var`` that can be mutated. By default + arguments are ``const`` just like functions. + +- A procedure may only accept a literal or expression as an argument if + and only if the procedure declares that argument as ``const``. + +- Procedures may perform I/O. + +- A procedure can call other procedures. + +- Procedures can only be called in the RHS of declaration statements, RHS + of assignment statements or as the procedure being called in a call statement. + +- When used within a valid statement, the only legal operators which can + be applied to a procedure call are unary operators and casts. + Additionally, the result of the call may not be used in the direct construction + of a type that does not match the return type of the procedure. + +Aside from this (and the different syntax necessary to declare/define +them), procedures are very similar to functions. The extra capabilities +that procedures have makes them harder to reason about, test, and +optimize. + +.. _ssec:procedure_syntax: + +Syntax +------ + +Procedures are almost exactly the same as functions. However, because +procedures can cause side effects, the returns clause is optional. Due to +this, the ``= ;`` declaration format is not available for +procedures. For example, the following code is illegal: + +:: + + procedure f() returns integer = 1; + + +If a returns clause is present, then a return statement must be reached +by all possible control flows in the procedure before the end of the +procedure is encountered. For instance: + +:: + + procedure change_first(var integer[*] v) { + v[1] = 7; + } + + procedure increment(var integer x) { + x = x + 1; + } + + procedure fibonacci(var integer a, var integer b) returns integer { + integer c = a + b; + a = b; + b = c; + return c; + } + +These procedures can be called as follows: + +:: + + integer x = 12; + integer y = 21; + integer[5] v = 13; + + call change_first(v); /* v == [7, 13, 13, 13, 13] */ + call increment(x); /* x == 13 */ + call fibonacci(x,y); /* x == 21 and y == 34 */ + +It is only possible to call procedures in this way. Functions must +appear in expressions because they can not cause side effects, so using +a function in a ``call`` statement would not do anything. *Gazprea* +should raise an error if a function is used in a ``call`` statement. + +A procedure may never be called within a function, doing so would allow for +impure functions. Procedures may only be called within assignment statements +(procedures may not be used as the control expression in control flow expressions, for instance). +The return value from a procedure call can only be manipulated with unary +operators. It is illegal to use the results from a procedure call with +binary expressions. +For example: + +:: + + /* p is some procedure with no arguments */ + var x = p(); /* Legal */ + var y = -p(); /* Legal, depending on the return type of p */ + var z = not p(); /* Legal, depending on the return type of p */ + var u = p() + p(); /* Illegal */ + +These restrictions are made by *Gazprea* in order to allow for more +optimizations. + +Procedures without a return clause may not be used in an expression. +*Gazprea* should raise an error in such a case. +:: + + /* p is some procedure with no return clause */ + integer x = p(); /* Illegal */ + +.. _ssec:procedure_fwd_declr: + +Procedure Declarations +---------------------- + +Procedures can use :ref:`forward declaration ` +just like functions. + +.. _ssec:procedure_main: + +Main +---- + +Execution of a *Gazprea* program starts with a procedure called +``main``. This procedure takes no arguments, and has an integer return +type. ``main`` is called exclusively by the operating system, and the return value is +used by the operating system, so if you are using multiple compilation units +one and only one compilation unit must define ``main``. + +:: + + /* must be writen like this */ + procedure main() returns integer { + var integer x = 1; + x = x + x; + x -> std_output; + + /* must have a return */ + return 0; + } + +.. _ssec:procedure_alias: + +Type Promotion of Arguments +--------------------------- + +Argument types can be promoted at call time, but only if the argument is +call by value (``const``). The reason is that mutable arguments are effectively +call by reference, and are therefore *l-values* (pointers). + +:: + + + procedure byvalue(String x) returns integer { + return len(x); + } + procedure byreference(var String x) returns integer { + return len(x); + } + procedure main() returns integer { + const character[3] y = ['y', 'e', 's']; + + integer size = byvalue(y); // legal + call byreference(y); // illegal + + return 0; + } + + +Aliasing +-------- + +Since procedures can have mutable arguments, it would be possible to +cause `aliasing `__. +In *Gazprea* aliasing of mutable variables is illegal (the only case +where any aliasing is allowed is that tuple members can be accessed by +name, or by number, but this is easily spotted). This helps *Gazprea* +compilers perform more optimizations. However, the compiler must be able +to catch cases where mutable memory locations are aliased, and an error +should be raised when this is detected. For instance: + +:: + + procedure p(var integer a, var integer b, const integer c, const integer d) { + /* Some code here */ + } + + procedure main() returns integer { + var integer x = 0; + var integer y = 0; + var integer z = 0; + + /* Illegal */ + call p(x, x, x, x); /* Aliasing, this is an error. */ + call p(x, x, y, y); /* Still aliasing, error. */ + call p(x, y, x, x); /* Argument a is mutable and aliased with c and d. */ + + /* Legal */ + call p(x, y, z, z); + /* Even though 'z' is aliased with 'c' and 'd' they are both const. */ + + return 0; + } + +Whenever a procedure has a mutable argument ``x`` it must be checked that +none of the other arguments given to the procedure are ``x``. +This is simple for scalar values, but more complicated when variable arrays are +passed toprocedures. For instance: + +:: + + call p(v[x], v[y]); + /* p is some procedure with two variable array arguments */ + +It is impossible to tell whether or not these overlap at compile time +due to the halting problem. Thus for simplicity, whenever an array is passed +to a procedure *Gazprea* detects aliasing whenever the same array is used, +regardless of whether or not the access would overlap. + +Another instance of aliasing relates to tuples, such as passing the +same tuple twice in one procedure, or passing the entire tuple and +separately passing a single tuple field. In both cases this can cause +aliasing. + +:: + + call p(t1, t1.1); + /* p is some procedure with a tuple argument and a real argument */ + +.. _ssec:procedure_vec_mat: + +Array Parameters and Returns +---------------------------------------- + +:ref:`As with functions `, the arguments and return +value of procedures can have both explicit and inferred sizes. + +Similarly, slices can be used whereever arrays are declared as parameters, and +unlike functions, array parameters in procedures can be ``var``. + +.. _ssec:function_namespacing: + +Procedure Namespacing +-------------------- + +In *Gazprea* procedure declarations occur in the global scope. +This means that two procedures with the same name cannot coexist in the same +gazprea program, nor can you forward declare the same procedure twice. + +Additionally, procedures share the following namespaces: + +- The ``struct`` namespace: you cannot have a struct and function with the same + name in the same gazprea program. + +- The ``function`` namespace: You cannot have a procedure and function with + the same name in the same gazprea program. diff --git a/gazprea-clean/spec/statements.rst b/gazprea-clean/spec/statements.rst new file mode 100644 index 0000000..7ecb9f2 --- /dev/null +++ b/gazprea-clean/spec/statements.rst @@ -0,0 +1,511 @@ +.. _sec:statements: + +Statements +========== + +.. _ssec:statements_assign: + +Assignment Statements +--------------------- + +In *Gazprea* a mutable variable may have different values throughout the +execution of the program. Mutable variables may have their values changed with +an assignment statement. In the simplest case an assignment statement +contains an identifier on the left hand side of an equals sign, and an +expression with a compatible type on the right hand side. + +:: + + var integer x = 7; + + x -> std_output; /* Prints 7 */ + + /* Give 'x' a new value */ + x = 2 * 3; /* This is an assignment statement */ + + x -> std_output; /* Prints 6 */ + +Type checking must be performed on assignment statements. The expression +on the right hand side must have a type that can be automatically +promoted to the type of the variable. For instance: + +:: + + var integer int_var = 7; + var real real_var = 0.0; + var boolean bool_var = true; + + /* Since 'x' is an integer it can be promoted to a real number \*/ + real_var = int_var; /* Legal */ + + /* Real numbers can not be turned into boolean values automatically. \*/ + bool_var = real_var; /* Illegal */ + +Assignments can also be more complicated than this with arrays and tuples. +With arrays indices may be provided in order to change the value of an array +element. In Gazprea, arrays cannot be indexed with array expressions. +For instance, with single dimensional arrays: + +:: + + var integer[*] v = [0, 0, 0]; + + /* Can assign an entire array value -- change 'v' to [1, 2, 3] */ + v = [1, 2, 3]; + + /* Change 'v' to [1, 0, 3] */ + v[2] = 0; + +This applies to arrays of any dimension. + +:: + + var integer[*][*] M = [[1, 1], [1, 1]]; + + /* Change the entire matrix M to [[1, 2], [3, 4]] */ + M = [[1, 2], [3, 4]]; + + /* Change a single position of M \*/ + M[1][2] = 7; /* M is now [[1, 7], [3, 4]] */ + +Tuples also have a special unpacking syntax in *Gazprea*. A tuple’s +field may be assigned to comma separated variables instead of a tuple +variable. For instance: + +:: + + var integer x = 0; + var real y = 0; + var real z = 0; + + tuple(integer, real) tup = (1, 2.0); + + /* x == 1, and y == 2.0 now */ + x, y = tup; + + /* Types can be promoted */ + + /* z == 1.0, y == 2.0 */ + z, y = tup; + + /* Can swap: z == 2.0, y == 1.0 */ + z, y = (y, z); + +The types of the variables must match the types of the tuple’s fields, +or the tuple’s fields must be able to be automatically promoted to the +variable’s type. The number of variables in the comma separated list +must match the number of fields in the tuple, if this is not the case an +error should be raised. This assignment is performed left-to-right. + +Assignments and initializations must perform a deep copy. It should not +be possible to cause the aliasing of memory locations with an +assignment. For instance: + +:: + + integer[*] v = [1, 2, 3]; + var integer[*] w = v; + + w[2] = 0; /* This must not affect 'v' */ + + /* v has the value [1, 2, 3] */ + /* w has the value [1, 0, 3] */ + + /* If you are not careful, you might copy the pointer of 'v' to 'w', + which would cause them to be stored in the same location in memory. If + this happens modifying 'w' would change 'v' as well. + */ + +The above is a simple example using arrays. You must ensure that values +can not be aliased with an assignment between any types, including +arrays and tuples. + +Variables may be declared as const, and in this case it is illegal for +them to appear on the left hand side of an assignment expression. The +compiler should raise on error when this is detected, since it does not +make sense to change a constant value. + +The right hand side of an assignment statement is always evaluated +before the left hand side. This is important for cases where procedures +may change variables, for instance: + +:: + + v[x] = p(x); + /* If p changes x then it is important that p(x) is executed before v[x] */ + +.. _ssec:statements_block: + +Block Statements +---------------- + +A list of statements may be grouped into one statement using curly +braces. This is called a block statement, and is similar to block +statements in other languages such as *C/C++*. As an example: + +:: + + { + x = 3; + z = 4; + x -> std_output; "\n" -> std_output; z -> std_output; "\n" -> std_output; + } + +Is a block statement. Declarations can only appear at the start of a +block. Each block statement introduces a new scope that new variables +may be declared in. For instance this is perfectly valid: + +:: + + integer x = 3; + var integer y = 0; + var real z = 0; + + { + real x = 7.1; + z = x; + } + + y = x; + +After execution this ``y = 3`` and ``z = 7.1``. + +.. _ssec:statements_cond: + +If/Else Statements +------------------ + +An if statement takes a boolean value as a conditional expression, and a +statement for the body. If the conditional expression evaluates to true, +then the body is executed. If the conditional expression evaluates to +false then the body of the if statement is not executed. If statements +in *Gazprea* require the conditional expression to be enclosed in parentheses. + +:: + + integer x = 0; + var integer y = 0; + + /* Compute some value for x */ + + if (x == 3) { + y = 7; + } + + /* At this point y will only be 7 if x == 3, and otherwise y will be + 0, assuming it did not change throughout the rest of the program. + */ + +If statements are often paired with block statements, like in the above +example. The if statement above could also be written as: + +:: + + if (x == 3) + y = 7; + +Since ``y = 7;`` is a statement it can be used as the body statement. +All statements after this point are not in the body of the if statement. +For instance: + +:: + + if (x == 3) + y = 7; + z = 32; + +is actually equivalent to the following: + +:: + + if (x == 4) { + y = 7; + } + + z = 32; + +*Gazprea* is not sensitive to whitespace, so we could even write +something like: + +:: + + if (x == 3) y = 7; + +An if statement may also be followed by an else statement. The else has +a body statement just like the if statement, but this is only run if the +conditional expression on the if statement fails. + +:: + + if (x == 3) + y = 7; + else + y = 32; + +Now if ``x`` does not have a value of 3, ``y`` is assigned a value of +32. This can be paired with if statements as well. + +:: + + y = 0; + + if (x < 0) { + y = -1; + } + else if (x > 0) { + y = 1; + } + + /* y is negative if x is negative, positive if x is positive, + and 0 if x is 0. */ + +.. _ssec:statements_loop: + +Loop +---- + +.. _sssec:statements_inf_Loop: + +Infinite Loop +~~~~~~~~~~~~~ + +*Gazprea* provides an infinite loop, which continuously executes the +body statement given to it. For instance: + +:: + + loop "hello!\n" -> std_output; + +Would print "hello!" indefinitely. This is often used with block +statements. + +:: + + /* Infinite counter */ + var integer n = 0; + + loop { + n -> std_output; "\n" -> std_output; + n = n + 1; + } + +.. _sssec:statements_pred_loop: + +Predicated Loop +~~~~~~~~~~~~~~~ + +A loop may also be provided with a control expression. The control +expression automatically breaks from the loop if it evaluates to false +when it is checked. + +The loop can be pre-predicated, which means that the control expression +is tested before the body statement is executed. This is the same +behaviour as while loops in most languages, and is written using the +``while`` token after the ``loop``, followed by a boolean expression for the +predicate. For example: + +:: + + var integer x = 0; + + /* Print 1 to 10 */ + loop while (x < 10) { + x = x + 1; + x -> std_output; "\n" -> std_output; + } + +A post-predicated loop is also available. In this case the control +expression is tested after the body statement is executed. This also +uses the ``while`` token followed by the control expression, but it appears +at the end of the loop. Post Predicated loop statements must end in a +semicolon. + +:: + + integer x = 10; + + /* Since the conditional is tested after the execution '10' is printed */ + loop x -> std_output; while (x == 0); + +.. _sssec:statements_iter_loop: + +Iterator Loop +~~~~~~~~~~~~~ + +Loops can be used to iterate over the elements of an array of any type. +This is done by using domain expressions (for instance ``i in v``) in +conjunction with a loop statement. + +When the domain is given by an array, each time the loop is executed the +next element of the array is assigned to the domain variable. The +elements of the domain array are assigned to the domain variable +starting from index 1, and going up to the final element of the array. +When all of the elements of the domain array have been used the loop +automatically exits. For instance: + +:: + + /* This will print 123 */ + loop i in [1, 2, 3] { + i -> std_output; + } + +Array ranges can also be used instead: + +:: + + // This will print 123 + loop i in 1..3 { + i -> std_output; + } + +The domain is evaluated once during the first iteration of the loop. For +instance: + +:: + + var integer[*] v = [i in 1..3 | i]; + + /* Since the domain 'v' is only evaluated once this loop prints 1, 2, + and then 3 even though after the first iteration 'v' is the zero + array. */ + loop i in v { + v = 0; + i -> std_output; "\n" -> std_output; + } + +Similarly, the domain variable is assigned from the domain array at the top of +the loop for every iteration, even if it is reassigned in the body of the loop: + +:: + + // This will print 123456 + loop i in 1..6 { + i -> std_output; + i = 5; + } + +Note that multiple domain expressions are *not* allowed: + +:: + + // This is illegal + loop i in u, j in v { + "Hello!\n" -> std_output; + } + + // If you want multiple domains, use a nested loop + loop i in u { + loop j in v { + "Hello!\n" -> std_output; + } + } + +.. _ssec:statements_break: + +Break +----- + +A ``break`` statement may only appear within the body of a loop. When a +``break`` statement is executed the loop is exited, and *Gazprea* continues +to execute after the loop. This only exits the innermost loop, which +actually contains the ``break``. + +:: + + /* Prints a 3x3 square of *'s */ + integer x = 0; + var integer y = 0; + + loop while (y < 3) { + y = y + 1; + + /* Normally this would loop forever, but the break exits this inner loop */ + loop { + if (x >= 3) break; + + x = x + 1; + "*" -> std_output; + } + + "\n" -> std_output; + } + +If a ``break`` statement is not contained within a loop an error must be +raised. + +.. _ssec:statements_continue: + +Continue +-------- + +Similarly to ``break``, ``continue`` may only appear within the body of +a loop. When a ``continue`` statement is executed the innermost loop +that contains the ``continue`` statements starts its next iteration. +``continue`` stops the execution of the loop’s body statement, the loop +then continues as though the body statement finished its execution +normally. + +:: + + /* Prints every number between 1 and 10, except for 7 */ + var integer x = 0; + + loop while (x < 10) { + x = x + 1; + + if (x == 7) continue; /* Start at the beginning of the loop, skip 7 */ + + x -> std_output; "\n" -> std_output; + } + +.. _ssec:statements_return: + +Return +------ + +The ``return`` statement is used to stop the execution of a function or +procedure. When a function/procedure returns then execution continues where the +function/procedure was called. + +If the function/procedure has a return type then the ``return`` statement must +be given a value that is the same as or able to be promoted to (see +:ref:`sec:typePromotion`) the return type; this will be the result of the +function/procedure call. Here is an example: + +:: + + function square(integer x) returns integer { + return x * x; + } + +If a procedure has no ``returns`` clause, then it has no return type and a +``return`` statement is not required but may still be present in order to +return early. In this case return is used as follows: + +:: + + procedure do_nothing() { + return; + } + +.. _ssec:statements_streams: + +Stream Statements +----------------- + +Stream statements are the statements used to read and write values in +*Gazprea*. + +Output example: + +:: + + 2 * 3 -> std_output; /* Prints 6 */ + +Input example: + +:: + + integer x; + x <- std_input; /* Read an integer into x */ diff --git a/gazprea-clean/spec/streams.rst b/gazprea-clean/spec/streams.rst new file mode 100644 index 0000000..610ca35 --- /dev/null +++ b/gazprea-clean/spec/streams.rst @@ -0,0 +1,259 @@ +.. _sec:streams: + +Streams +======= + +*Gazprea* has two streams: ``std_output`` and ``std_input``, +which are used for writting to `stdout` and reading from `stdin` respectively. + + +.. _ssec:output: + +Output Stream +------------- + +Output streams use the following syntax: + +:: + + -> std_output; + +.. _sssec:output_format: + +Output Format +~~~~~~~~~~~~~ + +Values of the following base types are treated as follows when sent to +an output stream: + +- :ref:`ssec:character`: The character is printed. + +- :ref:`ssec:integer`: Converted to a string representation, and then printed. + +- :ref:`ssec:real`: Converted to a string representation, and then printed. + This is the same behaviour as the `%g specifier in + printf `__. + +- :ref:`ssec:boolean`: Prints T for true, and F for false. + +:ref:`Arrays ` print their contents according to the rules above, with square +braces surrounding its elements and with spaces only *between* values. +For example: + +:: + + integer[*] v = 1..3; + v -> std_output; + +prints the following: + +:: + + [1 2 3] + +:ref:`strings ` print their contents as a contiguous sequence of characters. +For example: + +:: + + string str = "Hello, World!"; + str -> std_output; + +prints the following: + +:: + + Hello, World! + +:ref:`Matrices ` print like an array of arrays. For example: + +:: + + [[1, 2, 3], [4, 5, 6], [7, 8, 9]] -> std_output; + +prints the following: + +:: + + [[1 2 3] [4 5 6] [7 8 9]] + +No other type may be sent to a stream. For instance, +procedures with no return type and tuples cannot be sent to streams. +Also, empty arrays and matrices can be send to streams, but not empty +literals (e.g. ``[]``), because they have no type. + +Note that there is **no automatic new line or spaces printed.** To print +a new line, a user must explicitly print the new line or space +character. For example: + +:: + + '\n' -> std_output; + ' ' -> std_output; + +.. _ssec:input: + +Input Stream +------------ + +Input streams use the following syntax: + +:: + + <- std_input; + +An l-value may be anything that can appear on the left hand side of an +assignment statement. Consider reading the discussion of an l-value +`here `__. + +Input streams may only work on the following base types: + +- ``character``: Reads a single character from stdin. Note that there + can be no :ref:`error state ` for reading characters. + +- ``integer``: Reads an integer from stdin. If an integer could not be + read, an :ref:`error state ` is set on this stream. + +- ``real``: Reads a real from stdin. If a real could not be read, an :ref:`error state ` is + set on this stream. + +- ``boolean``: Reads a boolean from stdin. If a boolean value could not + be read, an :ref:`error state ` is set on this stream. + +Type promotion is not performed for stream input over any type. + + .. _sssec:input_format: + +Input Semantics +~~~~~~~~~~~~~~~ + +``std_input`` expects an input stream of values which do not need to be +whitespace separated. A read will consume the stream until a character or +EOF occurs that breaks the pattern match for the given types specifier. The longest +successful match is returned. + +In general input stream semantics are designed for parity with ``scanf``. +The only differences are the :ref:`ssec:builtIn_stream_state`, a boolean specifier +and a restriction on the maximum number of bytes that can be consumed in a single read to 512. + +For each of the allowed types the semantics are given below. + +Reading a ``character`` from stdin consumes the first byte that can be read from the +stream. If the end of the stream is encountered, then a value of ``-1`` is set. There +is no concept of skipping whitespace for characters, since space and escaped characters +must be readable. + +An ``integer`` from stdin can take any legal format described in the +:ref:`integer literal ` section. It may also be preceded by +a single negative or positive sign. All preceeding whitespace before the number or +sign character may be skipped up to the limit imposed by the 512 byte read restriction. + +A ``real`` input from stdin can take any legal format described in the +:ref:`real literal ` section with the exception that no +whitespace may be present. It may also be proceeded by a single negative or +positive sign. Preceeding whitespace may be skipped in the same way as integers. + +A ``boolean`` input from stdin is either ``T`` or ``F``. Preceeding whitespace may be +skipped in the same way as integers and reals. + +For the following program: + +:: + + var boolean b; + var character c; + var integer i; + var real r; + b <- std_input; + i <- std_input; + c <- std_input; + r <- std_input; + format(b) || " " || format(r) -> std_output; + +And this input (where '\\t' is TAB, '*' is space, and each line ends with a +newline ('\\n'): + +:: + + \tF\n + 1\n + *1.\n + +The output would be: + +:: + + F 1.0 + +because the white space is consumed for characters and skipped for other types. + + +.. _sssec:stream_error: + +Error Handling +~~~~~~~~~~~~~~ + +When reading ``boolean``, ``integer``, and ``real`` from stdin, it is +possible that the end of the stream or an error is encountered. In order to +handle these situations *Gazprea* provides a built in procedure that is +implicitly defined in every file: ``stream_state`` (see +:ref:`ssec:builtIn_stream_state`). + +Reading a ``character`` can never cause an error. The character will either be +successfully read or the end of the stream will be reached and ``-1`` will be +returned on this read. + +When an error occurs the the null value is assigned and the input stream +remains pointing to the same position as before the read occured. + +The program below demonstrates 4 reads which set the error +states 1,0,0,2 respectively. + +:: + + var integer ss; + var integer i; + var boolean b; + var character c; + + i <- std_input; + i -> std_output; + ss = stream_state(std_input); + ss -> std_output; + + c <- std_input; //eat the . + + i <- std_input; + i -> std_output; + + c <- std_input; + ss = stream_state(std_input); + ss -> std_output; + +With the input stream: + +:: + + .7 + +And the expected output: + +:: + + 0172 + +This table summarizes an input stream’s possible error states after a read of a +particular data type. + +========= ============= ========= ================= +Type Situation Return ``stream_state`` +========= ============= ========= ================= +Boolean error ``false`` 1 +\ end of stream ``false`` 2 +Character error N/A N/A +\ end of stream ``-1`` 2 +Integer error ``0`` 1 +\ end of stream ``0`` 2 +Real error ``0.0`` 1 +\ end of stream ``0.0`` 2 +========= ============= ========= ================= diff --git a/gazprea-clean/spec/type_casting.rst b/gazprea-clean/spec/type_casting.rst new file mode 100644 index 0000000..bddd522 --- /dev/null +++ b/gazprea-clean/spec/type_casting.rst @@ -0,0 +1,126 @@ +.. _sec:typeCasting: + +Type Casting +============ + +*Gazprea* provides explicit type casting. Type casting is an expression. +A value may be converted to a +different type using the following syntax where ``value`` is an +expression and ``toType`` is our destination type: + +:: + + as(value) + +Conversion from one type to another is not always legal. For instance +converting from an ``integer`` array to an ``integer`` has no +reasonable conversion. + +.. _ssec:typeCasting_stos: + +Scalar to Scalar +---------------- + +This table summarizes all of the conversion rules between scalar types +where N/A means no conversion is possible, id means no change is +necessary, and anything else describes how to convert the value to the +new type: + ++----------+-------------------------------------------------------------------------------------------------------------------------------------+ +| | **To type** | ++----------+-----------+--------------------------------+--------------------------------+--------------------------+----------------------------+ +| | | boolean | character | integer | real | +| +-----------+--------------------------------+--------------------------------+--------------------------+----------------------------+ +| | boolean | id | ‘\\0’ if false, 0x01 otherwise | 1 if true, 0 otherwise | 1.0 if true, 0.0 otherwise | +| +-----------+--------------------------------+--------------------------------+--------------------------+----------------------------+ +| **From** | character | false if ‘\\0’, true otherwise | id | *ASCII* value as integer | *ASCII* value as real | +| +-----------+--------------------------------+--------------------------------+--------------------------+----------------------------+ +| **type** | integer | false if 0, true otherwise | unsigned integer value mod 256 | id | real version of integer | +| +-----------+--------------------------------+--------------------------------+--------------------------+----------------------------+ +| | real | N/A | N/A | truncate | id | ++----------+-----------+--------------------------------+--------------------------------+--------------------------+----------------------------+ + +.. _ssec:typeCasting_stovm: + +Scalar to Array +----------------------- + +A scalar may be promoted to an array of any dimension with an element type that +the original scalar can be cast to according to the rules in :ref:`ssec:typeCasting_stos`. +A scalar to array cast *must* include a size with the type to cast to as this +cannot be inferred from the scalar value. For example: + +:: + + // Create an array of reals with length three where all values are 1.0. + real[*] v = as(1); + + // Create an array of booleans with length 10 where all values are true. + var u = as('c'); + +.. _ssec:typeCasting_vtov: + +Array to Array +---------------- + +Conversions between array types are also possible. First, the +values of the original are cast to the destination type’s element type +according to the rules in :ref:`ssec:typeCasting_stos` and then the destination +is padded with destination element type’s zero or truncated to match the +destination type size. Note that the size is not required for array to +array casting; if the size is not included in the cast type, the new +size is assumed to be the old size. For example: + +:: + + real[3] v = [i in 1..3 | i + 0.3 * i]; + + // Convert the real array to an integer array. + integer[3] u = as(v); + + // Convert to integers and zero pad. + integer[5] x = as(v); + + // Truncate the array. + real[2] y = as(v); + +Casting non-variable empty arrays ``[]`` is not allowed, because a literal +empty array does not have a type. + +.. _ssec:typeCasting_mtom: + +Multi-dimensional Arrays +------------------------ + +Conversions between arrays of any dimension are possible. +The process is exactly like :ref:`ssec:typeCasting_vtov` except padding and +truncation can occur in all dimensions. For example: + +:: + + real[2][2] a = [[1.2, 24], [-13e2, 4.0]]; + + // Convert to an integer matrix. + integer[2][2] b = as(a); + + // Convert to integers and pad in both dimensions. + integer[3][3] c = as(a); + + // Truncate in one dimension and pad in the other. + real[1][3] d = as(a); + real[3][1] e = as(a); + +.. _ssec:typeCasting_ttot: + +Tuple to Tuple +-------------- + +Conversions between ``tuple`` types are also possible. The original type +and the destination type must have an equal number of internal types and +each element must be pairwise castable according to the rules +in :ref:`ssec:typeCasting_stos`. For example: + +:: + + tuple(integer, integer) int_tup = (1, 2); + tuple(real, boolean) rb_tup = as(int_tup); diff --git a/gazprea-clean/spec/type_inference.rst b/gazprea-clean/spec/type_inference.rst new file mode 100644 index 0000000..8877704 --- /dev/null +++ b/gazprea-clean/spec/type_inference.rst @@ -0,0 +1,48 @@ +.. _sec:typeInference: + +Type Inference +============== + +In many cases the compiler can figure out what a variable’s type, or a +function’s return type, should be without an explicit type being +provided. For instance, instead of writing: + +:: + + integer x = 2; + const integer y = x * 2; + +*Gazprea* allows you to just write: + +:: + + var x = 2; + const y = x * 2; + +This is allowed because the compiler knows that the initialization +expression, 2, has the type integer. Because of this the compiler can +automatically give x an integer type. A *Gazprea* programmer can use +``var`` or ``const`` for any declaration with an initial value +expression, as long as the compiler can guess the type for the +expression. + +Note that although the qualifier may be elided (default is ``const``) and the +type may be elided (inferred from the RHS), it is not legal to imply both: + +:: + + x = 2; // assignment or declaration? + +Interpreted as a declaration, the full form would be ``const integer x = 2;``. +However, with both the modifier and type assumed we can't differentiate this +declaration from an assignment statement. To prevent this ambiguity, we require +at least one of the qualifier or the type to be present: + +:: + + const integer x = 2; // full form - legal + integer x = 2; // defaults to const - legal + var x = 2; // infers integer - legal + x = 2; // assignment to undeclared variable? - illegal + var x; // can't infer type - illegal + integer x; // const integer initialized to 0 - legal diff --git a/gazprea-clean/spec/type_promotion.rst b/gazprea-clean/spec/type_promotion.rst new file mode 100644 index 0000000..e22446a --- /dev/null +++ b/gazprea-clean/spec/type_promotion.rst @@ -0,0 +1,129 @@ +.. _sec:typePromotion: + +Type Promotion +============== + +Type promotion is a sub-problem of casting and refers to casts that happen +implicitly. + +Any conversion that can be done implicitly via promotion can also be done +explicitly via typecast expression. +The notable exception is array promotion to a higher dimension, which occurs as +a consequence of scalar to array promotion. + +.. _ssec:typePromotion_scalar: + +Scalars +------- + +The only automatic type promotion for scalars is ``integer`` to +``real``. This promotion is one way - a ``real`` cannot be automatically +converted to ``integer``. + +Automatic type conversion follows this table where N/A means no implicit +conversion possible, id means no conversion necessary, +``as(var)`` means var of type "From type" is converted to type +"toType" using semantics from . + ++----------+-----------+---------+-----------+---------+---------------+ +| | **To type** | ++----------+-----------+---------+-----------+---------+---------------+ +| | | boolean | character | integer | real | ++ +-----------+---------+-----------+---------+---------------+ +| **From** | boolean | id | N/A | N/A | N/A | ++ +-----------+---------+-----------+---------+---------------+ +| **type** | character | N/A | id | N/A | N/A | ++ +-----------+---------+-----------+---------+---------------+ +| | integer | N/A | N/A | id | as(var) | ++ +-----------+---------+-----------+---------+---------------+ +| | real | N/A | N/A | N/A | id | ++----------+-----------+---------+-----------+---------+---------------+ + +.. _ssec:typePromotion_stoa: + +Scalar to Array +-------------------------- + +All scalar types can be promoted to arrays that have an internal type that the +scalar can be :ref:`converted to implicity `. +This can occur when an array is used in an operation with a scalar value. + +The scalar will be implicitly converted to an array of +equivalent dimensions and equivalent internal type. For example: + +:: + + integer i = 1; + integer[*] v = [1, 2, 3, 4, 5]; + integer[*] res = v + i; + + res -> std_output; + +would print the following: + +:: + + [2 3 4 5 6] + +Other examples: + +:: + + 1 == [1, 1] // True + 1..2 || 3 // [1, 2, 3] + +Note that an array can never be downcast to a scalar, +even if type casting is used. Also note that matrix multiply imposes strict +requirements on the dimensionality of the the operands. The consequence is +that scalars can only be promoted to a matrix if the matrix multiply +operand is a square matrix (:math:`m \times m`). + +Tuple to Tuple +-------------- + +Tuples may be promoted to another tuple type if it has an equal number of +internal types and the original internal types can be implicitly +converted to the new internal types. For example: + +:: + + tuple(integer, integer) int_tup = (1, 2); + tuple(real, real) real_tup = int_tup; + + tuple(char, integer, boolean[2]) many_tup = ('a', 1, [true, false]); + tuple(char, real, boolean[2]) other_tup = many_tup; + +If initializing a variable with a tuple via :ref:`sec:typeInference`, the +variable is assumed to be the same type. +Therefore, tuple elements also copied accordingly. For example: + +:: + + tuple(real, real) foo = (1, 2); + tuple(real, real) bar = (3, 4); + + var baz = foo; + baz.1 -> std_output; // 1 + baz.2 -> std_output; // 2 + + baz = bar; + baz.1 -> std_output; // 3 + baz.2 -> std_output; // 4 + + +It is possible for a two sided promotion to occur with tuples. For example: + +:: + + boolean b = (1.0, 2) == (2, 3.0); + +Character Array to/from String +------------------------------- + +A ``string`` can be implicitly converted to an array of ``character``\ s and vice-versa (two-way type promotion). + +:: + + string str1 = "Hello"; /* str1 == "Hello" */ + character[*] chars = str1; /* chars == ['H', 'e', 'l', 'l', 'o'] */ + string str2 = chars || [' ', 'W', 'o', 'r', 'l', 'd']; /* str2 == "Hello World" */ diff --git a/gazprea-clean/spec/type_qualifiers.rst b/gazprea-clean/spec/type_qualifiers.rst new file mode 100644 index 0000000..a6eed73 --- /dev/null +++ b/gazprea-clean/spec/type_qualifiers.rst @@ -0,0 +1,66 @@ +.. _sec:typeQualifiers: + +Type Qualifiers +=============== + +*Gazprea* has two type qualifiers: ``const`` and ``var``. These +qualifers can prefix a type to specify its mutability or entirely +replace the type to request that it be inferred. Mutability refers to a +values ability to be an `r-value or +l-value `__. +The two qualifiers cannot be combined as they are mutually exclusive. + +.. _ssec:typeQualifiers_const: + +Const +----- + +A ``const`` value is immutable and therefore cannot be an l-value but +can be an r-value. For example: + +:: + + const integer i; + +Because a ``const`` value is not an l-value, it cannot be passed to a +``var`` argument in a ``procedure``. + +Note that ``const`` is the default *Gazprea* behaviour and is essentially a +no-op unless it is entirely replacing the type. + + +.. _ssec:typeQualifiers_var: + +Var +--- + +A ``var`` value is mutable and therefore can be an l-value or r-value. +For example: + +:: + + var integer i; + +The compiler should raise an error if an attempt is made to modify a variable +that is not explicitly declared ``var``. + +.. _ssec:typeQualifiers_infer: + +Type Inference Using Qualifiers +------------------------------- + +Type qualifiers may be used in place of a type, in which case the real +type must be inferred. A variable declared in this manner must be +**immediately initialised** to enable inference. For example: + +:: + + var i = 1; // integer + const i = 1; // integer + var r = 1.0; // real + const c = 'a'; // character + var t = (1, 2, 'a', [1, 2, 3]); // tuple(integer, integer, character, integer[3]) + const v = ['a', 'b', 'c', 'd']; // character[4] + +See :ref:`sec:typeInference` for a larger description of type inference, this section only +provides the syntax for inference using ``const`` and ``var``. diff --git a/gazprea-clean/spec/typedef.rst b/gazprea-clean/spec/typedef.rst new file mode 100644 index 0000000..754edad --- /dev/null +++ b/gazprea-clean/spec/typedef.rst @@ -0,0 +1,78 @@ +.. _sec:typealias: + +Typealias +========= + +Custom names for types can be defined using ``typealias``. Type aliases may only +appear at global scope, they may not appear within functions or procedures. A +type alias may use any valid identifier for the name of the type. After the type +alias has been defined any global declaration or function defined may use the +new name to refer to the old type. For instance: + +:: + + typealias integer int; + const int a = 0; + +Note that hese new type names can *appear* to conflict with symbol names. +However, the compiler can use context to differentiate a type alias from a +symbol. The following is therefore legal: + +:: + + typealias character main; + typealias integer i; + + const main A = 'A'; + + procedure main() returns i { + i i = 0; // = ; + return i; + } + +In addition to base types, ``typealias`` can be used with compound types +(arrays, vectors, and strings) and aggregate types (structs and tuples). +Using ``typealias`` on tuples, or on arrays with sizes helps reusability and +consistency: + +:: + + typealias tuple(character[64], integer, real) student_id_grade; + student_id_grade chucky_cheese = ("C. Cheese", 123456, 77.0); + + typealias integer[2][3] two_by_three_matrix; + two_by_three_matrix m = [i in 1..2, j in 1..3 | i + j]; + +Type aliases of arrays with inferred sizes are allowed, but declarations +of variables using the type alias must be initialized appropriately. + +Because a ``typealias`` is an aliased name for a type, you can use +``typealias`` on type alias'ed types: + +:: + + typealias integer int; + typealias int also_int; + +Duplicate alias names should raise a `SymbolError` + +:: + + typealias integer ty; + typealias character ty; + +Some type aliases may be parameterized with an expression, such as with arrays, +such expressions are restricted to be composed exclusively from arithmetic +operations on scalar literals. Practically speaking, this requires constant +folding but *not* constant propogation. + +:: + + typealias integer[1 + 3 - 2] vec_of_two; + procedure main() returns integer { + vec_of_two v = 1..3; + } + +Should raise a ``SizeError`` on line 3 since the ``vec_of_two`` type has a size +of 2 and an array of size 3 is being assigned. + diff --git a/gazprea-clean/spec/types.rst b/gazprea-clean/spec/types.rst new file mode 100644 index 0000000..80bd4a8 --- /dev/null +++ b/gazprea-clean/spec/types.rst @@ -0,0 +1,18 @@ +.. _sec:types: + +Types +===== + +.. toctree:: + :maxdepth: 2 + + types/boolean + types/character + types/integer + types/real + types/tuple + types/struct + types/array + types/vector + types/string + types/matrix diff --git a/gazprea-clean/spec/types/array.rst b/gazprea-clean/spec/types/array.rst new file mode 100644 index 0000000..0816bc2 --- /dev/null +++ b/gazprea-clean/spec/types/array.rst @@ -0,0 +1,253 @@ +.. _ssec:array: + +Arrays +------ + +Arrays are ordered, homogeneous collections of elements. *Gazprea*'s array +system offers a unified syntax for +statically-sized, dynamically-sized, and multi-dimensional arrays. + +An array's elements can be of any single type, including base types ( +``boolean``, +``integer``, ``real``), compound types (``tuple``), and other arrays. + +.. _sssec:array_decl: + +Declaration +~~~~~~~~~~~ + +An array type is specified by providing a shape in +square brackets (``[]``) to a type. + +#. Static vs. Dynamic Sizing + + *Gazprea* distinguishes between arrays whose size is fixed at compile time + (static) and arrays that can change size at runtime (dynamic). + + - A **static dimension** is declared using an integer literal or a + :ref:`constant expression `. + - A **dynamic dimension** is declared using an asterisk (``*``). + + :: + + // A statically-sized array of 10 integers. + var integer[10] a; + + // A dynamically-sized array of integers. + var integer[*] b; + +#. N-Dimensional Arrays + + Multi-dimensional arrays are declared by providing a comma-separated list of + dimension specifiers (the shape). Any dimension can be static or dynamic, + allowing for + the creation of fixed-size matrices, jagged arrays, and more general n-d + arrays. + + :: + + // A 3x4 matrix of real numbers. + var real[3, 4] matrix; + + // A jagged array: 5 rows, each a dynamic array of characters. + var character[5, *] jagged; + + // A dynamic list of static 3-element integer vectors. + var integer[*, 3] vectors; + + +#. Inferred Type and Size + + When initializing a variable with an array literal, its type and size can + be inferred by the compiler using ``var``. The resulting array is always + statically-sized unless _any_ initializer contains a dynamic dimension + or is a dynamically-sized array. + + :: + + // v is inferred as type integer[3]. + var v = [1, 2, 3]; + + // w is inferred as type real[2, 2]. + var w = [[1.0, 2.0], [3.0, 4.0]]; + + // x is inferred as type integer[*]. + var integer[*] dyn = [1, 2, 3, 4, 5]; + var x = [...dyn]; + +.. _sssec:array_constr: + +Construction +~~~~~~~~~~~~ + +An array value is constructed using a comma-separated list of expressions +within square brackets. All elements must share a common promotable type. +The element type of an unspecified array is the top-most type in the type +hierarchy that elements can be _implicitly_ promoted to. Any other unpromotable +types will result in a compile-time type error. + +:: + + [1, 2, 3] // An integer array + [1, 2.5, 3] // A real array (integer 1 is promoted) + [(1, true), (2, false)] // An array of tuples + [1, [2, 3], [4, 5, 6]] // A ragged integer array integer[3,*] + +*Gazprea* supports empty array literals (``[]``). An empty literal can only be +used to initialize a dynamically-sized array, as a static array's size must be +known. + +:: + + var integer[*] a: = []; // Legal + var integer[5] b: = []; // Illegal: size mismatch + +.. _sssec:array_spread: + +Spread Operator +~~~~~~~~~~~~~~~ + +The spread operator (``...``) provides a concise, declarative way to construct +a new array by unpacking elements from existing arrays. It can be used multiple +times within an array literal and can be combined with other elements. + +The spread operator is a syntactic feature **exclusive** to array literals. +It is +evaluated left-to-right. + +:: + + var integer[2] a = [1, 2]; + var integer[3] b = [3, 4, 5]; + + // c becomes [0, 1, 2, 3, 4, 5, 6] + var integer[7] c = [0, ...a, ...b, 6]; + +When constructing a static array, the compiler must be able to verify the final +size at compile time. Spreading a dynamic array into a static array is a +compile-time size error. See :ref:`sec:constexpr` for more details. + +.. _sssec:array_ops: + +Operations +~~~~~~~~~~ + +#. Indexing and Slicing + + - **Indexing:** Elements of an N-dimensional array are accessed using a + comma-separated list of 1-based integer indices. Negative indices count + from the end of a dimension. + - **Slicing (Deep Copy):** A slice expression creates a **new, independent + array** by performing a **deep copy** of a segment of an existing array. + The resulting array has its own memory, and modifications to it will + never affect the original array. This behavior is consistent with + *Gazprea*'s rule that all assignments are deep copies. + + A slice expression is an **r-value**, meaning it produces a value and + cannot be the target of an assignment. For N-D arrays, slicing is only + permitted on the last dimension. + + :: + + var integer[5] a = [10, 20, 30, 40, 50]; + + // Legal: Create a new array 'b' from a slice of 'a'. + var integer[3] b = a[2..5]; // b is [20, 30, 40] + + // 'b' is independent of 'a'. + b[1] = 99; // 'a' remains [10, 20, 30, 40, 50] + + // Illegal: A slice is not an l-value and cannot be assigned to. + a[1..3] = [1, 2]; // COMPILE-TIME ERROR + +#. shape + + The built-in function ``shape`` returns the shape of an array as a + dynamically-sized integer array (``integer[*]``). + + For jagged arrays, ``shape`` returns the shape of the longest non-jagged + (rectangular) prefix of the array's dimensions. To get the shape of a + specific inner array, it must be indexed directly. + + :: + + var integer[10] a; + shape(a) // returns [10] + + var real[3, 4] b; + shape(b) // returns [3, 4] + + var character[5, *] c; + shape(c) // returns [5] + + var integer[2, 3, *] d; + shape(d) // returns [2, 3] + + // To get the size of an inner array of c: + shape(c[1]) // returns [N] where N is the size of the first inner array + +#. Concatenation (``||``) + + The ``||`` operator concatenates two arrays. This operation is primarily + useful for **dynamically-sized arrays**. + + :: + + var integer[*] a = [1, 2]; + a = a || [3, 4]; // a is now [1, 2, 3, 4] + + Attempting to reassign the result of a concatenation to a static array will + result in a ``SizeError`` if the new size does not match the declared size. + The :ref:`spread operator ` is the preferred method for + composition. Note that working with a dynamically-sized array implies that + the size check must be performed at runtime. + +#. Element-wise Operations and Broadcasting + + Unary and binary operations (e.g., ``not``, ``+``, ``-``, ``*``) can be applied + element-wise to arrays. + + - For operations between two arrays, their dimensions must be compatible. + - For operations between an array and a scalar, the scalar is **broadcast** + across the array. + + *Gazprea* follows a simple "trailing dimensions" rule for broadcasting: an + array ``A`` can be broadcast over array ``B`` if ``A``'s dimensions are a suffix + of ``B``'s dimensions. + + :: + + var integer[3, 4] m = ...; + var integer[4] n = [1, 2, 3, 4]; + var s = 10; + + var r1 = m + s; // Legal: scalar broadcast + var r2 = m + v; // Legal: [4] is a suffix of [3, 4]. v is added to each row. + + var integer[3] v2; + var r3 = m + v2; // Illegal: [3] is not a suffix of [3, 4]. + + The equality operators ``==`` and ``!=`` are an exception. They perform a + deep, element-wise comparison and return a single ``boolean`` value. + + These element-wise operations are fully supported for dynamic arrays where the + shape is regular (e.g., ``integer[*]``, ``integer[*, 5]``). Compatibility + checks are performed at **runtime**, and a ``SizeError`` will be thrown if + the shapes are incompatible. + + However, element-wise operations are **disallowed** on any array that has a + jagged dimension (e.g., ``integer[5, *]``). This restriction exists because + the operation is ambiguous when inner arrays have different lengths. This is + a compile-time error. To perform an operation on a jagged array, the + programmer must do so explicitly by iterating over its elements. + + :: + + // Legal operation on a dynamic array + var integer[*, 5] dyn; + var res = dyn + 5; + + // Illegal operation on a jagged array + var integer[5, *] jagged; + var err = jagged + 5; // Compile-time TypeError or ShapeError + diff --git a/gazprea-clean/spec/types/boolean.rst b/gazprea-clean/spec/types/boolean.rst new file mode 100644 index 0000000..aa3a00d --- /dev/null +++ b/gazprea-clean/spec/types/boolean.rst @@ -0,0 +1,80 @@ +.. _ssec:boolean: + +Boolean +------- + +A ``boolean`` is either ``true`` or ``false``. A ``boolean`` can be +represented by an ``i1`` in *MLIR*. + +.. _sssec:boolean_decl: + +Declaration +~~~~~~~~~~~ + +A ``boolean`` value is declared with the keyword ``boolean``. +If the variable is not initialized explicitly, it is set to ``false`` (zero). + +.. _sssec:boolean_lit: + +Literals +~~~~~~~~ + +The following are the only two valid ``boolean`` literals: + +- ``true`` + +- ``false`` + +.. _sssec:boolean_ops: + +Operations +~~~~~~~~~~ + +The following operations are defined on ``boolean`` values. In all +of the usage examples ``bool-expr`` means some ``boolean`` yielding +expression. + +============= ========== =========================== ================= +**Operation** **Symbol** **Usage** **Associativity** +============= ========== =========================== ================= +parenthesis ``()`` ``(bool-expr)`` N/A +negation ``not`` ``not bool-expr`` right +logical or ``or`` ``bool-expr or bool-expr`` left +logical xor ``xor`` ``bool-expr xor bool-expr`` left +logical and ``and`` ``bool-expr and bool-expr`` left +equals ``==`` ``bool-expr == bool-expr`` left +not equals ``!=`` ``bool-expr != bool-expr`` left +============= ========== =========================== ================= + +Unlike many languages the ``and`` and ``or`` operators do not `short +circuit +evaluation `__. +Therefore, both the left hand side and right hand side of an expression +must always be evaluated. + +This table specifies ``boolean`` operator precedence. Operators without +lines between them have the same level of precedence. + ++----------------+---------------+ +| **Precedence** | **Operation** | ++================+===============+ +| HIGHER | ``not`` | ++----------------+---------------+ +| | ``==`` | +| | | +| | ``!=`` | ++----------------+---------------+ +| | ``and`` | ++----------------+---------------+ +| | ``or`` | +| | | +| LOWER | ``xor`` | ++----------------+---------------+ + + +Type Casting and Type Promotion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To see the types that ``boolean`` may be cast and/or promoted to, see +the sections on :ref:`sec:typeCasting` and :ref:`sec:typePromotion` +respectively. diff --git a/gazprea-clean/spec/types/character.rst b/gazprea-clean/spec/types/character.rst new file mode 100644 index 0000000..8fc00a4 --- /dev/null +++ b/gazprea-clean/spec/types/character.rst @@ -0,0 +1,82 @@ +.. _ssec:character: + +Character +--------- + +A ``character`` is a signed 8-bit value. A ``character`` can be +represented by an ``i8`` in *MLIR*. + +.. _sssec:character_decl: + +Declaration +~~~~~~~~~~~ + +A ``character`` value is declared with the keyword ``character``. + +.. _sssec:character_lit: + +Literals and Escape Sequences +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A ``character`` literal is written in the same manner as *C99*: a single +character enclosed in single quotes. For example: + +:: + + 'a' + 'b' + 'A' + '1' + '.' + '*' + +As in *C99*, *Gazprea* supports character escape sequences for common +characters. For example: + +:: + + '\0' + '\n' + +The following escape sequences are supported by *Gazprea*: + +=============== =================== =============== +**Description** **Escape Sequence** **Value (Hex)** +=============== =================== =============== +Null ``\0`` ``0x00`` +Bell ``\a`` ``0x07`` +Backspace ``\b`` ``0x08`` +Tab ``\t`` ``0x09`` +Line Feed ``\n`` ``0x0A`` +Carriage Return ``\r`` ``0x0D`` +Quotation Mark ``\"`` ``0x22`` +Apostrophe ``\'`` ``0x27`` +Backslash ``\\`` ``0x5C`` +=============== =================== =============== + +.. _sssec:character_ops: + +Operations +~~~~~~~~~~ + +The following operations are defined between ``character`` values. + ++------------+--------------------------+------------+---------------------------+-------------------+ +| **Class** | **Operation** | **Symbol** | **Usage** | **Associativity** | ++============+==========================+============+===========================+===================+ +| Grouping | parentheses | ``()`` | ``(character)`` | N/A | ++------------+--------------------------+------------+---------------------------+-------------------+ +| Comparison | equals | ``==`` | ``character == character``| left | +| +--------------------------+------------+---------------------------+-------------------+ +| | not equals | ``!=`` | ``character != character``| left | ++------------+--------------------------+------------+---------------------------+-------------------+ + +Scalar values with type ``character`` may be concatenated onto +variables with type ``string`` or arrays with type ``character``. + +Type Casting and Type Promotion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To see the types that ``character`` may be cast and/or promoted to, see +the sections on :ref:`sec:typeCasting` and :ref:`sec:typePromotion` +respectively. diff --git a/gazprea-clean/spec/types/integer.rst b/gazprea-clean/spec/types/integer.rst new file mode 100644 index 0000000..094669e --- /dev/null +++ b/gazprea-clean/spec/types/integer.rst @@ -0,0 +1,120 @@ +.. _ssec:integer: + +Integer +------- + +An ``integer`` is a signed 32-bit value. An ``integer`` can be +represented by an ``i32`` in *MLIR*. + +.. _sssec:integer_decl: + +Declaration +~~~~~~~~~~~ + +A ``integer`` value is declared with the keyword ``integer``. + +.. _sssec:integer_lit: + +Literals +~~~~~~~~ + +An ``integer`` literal is specified in base 10. For example: + +:: + + 1234 + 2 + 0 + +.. _sssec:integer_ops: + +Operations +~~~~~~~~~~ + +The following operations are defined between ``integer`` values. In all +of the usage examples ``int-expr`` means some ``integer`` yielding +expression. + ++------------+--------------------------+------------+--------------------------+-------------------+ +| **Class** | **Operation** | **Symbol** | **Usage** | **Associativity** | ++============+==========================+============+==========================+===================+ +| Grouping | parentheses | ``()`` | ``(int-expr)`` | N/A | ++------------+--------------------------+------------+--------------------------+-------------------+ +| Arithmetic | addition | ``+`` | ``int-expr + int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | subtraction | ``-`` | ``int-expr - int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | multiplication | ``*`` | ``int-expr * int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | division | ``/`` | ``int-expr / int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | remainder | ``%`` | ``int-expr % int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | exponentiation | ``^`` | ``int-expr ^ int-expr`` | right | +| +--------------------------+------------+--------------------------+-------------------+ +| | unary negation | ``-`` | ``- int-expr`` | right | +| +--------------------------+------------+--------------------------+-------------------+ +| | unary plus (no-op) | ``+`` | ``+ int-expr`` | right | ++------------+--------------------------+------------+--------------------------+-------------------+ +| Comparison | less than | ``<`` | ``int-expr < int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | greater than | ``>`` | ``int-expr > int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | less than or equal to | ``<=`` | ``int-expr <= int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | greater than or equal to | ``>=`` | ``int-expr >= int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | equals | ``==`` | ``int-expr == int-expr`` | left | +| +--------------------------+------------+--------------------------+-------------------+ +| | not equals | ``!=`` | ``int-expr != int-expr`` | left | ++------------+--------------------------+------------+--------------------------+-------------------+ + +Unary negation produces the additive inverse of the ``integer`` +expression. Unary plus always produces the same result as the +``integer`` expression it is applied to. Remainder mirrors the behaviour +of remainder in *C99*. + +This table specifies ``integer`` operator precedence. Operators without +lines between them have the same level of precedence. Note that +parentheses are not included in this list because they are used to +override precedence and create new atoms in an expression. + ++----------------+----------------+ +| **Precedence** | **Operations** | ++================+================+ +| HIGHER | ``unary +`` | +| | | +| | ``unary -`` | ++----------------+----------------+ +| | ``^`` | ++----------------+----------------+ +| | ``*`` | +| | | +| | ``/`` | +| | | +| | ``%`` | ++----------------+----------------+ +| | ``+`` | +| | | +| | ``-`` | ++----------------+----------------+ +| | ``<`` | +| | | +| | ``>`` | +| | | +| | ``<=`` | +| | | +| | ``>=`` | ++----------------+----------------+ +| | ``==`` | +| | | +| LOWER | ``!=`` | ++----------------+----------------+ + + +Type Casting and Type Promotion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To see the types that ``integer`` may be cast and/or promoted to, see +the sections on :ref:`sec:typeCasting` and :ref:`sec:typePromotion` +respectively. diff --git a/gazprea-clean/spec/types/real.rst b/gazprea-clean/spec/types/real.rst new file mode 100644 index 0000000..45d0b78 --- /dev/null +++ b/gazprea-clean/spec/types/real.rst @@ -0,0 +1,67 @@ +.. _ssec:real: + +Real +---- + +A ``real`` is an IEEE 754 32-bit floating point value. A ``real`` can be +represented by a ``f32`` in *MLIR*. + +.. _sssec:real_decl: + +Declaration +~~~~~~~~~~~ + +A ``real`` value is declared with the keyword ``real``. + +.. _sssec:real_lit: + +Literals +~~~~~~~~ + +A ``real`` literal can be specified in several ways. A leading zero is +not necessary and can be inferred from a leading decimal point. Likewise, +a trailing zero is not necessary and can be inferred from a trailing +decimal point. However, at least one digit must be present in order to be +parsed. For example: + +:: + + 42.0 + 42. + 4.2 + 0.42 + .42 + . // Illegal. + +A ``real`` literal can also be created by any valid ``real`` or +``integer`` literal followed by scientific notation indicated by the +character ``e`` or ``E`` and another valid ``integer`` literal. Scientific notation +multiplies the first literal by :math:`{10}^{x}`. For example, +:math:`4.2\mathrm{e}{-3}=4.2 \times10^{-3}`. For example: + +:: + + 4.2e-1 + 4.2e+9 + 4.2E5 + 42.e+37 + .42e-7 + 42E6 + +.. _sssec:real_ops: + +Operations +~~~~~~~~~~ + +Floating point operations and precedence are equivalent to :ref:`integer operation and precedence `. + +Operations on real numbers should adhere to the IEEE 754 spec with +regards to the representation of not-a-number(NaNs), infinity(infs), and +zeros. + +Type Casting and Type Promotion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To see the types that ``real`` may be cast and/or promoted to, see +the sections on :ref:`sec:typeCasting` and :ref:`sec:typePromotion` +respectively. diff --git a/gazprea-clean/spec/types/string.rst b/gazprea-clean/spec/types/string.rst new file mode 100644 index 0000000..a89185c --- /dev/null +++ b/gazprea-clean/spec/types/string.rst @@ -0,0 +1,94 @@ +.. _ssec:string: + +String +------ + +A ``string`` is another object within *Gazprea*. Fundamentally, a ``string`` is +a dynamic-sized ``array`` of ``character``. There are several +additional behaviours that are unique to strings: +an :ref:`extra literal style `, +the :ref:`result of a concatenation ` +and :ref:`behaviour when sent to an output stream `. + +.. _sssec:string_decl: + +Declaration +~~~~~~~~~~~ + +A string may be declared with the keyword ``string``. The same rules of +:ref:`dynamic array declarations ` also apply to strings, +which means +that all lenghts are inferred: + +:: + + string = ; + +.. _sssec:string_lit: + +Literals +~~~~~~~~ + +Strings can be constructed in the same way as arrays using character literals. +*Gazprea* also provides a special syntax for string literals. A string literal +is any sequence of character literals (including escape sequences) in between +double quotes. For instance: + +:: + + string cats_meow = "The cat said \"Meow!\"\nThat was a good day.\n" + +Although strings and character arrays look similar, they are still treated +differently by the compiler: + +:: + + character[*] carray = ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '\n']; + string vec = carray; + carry -> std_output; + vec -> std_output; + +prints: + +:: + + [h e l l o w o r l d + ] + hello world + + +.. _sssec:string_ops: + +Operations +~~~~~~~~~~ + +As character vectors, strings have all of the same operations defined on them as +the other array data types. +Remember that because a ``string`` and vector of ``character`` are fundamentally +the same, the concatenation operation may be used to concatenate values of the +two types. You may also append a slice of characters to a string using the +append method. +As well, a scalar character may be concatenated onto a string in the same way +as it would be concatenated onto an array of characters. +Note that because a ``string`` is a type of ``vector``, concatenation may also +be accomplished with ``concat`` and ``push`` methods: + +:: + + var string letters = ['a', 'b'] || "cd"; + letters.concat("ef"); + letters.push('g'); + letters -> std_output; + +prints the following: + +:: + + abcdefg + + +Type Casting and Type Promotion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To see the types that ``string`` may be cast and/or promoted to, see the +sections on :ref:`sec:typeCasting` and :ref:`sec:typePromotion` respectively. diff --git a/gazprea-clean/spec/types/tuple.rst b/gazprea-clean/spec/types/tuple.rst new file mode 100644 index 0000000..cdd4417 --- /dev/null +++ b/gazprea-clean/spec/types/tuple.rst @@ -0,0 +1,181 @@ +.. _ssec:tuple: + +Tuples +------ + +A ``tuple`` is an ordered collection of values that groups multiple, potentially +different, types into a single compound value. + +The fields within a tuple can be anonymous or can be given explicit names. This +allows tuples to be used as simple, lightweight collections or as more descriptive, +self-documenting data structures. + +.. _sssec:tuple_structural: + +Structural Tuples +~~~~~~~~~~~~~~~~~ + +By default, all tuple literals create **structural types**. A tuple's type is +uniquely defined by the sequence of its field types and their corresponding names. +The order of fields is significant, so two tuples `(integer a, real b)` and +`(real a, integer b)` are not equivalent. + +**Type Identity** + +The names of the fields are a part of the type. Therefore, two tuples with +different field names are considered different types, even if their underlying +member types are the same. + +:: + + // These three variables all have different, incompatible tuple types. + var tuple (integer, real) a = (1, 2.0); + var tuple (integer x, real y) b = (x: 1, y: 2.0); + var tuple (integer a, real b) c = (a: 1, b: 2.0); + +**Literals** + +A tuple literal is constructed by grouping values together between parentheses +in a comma-separated list. Field names are optional and are specified with a +colon (`:`) after the name. + +:: + + // A literal of type (integer, character, boolean) + (1, 'a', true) + + // A literal of type (integer x, real y) + (x: 10, y: 3.14) + + // A literal of type (integer status, boolean) + (status: 200, false) + +Duplicate field names within a single tuple literal are not allowed and will +result in a compile-time error. + +**Access** + +Fields in a tuple are accessed using dot notation (`.`). Gazprea supports dual +access for named fields: + +1. **By Index:** All fields can be accessed by their 1-based integer index. +2. **By Name:** If a field is named, it can also be accessed by its name. + +:: + + var point = (x: 10, y: 20); + + // Access by index + point.1 -> std_output; // Prints 10 + point.2 = 30; // Modify the second field + + // Access by name + point.x -> std_output; // Prints 10 + point.y = 40; // Modify the field named 'y' + +.. _sssec:tuple_nominal: + +Nominal Tuple Types +~~~~~~~~~~~~~~~~~~~ + +For stricter type safety, a tuple structure can be used to define a new +**nominal type** using the ``type`` keyword. A nominal type is distinct from all +other types, including structural tuples that have the exact same definition. + +**Definition and Construction** + +A nominal type is defined at the global scope. Instances of the type are created +using a constructor-like syntax where the type's name is used like a function. + +:: + + // Define a new, unique 'Point' type + type Point = (integer x, integer y); + + // Construct an instance of the Point type + var my_point = Point(x: 100, y: 200); + + // This is a type error, because Point and the structural tuple are not compatible + // var another: Point = (x: 1, y: 2); // ILLEGAL + +**Access** + +Access for nominal types works identically to structural tuples, allowing access +by index or by name. + +:: + + my_point.x = 150; + my_point.2 -> std_output; // Prints 200 + +.. _sssec:tuple_ops: + +Operations +~~~~~~~~~~ + +**Comparison** + +The equality (`==`) and inequality (`!=`) operators are defined for tuples. +Two tuples are considered equal if and only if: +1. They have a compatible type. For structural tuples, this means their type + signatures (field types, names, and order) are identical. For nominal + types, both must be of the same nominal type. +2. All corresponding fields are pairwise equal. + +:: + + var p1 = (x: 1, y: 2); + var p2 = (x: 1, y: 2); + var p3 = (a: 1, b: 2); + + p1 == p2; // true + p1 == p3; // false (incompatible types) + + type Point = (integer x, integer y); + var n1 = Point(x: 1, y: 2); + var n2 = Point(x: 1, y: 2); + + n1 == n2; // true + n1 == p1; // false (incompatible types: nominal vs. structural) + +.. _sssec:tuple_casting: + +Type Casting and Promotion +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**No Implicit Promotion** + +Gazprea does not support implicit promotion or conversion between different +tuple types. If two tuple types are not identical, they are incompatible. + +:: + + var unnamed: (integer, integer) = (1, 2); + var named: (integer x, integer y); + + named = unnamed; // ILLEGAL: types are not identical. + +**Explicit Casting with `as<>`** + +The `as<>` operator must be used to explicitly convert between compatible tuple +types. This is the only mechanism to: +1. Convert between different structural tuple types. +2. Convert a nominal tuple to its underlying structural type (or vice-versa). + +The cast is only valid if the fields of the source tuple can be pairwise cast +to the fields of the destination type. + +:: + + // 1. Cast a structural literal to a named structural type + var named: (integer x, integer y) = as<(integer x, integer y)>((1, 2)); + + // 2. Cast between compatible nominal and structural types + type Point = (integer x, integer y); + var my_point = Point(x: 10, y: 20); + + // Cast from nominal to structural to call a generic procedure + var structural_point = as<(integer x, integer y)>(my_point); + + // 3. Cast from a structural literal to a nominal type + var another_point = as((x: 1, y: 2)); \ No newline at end of file diff --git a/gazprea_specification_issues.md b/gazprea_specification_issues.md index 3280179..17a9ee9 100644 --- a/gazprea_specification_issues.md +++ b/gazprea_specification_issues.md @@ -9,7 +9,7 @@ This report analyzes the Gazprea language specification from the perspective of a student attempting to implement the language for the first time. After comprehensively reviewing all 26 specification files, I identified **8 major categories of issues** that would significantly hinder implementation efforts: 1. **Inconsistent Terminology** - Same concepts described differently across files -2. **Missing Critical Definitions** - Key implementation details left undefined +2. **Missing Critical Definitions** - Key implementation details left undefined 3. **Contradictory Rules** - Conflicting statements about language behavior 4. **Unclear Precedence/Evaluation Order** - Ambiguous execution semantics 5. **Missing Edge Cases** - Insufficient coverage of boundary conditions @@ -30,19 +30,24 @@ These issues range from minor terminological inconsistencies to fundamental ambi #### Examples: **Matrix vs Multi-dimensional Array** -- `types/matrix.rst` describes matrices as "2D arrays" +- `types/matrix.rst` describes matrices as "2D arrays" - `types/array.rst` discusses "multi-dimensional arrays" - `type_promotion.rst` mentions "multi-dimensional array promotion" - **Problem**: Unclear if matrices are a special case of multi-dimensional arrays or a distinct type +- **Solution**: Matrices are N-D arrays with N=2. They are a simple extension + to the standard array terminology + **String Case Inconsistency** - `types/string.rst` uses both "String" and "string" - Keywords list includes "string" (lowercase) - **Problem**: Case sensitivity unclear - are these the same type? +- **Solution**: `string` is the correct keyword **Vector Capitalization** - Sometimes "Vector", sometimes "vector" - Method syntax suggests object-oriented features inconsistent with rest of language +- eliminate vector in favour of the alternative syntax **Student Impact**: A student would waste time trying to understand if these are different features or just documentation inconsistencies. @@ -56,11 +61,15 @@ These issues range from minor terminological inconsistencies to fundamental ambi - `declarations.rst`: "All declarations must appear at the beginning of the block" - **Missing**: What exactly constitutes "beginning"? Can there be empty statements? Comments? - **Student Impact**: Cannot write a parser without knowing exactly what's allowed +- **Solution**: "Any and all declaration statements must be the first statements + in a given block" does this apply to globals? **Memory Management Model** - `built_in_functions.rst`: `format()` returns a string - **Missing**: Who manages this memory? When is it freed? - **Student Impact**: Cannot implement memory-safe code generation +- **Solution**: Strings are freed when they go out of scope. Since strings + are just a variable length array of characters, they are treated the same **Constant Folding in Typedef** - `typedef.rst`: "Parameterized expressions (constant folding)" @@ -70,6 +79,7 @@ These issues range from minor terminological inconsistencies to fundamental ambi **Array Size Limits** - No mention of maximum array sizes or memory limits - **Student Impact**: Cannot implement bounds checking or prevent memory exhaustion +- **Solution**: Gazprea is, in essence, a 32-bit language ### 3. Contradictory Rules @@ -79,8 +89,9 @@ These issues range from minor terminological inconsistencies to fundamental ambi **Matrix Indexing Syntax** - `types/matrix.rst` examples show `M[i][j]` syntax -- Some examples use `M[i, j]` syntax +- Some examples use `M[i, j]` syntax - **Problem**: Which is correct? Are both supported? +- **Solution**: We should switch back to `M[i,j]` **Vector Methods vs Functions** - `types/vector.rst`: "Methods: `push()`, `len()`, `append()`" @@ -124,22 +135,28 @@ These issues range from minor terminological inconsistencies to fundamental ambi **Empty Array Literals** - No mention of how `[]` should be typed - **Problem**: What type does an empty array have? +- Whatever type it is declared to have. Empty arrays cannot be created literally +- Does type inference fail here? **Negative Range Bounds** - Range operator `..` defined for positive bounds - **Missing**: What does `5..1` mean? Error or empty range? +- Empty range **Integer Overflow/Underflow** - `types/integer.rst`: 32-bit signed integers - **Missing**: Behavior on overflow (wrap, error, undefined)? +- Should produce an error **NaN Propagation in Real Arithmetic** - `types/real.rst`: IEEE 754 compliance mentioned - **Missing**: Specific rules for NaN handling in operations +- Arithmetic between NaN is... UB? **Zero-Length Slices** - Array slicing syntax defined - **Missing**: What does `arr[5..4]` return? +- I'm thinking an empty array **Student Impact**: Must guess at edge case behavior, leading to unpredictable implementations. @@ -221,7 +238,7 @@ These issues range from minor terminological inconsistencies to fundamental ambi #### Strong Points: 1. **Type Safety**: Clear emphasis on type checking and safety -2. **Functional Purity**: Well-defined restrictions on functions vs procedures +2. **Functional Purity**: Well-defined restrictions on functions vs procedures 3. **Operator Precedence**: Complete precedence tables provided 4. **I/O Model**: Stream-based I/O is well-specified 5. **Array Operations**: Rich set of array operations with good examples @@ -352,7 +369,7 @@ The Gazprea language specification shows thoughtful design for a functional lang The most critical issues are: 1. **Contradictory syntax rules** that make parsing impossible -2. **Missing semantic definitions** that prevent correct code generation +2. **Missing semantic definitions** that prevent correct code generation 3. **Inconsistent terminology** that creates confusion about language features **Priority for fixes:** @@ -367,4 +384,4 @@ With these improvements, the specification would provide a solid foundation for **Generated by**: Claude Code Analysis **Date**: July 16, 2025 -**Files Analyzed**: 26 specification files in `/gazprea/spec/` directory \ No newline at end of file +**Files Analyzed**: 26 specification files in `/gazprea/spec/` directory