diff --git a/.env b/.env new file mode 100644 index 0000000..fa1d905 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +PYTHONPATH=bagel \ No newline at end of file diff --git a/.gitignore b/.gitignore index e69de29..63ef37c 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/** +*.pyc +.pytest_cache/** diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..30c05a5 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Module", + "type": "python", + "request": "launch", + "module": "bagel", + "justMyCode": true + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..26df38b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.linting.enabled": false +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..5532b8b --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "bagel: REPL", + "type": "shell", + "command": "python -m bagel", + "problemMatcher": [], + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} \ No newline at end of file diff --git a/bagel/__init__.py b/bagel/__init__.py index b794fd4..66a87bb 100644 --- a/bagel/__init__.py +++ b/bagel/__init__.py @@ -1 +1 @@ -__version__ = '0.1.0' +__version__ = '0.1.5' diff --git a/bagel/__main__.py b/bagel/__main__.py index 2dd41d0..569b3cc 100644 --- a/bagel/__main__.py +++ b/bagel/__main__.py @@ -3,13 +3,14 @@ import colorama from colorama import Fore +from .codeanalysis.binding.binder import Binder from .codeanalysis.evaluator import Evaluator -from .codeanalysis.syntaxnode import SyntaxNode -from .codeanalysis.syntaxtoken import SyntaxToken -from .codeanalysis.syntaxtree import SyntaxTree +from .codeanalysis.syntax.syntaxnode import SyntaxNode +from .codeanalysis.syntax.syntaxtoken import SyntaxToken +from .codeanalysis.syntax.syntaxtree import SyntaxTree colorama.init(autoreset=True) -show_tree = False +SHOW_TREE = False def pretty_print(node: SyntaxNode, indent: str = "", is_last: bool = True): @@ -40,49 +41,30 @@ def pretty_print(node: SyntaxNode, indent: str = "", is_last: bool = True): while True: line = input("» ") - # ideas - # a = input("» ") - # a = input("› ") - # a = input("¶ ") - # a = input("~ ") - # a = input("⇝ ") - # a = input("⇢ ") - # a = input("⇻ ") - # a = input("⇾ ") - # a = input("∢ ") - # a = input("∝ ") - # a = input("⊱ ") - - # a = input("⊶ ") - # a = input("⊷ ") - # a = input("⊸ ") - - # a = input("⋉ ") - # a = input("⋯ ") - # a = input("⨊ ") - # a = input("⨭ ") - # a = input("⫻ ") - if line is None or line == "": break if line == "#showtree": - show_tree = not show_tree - print("Showing parser trees" if show_tree else "Not showing parser trees") + SHOW_TREE = not SHOW_TREE + print("Showing parser trees" if SHOW_TREE else "Not showing parser trees") continue elif line == "#cls": os.system('cls') continue syntax_tree = SyntaxTree.parse(line) + binder = Binder() + bound_expression = binder.bind_expression(syntax_tree.root) + + diagnostics = syntax_tree.diagnostics + binder.diagnostics - if show_tree: + if SHOW_TREE: pretty_print(syntax_tree.root) - if not len(syntax_tree.diagnostics) > 0: - evaluator = Evaluator(syntax_tree.root) + if not any(syntax_tree.diagnostics): + evaluator = Evaluator(bound_expression) result = evaluator.evaluate() - print(str(result)) + print(result) else: for _diagnostic in syntax_tree.diagnostics: print(Fore.RED + _diagnostic) diff --git a/bagel/__pycache__/__init__.cpython-310.pyc b/bagel/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index cef0f46..0000000 Binary files a/bagel/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/bagel/__pycache__/__main__.cpython-310.pyc b/bagel/__pycache__/__main__.cpython-310.pyc deleted file mode 100644 index 686f4d5..0000000 Binary files a/bagel/__pycache__/__main__.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/binary_expression_syntax.cpython-310.pyc b/bagel/codeanalysis/__pycache__/binary_expression_syntax.cpython-310.pyc deleted file mode 100644 index d05ef08..0000000 Binary files a/bagel/codeanalysis/__pycache__/binary_expression_syntax.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/evaluator.cpython-310.pyc b/bagel/codeanalysis/__pycache__/evaluator.cpython-310.pyc deleted file mode 100644 index 359b515..0000000 Binary files a/bagel/codeanalysis/__pycache__/evaluator.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/expression_syntax.cpython-310.pyc b/bagel/codeanalysis/__pycache__/expression_syntax.cpython-310.pyc deleted file mode 100644 index 371e69e..0000000 Binary files a/bagel/codeanalysis/__pycache__/expression_syntax.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/lexer.cpython-310.pyc b/bagel/codeanalysis/__pycache__/lexer.cpython-310.pyc deleted file mode 100644 index fc00f64..0000000 Binary files a/bagel/codeanalysis/__pycache__/lexer.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/number_expression_syntax.cpython-310.pyc b/bagel/codeanalysis/__pycache__/number_expression_syntax.cpython-310.pyc deleted file mode 100644 index 2f28dfa..0000000 Binary files a/bagel/codeanalysis/__pycache__/number_expression_syntax.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/parenthesized_expression_syntax.cpython-310.pyc b/bagel/codeanalysis/__pycache__/parenthesized_expression_syntax.cpython-310.pyc deleted file mode 100644 index f32af5d..0000000 Binary files a/bagel/codeanalysis/__pycache__/parenthesized_expression_syntax.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/parser.cpython-310.pyc b/bagel/codeanalysis/__pycache__/parser.cpython-310.pyc deleted file mode 100644 index f117d04..0000000 Binary files a/bagel/codeanalysis/__pycache__/parser.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/syntaxkind.cpython-310.pyc b/bagel/codeanalysis/__pycache__/syntaxkind.cpython-310.pyc deleted file mode 100644 index dcf560e..0000000 Binary files a/bagel/codeanalysis/__pycache__/syntaxkind.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/syntaxnode.cpython-310.pyc b/bagel/codeanalysis/__pycache__/syntaxnode.cpython-310.pyc deleted file mode 100644 index cf2ffde..0000000 Binary files a/bagel/codeanalysis/__pycache__/syntaxnode.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/syntaxtoken.cpython-310.pyc b/bagel/codeanalysis/__pycache__/syntaxtoken.cpython-310.pyc deleted file mode 100644 index a0ca05a..0000000 Binary files a/bagel/codeanalysis/__pycache__/syntaxtoken.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/__pycache__/syntaxtree.cpython-310.pyc b/bagel/codeanalysis/__pycache__/syntaxtree.cpython-310.pyc deleted file mode 100644 index be2bc90..0000000 Binary files a/bagel/codeanalysis/__pycache__/syntaxtree.cpython-310.pyc and /dev/null differ diff --git a/bagel/codeanalysis/binding/__init__.py b/bagel/codeanalysis/binding/__init__.py new file mode 100644 index 0000000..6839e2a --- /dev/null +++ b/bagel/codeanalysis/binding/__init__.py @@ -0,0 +1,9 @@ +from .boundnode import BoundNode +from .boundnodekind import BoundNodeKind +from .boundexpression import BoundExpression +from .boundliteralexpression import BoundLiteralExpression +from .boundunaryoperatorkind import BoundUnaryOperatorKind +from .boundunaryexpression import BoundUnaryExpression +from .boundbinaryoperatorkind import BoundBinaryOperatorKind +from .boundbinaryexpression import BoundBinaryExpression +from .binder import Binder diff --git a/bagel/codeanalysis/binding/binder.py b/bagel/codeanalysis/binding/binder.py new file mode 100644 index 0000000..9cf4383 --- /dev/null +++ b/bagel/codeanalysis/binding/binder.py @@ -0,0 +1,88 @@ +from typing import List + +from ..syntax.binary_expression_syntax import BinaryExpressionSyntax +from ..syntax.expression_syntax import ExpressionSyntax +from ..syntax.literal_expression_syntax import LiteralExpressionSyntax +from ..syntax.syntaxkind import SyntaxKind +from ..syntax.unary_expression_syntax import UnaryExpressionSyntax + +from .boundexpression import BoundExpression +from .boundbinaryexpression import BoundBinaryExpression +from .boundunaryexpression import BoundUnaryExpression +from .boundliteralexpression import BoundLiteralExpression +from .boundunaryoperatorkind import BoundUnaryOperatorKind +from .boundbinaryoperatorkind import BoundBinaryOperatorKind + + +class Binder: + _diagnostics: List[str] + + def __init__(self): + self._diagnostics = [] + + @property + def diagnostics(self) -> List[str]: + return self._diagnostics + + def bind_expression(self, syntax: ExpressionSyntax) -> BoundExpression: + match syntax.kind: + case SyntaxKind.LITERALEXPRESSION: + return self.bind_literal_expression(syntax) + case SyntaxKind.UNARYEXPRESSION: + return self.bind_unary_expression(syntax) + case SyntaxKind.BINARYEXPRESSION: + return self.bind_binary_expression(syntax) + case _: + raise Exception(f"Unexpected syntax {syntax.kind}") + + def bind_literal_expression(self, syntax: LiteralExpressionSyntax) -> BoundLiteralExpression: + value = syntax.value if syntax.value != None else 0 + return BoundLiteralExpression(value) + + def bind_unary_expression(self, syntax: UnaryExpressionSyntax) -> BoundUnaryExpression: + bound_operand = self.bind_expression(syntax.operand) + bound_operator_kind = self.bind_unary_operator_kind(syntax.operator_token.kind, bound_operand.type) + + if bound_operator_kind is None: + self._diagnostics.append(f"Unary operator {syntax.operator_token.text} is not defined for type {bound_operand.type}.") + return bound_operand + + return BoundUnaryExpression(bound_operator_kind, bound_operand) + + def bind_binary_expression(self, syntax: BinaryExpressionSyntax) -> BoundBinaryExpression: + bound_left = self.bind_expression(syntax.left) + bound_right = self.bind_expression(syntax.right) + bound_operator_kind = self.bind_binary_operator_kind(syntax.operator_token.kind, bound_left.type, bound_right.type) + if bound_operator_kind is None: + self._diagnostics.append(f"Binary operator {syntax.operator_token.text} is not defined for types {bound_left.type} and {bound_right.type}.") + return bound_left + + return BoundBinaryExpression(bound_left, bound_operator_kind, bound_right) + + def bind_unary_operator_kind(self, kind: SyntaxKind, operand_type: int) -> BoundUnaryOperatorKind | None: + if operand_type != int: + return + + match kind: + case SyntaxKind.PLUSTOKEN: + return BoundUnaryOperatorKind.IDENTITY + case SyntaxKind.MINUSTOKEN: + return BoundUnaryOperatorKind.NEGATION + case _: + raise Exception(f"Unexpected unary operator kind {kind}") + + def bind_binary_operator_kind(self, kind: SyntaxKind, left_type: int, right_type: int) -> BoundBinaryOperatorKind | None: + if left_type != int or right_type != int: + return + + match kind: + case SyntaxKind.PLUSTOKEN: + return BoundBinaryOperatorKind.ADDITION + case SyntaxKind.MINUSTOKEN: + return BoundBinaryOperatorKind.SUBTRACTION + case SyntaxKind.STARTOKEN: + return BoundBinaryOperatorKind.MULTIPLICATION + case SyntaxKind.SLASHTOKEN: + return BoundBinaryOperatorKind.DIVISION + case _: + raise Exception(f"Unexpected binary operator kind {kind}") diff --git a/bagel/codeanalysis/binding/boundbinaryexpression.py b/bagel/codeanalysis/binding/boundbinaryexpression.py new file mode 100644 index 0000000..fde3beb --- /dev/null +++ b/bagel/codeanalysis/binding/boundbinaryexpression.py @@ -0,0 +1,34 @@ +from .boundexpression import BoundExpression +from .boundnodekind import BoundNodeKind +from .boundunaryoperatorkind import BoundUnaryOperatorKind +from .boundbinaryoperatorkind import BoundBinaryOperatorKind + +class BoundBinaryExpression(BoundExpression): + _left: BoundExpression + _operator_kind: BoundUnaryOperatorKind + _right: BoundExpression + + def __init__(self, left: BoundExpression, operator_kind: BoundBinaryOperatorKind, right: BoundExpression): + self._left = left + self._operator_kind = operator_kind + self._right = right + + @property + def kind(self) -> BoundNodeKind: + return BoundNodeKind.BINARYEXPRESSION + + @property + def type(self): + return self._left.type + + @property + def left(self) -> BoundExpression: + return self._left + + @property + def operator_kind(self) -> BoundUnaryOperatorKind: + return self._operator_kind + + @property + def right(self) -> BoundExpression: + return self._right diff --git a/bagel/codeanalysis/binding/boundbinaryoperatorkind.py b/bagel/codeanalysis/binding/boundbinaryoperatorkind.py new file mode 100644 index 0000000..2cad387 --- /dev/null +++ b/bagel/codeanalysis/binding/boundbinaryoperatorkind.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class BoundBinaryOperatorKind(Enum): + ADDITION = 0 + SUBTRACTION = 1 + MULTIPLICATION = 2 + DIVISION = 3 diff --git a/bagel/codeanalysis/binding/boundexpression.py b/bagel/codeanalysis/binding/boundexpression.py new file mode 100644 index 0000000..eadc111 --- /dev/null +++ b/bagel/codeanalysis/binding/boundexpression.py @@ -0,0 +1,9 @@ +from .boundnode import BoundNode + + +class BoundExpression(BoundNode): + _type: object + + @property + def type(self): + return self._type diff --git a/bagel/codeanalysis/binding/boundliteralexpression.py b/bagel/codeanalysis/binding/boundliteralexpression.py new file mode 100644 index 0000000..91de9dc --- /dev/null +++ b/bagel/codeanalysis/binding/boundliteralexpression.py @@ -0,0 +1,21 @@ +from .boundexpression import BoundExpression +from .boundnodekind import BoundNodeKind + + +class BoundLiteralExpression(BoundExpression): + _value: object + + def __init__(self, value: object): + self._value = value + + @property + def kind(self) -> BoundNodeKind: + return BoundNodeKind.LITERALEXPRESSION + + @property + def type(self): + return type(self._value) + + @property + def value(self): + return self._value diff --git a/bagel/codeanalysis/binding/boundnode.py b/bagel/codeanalysis/binding/boundnode.py new file mode 100644 index 0000000..fa01742 --- /dev/null +++ b/bagel/codeanalysis/binding/boundnode.py @@ -0,0 +1,9 @@ +from .boundnodekind import BoundNodeKind + + +class BoundNode: + _kind: BoundNodeKind + + @property + def kind(self) -> BoundNodeKind: + return self._kind diff --git a/bagel/codeanalysis/binding/boundnodekind.py b/bagel/codeanalysis/binding/boundnodekind.py new file mode 100644 index 0000000..b31895e --- /dev/null +++ b/bagel/codeanalysis/binding/boundnodekind.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class BoundNodeKind(Enum): + LITERALEXPRESSION = 0 + UNARYEXPRESSION = 1 + BINARYEXPRESSION = 2 diff --git a/bagel/codeanalysis/binding/boundunaryexpression.py b/bagel/codeanalysis/binding/boundunaryexpression.py new file mode 100644 index 0000000..0d3311d --- /dev/null +++ b/bagel/codeanalysis/binding/boundunaryexpression.py @@ -0,0 +1,27 @@ +from .boundnodekind import BoundNodeKind +from .boundunaryoperatorkind import BoundUnaryOperatorKind +from .boundexpression import BoundExpression + +class BoundUnaryExpression(BoundExpression): + _operator_kind: BoundUnaryOperatorKind + _operand: BoundExpression + + def __init__(self, operator_kind: BoundUnaryOperatorKind, operand: BoundExpression): + self._operator_kind = operator_kind + self._operand = operand + + @property + def kind(self) -> BoundNodeKind: + return BoundNodeKind.UNARYEXPRESSION + + @property + def type(self): + return self._operand.type + + @property + def operator_kind(self) -> BoundUnaryOperatorKind: + return self._operator_kind + + @property + def operand(self) -> BoundExpression: + return self._operand \ No newline at end of file diff --git a/bagel/codeanalysis/binding/boundunaryoperatorkind.py b/bagel/codeanalysis/binding/boundunaryoperatorkind.py new file mode 100644 index 0000000..189c275 --- /dev/null +++ b/bagel/codeanalysis/binding/boundunaryoperatorkind.py @@ -0,0 +1,6 @@ +from enum import Enum + + +class BoundUnaryOperatorKind(Enum): + IDENTITY = 0 + NEGATION = 1 diff --git a/bagel/codeanalysis/evaluator.py b/bagel/codeanalysis/evaluator.py index 8ec1f9c..cc7c767 100644 --- a/bagel/codeanalysis/evaluator.py +++ b/bagel/codeanalysis/evaluator.py @@ -1,37 +1,46 @@ -from .binary_expression_syntax import BinaryExpressionSyntax -from .expression_syntax import ExpressionSyntax -from .literal_expression_syntax import LiteralExpressionSyntax -from .parenthesized_expression_syntax import ParenthesizedExpressionSyntax -from .syntaxkind import SyntaxKind +from .binding import (BoundBinaryExpression, BoundBinaryOperatorKind, + BoundExpression, BoundLiteralExpression, + BoundUnaryExpression, BoundUnaryOperatorKind) class Evaluator: - def __init__(self, root: ExpressionSyntax): + _root: BoundExpression + + def __init__(self, root: BoundExpression): self._root = root - def evaluate(self) -> int: + def evaluate(self) -> object: return self.evaluate_expression(self._root) - def evaluate_expression(self, node: ExpressionSyntax) -> int: - if type(node) is LiteralExpressionSyntax: - return int(node.number_token.value) - - if type(node) is BinaryExpressionSyntax: - left = self.evaluate_expression(node.left) - right = self.evaluate_expression(node.right) - - if node.operator_token.kind == SyntaxKind.PlusToken: - return left + right - elif node.operator_token.kind == SyntaxKind.MinusToken: - return left - right - elif node.operator_token.kind == SyntaxKind.StarToken: - return left * right - elif node.operator_token.kind == SyntaxKind.SlashToken: - return left / right - else: - raise Exception(f"Unexpected binary operator {node.operator_token.kind}") - - if type(node) is ParenthesizedExpressionSyntax: - return self.evaluate_expression(node.expression) + def evaluate_expression(self, node: BoundExpression) -> object: + if isinstance(node, BoundLiteralExpression): + return node.value + + if isinstance(node, BoundUnaryExpression): + operand = int(self.evaluate_expression(node.operand)) + + match node.operator_kind: + case BoundUnaryOperatorKind.IDENTITY: + return operand + case BoundUnaryOperatorKind.NEGATION: + return -operand + case _: + raise Exception(f"Unexpected unary operator {node.operator_kind}") + + if isinstance(node, BoundBinaryExpression): + left = int(self.evaluate_expression(node.left)) + right = int(self.evaluate_expression(node.right)) + + match node.operator_kind: + case BoundBinaryOperatorKind.ADDITION: + return left + right + case BoundBinaryOperatorKind.SUBTRACTION: + return left - right + case BoundBinaryOperatorKind.MULTIPLICATION: + return left * right + case BoundBinaryOperatorKind.DIVISION: + return left / right + case _: + raise Exception(f"Unexpected binary operator {node.operator_kind}") raise Exception(f"Unexpected node {node.kind}") diff --git a/bagel/codeanalysis/lexer.py b/bagel/codeanalysis/lexer.py deleted file mode 100644 index 813f40d..0000000 --- a/bagel/codeanalysis/lexer.py +++ /dev/null @@ -1,73 +0,0 @@ -from .syntaxkind import SyntaxKind -from .syntaxtoken import SyntaxToken - - -class Lexer: - def __init__(self, text: str): - self._text = text - self._position = 0 - self._diagnostics = [] - - @property - def diagnostics(self) -> list: - return self._diagnostics - - @property - def current(self) -> str: - if self._position >= len(self._text): - return '\0' - return self._text[self._position] - - def next(self) -> None: - self._position += 1 - - def next_token(self) -> SyntaxToken: - if self._position >= len(self._text): - return SyntaxToken(SyntaxKind.EndOfFileToken, self._position, '\0', None) - - if self.current.isdigit(): - start = self._position - - while self.current.isdigit(): - self.next() - - length = self._position - start - text = self._text[start:length + start] - try: - value = int(text) - except ValueError: - self._diagnostics.append(f"The number {self._text} isn't a valid int.") - return SyntaxToken(SyntaxKind.NumberToken, start, text, value) - - if self.current.isspace(): - start = self._position - - while self.current.isspace(): - self.next() - - length = self._position - start - text = self._text[start:length + start] - return SyntaxToken(SyntaxKind.WhiteSpaceToken, start, text, None) - - if self.current == '+': - self._position += 1 - return SyntaxToken(SyntaxKind.PlusToken, self._position, '+', None) - elif self.current == '-': - self._position += 1 - return SyntaxToken(SyntaxKind.MinusToken, self._position, '-', None) - elif self.current == '*': - self._position += 1 - return SyntaxToken(SyntaxKind.StarToken, self._position, '*', None) - elif self.current == '/': - self._position += 1 - return SyntaxToken(SyntaxKind.SlashToken, self._position, '/', None) - elif self.current == '(': - self._position += 1 - return SyntaxToken(SyntaxKind.OpenParenthesisToken, self._position, '(', None) - elif self.current == ')': - self._position += 1 - return SyntaxToken(SyntaxKind.CloseParenthesisToken, self._position, ')', None) - - self._diagnostics.append(f"ERROR: bad character input: '{self.current}'") - self._position += 1 - return SyntaxToken(SyntaxKind.BadToken, self._position, self._text[self._position - 1:1], None) \ No newline at end of file diff --git a/bagel/codeanalysis/parser.py b/bagel/codeanalysis/parser.py deleted file mode 100644 index 8dbb22d..0000000 --- a/bagel/codeanalysis/parser.py +++ /dev/null @@ -1,94 +0,0 @@ -from .binary_expression_syntax import BinaryExpressionSyntax -from .expression_syntax import ExpressionSyntax -from .lexer import Lexer -from .literal_expression_syntax import LiteralExpressionSyntax -from .parenthesized_expression_syntax import ParenthesizedExpressionSyntax -from .syntaxkind import SyntaxKind -from .syntaxtoken import SyntaxToken -from .syntaxtree import SyntaxTree - - -class Parser: - def __init__(self, text: str): - self._diagnostics = [] - self._position = 0 - self._tokens = [] - - lexer = Lexer(text) - token = lexer.next_token() - - self._tokens.append(token) - - while token.kind != SyntaxKind.EndOfFileToken: - token = lexer.next_token() - - if token.kind not in [SyntaxKind.WhiteSpaceToken, SyntaxKind.BadToken]: - self._tokens.append(token) - - self._diagnostics += lexer.diagnostics - - @property - def diagnostics(self) -> list: - return self._diagnostics - - def peek(self, offset: int = 0) -> SyntaxToken: - index = self._position + offset - if index >= len(self._tokens): - return self._tokens[len(self._tokens) - 1] - - return self._tokens[index] - - @property - def current(self) -> SyntaxToken: - return self.peek() - - def next_token(self) -> SyntaxToken: - current = self.current - self._position += 1 - return current - - def match_token(self, kind: SyntaxKind) -> SyntaxToken: - if self.current.kind == kind: - return self.next_token() - - self._diagnostics.append(f"ERROR: Unexpected token <'{self.current.kind}'>, expected <{kind}>") - return SyntaxToken(kind, self.current.position, None, None) - - def parse(self) -> SyntaxTree: - expression = self.parse_expression() - end_of_file_token = self.match_token(SyntaxKind.EndOfFileToken) - return SyntaxTree(self._diagnostics, expression, end_of_file_token) - - def parse_expression(self) -> ExpressionSyntax: - return self.parse_term() - - def parse_term(self) -> ExpressionSyntax: - left = self.parse_factor() - - while self.current.kind in [SyntaxKind.PlusToken, SyntaxKind.MinusToken]: - operator_token = self.next_token() - right = self.parse_factor() - left = BinaryExpressionSyntax(left, operator_token, right) - - return left - - def parse_factor(self) -> ExpressionSyntax: - left = self.parse_primary_expression() - - while self.current.kind in [SyntaxKind.StarToken, SyntaxKind.SlashToken]: - operator_token = self.next_token() - right = self.parse_primary_expression() - left = BinaryExpressionSyntax(left, operator_token, right) - - return left - - def parse_primary_expression(self) -> ExpressionSyntax: - if self.current.kind == SyntaxKind.OpenParenthesisToken: - left = self.next_token() - expression = self.parse_expression() - right = self.match_token(SyntaxKind.CloseParenthesisToken) - return ParenthesizedExpressionSyntax(left, expression, right) - - number_token = self.match_token(SyntaxKind.NumberToken) - return LiteralExpressionSyntax(number_token) - diff --git a/bagel/codeanalysis/binary_expression_syntax.py b/bagel/codeanalysis/syntax/binary_expression_syntax.py similarity index 85% rename from bagel/codeanalysis/binary_expression_syntax.py rename to bagel/codeanalysis/syntax/binary_expression_syntax.py index ebe8098..5673829 100644 --- a/bagel/codeanalysis/binary_expression_syntax.py +++ b/bagel/codeanalysis/syntax/binary_expression_syntax.py @@ -4,6 +4,10 @@ class BinaryExpressionSyntax(ExpressionSyntax): + _left: ExpressionSyntax + _operator_token: SyntaxToken + _right: ExpressionSyntax + def __init__(self, left: ExpressionSyntax, operator_token: SyntaxToken, right: ExpressionSyntax): self._left = left self._operator_token = operator_token @@ -11,7 +15,7 @@ def __init__(self, left: ExpressionSyntax, operator_token: SyntaxToken, right: E @property def kind(self) -> SyntaxKind: - return SyntaxKind.BinaryExpression + return SyntaxKind.BINARYEXPRESSION def get_children(self) -> list: # can be an array too diff --git a/bagel/codeanalysis/expression_syntax.py b/bagel/codeanalysis/syntax/expression_syntax.py similarity index 100% rename from bagel/codeanalysis/expression_syntax.py rename to bagel/codeanalysis/syntax/expression_syntax.py diff --git a/bagel/codeanalysis/syntax/lexer.py b/bagel/codeanalysis/syntax/lexer.py new file mode 100644 index 0000000..300a7c9 --- /dev/null +++ b/bagel/codeanalysis/syntax/lexer.py @@ -0,0 +1,97 @@ +from curses.ascii import isalpha + +from .syntaxfacts import SyntaxFacts +from .syntaxkind import SyntaxKind +from .syntaxtoken import SyntaxToken + + +class Lexer: + _text: str + _diagnostics: list + + def __init__(self, text: str): + self._text = text + self._position = 0 + self._diagnostics = [] + + @property + def diagnostics(self) -> list: + return self._diagnostics + + @property + def current(self) -> str: + if self._position >= len(self._text): + return '\0' + return self._text[self._position] + + @staticmethod + def isletter(c: str) -> bool: + return c.isalpha() or c == '_' + + def next(self) -> None: + self._position += 1 + + def lex(self) -> SyntaxToken: + if self._position >= len(self._text): + return SyntaxToken(SyntaxKind.EOFTOKEN, self._position, '\0', None) + + if self.current.isdigit(): + start = self._position + + while self.current.isdigit(): + self.next() + + length = self._position - start + text = self._text[start:length + start] + try: + value = int(text) + except ValueError: + self._diagnostics.append(f"The number {self._text} isn't a valid int.") + return SyntaxToken(SyntaxKind.NUMBERTOKEN, start, text, value) + + if self.current.isspace(): + start = self._position + + while self.current.isspace(): + self.next() + + length = self._position - start + text = self._text[start:length + start] + return SyntaxToken(SyntaxKind.WHITESPACETOKEN, start, text, None) + + if self.isletter(self.current): + start = self._position + + while self.isletter(self.current): + self.next() + + length = self._position - start + text = self._text[start:length + start] + kind = SyntaxFacts.get_keyword_kind(text) + return SyntaxToken(kind, start, text, None) + + + match self.current: + case '+': + self._position += 1 + return SyntaxToken(SyntaxKind.PLUSTOKEN, self._position, '+', None) + case '-': + self._position += 1 + return SyntaxToken(SyntaxKind.MINUSTOKEN, self._position, '-', None) + case '*': + self._position += 1 + return SyntaxToken(SyntaxKind.STARTOKEN, self._position, '*', None) + case '/': + self._position += 1 + return SyntaxToken(SyntaxKind.SLASHTOKEN, self._position, '/', None) + case '(': + self._position += 1 + return SyntaxToken(SyntaxKind.OPENPARENTOKEN, self._position, '(', None) + case ')': + self._position += 1 + return SyntaxToken(SyntaxKind.CLOSEPARENTOKEN, self._position, ')', None) + + self._diagnostics.append(f"ERROR: bad character input: '{self.current}'") + self._position += 1 + return SyntaxToken( + SyntaxKind.BADTOKEN, self._position, self._text[self._position - 1:1], None) diff --git a/bagel/codeanalysis/literal_expression_syntax.py b/bagel/codeanalysis/syntax/literal_expression_syntax.py similarity index 58% rename from bagel/codeanalysis/literal_expression_syntax.py rename to bagel/codeanalysis/syntax/literal_expression_syntax.py index 685374c..3eb6f14 100644 --- a/bagel/codeanalysis/literal_expression_syntax.py +++ b/bagel/codeanalysis/syntax/literal_expression_syntax.py @@ -4,12 +4,16 @@ class LiteralExpressionSyntax(ExpressionSyntax): - def __init__(self, literal_token: SyntaxToken): + _literal_token: SyntaxToken + + def __init__(self, literal_token: SyntaxToken, value: object=None): self._literal_token = literal_token + + self._value = value if value != None else literal_token.value @property def kind(self) -> SyntaxKind: - return SyntaxKind.NumberExpression + return SyntaxKind.LITERALEXPRESSION def get_children(self) -> list: return [self.literal_token] @@ -18,3 +22,7 @@ def get_children(self) -> list: def literal_token(self) -> SyntaxToken: return self._literal_token + @property + def value(self) -> SyntaxToken: + return self._value + diff --git a/bagel/codeanalysis/parenthesized_expression_syntax.py b/bagel/codeanalysis/syntax/parenthesized_expression_syntax.py similarity index 85% rename from bagel/codeanalysis/parenthesized_expression_syntax.py rename to bagel/codeanalysis/syntax/parenthesized_expression_syntax.py index d9f1b11..91792d6 100644 --- a/bagel/codeanalysis/parenthesized_expression_syntax.py +++ b/bagel/codeanalysis/syntax/parenthesized_expression_syntax.py @@ -4,6 +4,10 @@ class ParenthesizedExpressionSyntax(ExpressionSyntax): + _open_parenthesis_token: SyntaxToken + _expression: ExpressionSyntax + _close_parenthesis_token: SyntaxToken + def __init__(self, open_parenthesis_token: SyntaxToken, expression: ExpressionSyntax, close_parenthesis_token: SyntaxToken): self._open_parenthesis_token = open_parenthesis_token @@ -12,7 +16,7 @@ def __init__(self, open_parenthesis_token: SyntaxToken, expression: ExpressionSy @property def kind(self) -> SyntaxKind: - return SyntaxKind.ParenthesizedExpression + return SyntaxKind.PARENEXPRESSION def get_children(self) -> list: return [self.open_parenthesis_token, self.expression, self.close_parenthesis_token] diff --git a/bagel/codeanalysis/syntax/parser.py b/bagel/codeanalysis/syntax/parser.py new file mode 100644 index 0000000..18a50e2 --- /dev/null +++ b/bagel/codeanalysis/syntax/parser.py @@ -0,0 +1,102 @@ +from .unary_expression_syntax import UnaryExpressionSyntax +from .binary_expression_syntax import BinaryExpressionSyntax +from .expression_syntax import ExpressionSyntax +from .lexer import Lexer +from .literal_expression_syntax import LiteralExpressionSyntax +from .parenthesized_expression_syntax import ParenthesizedExpressionSyntax +from .syntaxkind import SyntaxKind +from .syntaxtoken import SyntaxToken +from .syntaxtree import SyntaxTree +from .syntaxfacts import SyntaxFacts + + +class Parser: + _diagnostics: list + + def __init__(self, text: str): + self._diagnostics = [] + self._position = 0 + self._tokens = [] + + lexer = Lexer(text) + token = lexer.lex() + + self._tokens.append(token) + + while token.kind != SyntaxKind.EOFTOKEN: + token = lexer.lex() + + if token.kind not in [SyntaxKind.WHITESPACETOKEN, SyntaxKind.BADTOKEN]: + self._tokens.append(token) + + self._diagnostics += lexer.diagnostics + + @property + def diagnostics(self) -> list: + return self._diagnostics + + def peek(self, offset: int = 0) -> SyntaxToken: + index = self._position + offset + if index >= len(self._tokens): + return self._tokens[len(self._tokens) - 1] + + return self._tokens[index] + + @property + def current(self) -> SyntaxToken: + return self.peek() + + def next_token(self) -> SyntaxToken: + current = self.current + self._position += 1 + return current + + def match_token(self, kind: SyntaxKind) -> SyntaxToken: + if self.current.kind == kind: + return self.next_token() + + self._diagnostics.append( + f"ERROR: Unexpected token <'{self.current.kind}'>, expected <{kind}>") + return SyntaxToken(kind, self.current.position, None, None) + + def parse(self) -> SyntaxTree: + expression = self.parse_expression() + end_of_file_token = self.match_token(SyntaxKind.EOFTOKEN) + return SyntaxTree(self._diagnostics, expression, end_of_file_token) + + def parse_expression(self, parent_precedence: int=0) -> ExpressionSyntax: + left = None + unary_operator_precedence = SyntaxFacts.get_unary_operator_precedence(self.current.kind) + if unary_operator_precedence != 0 and unary_operator_precedence >= parent_precedence: + operator_token = self.next_token() + operand = self.parse_expression(unary_operator_precedence) + left = UnaryExpressionSyntax(operator_token, operand) + else: + left = self.parse_primary_expression() + + while True: + precedence = SyntaxFacts.get_binary_operator_precedence(self.current.kind) + if not precedence or precedence <= parent_precedence: + break + + operator_token = self.next_token() + right = self.parse_expression(precedence) + left = BinaryExpressionSyntax(left, operator_token, right) + + return left + + def parse_primary_expression(self) -> ExpressionSyntax: + match self.current.kind: + case SyntaxKind.OPENPARENTOKEN: + left = self.next_token() + expression = self.parse_expression() + right = self.match_token(SyntaxKind.CLOSEPARENTOKEN) + return ParenthesizedExpressionSyntax(left, expression, right) + case SyntaxKind.TRUEKEYWORD | SyntaxKind.FALSEKEYWORD: + keyword_token = self.next_token() + value = self.current.kind == SyntaxKind.TRUEKEYWORD + a = LiteralExpressionSyntax(keyword_token, value) + return a + case _: + literal_token = self.match_token(SyntaxKind.NUMBERTOKEN) + return LiteralExpressionSyntax(literal_token) diff --git a/bagel/codeanalysis/syntax/syntaxfacts.py b/bagel/codeanalysis/syntax/syntaxfacts.py new file mode 100644 index 0000000..1050eea --- /dev/null +++ b/bagel/codeanalysis/syntax/syntaxfacts.py @@ -0,0 +1,31 @@ +from .syntaxkind import SyntaxKind + + +class SyntaxFacts: + @staticmethod + def get_unary_operator_precedence(kind: SyntaxKind) -> int: + match kind: + case SyntaxKind.PLUSTOKEN | SyntaxKind.MINUSTOKEN: + return 3 + case _: + return 0 + + @staticmethod + def get_binary_operator_precedence(kind: SyntaxKind) -> int: + match kind: + case SyntaxKind.STARTOKEN | SyntaxKind.SLASHTOKEN: + return 2 + case SyntaxKind.PLUSTOKEN | SyntaxKind.MINUSTOKEN: + return 1 + case _: + return 0 + + @staticmethod + def get_keyword_kind(text: str) -> SyntaxKind: + match text: + case 'true': + return SyntaxKind.TRUEKEYWORD + case 'false': + return SyntaxKind.FALSEKEYWORD + case _: + return SyntaxKind.IDENTIFIERTOKEN diff --git a/bagel/codeanalysis/syntax/syntaxkind.py b/bagel/codeanalysis/syntax/syntaxkind.py new file mode 100644 index 0000000..c8168a5 --- /dev/null +++ b/bagel/codeanalysis/syntax/syntaxkind.py @@ -0,0 +1,26 @@ +import enum + + +class SyntaxKind(enum.Enum): + # Tokens + BADTOKEN = 0 + EOFTOKEN = 1 + WHITESPACETOKEN = 2 + NUMBERTOKEN = 3 + PLUSTOKEN = 4 + MINUSTOKEN = 5 + STARTOKEN = 6 + SLASHTOKEN = 7 + OPENPARENTOKEN = 8 + CLOSEPARENTOKEN = 9 + IDENTIFIERTOKEN = 10 + + # Keywords + FALSEKEYWORD = 11 + TRUEKEYWORD = 12 + + # Expressions + LITERALEXPRESSION = 13 + UNARYEXPRESSION = 14 + BINARYEXPRESSION = 15 + PARENEXPRESSION = 16 diff --git a/bagel/codeanalysis/syntaxnode.py b/bagel/codeanalysis/syntax/syntaxnode.py similarity index 79% rename from bagel/codeanalysis/syntaxnode.py rename to bagel/codeanalysis/syntax/syntaxnode.py index 74e0583..25a7f46 100644 --- a/bagel/codeanalysis/syntaxnode.py +++ b/bagel/codeanalysis/syntax/syntaxnode.py @@ -4,7 +4,7 @@ class SyntaxNode: @property def kind(self): - return SyntaxKind.BadToken + return SyntaxKind.BADTOKEN def get_children(self): return [] diff --git a/bagel/codeanalysis/syntaxtoken.py b/bagel/codeanalysis/syntax/syntaxtoken.py similarity index 88% rename from bagel/codeanalysis/syntaxtoken.py rename to bagel/codeanalysis/syntax/syntaxtoken.py index 1dcb10a..99ff48a 100644 --- a/bagel/codeanalysis/syntaxtoken.py +++ b/bagel/codeanalysis/syntax/syntaxtoken.py @@ -3,6 +3,11 @@ class SyntaxToken(SyntaxNode): + _kind: SyntaxKind + _position: int + _text: str + _value: object + def __init__(self, kind: SyntaxKind, position: int, text: str, value: object): self._kind = kind self._position = position diff --git a/bagel/codeanalysis/syntaxtree.py b/bagel/codeanalysis/syntax/syntaxtree.py similarity index 80% rename from bagel/codeanalysis/syntaxtree.py rename to bagel/codeanalysis/syntax/syntaxtree.py index fc91769..f1f5375 100644 --- a/bagel/codeanalysis/syntaxtree.py +++ b/bagel/codeanalysis/syntax/syntaxtree.py @@ -1,8 +1,14 @@ +from __future__ import annotations + from .expression_syntax import ExpressionSyntax from .syntaxtoken import SyntaxToken class SyntaxTree: + _diagnostics: list + _root: ExpressionSyntax + _end_of_file_token: SyntaxToken + def __init__(self, diagnostics: list, root: ExpressionSyntax, end_of_file_token: SyntaxToken): self._diagnostics = diagnostics self._root = root @@ -21,7 +27,7 @@ def end_of_file_token(self) -> SyntaxToken: return self._end_of_file_token @staticmethod - def parse(text: str) -> object: + def parse(text: str) -> SyntaxTree: from .parser import Parser parser = Parser(text) return parser.parse() diff --git a/bagel/codeanalysis/syntax/unary_expression_syntax.py b/bagel/codeanalysis/syntax/unary_expression_syntax.py new file mode 100644 index 0000000..7f27740 --- /dev/null +++ b/bagel/codeanalysis/syntax/unary_expression_syntax.py @@ -0,0 +1,28 @@ +from .expression_syntax import ExpressionSyntax +from .syntaxkind import SyntaxKind +from .syntaxtoken import SyntaxToken + + +class UnaryExpressionSyntax(ExpressionSyntax): + _operator_token: SyntaxToken + _operand: ExpressionSyntax + + def __init__(self, operator_token: SyntaxToken, operand: ExpressionSyntax): + self._operator_token = operator_token + self._operand = operand + + @property + def kind(self) -> SyntaxKind: + return SyntaxKind.UNARYEXPRESSION + + def get_children(self) -> list: + # can be an array too + return [self.operator_token, self.operand] + + @property + def operator_token(self) -> SyntaxToken: + return self._operator_token + + @property + def operand(self) -> ExpressionSyntax: + return self._operand diff --git a/bagel/codeanalysis/syntaxkind.py b/bagel/codeanalysis/syntaxkind.py deleted file mode 100644 index bea2e3c..0000000 --- a/bagel/codeanalysis/syntaxkind.py +++ /dev/null @@ -1,20 +0,0 @@ -import enum - - -class SyntaxKind(enum.Enum): - # Tokens - BadToken = 0 - EndOfFileToken = 1 - WhiteSpaceToken = 2 - NumberToken = 3 - PlusToken = 4 - MinusToken = 5 - StarToken = 6 - SlashToken = 7 - OpenParenthesisToken = 8 - CloseParenthesisToken = 9 - - # Expressions - NumberExpression = 10 - BinaryExpression = 11 - ParenthesizedExpression = 12