diff --git a/.gitignore b/.gitignore index 2f51a1e..99f56cb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ Terminal ios-obj-log/* build/* obj/* -rm-win-r.sh +rm-win-r.sh \ No newline at end of file diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt index 60d533c..21b9213 100644 --- a/compiler/CMakeLists.txt +++ b/compiler/CMakeLists.txt @@ -9,3 +9,6 @@ endif() # Include common headers target_include_directories(lmc PUBLIC ${CMAKE_SOURCE_DIR}/include) + +# Link against runtime library +target_link_libraries(lmc lmvm) diff --git a/compiler/ast.hpp b/compiler/ast.hpp index 1bcd1c3..1f50455 100644 --- a/compiler/ast.hpp +++ b/compiler/ast.hpp @@ -3,11 +3,9 @@ // #pragma once -#include #include #include #include -#include #include "../include/lmx_export.hpp" @@ -23,7 +21,7 @@ namespace lmx { enum ASTKind { Program, - Binary, Unary, NumLiteral, StringLiteral, BoolLiteral, + Binary, Unary, NumLiteral, StringLiteral, BoolLiteral, VectorLiteral, BlockStmt, IfStmt, VarDecl, @@ -219,6 +217,12 @@ struct StringNode final : public ExprNode { ~StringNode() override = default; }; +struct VectorNode final : public ExprNode { + std::vector> elements; + explicit VectorNode(std::vector> elems) + : ExprNode(ASTKind::VectorLiteral), elements(std::move(elems)){} +}; + struct BinaryNode final : public ExprNode { std::shared_ptr left; std::shared_ptr right; diff --git a/compiler/common.hpp b/compiler/common.hpp index 43bc517..69a6285 100644 --- a/compiler/common.hpp +++ b/compiler/common.hpp @@ -12,6 +12,8 @@ #include #include +#include "../include/error.hpp" + #include "parser.hpp" namespace lmx { @@ -23,10 +25,10 @@ namespace lmx { inline std::string read_file(const std::string& path) { std::ifstream file(path); if (!file.is_open()) { - std::cerr << "Failed to open file " << path << std::endl; + LM_ERROR("Failed to open file " + path); return {}; } - return std::string(std::istreambuf_iterator{file}, std::istreambuf_iterator{}); + return {std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; } inline std::shared_ptr compile(const std::string &path) { std::string code = read_file(path); @@ -34,7 +36,7 @@ namespace lmx { Lexer lexer(code); auto tks = lexer.tokenize(code); - Parser parser(tks); + Parser parser(tks, code, path); if (auto node = parser.parse_program(); node && !parser.error()) return node; return nullptr; } diff --git a/compiler/generator/emit.cpp b/compiler/generator/emit.cpp index 66da3d2..38a580c 100644 --- a/compiler/generator/emit.cpp +++ b/compiler/generator/emit.cpp @@ -7,6 +7,8 @@ #include #include +#include "debug.hpp" + namespace lmx { LMXOpcodeEmitter::Ret_Type LMXOpcodeEmitter::ret_type = None; template @@ -32,12 +34,39 @@ void LMXOpcodeEmitter::emit_mov_ri(std::vector &ops, uint8_t r void LMXOpcodeEmitter::emit_mov_rr(std::vector &ops, uint8_t r1, uint8_t r2) { if (r1 == r2) return; - if (ret_type == Reg) { - // 如果上一条指令的 目标寄存器 与本条指令的 源寄存器 相同 - // 不生成新指令, 修改上条指令目标为本条目标 - if (auto& last = ops.back(); last.operands[0] == r2) { - last.operands[0] = r1; - return; + if (ret_type == Reg && !ops.empty()) { + const auto& last_op = ops.back(); + bool is_reg_return_op = false; + switch (last_op.op) { + case lmx::runtime::Opcode::MOV_RI: + case lmx::runtime::Opcode::MOV_RR: + case lmx::runtime::Opcode::MOV_RC: + case lmx::runtime::Opcode::ADD: + case lmx::runtime::Opcode::SUB: + case lmx::runtime::Opcode::MUL: + case lmx::runtime::Opcode::DIV: + case lmx::runtime::Opcode::MOD: + case lmx::runtime::Opcode::POW: + case lmx::runtime::Opcode::CMP_GE: + case lmx::runtime::Opcode::CMP_LT: + case lmx::runtime::Opcode::CMP_LE: + case lmx::runtime::Opcode::CMP_GT: + case lmx::runtime::Opcode::CMP_EQ: + case lmx::runtime::Opcode::CMP_NE: + case lmx::runtime::Opcode::AND: + case lmx::runtime::Opcode::OR: + case lmx::runtime::Opcode::LOCAL_GET: + is_reg_return_op = true; + break; + default: + is_reg_return_op = false; + break; + } + if (is_reg_return_op) { + if (last_op.operands[0] == r2) { + ops.back().operands[0] = r1; + return; + } } } lmx::runtime::Op op(lmx::runtime::Opcode::MOV_RR); @@ -257,6 +286,67 @@ void LMXOpcodeEmitter::emit_dec(std::vector &ops, uint8_t r) { ops.push_back(op); } +void LMXOpcodeEmitter::emit_mov_rm(std::vector &ops, uint8_t r1, uint8_t r2, int8_t offest) { + runtime::Op op(runtime::Opcode::MOV_RM); + op.operands[0] = r1; + op.operands[1] = r2; + op.operands[2] = static_cast(offest); + ops.push_back(op); + ret_type = Reg; +} + +void LMXOpcodeEmitter::emit_mov_mi(std::vector &ops, uint8_t r1, int8_t offest1, int64_t imm) { + runtime::Op op(runtime::Opcode::MOV_MI); + op.operands[0] = r1; + op.operands[1] = static_cast(offest1); + write_imm(op.operands + 2, imm); + ops.push_back(op); + ret_type = None; +} + +void LMXOpcodeEmitter::emit_mov_mr(std::vector &ops, uint8_t r1, int8_t offest1, uint8_t r2) { + runtime::Op op(runtime::Opcode::MOV_MR); + op.operands[0] = r1; + op.operands[1] = static_cast(offest1); + op.operands[2] = r2; + ops.push_back(op); + ret_type = None; +} + +void LMXOpcodeEmitter::emit_mov_mm(std::vector &ops, uint8_t r1, int8_t offest1, uint8_t r2, int8_t offest2) { + runtime::Op op(runtime::Opcode::MOV_MM); + op.operands[0] = r1; + op.operands[1] = static_cast(offest1); + op.operands[2] = r2; + op.operands[3] = static_cast(offest2); + ops.push_back(op); + ret_type = None; +} + +void LMXOpcodeEmitter::emit_mov_mc(std::vector &ops, uint8_t r1, int8_t offest1, uint64_t idx) { + runtime::Op op(runtime::Opcode::MOV_MC); + op.operands[0] = r1; + op.operands[1] = static_cast(offest1); + write_imm(op.operands + 2, std::bit_cast(idx)); + ops.push_back(op); + ret_type = None; +} + +void LMXOpcodeEmitter::emit_push(std::vector& ops, uint8_t reg) { + runtime::Op op(runtime::Opcode::PUSH); + op.operands[0] = reg; + ops.push_back(op); + ret_type = None; +} + +void LMXOpcodeEmitter::emit_create_vector(std::vector& ops, uint8_t rd, uint8_t count) { + runtime::Op op(runtime::Opcode::CREATE_VECTOR); + op.operands[0] = rd; + op.operands[1] = count; + ops.push_back(op); + ret_type = Reg; +} + #undef getter_opt_code } // namespace lmx diff --git a/compiler/generator/emit.hpp b/compiler/generator/emit.hpp index 4c265c9..ee99792 100644 --- a/compiler/generator/emit.hpp +++ b/compiler/generator/emit.hpp @@ -75,6 +75,10 @@ class LMC_API LMXOpcodeEmitter { static void emit_or(std::vector &ops, uint8_t r1, uint8_t r2, uint8_t r3); static void emit_vmc(std::vector &ops, uint16_t idx); + + static void emit_push(std::vector& ops, uint8_t reg); + + static void emit_create_vector(std::vector& ops, uint8_t rd, uint8_t count); }; } // namespace lmx \ No newline at end of file diff --git a/compiler/generator/generator.cpp b/compiler/generator/generator.cpp index 85cdb2e..b42429e 100644 --- a/compiler/generator/generator.cpp +++ b/compiler/generator/generator.cpp @@ -11,10 +11,12 @@ #include #include +#include "debug.hpp" #include "emit.hpp" #include "opcode.hpp" #include "vmcall.hpp" #include "../common.hpp" +#include "../../runtime/builtins.hpp" namespace lmx { bool Generator::node_has_error = false; @@ -24,7 +26,7 @@ Allocator::Allocator() { } size_t Allocator::alloc() { - for (size_t i = 0; i < REG_COUNT; i++) { + for (size_t i = 1; i < REG_COUNT; i++) { if (!bitset.test(i)) { bitset.set(i); return i; @@ -44,14 +46,31 @@ void Allocator::free(size_t i) { } } -bool Allocator::is_free(size_t i) { +bool Allocator::is_free(const size_t i) const { return bitset.test(i); } +void Generator::add_builtins() const { + size_t base_index = runtime::builtins::builtin_start; + for (size_t i = 0; i < runtime::builtins::builtin_constants_count; i++) { + const auto&[name, value] = runtime::builtins::builtin_constants[i]; + cur.back()->new_var(name, false, base_index); + base_index++; + } +} + Generator::Generator() { cur.push_back(std::make_unique("global")); + + add_builtins(); + + DEBUG_LOG("Added builtin constants to compiling frame"); + DEBUG_LOG("Builtin constants in compiling frame:"); + for (const auto& [name, info] : cur.back()->locals) { + DEBUG_LOG_FMT(" %s: index=%d, mutable=%s", name.c_str(), info.second, info.first ? "true" : "false"); + } } -void Generator::write(runtime::Op& op) { +void Generator::write(const runtime::Op& op) { ops.push_back(op); } @@ -64,7 +83,9 @@ std::vector &Generator::get_ops() { } size_t Generator::gen(std::shared_ptr &n) { + DEBUG_LOG("Gen: " << cur.back()->to_string()); switch (n->kind) { + DEBUG_LOG(ITIS(n->kind, std::to_string)); case Program: return gen_program(n); case Binary: return gen_binary(n); case Unary: return gen_unary(n); @@ -75,6 +96,7 @@ size_t Generator::gen(std::shared_ptr &n) { case NumLiteral: return gen_num(n); case StringLiteral: return gen_string(n); case BoolLiteral: return gen_bool(n); + case VectorLiteral: return gen_vector(n); case BlockStmt: return gen_block(n); case IfStmt: return gen_if(n); case FuncDecl: return gen_function(n); @@ -100,7 +122,7 @@ size_t Generator::gen_program(std::shared_ptr &n) { return last_ret; } size_t Generator::gen_loop(const std::shared_ptr &shared) { - const auto node = std::static_pointer_cast(std::move(shared)); + const auto node = std::static_pointer_cast(shared); size_t loop_cond = -1; if (node->condition) loop_cond = gen(node->condition); @@ -147,7 +169,7 @@ size_t Generator::gen_break(std::shared_ptr &n) { size_t Generator::gen_module(std::shared_ptr &shared) { const auto node = std::static_pointer_cast(std::move(shared)); for (const auto& mn : modules) - if (mn == node->name) return -1; //已解析过同名模块,不再解析 + if (mn == node->name) return -1; if (node->type == ModuleNode::Types::dyn) { @@ -232,9 +254,15 @@ size_t Generator::gen_binary(std::shared_ptr& n) { expr_release = true; return expr_ret_reg; } + DEBUG_LOG("Try to gen node->right"); volatile size_t rr = gen(node->right); + if (rr == -1) { + regs.free(lr); + return -1; + } tmp = regs.alloc(); LMXOpcodeEmitter::emit_mov_rr(ops, tmp, rr); + DEBUG_LOG(ITIS(tmp, std::to_string) << ", " << ITIS(rr, std::to_string)); if (expr_release) regs.free(rr); rr = tmp; expr_ret_reg = regs.alloc(); @@ -336,7 +364,7 @@ size_t Generator::gen_function(std::shared_ptr &n) { error("the function args name `" + arg + "` was defined on last scope"); } } - new_func(node->name, args_count); //函数不做作用域区分,全部全局 + new_func(node->name, args_count); for (size_t i = 0; i < args_count ; i++) { LMXOpcodeEmitter::emit_local_set(ops, cur.size() - 1, i, REG_COUNT_INDEX_MAX - i); cur.back()->new_var(node->args[i], true, i); @@ -454,6 +482,53 @@ size_t Generator::gen_bool(std::shared_ptr &n) { return expr_ret_reg; } +size_t Generator::gen_vector(std::shared_ptr &n) { + const auto node = std::static_pointer_cast(std::move(n)); + DEBUG_LOG("gen vector"); + regs.print_regs(); + + const size_t vector_size = node->elements.size(); + DEBUG_LOG("vector_size: " << vector_size); + + std::vector elem_regs; + elem_regs.reserve(node->elements.size()); + + for (auto& elem : node->elements) { + std::shared_ptr elem_node = elem; + DEBUG_LOG("Now processing: " << elem_node->kind << ": " << elem_node); + auto reg = gen(elem_node); + if (reg == -1) { + node_has_error = true; + return -1; + } + elem_regs.push_back(reg); + DEBUG_LOG("Pushed elem_regs:"); + for (auto const& elem_in : elem_regs) DEBUG_LOG(elem_in); + regs.print_regs(); + } + + for (auto reg : elem_regs) { + LMXOpcodeEmitter::emit_push(ops, reg); + DEBUG_LOG("Pushed r" << reg << " to stack"); + } + + auto result_reg = regs.alloc(); + LMXOpcodeEmitter::emit_create_vector(ops, result_reg, vector_size); + DEBUG_LOG("CREATE_VECTOR r" << result_reg << ", " << vector_size); + + for (const auto reg : elem_regs) { + regs.free(reg); + DEBUG_LOG("Freed register: " << reg); + regs.print_regs(); + } + + expr_release = true; + + DEBUG_LOG("gen_vector finished, returning result_reg: " << result_reg); + regs.print_regs(); + return result_reg; +} + size_t Generator::gen_block(std::shared_ptr &n) { std::unordered_map save(cur.back()->locals); @@ -518,7 +593,7 @@ void Generator::write_binary_file(const std::string& path) { void Generator::print_ops(std::vector& ops) { size_t i = 0; for (auto &op: ops) { - printf("[0x%zx]\t", i++); // fix [0x%llx] the warning caused by + printf("[0x%zx]\t", i++); switch (op.op) { using enum runtime::Opcode; case MOV_RI: { @@ -538,13 +613,20 @@ void Generator::print_ops(std::vector& ops) { break; } case MOV_MI: { - + printf("MOVMI: 0x%llu, %lld\n", *reinterpret_cast(op.operands), *reinterpret_cast(op.operands + 1)); + break; } case MOV_MM: { + printf("MOVMM: 0x%llu, 0x%llu\n", *reinterpret_cast(op.operands), *reinterpret_cast(op.operands + 1)); + break; } case MOV_MR: { + printf("MOVMR: 0x%llu, %u\n", *reinterpret_cast(op.operands), op.operands[1]); + break; } case MOV_MC: { + printf("MOVMC: 0x%llu, 0x%llu\n", *reinterpret_cast(op.operands), *reinterpret_cast(op.operands + 1)); + break; } case ADD: { printf("ADD: %u, %u, %u\n", op.operands[0], op.operands[1], op.operands[2]); @@ -623,11 +705,11 @@ void Generator::print_ops(std::vector& ops) { break; } case LOCAL_GET: { - printf("LOCAL_GET: %u, [%u, 0x%x]\n", op.operands[0], op.operands[1], *(uint16_t*)(op.operands + 2)); + printf("LOCAL_GET: %u, [%u, 0x%x]\n", op.operands[0], op.operands[1], *reinterpret_cast(op.operands + 2)); break; } case LOCAL_SET: { - printf("LOCAL_SET: [%u, 0x%x], %u\n", op.operands[0], *(uint16_t*)(op.operands + 1), op.operands[3]); + printf("LOCAL_SET: [%u, 0x%x], %u\n", op.operands[0], *reinterpret_cast(op.operands + 1), op.operands[3]); break; } case FUNC_CREATE: { @@ -647,12 +729,13 @@ void Generator::print_ops(std::vector& ops) { break; } case VMC: { - printf("VMC: %d\n", *(uint16_t*)op.operands); + printf("VMC: %d\n", *reinterpret_cast(op.operands)); break; } case DEC: { printf("DEC: %u\n", op.operands[0]); } + default: ; } } std::cout << std::flush; diff --git a/compiler/generator/generator.hpp b/compiler/generator/generator.hpp index 87fe0ee..9f3db8f 100644 --- a/compiler/generator/generator.hpp +++ b/compiler/generator/generator.hpp @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include +#include "debug.hpp" #include "../ast.hpp" #include "lmx_export.hpp" @@ -33,7 +35,16 @@ class LMC_API Allocator { size_t alloc(); size_t alloc(size_t i); void free(size_t i); - bool is_free(size_t i); + [[nodiscard]] bool is_free(size_t i) const; + void print_regs() const { + std::string regs_str = "Allocated registers: "; + for (size_t i = 0; i < REG_COUNT; i++) { + if (bitset.test(i)) { + regs_str += "r" + std::to_string(i) + " "; + } + } + DEBUG_LOG(regs_str); + } }; class LMC_API Generator { @@ -70,6 +81,8 @@ class LMC_API Generator { size_t gen_bool(std::shared_ptr& n); + size_t gen_vector(std::shared_ptr& n); + size_t gen_block(std::shared_ptr& n); size_t basic_gen_block(std::shared_ptr &n); @@ -112,9 +125,20 @@ class LMC_API Generator { } else error("redefined var: `" + n + "`"); return local_count; } + uint16_t new_var(const std::string& n, bool is_mut) { return new_var(n, is_mut, ++local_count); } + + std::string to_string() const { + auto result = std::format("CallingFrame {}(\n local_count: {}\n", name, local_count); + for (auto [var_name, pack] : locals) { + auto& [mut, here] = pack; + result.append(std::format("\t{}: {}, at {}\n", var_name, (mut ? "mutable" : "immutable"), here)); + } + result.append(")"); + return result; + } }; std::vector> cur; /* ====================================== * @@ -156,10 +180,12 @@ class LMC_API Generator { */ std::unordered_map< std::string, - std::pair > - >> extern_funcs; - static inline runtime::CBasicTypes lmtype2ctype(std::string& lmt) { + std::pair< + size_t, + std::vector> + > + > extern_funcs; + static runtime::CBasicTypes lmtype2ctype(std::string& lmt) { if (lmt.empty()) return runtime::Void; if (lmt == "bool") return runtime::CBasicTypes::Bool; if (lmt == "num") return runtime::CBasicTypes::LongLong; @@ -176,11 +202,14 @@ class LMC_API Generator { public: static bool node_has_error; Allocator regs; + + void add_builtins() const; + Generator(); ~Generator() = default; std::vector ops; - void write(runtime::Op& op); + void write(const runtime::Op& op); std::vector &get_ops(); std::vector constant_pool; diff --git a/compiler/lexer.cpp b/compiler/lexer.cpp index d7d9b57..0c84d76 100644 --- a/compiler/lexer.cpp +++ b/compiler/lexer.cpp @@ -1,278 +1,249 @@ -// -// Created by geguj on 2025/12/28. -// - -#include "lexer.hpp" - -#include -#include - -namespace lmx { - -std::ostream& operator<<(std::ostream& os, const Token& t) { - os << "Token("; - switch (t.type) { - case TokenType::END_OF_FILE: os << "END_OF_FILE"; break; - case TokenType::IDENTIFIER: os << "IDENTIFIER"; break; - case TokenType::NUM_LITERAL: os << "INT_LITERAL"; break; - case TokenType::STRING_LITERAL: os << "STRING_LITERAL"; break; - case TokenType::COMMA: os << "COMMA"; break; - case TokenType::TRUE_LITERAL: os << "TRUE_LITERAL"; break; - case TokenType::FALSE_LITERAL: os << "FALSE_LITERAL"; break; - case TokenType::OPER_PLUS: os << "OPER_PLUS"; break; - case TokenType::OPER_MINUS: os << "OPER_MINUS"; break; - case TokenType::OPER_MUL: os << "OPER_MUL"; break; - case TokenType::OPER_DIV: os << "OPER_DIV"; break; - case TokenType::OPER_MOD: os << "OPER_MOD"; break; - case TokenType::EQ: os << "EQ"; break; - case TokenType::GE: os << "GE"; break; - case TokenType::GT: os << "GT"; break; - case TokenType::LE: os << "LE"; break; - case TokenType::LT: os << "LT"; break; - case TokenType::COLON: os << "COLON"; break; - case TokenType::COL_COLON: os << "COL_COLON"; break; - case TokenType::OPER_POW: os << "OPER_POW"; break; - case TokenType::ASSIGN: os << "ASSIGN"; break; - case TokenType::NOT: os << "NOT"; break; - case TokenType::NE: os << "NE"; break; - case TokenType::LPAREN: os << "LPAREN"; break; - case TokenType::RPAREN: os << "RPAREN"; break; - case TokenType::LBRACK: os << "LBRACK"; break; - case TokenType::RBRACK: os << "RBRACK"; break; - case TokenType::LBRACE: os << "LBRACE"; break; - case TokenType::RBRACE: os << "RBRACE"; break; - - case TokenType::UNKNOWN: os << "UNKNOWN"; break; - case TokenType::KW_FUNC: os << "KEYWORD_FUNC"; break; - case TokenType::KW_RETURN: os << "KEYWORD_RETURN"; break; - default: os << "UNKNOWN"; - } - os << ", " << t.text << ", " << t.line << ", " << t.col << ')'; - return os; -} - -void Lexer::advance() { - pos++; - if (src[pos] == '\n') { - line++; - col = 1; - } else col++; -} - -Token Lexer::next() { - while (isspace(src[pos])) { - advance(); - } - if (pos >= src.size()) return {TokenType::END_OF_FILE,"", line, col}; - - switch (src[pos]) { - case '+': { - advance(); - return {TokenType::OPER_PLUS, "+", line, col}; - } - case '-': { - advance(); - return {TokenType::OPER_MINUS, "-", line, col}; - } - case '*': { - advance(); - return {TokenType::OPER_MUL, "*", line, col}; - } - case '/': { - advance(); - return {TokenType::OPER_DIV, "/", line, col}; - } - case '%': { - advance(); - return {TokenType::OPER_MOD, "%", line, col}; - } - case '=': { - advance(); - if (src[pos] == '=') { - advance(); - return {TokenType::EQ, "==", line, col}; - } - return {TokenType::ASSIGN, "=", line, col}; - } - case '>': { - advance(); - if (src[pos] == '=') { - advance(); - return {TokenType::GE, ">=", line, col}; - } - return {TokenType::GT, ">", line, col}; - } - case '<': { - advance(); - if (src[pos] == '=') { - advance(); - return {TokenType::LE, "<=", line, col}; - } - return {TokenType::LT, "<", line, col}; - } - case ':': { - advance(); - if (src[pos] == ':') { - advance(); - return {TokenType::COL_COLON, "::", line, col}; - } - return {TokenType::COLON, ":", line, col}; - } - case '^': { - advance(); - return {TokenType::OPER_POW, "^", line, col}; - } - case '#': { - while (pos <= src.size() && src[pos] != '\n' ) - advance(); - advance(); - return {TokenType::COMMENT, {}, line, col}; - } - case '"': { - advance(); - std::string str; - while (src[pos] != '"') { - if (src[pos] == '\\') { - advance(); - switch (src[pos]) { - case 'n': str += '\n'; break; - case 't': str += '\t'; break; - case 'r': str += '\r'; break; - case 'b': str += '\b'; break; - case 'f': str += '\f'; break; - case 'v': str += '\v'; break; - case '0': str += '\0'; break; - default: str += src[pos]; break; - } - advance(); - continue; - } - str += src[pos]; - advance(); - } - advance(); - - return {TokenType::STRING_LITERAL, str, line, col - str.size()}; - } - case '(': { - advance(); - return {TokenType::LPAREN, "(", line, col}; - } - case ')': { - advance(); - return {TokenType::RPAREN, ")", line, col}; - } - case '{': { - advance(); - return {TokenType::LBRACE, "{", line, col}; - } - case '}': { - advance(); - return {TokenType::RBRACE, "}", line, col}; - } - case '[': { - advance(); - return {TokenType::LBRACK, "[", line, col}; - } - case ']': { - advance(); - return {TokenType::RBRACK, "]", line, col}; - } - case ',': { - advance(); - return {TokenType::COMMA, ", ", line, col}; - } - case '!': { - advance(); - if (src[pos] == '=') { - advance(); - return {TokenType::NE, "!=", line, col}; - } - return {TokenType::NOT, "!", line, col}; - } - case '|': { - advance(); - if (src[pos] == '>') { - advance(); - return {TokenType::PIPE, "|>", line, col}; - } - if (src[pos] == '|') { - advance(); - return {TokenType::OR, "||", line, col}; - } - return {TokenType::UNKNOWN, std::string(1, src[pos]), line, col}; - } - case '&': { - advance(); - if (src[pos] == '&') { - advance(); - return {TokenType::AND, "&&", line, col}; - } - return {TokenType::UNKNOWN, std::string(1, src[pos]), line, col}; - } - case '.': { - advance(); - return {TokenType::DOT, ".", line, col}; - } - default: { - if (isdigit(src[pos])) { - auto cur_line = line, cur_col = col; - std::string num; - while (isdigit(src[pos]) || src[pos] == '_') { - if (src[pos] == '_') { - advance(); - continue; - } - num += src[pos]; - advance(); - } - return {TokenType::NUM_LITERAL, num, cur_line, cur_col}; - } - if (isalpha(src[pos]) || src[pos] == '_') { - std::string id; - auto cur_line = line, cur_col = col; - while (isalnum(src[pos])|| src[pos] == '_') { - id += src[pos]; - advance(); - } - static const std::unordered_map keywords = { - {"func", TokenType::KW_FUNC}, - {"return", TokenType::KW_RETURN}, - {"if", TokenType::KW_IF}, - {"else", TokenType::KW_ELSE}, - {"let", TokenType::KW_LET}, - {"__VMC", TokenType::KW_VMC}, - {"module", TokenType::KW_MODULE}, - {"use", TokenType::KW_USE}, - {"loop", TokenType::KW_LOOP}, - {"break", TokenType::KW_BREAK}, - {"continue", TokenType::KW_CONTINUE}, - }; - if (const auto it = keywords.find(id); it != keywords.end()) { - return {it->second, id, cur_line, cur_col}; - } - return {TokenType::IDENTIFIER, id, cur_line, cur_col}; - } - } - } - - auto token = Token{TokenType::UNKNOWN, std::string(1, src[pos]), line, col}; - advance(); - return token; -} - -std::vector Lexer::tokenize(const std::string& new_src) { - src = new_src; - pos = 0; - line = 1; - col = 1; - std::vector tokens; - while (pos < src.size()) { - tokens.push_back(next()); - } - // Add EOF token at the end - tokens.push_back({TokenType::END_OF_FILE, "", line, col}); - return tokens; -} - -} +// +// Created by geguj on 2025/12/28. +// + +#include "lexer.hpp" + +#include +#include +#include + +namespace lmx { + +LMC_API std::ostream& operator<<(std::ostream& os, const Token& t) { + os << "Token("; + switch (t.type) { + case TokenType::END_OF_FILE: os << "END_OF_FILE"; break; + case TokenType::IDENTIFIER: os << "IDENTIFIER"; break; + case TokenType::NUM_LITERAL: os << "NUM_LITERAL"; break; + case TokenType::STRING_LITERAL: os << "STRING_LITERAL"; break; + case TokenType::COMMA: os << "COMMA"; break; + case TokenType::TRUE_LITERAL: os << "TRUE_LITERAL"; break; + case TokenType::FALSE_LITERAL: os << "FALSE_LITERAL"; break; + case TokenType::OPER_PLUS: os << "OPER_PLUS"; break; + case TokenType::OPER_MINUS: os << "OPER_MINUS"; break; + case TokenType::OPER_MUL: os << "OPER_MUL"; break; + case TokenType::OPER_DIV: os << "OPER_DIV"; break; + case TokenType::OPER_MOD: os << "OPER_MOD"; break; + case TokenType::EQ: os << "EQ"; break; + case TokenType::GE: os << "GE"; break; + case TokenType::GT: os << "GT"; break; + case TokenType::LE: os << "LE"; break; + case TokenType::LT: os << "LT"; break; + case TokenType::COLON: os << "COLON"; break; + case TokenType::COL_COLON: os << "COL_COLON"; break; + case TokenType::OPER_POW: os << "OPER_POW"; break; + case TokenType::ASSIGN: os << "ASSIGN"; break; + case TokenType::NOT: os << "NOT"; break; + case TokenType::NE: os << "NE"; break; + case TokenType::LPAREN: os << "LPAREN"; break; + case TokenType::RPAREN: os << "RPAREN"; break; + case TokenType::LBRACK: os << "LBRACK"; break; + case TokenType::RBRACK: os << "RBRACK"; break; + case TokenType::LBRACE: os << "LBRACE"; break; + case TokenType::RBRACE: os << "RBRACE"; break; + case TokenType::UNKNOWN: os << "UNKNOWN"; break; + case TokenType::KW_FUNC: os << "KEYWORD_FUNC"; break; + case TokenType::KW_RETURN: os << "KEYWORD_RETURN"; break; + case TokenType::KW_IF: os << "KEYWORD_IF"; break; + case TokenType::KW_ELSE: os << "KEYWORD_ELSE"; break; + case TokenType::KW_LET: os << "KEYWORD_LET"; break; + case TokenType::KW_VMC: os << "KEYWORD_VMC"; break; + case TokenType::KW_MODULE: os << "KEYWORD_MODULE"; break; + case TokenType::KW_USE: os << "KEYWORD_USE"; break; + case TokenType::KW_LOOP: os << "KEYWORD_LOOP"; break; + case TokenType::KW_BREAK: os << "KEYWORD_BREAK"; break; + case TokenType::KW_CONTINUE: os << "KEYWORD_CONTINUE"; break; + case TokenType::PIPE: os << "PIPE"; break; + case TokenType::OR: os << "OR"; break; + case TokenType::AND: os << "AND"; break; + case TokenType::DOT: os << "DOT"; break; + case TokenType::COMMENT: os << "COMMENT"; break; + default: os << "UNKNOWN"; + } + os << ", " << t.text << ", " << t.line << ", " << t.col << ')'; + return os; +} + +// 定义token类型和对应的正则表达式 +struct TokenPattern { + TokenType type; + std::regex pattern; + TokenPattern(TokenType t, const std::string& regex_str) : type(t), pattern(regex_str) {} +}; + +// 优先级从高到低排序 +static const std::vector token_patterns = { + {TokenType::COMMENT, "^#.*?$"}, + {TokenType::STRING_LITERAL, R"(^"(\.|[^\"])*")"}, + {TokenType::EQ, "^=="}, + {TokenType::NE, "^!="}, + {TokenType::GE, "^>="}, + {TokenType::LE, "^<="}, + {TokenType::OR, "^\\|\\|"}, + {TokenType::AND, "^&&"}, + {TokenType::PIPE, "^\\|>"}, + {TokenType::COL_COLON, "^::"}, + {TokenType::OPER_PLUS, "^\\+"}, + {TokenType::OPER_MINUS, "^-"}, + {TokenType::OPER_MUL, "^\\*"}, + {TokenType::OPER_DIV, "^/"}, + {TokenType::OPER_MOD, "^%"}, + {TokenType::OPER_POW, "^\\^"}, + {TokenType::ASSIGN, "^="}, + {TokenType::NOT, "^!"}, + {TokenType::GT, "^>"}, + {TokenType::LT, "^<"}, + {TokenType::COLON, "^:"}, + {TokenType::LPAREN, "^\\("}, + {TokenType::RPAREN, "^\\)"}, + {TokenType::LBRACE, "^\\{"}, + {TokenType::RBRACE, "^\\}"}, + {TokenType::LBRACK, "^\\["}, + {TokenType::RBRACK, "^\\]"}, + {TokenType::COMMA, "^,"}, + {TokenType::DOT, "^\\."}, + {TokenType::NUM_LITERAL, "^\\d[_\\d]*"}, + {TokenType::IDENTIFIER, "^[a-zA-Z_][a-zA-Z0-9_]*"}, +}; + +// 关键字映射 +static const std::unordered_map keywords = { + {"func", TokenType::KW_FUNC}, + {"return", TokenType::KW_RETURN}, + {"if", TokenType::KW_IF}, + {"else", TokenType::KW_ELSE}, + {"let", TokenType::KW_LET}, + {"__VMC", TokenType::KW_VMC}, + {"module", TokenType::KW_MODULE}, + {"use", TokenType::KW_USE}, + {"loop", TokenType::KW_LOOP}, + {"break", TokenType::KW_BREAK}, + {"continue", TokenType::KW_CONTINUE}, + {"true", TokenType::TRUE_LITERAL}, + {"false", TokenType::FALSE_LITERAL}, +}; + +Token Lexer::next() { + // 跳过空白字符 + while (pos < src.size() && isspace(src[pos])) { + if (src[pos] == '\n') { + line++; + col = 1; + } else { + col++; + } + pos++; + } + + if (pos >= src.size()) { + return {TokenType::END_OF_FILE, "", line, col}; + } + + // 提取当前位置开始的子串 + const std::string remaining = src.substr(pos); + + // 尝试匹配所有token模式 + for (const auto& pattern : token_patterns) { + std::smatch match; + if (std::regex_search(remaining, match, pattern.pattern)) { + const std::string matched_text = match.str(0); + const size_t match_length = matched_text.size(); + + // 保存当前位置信息 + const size_t token_line = line; + const size_t token_col = col; + + // 更新位置信息 + for (const char c : matched_text) { + if (c == '\n') { + line++; + col = 1; + } else { + col++; + } + } + pos += match_length; + + // 处理标识符和关键字 + if (pattern.type == TokenType::IDENTIFIER) { + auto it = keywords.find(matched_text); + if (it != keywords.end()) { + return {it->second, matched_text, token_line, token_col}; + } + } + + // 处理字符串字面量(去除引号) + if (pattern.type == TokenType::STRING_LITERAL) { + // 去除首尾引号 + const std::string unquoted = matched_text.substr(1, matched_text.size() - 2); + // 处理转义字符 + std::string processed; + for (size_t i = 0; i < unquoted.size(); i++) { + if (unquoted[i] == '\\' && i + 1 < unquoted.size()) { + i++; + switch (unquoted[i]) { + case 'n': processed += '\n'; break; + case 't': processed += '\t'; break; + case 'r': processed += '\r'; break; + case 'b': processed += '\b'; break; + case 'f': processed += '\f'; break; + case 'v': processed += '\v'; break; + case '0': processed += '\0'; break; + default: processed += unquoted[i]; break; + } + } else { + processed += unquoted[i]; + } + } + return {TokenType::STRING_LITERAL, processed, token_line, token_col}; + } + + // 处理数字字面量(去除下划线) + if (pattern.type == TokenType::NUM_LITERAL) { + std::string num_without_underscores; + for (char c : matched_text) { + if (c != '_') { + num_without_underscores += c; + } + } + return {TokenType::NUM_LITERAL, num_without_underscores, token_line, token_col}; + } + + // 跳过注释 + if (pattern.type == TokenType::COMMENT) { + return next(); + } + + return {pattern.type, matched_text, token_line, token_col}; + } + } + + // 无法识别的字符 + const char unknown_char = src[pos]; + Token token = {TokenType::UNKNOWN, std::string(1, unknown_char), line, col}; + pos++; + col++; + return token; +} + +std::vector Lexer::tokenize(const std::string& new_src) { + src = new_src; + pos = 0; + line = 1; + col = 1; + std::vector tokens; + + while (pos < src.size()) { + Token t = next(); + if (t.type != TokenType::COMMENT) { + tokens.push_back(t); + } + } + + tokens.push_back({TokenType::END_OF_FILE, "", line, col}); + + return tokens; +} + +} \ No newline at end of file diff --git a/compiler/lexer.hpp b/compiler/lexer.hpp index 995481e..733de57 100644 --- a/compiler/lexer.hpp +++ b/compiler/lexer.hpp @@ -5,6 +5,7 @@ #pragma once #include #include +#include #include "../include/lmx_export.hpp" @@ -44,7 +45,6 @@ struct LMC_API Token { class LMC_API Lexer { size_t pos{0}, line{1}, col{1}; - void advance(); std::string& src; Token next(); diff --git a/compiler/parser.cpp b/compiler/parser.cpp index 4720432..f8a8ac0 100644 --- a/compiler/parser.cpp +++ b/compiler/parser.cpp @@ -4,10 +4,8 @@ #include "parser.hpp" -#include -#include - -#include "../include/opcode.hpp" +#include "debug.hpp" +#include "../include/error.hpp" namespace lmx { @@ -60,20 +58,38 @@ bool Parser::match(TokenType t) const { } bool Parser::is_eof() const { - return pos >= tokens.size(); + DEBUG_LOG(ITIS(pos, std::to_string) << ", " << ITIS(tokens.size(), std::to_string)); + return pos >= tokens.size() - 1; } void Parser::check_eof() { while (!is_eof() && match(TokenType::END_OF_FILE)) { advance(); } - //if (!is_eof()) { - // error("Expected end of file"); - //} } -void Parser::error(const std::string& msg) { + +void Parser::print_error(const std::string& msg) { has_err = true; - std::cerr << "Error: " << msg << " at " << cur().line << ":" << cur().col << std::endl; + const auto this_line = [&] { + std::istringstream ss(code); + std::string l; + for (size_t i = 0; i < cur().col; i ++) + std::getline(ss, l); return l; + }(); + DEBUG_LOG(ITIS(code, ) << ", " << ITIS(this_line, )); + LM_ERROR(msg + + "\nat line " + + std::to_string(cur().line) + + ", column " + + std::to_string(cur().col) + + ", in " + + src + + "\n>>> " + + this_line + + "\n" + + std::string(cur().col + 3, ' ') + + "^" + ); } std::shared_ptr Parser::parse_block() { if (!match(TokenType::LBRACE)) error("expected '{'"); @@ -86,6 +102,11 @@ std::shared_ptr Parser::parse_block() { advance(); return std::make_shared(stmts); } + +void Parser::error(const std::string& msg) { + throw ParserError(msg); +} + std::shared_ptr Parser::parse_expr() { std::shared_ptr node = parse_logical_and(); std::shared_ptr type; @@ -122,10 +143,18 @@ std::shared_ptr Parser::parse_logical_or() { std::shared_ptr Parser::parse_relational() { std::shared_ptr node = expr(); while (match(TokenType::EQ) || match(TokenType::LT) || match(TokenType::GT) || - match(TokenType::LE) || match(TokenType::GE) || match(TokenType::NE) ) { + match(TokenType::LE) || match(TokenType::GE) || match(TokenType::NE)) { auto op = cur().text; + DEBUG_LOG(ITIS(op, )); advance(); - node = std::make_shared(node, parse_expr(), op); + DEBUG_LOG(ITIS(cur().text, )); + if (is_eof()) { + error("Not expected: eof"); + } + DEBUG_LOG("not eof, it's: " + cur().text + "type: " + std::to_string(static_cast(cur().type)) + + ", eof type: " + std::to_string(static_cast(TokenType::END_OF_FILE))); + auto a = parse_expr(); + node = std::make_shared(node, a, op); } return node; } @@ -150,91 +179,149 @@ std::shared_ptr Parser::parse_if() { } return std::make_shared(condition, then_block, else_block); } -std::shared_ptr Parser::parse() { - static bool in_func = false; - static bool in_loop = false; - std::shared_ptr node; - re_parse: - switch (cur().type) { - case TokenType::COMMENT: advance(); goto re_parse; - case TokenType::KW_LET: { - advance(); - if (!match(TokenType::IDENTIFIER)) error("expected identifier"); - auto name = cur().text; - advance(); - if (!match(TokenType::ASSIGN)) error("expected assignment"); - advance(); - node = std::make_shared(name, parse_expr(), false); - break; +std::shared_ptr Parser::parse_vector() { + if (!match(TokenType::LBRACK)) { + error("expected '[' at start of vector"); + return nullptr; } - case TokenType::KW_FUNC: { + advance(); + + std::vector> elements; + + if (match(TokenType::RBRACK)) { advance(); - in_func = true; - node = parse_funcdecl(true); - in_func = false; - break; + return std::make_shared(std::move(elements)); } - case TokenType::KW_RETURN: { - if (!in_func) error("expected 'return'"); - auto line = cur().line; - advance(); - if (cur().line > line) { - node = std::make_shared(nullptr); - } else { - auto e = parse_expr(); + elements.push_back(parse_expr()); - node = std::make_shared(e); + while (!match(TokenType::RBRACK) && !is_eof()) { + if (!match(TokenType::COMMA)) { + error("expected ',' between vector elements"); + while (!match(TokenType::COMMA) && !match(TokenType::RBRACK) && !is_eof()) { + advance(); + } + if (match(TokenType::COMMA)) { + advance(); + if (!match(TokenType::RBRACK)) { + elements.push_back(parse_expr()); + } + } + continue; } - break; - } - case TokenType::KW_LOOP: { - advance(); - std::shared_ptr cond = nullptr; - if (!match(TokenType::LBRACE)) - cond = parse_expr(); - in_loop = true; - auto block = parse_block(); - node = std::make_shared(cond, block); - in_loop = false; - break; - } - case TokenType::KW_BREAK: { - advance(); - if (!in_loop) error("expected 'break', but not in loop"); - node = std::make_shared(); - break; - } - case TokenType::KW_CONTINUE: { - advance(); - if (!in_loop) error("expected 'continue', but not in loop"); - node = std::make_shared(); - break; - } - case TokenType::KW_IF: { - advance(); - node = parse_if(); - break; - } - case TokenType::KW_MODULE: { - advance(); - node = parse_module(); - break; - } - case TokenType::KW_USE: { + advance(); - auto path = parse_string(); - break; + + if (match(TokenType::RBRACK)) { + break; + } + + elements.push_back(parse_expr()); } - default: { - if (match(TokenType::IDENTIFIER) && peek_match(TokenType::ASSIGN)) { - auto name = cur().text; + + if (!match(TokenType::RBRACK)) { + error("expected ']' at end of vector"); + while (!match(TokenType::RBRACK) && !is_eof()) { advance(); + } + if (match(TokenType::RBRACK)) { advance(); - node = std::make_shared(name, parse_expr()); - } else node = parse_expr(); - break; + } + return std::make_shared(std::move(elements)); } + + advance(); + return std::make_shared(std::move(elements)); +} +std::shared_ptr Parser::parse() { + static bool in_func = false; + static bool in_loop = false; + std::shared_ptr node; + try { + re_parse: + switch (cur().type) { + case TokenType::COMMENT: advance(); goto re_parse; + case TokenType::KW_LET: { + advance(); + if (!match(TokenType::IDENTIFIER)) error("expected identifier"); + auto name = cur().text; + advance(); + if (!match(TokenType::ASSIGN)) error("expected assignment"); + advance(); + node = std::make_shared(name, parse_expr(), false); + break; + } + case TokenType::KW_FUNC: { + advance(); + in_func = true; + node = parse_funcdecl(true); + in_func = false; + break; + } + case TokenType::KW_RETURN: { + if (!in_func) error("expected 'return'"); + + auto line = cur().line; + advance(); + if (cur().line > line) { + node = std::make_shared(nullptr); + } else { + auto e = parse_expr(); + + node = std::make_shared(e); + } + break; + } + case TokenType::KW_LOOP: { + advance(); + std::shared_ptr cond = nullptr; + if (!match(TokenType::LBRACE)) + cond = parse_expr(); + in_loop = true; + auto block = parse_block(); + node = std::make_shared(cond, block); + in_loop = false; + break; + } + case TokenType::KW_BREAK: { + advance(); + if (!in_loop) error("expected 'break', but not in loop"); + node = std::make_shared(); + break; + } + case TokenType::KW_CONTINUE: { + advance(); + if (!in_loop) error("expected 'continue', but not in loop"); + node = std::make_shared(); + break; + } + case TokenType::KW_IF: { + advance(); + node = parse_if(); + break; + } + case TokenType::KW_MODULE: { + advance(); + node = parse_module(); + break; + } + case TokenType::KW_USE: { + advance(); + auto path = parse_string(); + break; + } + default: { + if (match(TokenType::IDENTIFIER) && peek_match(TokenType::ASSIGN)) { + auto name = cur().text; + advance(); + advance(); + node = std::make_shared(name, parse_expr()); + } else node = parse_expr(); + break; + } + } + } catch (ParserError &e) { + print_error("ParserError: " + std::string(e.what())); } return node; } @@ -349,12 +436,12 @@ std::shared_ptr Parser::parse_funcdecl(const bool has_block = true) { } } check_type(ret_type =) - if (match(TokenType::LBRACE)) { // 一般 定义情况 + if (match(TokenType::LBRACE)) { auto node = std::make_shared(name, params, parse_block()); node->args_type = std::move(args_type); node->ret_type = std::move(ret_type); return node; - } else if (match(TokenType::ASSIGN)) { // 外部导入情况 + } else if (match(TokenType::ASSIGN)) { advance(); if (!match(TokenType::STRING_LITERAL)) { error("expected string literal"); @@ -366,7 +453,7 @@ std::shared_ptr Parser::parse_funcdecl(const bool has_block = true) { node->args_type = std::move(args_type); node->ret_type = std::move(ret_type); return node; - } else { // 仅声明情况 + } else { auto node = std::make_shared(name, params, nullptr); node->args_type = std::move(args_type); node->ret_type = std::move(ret_type); @@ -388,21 +475,31 @@ std::shared_ptr Parser::term() { while (match(TokenType::OPER_MUL) || match(TokenType::OPER_DIV) || match(TokenType::OPER_MOD) || match(TokenType::OPER_POW)) { auto op = cur().text; advance(); + if (is_eof()) error("Unexpected eof"); node = std::make_shared(node, factor(), op); } return node; } std::shared_ptr Parser::parse_program() { + DEBUG_ENTER_FUNC(); std::vector> stmts; - while (!match(TokenType::RBRACE) && !is_eof()) { - if (const auto stmt = parse()) stmts.push_back(stmt); + try { + while (!is_eof()) { + if (const auto stmt = parse()) stmts.push_back(stmt); + } + } catch (ParserError& e) { + print_error("ParserError:" + std::string(e.what())); } + DEBUG_LEAVE_FUNC(); return std::make_shared(stmts); } std::shared_ptr Parser::factor() { std::shared_ptr fact = nullptr; - if (match(TokenType::NUM_LITERAL)) { + if (match(TokenType::IDENTIFIER) && cur().text == "vec") { + advance(); + fact = parse_vector(); + } else if (match(TokenType::NUM_LITERAL)) { fact = std::make_shared(cur().text); advance(); } else if (match(TokenType::LPAREN)) { diff --git a/compiler/parser.hpp b/compiler/parser.hpp index 5bcd8ef..edb5659 100644 --- a/compiler/parser.hpp +++ b/compiler/parser.hpp @@ -5,6 +5,7 @@ #pragma once #include #include +#include #include "../include/lmx_export.hpp" #include "lexer.hpp" @@ -16,6 +17,8 @@ class LMC_API Parser { bool in_module{false}; bool has_err{false}; std::vector& tokens; + std::string code; + std::string src; size_t pos{0}; void parse_args(std::vector> &args); @@ -26,43 +29,32 @@ class LMC_API Parser { [[nodiscard]] bool is_eof() const; std::shared_ptr expr(); std::shared_ptr term(); - - std::shared_ptr factor(); - std::shared_ptr parse_func_call(); bool peek_match(TokenType type) const; - void check_eof(); + void print_error(const std::string &msg); + void error(const std::string& msg); std::shared_ptr parse_block(); - std::shared_ptr parse_string(); - std::shared_ptr parse_if(); std::shared_ptr parse_expr(); - std::shared_ptr parse_logical_and(); - std::shared_ptr parse_relational(); - std::shared_ptr parse_logical_or(); - + std::shared_ptr parse_vector(); std::shared_ptr parse_funcdecl(bool has_block); public: - explicit Parser(std::vector& tokens): tokens(tokens) {} + explicit Parser(std::vector& tokens, std::string code, std::string src = ""): tokens(tokens), code(std::move(code)), src(std::move(src)) {} std::shared_ptr parse(); - std::shared_ptr parse_module(); - std::shared_ptr parse_type(); - - std::shared_ptr parse_program(); [[nodiscard]] bool error() const {return has_err;} }; diff --git a/include/debug.hpp b/include/debug.hpp new file mode 100644 index 0000000..cdbce1b --- /dev/null +++ b/include/debug.hpp @@ -0,0 +1,128 @@ +// +// 调试输出宏定义 +// 定义DEBUG_OUTPUT宏来启用调试输出 +// Created by DaLL +// + +#pragma once + +#include +#include +#include + +// 控制台颜色代码 +#define COLOR_RESET "\033[0m" +#define COLOR_YELLOW "\033[33m" +#define COLOR_GREEN "\033[32m" +#define COLOR_CYAN "\033[36m" +#define COLOR_MAGENTA "\033[35m" +#define COLOR_RED "\033[31m" +#define COLOR_BLUE "\033[34m" +#define COLOR_WHITE "\033[37m" + +// 启用调试输出宏 - 定义这个宏来启用调试输出 +// 注释掉这一行来禁用调试输出 +// #define DEBUG_OUTPUT + +#ifdef DEBUG_OUTPUT + +// 带颜色的输出宏 +#define DEBUG_COLOR(color, msg) do { \ + std::cerr << color << "[DEBUG] " << __FILE__ << ":" << __LINE__ << " - " << msg << COLOR_RESET << std::endl; \ +} while(0) + +// 基础调试输出宏(黄色) +#define DEBUG_LOG(msg) do { \ + std::cerr << COLOR_YELLOW << "[DEBUG] " << __FILE__ << ":" << __LINE__ << " - " << msg << COLOR_RESET << std::endl; \ +} while(0) + +// 带格式的调试输出(黄色) +#define DEBUG_LOG_FMT(fmt, ...) do { \ + std::cerr << COLOR_YELLOW << "[DEBUG] " << __FILE__ << ":" << __LINE__ << " - "; \ + std::fprintf(stderr, fmt, ##__VA_ARGS__); \ + std::cerr << COLOR_RESET << std::endl; \ +} while(0) + +// 进入函数调试(青色) +#define DEBUG_ENTER_FUNC() do { \ + std::cerr << COLOR_CYAN << "[DEBUG] ENTER: " << __FUNCTION__ << " at " << __FILE__ << ":" << __LINE__ << COLOR_RESET << std::endl; \ +} while(0) + +// 离开函数调试(绿色) +#define DEBUG_LEAVE_FUNC() do { \ + std::cerr << COLOR_GREEN << "[DEBUG] LEAVE: " << __FUNCTION__ << " at " << __FILE__ << ":" << __LINE__ << COLOR_RESET << std::endl; \ +} while(0) + +// 值调试(黄色) +#define DEBUG_VAL(name, val) do { \ + std::cerr << COLOR_YELLOW << "[DEBUG] " << __FILE__ << ":" << __LINE__ << " - " << name << " = " << (val) << COLOR_RESET << std::endl; \ +} while(0) + +// 指针调试(洋红色) +#define DEBUG_PTR(name, ptr) do { \ + std::cerr << COLOR_MAGENTA << "[DEBUG] " << __FILE__ << ":" << __LINE__ << " - " << name << " = " << static_cast(ptr) << COLOR_RESET << std::endl; \ +} while(0) + +// 错误调试(红色) +#define DEBUG_ERROR(msg) do { \ + std::cerr << COLOR_RED << "[ERROR] " << __FILE__ << ":" << __LINE__ << " - " << msg << COLOR_RESET << std::endl; \ +} while(0) + +// Token列表展示(蓝色) +#define DEBUG_TOKEN_LIST(tokens) do { \ + std::cerr << COLOR_BLUE << "\n========== TOKEN LIST ==========" << COLOR_RESET << std::endl; \ + for (size_t i = 0; i < tokens.size(); i++) { \ + std::cerr << COLOR_BLUE << "[" << std::setw(3) << i << "] " << tokens[i] << COLOR_RESET << std::endl; \ + } \ + std::cerr << COLOR_BLUE << "================================" << COLOR_RESET << std::endl; \ +} while(0) + +// 字节码展示(青色) +#define DEBUG_BYTECODE(ops) do { \ + std::cerr << COLOR_CYAN << "\n========== BYTECODE ==========" << COLOR_RESET << std::endl; \ + Generator::print_ops(const_cast&>(ops)); \ + std::cerr << COLOR_CYAN << "==============================" << COLOR_RESET << std::endl; \ +} while(0) + +// 执行步骤展示(黄色) +#define DEBUG_EXEC_STEP(pc, op, desc) do { \ + std::cerr << COLOR_YELLOW << "[EXEC] PC=" << std::setw(4) << pc << " | " << desc << COLOR_RESET << std::endl; \ +} while(0) + +// 寄存器状态展示(白色) +#define DEBUG_REGS(regs) do { \ + std::cerr << COLOR_WHITE << " REGS: "; \ + for (size_t i = 0; i < REG_COUNT && i < 16; i++) { \ + if (regs[i].type == ValueType::Int) \ + std::cerr << "r" << i << "=" << regs[i].i64 << " "; \ + else if (regs[i].type == ValueType::Bool) \ + std::cerr << "r" << i << "=" << (regs[i].b ? "T" : "F") << " "; \ + else if (regs[i].type == ValueType::Float) \ + std::cerr << "r" << i << "=" << regs[i].f64 << " "; \ + } \ + std::cerr << COLOR_RESET << std::endl; \ +} while(0) + +// 分隔线 +#define DEBUG_SEPARATOR(title) do { \ + std::cerr << COLOR_YELLOW << "\n========== " << title << " ==========" << COLOR_RESET << std::endl; \ +} while(0) + +#else + +// 禁用时的空宏 +#define DEBUG_LOG(msg) do {} while(0) +#define DEBUG_LOG_FMT(fmt, ...) do {} while(0) +#define DEBUG_ENTER_FUNC() do {} while(0) +#define DEBUG_LEAVE_FUNC() do {} while(0) +#define DEBUG_VAL(name, val) do {} while(0) +#define DEBUG_PTR(name, ptr) do {} while(0) +#define DEBUG_COLOR(color, msg) do {} while(0) +#define DEBUG_ERROR(msg) do {} while(0) +#define DEBUG_TOKEN_LIST(tokens) do {} while(0) +#define DEBUG_BYTECODE(ops) do {} while(0) +#define DEBUG_EXEC_STEP(pc, op, desc) do {} while(0) +#define DEBUG_REGS(regs) do {} while(0) +#define DEBUG_SEPARATOR(title) do {} while(0) + +#endif diff --git a/include/error.hpp b/include/error.hpp new file mode 100644 index 0000000..1def5e2 --- /dev/null +++ b/include/error.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include + +namespace lmx { + +// 错误类型枚举 +enum class ErrorType { + ERR +}; + +// 错误报告函数 +// 参数: +// type: 错误类型 +// message: 错误消息 +inline void error_reporter(const ErrorType type, const std::string& message) { + if (type == ErrorType::ERR) std::cerr << "Error: " << message << std::endl; +} + +inline void LM_ERROR(const std::string& msg) { + error_reporter(ErrorType::ERR, msg); +} + +class ParserError final : public std::runtime_error { +public: + explicit ParserError( + const std::string& msg + ): std::runtime_error(msg) {} +}; + +#define ITIS(x, convert) (std::string(#x " = <") + convert(x) + std::string(">")) +} // lmx diff --git a/include/lmx_export.hpp b/include/lmx_export.hpp index af2fdad..ad7f151 100644 --- a/include/lmx_export.hpp +++ b/include/lmx_export.hpp @@ -24,5 +24,5 @@ #define LMX_VERSION 0x00000001 #define LMX_MAGIC_NUM 0x4d4c5451 -const unsigned int lmx_magic = LMX_MAGIC_NUM; -const unsigned int lmx_version = LMX_VERSION; +constexpr unsigned int lmx_magic = LMX_MAGIC_NUM; +constexpr unsigned int lmx_version = LMX_VERSION; diff --git a/include/opcode.hpp b/include/opcode.hpp index 85ccdca..ace0322 100644 --- a/include/opcode.hpp +++ b/include/opcode.hpp @@ -1,4 +1,4 @@ -// +// // Created by geguj on 2025/12/27. // @@ -30,9 +30,11 @@ enum class Opcode : uint8_t { AND, OR, VMC, // vmcall DEC, + PUSH, // 压栈指令 + CREATE_VECTOR, // 创建向量指令 }; -inline uint8_t opcode_len(Opcode op) { +inline uint8_t opcode_len(const Opcode op) { switch (op) { using enum Opcode; case MOV_RI: @@ -70,18 +72,20 @@ inline uint8_t opcode_len(Opcode op) { case DEC: return 1; case JMP: return 8; case DEBUG_LOG: return 9; + case PUSH: return 1; // 压栈指令长度:1个寄存器 + case CREATE_VECTOR: return 2; // 创建向量指令长度:1个目标寄存器 + 1个立即数(元素数量); } return 0; } struct Op { - Opcode op; + Opcode op{}; uint8_t operands[12]{}; - explicit inline Op(const Opcode op, const uint8_t* operand): op(op) { + explicit Op(const Opcode op, const uint8_t* operand): op(op) { memcpy(operands, operand, 12); } - explicit inline Op(const Opcode op): op(op) {} - explicit inline Op() = default; + explicit Op(const Opcode op): op(op) {} + explicit Op() = default; }; } // namespace lmx diff --git a/include/vmcall.hpp b/include/vmcall.hpp index f6df14c..9c5a3bb 100644 --- a/include/vmcall.hpp +++ b/include/vmcall.hpp @@ -1,27 +1,29 @@ -#pragma once -#include "../runtime/vm.hpp" - - -namespace lmx::runtime { -class VMCall { -#define VMCALL_INDEX_MAX UINT16_MAX -#define VMC_REGISTER(name) static void lm_##name(lmx::runtime::VirtualCore* self) - VMC_REGISTER(out); - VMC_REGISTER(in); - VMC_REGISTER(exit); - VMC_REGISTER(dyn_load); - VMC_REGISTER(dyn_set); - VMC_REGISTER(dyn_call); - -#undef VMC_REGISTER -#define VMC_REGISTER(name) void lmx::runtime::VMCall::lm_##name(lmx::runtime::VirtualCore* self) -public: - using VmCallType = void (*)(VirtualCore* self); - - static inline VmCallType vmcall_table[] = { - lm_out, lm_in, lm_exit, lm_dyn_load, lm_dyn_set, lm_dyn_call - }; - static inline uint16_t vmcall_count = sizeof(vmcall_table) / sizeof(VmCallType); -}; -} - +#pragma once +#include "../runtime/vm.hpp" + + +namespace lmx::runtime { +class VMCall { +#define VMCALL_INDEX_MAX UINT16_MAX +#define VMC_REGISTER(name) static void lm_##name(lmx::runtime::VirtualCore* self) + VMC_REGISTER(out); + VMC_REGISTER(in); + VMC_REGISTER(exit); + VMC_REGISTER(dyn_load); + VMC_REGISTER(dyn_set); + VMC_REGISTER(dyn_call); + VMC_REGISTER(alloc_memory); + VMC_REGISTER(store_memory); + +#undef VMC_REGISTER +#define VMC_REGISTER(name) void lmx::runtime::VMCall::lm_##name(lmx::runtime::VirtualCore* self) +public: + using VmCallType = void (*)(VirtualCore* self); + + static inline VmCallType vmcall_table[] = { + lm_out, lm_in, lm_exit, lm_dyn_load, lm_dyn_set, lm_dyn_call, lm_alloc_memory, lm_store_memory + }; + static inline uint16_t vmcall_count = sizeof(vmcall_table) / sizeof(VmCallType); +}; +} + diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 300f395..4db4ca8 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -3,7 +3,8 @@ set(CMAKE_CXX_STANDARD 23) set(CMAKE_POLICY_VERSION_MINIMUM 3.5) file(GLOB_RECURSE RUNTIME_SRC *.cpp */*.cpp) -add_library(lmvm SHARED ${RUNTIME_SRC}) +add_library(lmvm SHARED ${RUNTIME_SRC} + builtins.cpp) # Define cmake-build-debug macro for DLL export if(WIN32) diff --git a/runtime/builtins.cpp b/runtime/builtins.cpp new file mode 100644 index 0000000..685c32b --- /dev/null +++ b/runtime/builtins.cpp @@ -0,0 +1,33 @@ +#include "builtins.hpp" + +namespace lmx::runtime::builtins { + const BuiltinConstant builtin_constants[] = { + {"EARTH_GRAVITY", Value(9.80665)}, + {"MOON_GRAVITY", Value(1.625)}, + {"MARS_GRAVITY", Value(3.72076)}, + {"WATER_DENSITY", Value(1000.0)}, + {"STANDARD_PRESSURE", Value(101325.0)}, + {"STANDARD_TEMPERATURE", Value(273.15)}, + {"AIR_DENSITY", Value(1.225)}, + {"C", Value(2.99792458e8)}, + {"G", Value(6.67430e-11)}, + {"H", Value(6.62607015e-34)}, + {"KB", Value(1.380649e-23)}, + {"EPSILON_0", Value(8.8541878128e-12)}, + {"MU_0", Value(1.25663706212e-6)}, + {"AVOGADRO", Value(6.02214076e23)}, + {"R", Value(8.314462618)}, + {"FARADAY", Value(9.648533212e4)}, + {"AMU", Value(1.66053906660e-27)}, + {"MOLAR_VOLUME_IDEAL", Value(0.024465)}, + {"ROOM_PRESSURE", Value(1.0e5)}, + {"ROOM_TEMPERATURE", Value(297.15)} + }; + size_t builtin_constants_count = std::size(builtin_constants); + size_t get_builtin_constant_index(const std::string& name) { + for (size_t i = 0; i < builtin_constants_count; i++) + if (builtin_constants[i].name == name) + return i + builtin_start; + return SIZE_MAX; + } +} \ No newline at end of file diff --git a/runtime/builtins.hpp b/runtime/builtins.hpp new file mode 100644 index 0000000..aae11db --- /dev/null +++ b/runtime/builtins.hpp @@ -0,0 +1,20 @@ +#pragma once +#include "value/value.hpp" +#include "../include/lmx_export.hpp" + +namespace lmx::runtime::builtins { + +// 内置常量定义 +struct BuiltinConstant { + const char* name; + Value value; +}; + +// 内置常量数量 +inline int builtin_start = 0; +extern LMVM_API size_t builtin_constants_count; +extern LMVM_API const BuiltinConstant builtin_constants[]; + +LMVM_API size_t get_builtin_constant_index(const std::string& name); + +} // namespace lmx::runtime::builtins \ No newline at end of file diff --git a/runtime/frame/frame.hpp b/runtime/frame/frame.hpp index f2d27af..85fb886 100644 --- a/runtime/frame/frame.hpp +++ b/runtime/frame/frame.hpp @@ -8,13 +8,11 @@ namespace lmx::runtime { struct StackFrame { - // size_t local_pc{}; std::vector locals{}; - // size_t ret_addr{}; StackFrame() { locals.resize(56); } - void new_var(const uint16_t addr, Value& value) { + void new_var(const uint16_t addr, const Value& value) { if (locals.size() <= addr) locals.resize(addr + 1); locals[addr] = value; } diff --git a/runtime/libloader.cpp b/runtime/libloader.cpp index ea7c859..93808c0 100644 --- a/runtime/libloader.cpp +++ b/runtime/libloader.cpp @@ -42,14 +42,13 @@ void DynFunc::call(VirtualCore* vm) const { switch (ret_type) { case Void: vm->get_register(0) = (void*)nullptr; dcCallVoid(caller, func); break; - case Char: vm->get_register(0) = (uint8_t)dcCallChar(caller, func); break; - case Short: vm->get_register(0) = (uint64_t)dcCallShort(caller, func); break; - case Int: vm->get_register(0) = (uint64_t)dcCallInt(caller, func); break; - case LongLong: vm->get_register(0) = (uint64_t)dcCallLongLong(caller, func); break; - case Float: vm->get_register(0) = dcCallFloat(caller, func); break; + case Char: vm->get_register(0) = static_cast(dcCallChar(caller, func)); break; + case Short: vm->get_register(0) = static_cast(dcCallShort(caller, func)); break; + case Int: vm->get_register(0) = static_cast(dcCallInt(caller, func)); break; + case LongLong: vm->get_register(0) = static_cast(dcCallLongLong(caller, func)); break; + case Bool: vm->get_register(0) = static_cast(dcCallBool(caller, func)); break; case Double: vm->get_register(0) = dcCallDouble(caller, func); break; case Ptr: vm->get_register(0) = dcCallPointer(caller, func); break; - case Bool: vm->get_register(0) = (bool)dcCallBool(caller, func); break; default: dcArgPointer(caller, vm->get_register(reg).ptr); break; @@ -65,7 +64,7 @@ size_t DynFunc::max_size() const { #else 0; #endif - for (const auto& at : arg_type) + for ([[maybe_unused]] const auto& at : arg_type) size += 8; return size; } @@ -80,7 +79,7 @@ DynLib::DynLib(const std::string& name) { handle = dlopen(this->name.c_str(), RTLD_LAZY | RTLD_GLOBAL); #endif if (!handle) { - std::cerr << "error: cannot load lib: '" << this->name << '\'' << std::endl; + LM_ERROR("error: cannot load lib: '" + this->name + '\''); exit(1); } } @@ -95,7 +94,7 @@ void DynLib::set_func(const char* n, std::vector args_type, CBasicT dlsym(handle, n); #endif if (!fp) { - std::cerr << "the dynamic symbol '" << n << "' does not exist" << std::endl; + LM_ERROR("the dynamic symbol '" + std::string(n) + "' does not exist"); exit(1); } if (!funcs.contains(n)) { @@ -113,7 +112,7 @@ bool DynLib::contain(const char* n) const { const DynFunc* DynLib::find(const char* n) const { const auto it = funcs.find(n); if (it == funcs.end()) { - std::cerr << "the func `" << n << "` is not found in lib `" << name << "`" << std::endl; + LM_ERROR("the func `" + std::string(n) + "` is not found in lib `" + name + "`"); return nullptr; } return &it->second; diff --git a/runtime/loader.cpp b/runtime/loader.cpp index 6d19283..fb6ffcf 100644 --- a/runtime/loader.cpp +++ b/runtime/loader.cpp @@ -7,6 +7,7 @@ #include #include +#include "error.hpp" #include "lmx_export.hpp" #include "opcode.hpp" @@ -33,12 +34,11 @@ bool BinaryLoader::check_head() { void BinaryLoader::load() { if (!check_head()) { - std::cerr << "Loader: binary file format bad" << std::endl; + LM_ERROR("Loader: binary file format bad"); exit(-1); } while (true) { Op op; - //std::cout << (int)op.op << std::endl; op.op = static_cast(file.get()); file.read(reinterpret_cast(op.operands), opcode_len(op.op)); ops.push_back(op); diff --git a/runtime/loader.hpp b/runtime/loader.hpp index 440faa0..ac86903 100644 --- a/runtime/loader.hpp +++ b/runtime/loader.hpp @@ -10,7 +10,7 @@ struct Op; class LMVM_API BinaryLoader { std::vector ops; std::vector data; - std::remove_reference::type file; + std::remove_reference_t file; struct Header { uint32_t magic, version; }; diff --git a/runtime/object/base.hpp b/runtime/object/base.hpp index 5960d71..dc42473 100644 --- a/runtime/object/base.hpp +++ b/runtime/object/base.hpp @@ -1,6 +1,5 @@ #pragma once #include -#include namespace lmx::runtime { enum class LMXObjType: uint8_t { diff --git a/runtime/value/value.cpp b/runtime/value/value.cpp index 8ce310e..7ef4a16 100644 --- a/runtime/value/value.cpp +++ b/runtime/value/value.cpp @@ -11,10 +11,13 @@ namespace lmx::runtime { -Value::Value() : null(nullptr), type(ValueType::Ptr) { +Value::Value() : type(ValueType::Ptr), null(nullptr) { } -Value::Value(void* p) : i64(*static_cast(p)), type(ValueType::Ptr) { +Value::Value(double n) : type(ValueType::Float), f64(n) { +} + +Value::Value(void* p) : type(ValueType::Ptr), i64(*static_cast(p)) { } Value& Value::operator=(void* new_ptr) { @@ -59,7 +62,10 @@ Value& Value::operator=(const Value& rhs) { case ValueType::Float: this->f64 = rhs.f64; break; case ValueType::Str: this->str = rhs.str; break; case ValueType::Bool: this->b = rhs.b; break; - case ValueType::Ptr: this->ptr = rhs.ptr; break; + case ValueType::Ptr: + this->ptr = rhs.ptr; + this->u64 = rhs.u64; + break; case ValueType::Null: this->null = nullptr; break; case ValueType::NO_ENUM_VALUE: break; // add this line to avoid warning. } diff --git a/runtime/value/value.hpp b/runtime/value/value.hpp index 79eceb7..ffecfcd 100644 --- a/runtime/value/value.hpp +++ b/runtime/value/value.hpp @@ -37,6 +37,7 @@ struct LMVM_API Value { }; explicit Value(void* p); Value(); + explicit Value(double n); Value(const Value& other) = default; template T& get(); diff --git a/runtime/vm.cpp b/runtime/vm.cpp index 6fde9c7..cbad74d 100644 --- a/runtime/vm.cpp +++ b/runtime/vm.cpp @@ -7,216 +7,927 @@ #include #include +#include "builtins.hpp" +#include "value/value.hpp" #include "vmcall.hpp" #include "../compiler/generator/generator.hpp" +#include "../include/debug.hpp" namespace lmx::runtime { -VirtualCore::VirtualCore() : const_pool_top(nullptr), ste() { - static std::vector program; - ste.program = &program; +VirtualCore::VirtualCore() : const_pool_top(nullptr) { + ste.program = nullptr; ste.pc = 0; - ste.cur.push_back(std::make_unique()); - ste.cur.back()->locals.resize(64); + ste.stack_frames.push_back(std::make_unique()); + ste.stack_frames.back()->locals.resize(64); + insert_builtins(); } -/*VirtualCore::VirtualCore(LMXState ste) : const_pool_top(nullptr), ste(std::move(ste)) {} - -VirtualCore::VirtualCore(LMXState ste, void* const_pool_top) : - const_pool_top(const_pool_top), - ste(std::move(ste)) { -}*/ +Value* VirtualCore::get_value_from_pool(const size_t offset) const { + return static_cast(const_pool_top) + offset; +} -Value *VirtualCore::get_value_from_pool(const size_t offest) const { - return static_cast(const_pool_top) + offest; +bool VirtualCore::is_valid_register(const uint8_t reg) { + DEBUG_LOG(ITIS(reg, std::to_string) << ", " << ITIS(REG_COUNT, std::to_string)); + return reg < REG_COUNT; } -int VirtualCore::run() { - RUN_CONTINUE: - const Opcode& op = ste.program->operator[](ste.pc).op; - const auto& operands = ste.program->operator[](ste.pc).operands; - switch (op) { - using enum Opcode; - case MOV_RI: { - ste.regs[operands[0]] = *reinterpret_cast(operands + 1); - ste.pc++; - goto RUN_CONTINUE; - } - case MOV_RM: { - // todo! - ste.pc++; - goto RUN_CONTINUE; - } - case MOV_RR: { - ste.regs[operands[0]] = ste.regs[operands[1]]; - ste.pc++; - goto RUN_CONTINUE; +bool VirtualCore::validate_registers(const uint8_t* regs, size_t count) { + for (size_t i = 0; i < count; i++) { + if (!is_valid_register(regs[i])) { + return false; + } } - case MOV_RC: { - ste.regs[operands[0]] = (char*)get_constant() + *(uint64_t*)(operands + 1); - ste.pc++; - goto RUN_CONTINUE; - } - case MOV_MI: { + return true; +} - // todo! - ste.pc++; - goto RUN_CONTINUE; - } - case MOV_MM: { - // todo! - ste.pc++; - goto RUN_CONTINUE; - } - case MOV_MR: { - // todo! - ste.pc++; - goto RUN_CONTINUE; - } - case MOV_MC: { - // todo! - ste.pc++; - goto RUN_CONTINUE; - } - case ADD: { - ste.regs[operands[0]] = ste.regs[operands[1]].i64 + ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case SUB: { - ste.regs[operands[0]] = ste.regs[operands[1]].i64 - ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case MUL: { - ste.regs[operands[0]] = ste.regs[operands[1]].i64 * ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case DIV: { - ste.regs[operands[0]] = ste.regs[operands[1]].i64 / ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case MOD: { - ste.regs[operands[0]] = ste.regs[operands[1]].i64 % ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case POW: { - ste.regs[operands[0]] = std::pow(ste.regs[operands[1]].f64, ste.regs[operands[2]].f64); - ste.pc++; - goto RUN_CONTINUE; - } - case FCALL: { - ste.ret_addr_stack.push_back(ste.pc + 1); // 返回地址 - ste.pc = *reinterpret_cast(operands); // 跳转地址 - const auto args_count = operands[8]; // 传参数量 - ste.cur.push_back(std::make_unique()); //新建栈帧 - ste.cur.back()->locals.resize(args_count + 1); - for (uint8_t i = 0; i != args_count; i++) ste.cur.back()->locals[i] = ste.regs[REG_COUNT_INDEX_MAX - i]; - goto RUN_CONTINUE; - } - case FRET: { - ste.pc = ste.ret_addr_stack.back(); //返回地址 - ste.ret_addr_stack.pop_back(); - ste.cur.pop_back(); // 恢复栈帧 - goto RUN_CONTINUE; - } - case HALT: { - return 0; - } - case DEBUG_LOG: { - fprintf(stderr,"[LogInfo]: %s\n", static_cast(const_pool_top) + *reinterpret_cast(operands)); - ste.pc++; - goto RUN_CONTINUE; - } - case JMP: { - ste.pc = *reinterpret_cast(operands); - goto RUN_CONTINUE; - } - case CMP_GE: { - ste.regs[operands[0]].b = ste.regs[operands[1]].i64 >= ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case CMP_LT: { - ste.regs[operands[0]].b = ste.regs[operands[1]].i64 < ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case CMP_LE: { - ste.regs[operands[0]].b = ste.regs[operands[1]].i64 <= ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case CMP_GT: { - ste.regs[operands[0]].b = ste.regs[operands[1]].i64 > ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case CMP_EQ: { - ste.regs[operands[0]].b = ste.regs[operands[1]].i64 == ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case CMP_NE: { - ste.regs[operands[0]].b = ste.regs[operands[1]].i64 != ste.regs[operands[2]].i64; - ste.pc++; - goto RUN_CONTINUE; - } - case IF_TRUE: { - if (ste.regs[operands[0]].b) ste.pc = *reinterpret_cast(operands + 1); - else ste.pc++; - goto RUN_CONTINUE; - } - case IF_FALSE: { - if (!ste.regs[operands[0]].b) ste.pc = *reinterpret_cast(operands + 1); - else ste.pc++; - goto RUN_CONTINUE; - } - case FUNC_CREATE: { - while (ste.program->operator[](ste.pc).op != FUNC_END) {ste.pc++;} - ste.pc ++; - goto RUN_CONTINUE; - } - case FUNC_END: { - ste.pc++; - goto RUN_CONTINUE; - } - case LOCAL_GET: { - ste.regs[operands[0]] = ste.cur[operands[1]]->locals[*(uint16_t*)(operands + 2)]; - ste.pc++; - goto RUN_CONTINUE; - } - case LOCAL_SET: { - ste.cur[operands[0]]->locals[*(uint16_t*)(operands + 1)] = ste.regs[operands[3]]; - ste.pc++; - goto RUN_CONTINUE; - } - case AND: { - ste.regs[operands[0]].b = ste.regs[operands[1]].b && ste.regs[operands[2]].b; - ste.pc++; - goto RUN_CONTINUE; - } - case OR: { - ste.regs[operands[0]].b = ste.regs[operands[1]].b || ste.regs[operands[2]].b; - ste.pc++; - goto RUN_CONTINUE; +bool VirtualCore::validate_jump_address(const uint64_t address) const { + return address < ste.program->size(); +} + +bool VirtualCore::validate_stack_frame(const size_t frame_index, const size_t local_index) const { + if (frame_index >= ste.stack_frames.size()) { + return false; } - case VMC: { - VMCall::vmcall_table[*(uint16_t*)operands](this); - ste.pc++; - goto RUN_CONTINUE; + return local_index < ste.stack_frames[frame_index]->locals.size(); +} + +const char* VirtualCore::get_constant_string(const uint64_t offset) const { + if (const_pool_top == nullptr) { + return nullptr; } - case DEC: { - ste.regs[operands[0]].i64--; - ste.pc++; - goto RUN_CONTINUE; + return static_cast(const_pool_top) + offset; +} + +void VirtualCore::handle_error(const char* error_message) { + LM_ERROR(error_message); +} + +VirtualCore::~VirtualCore() { + ste.stack_frames.clear(); + ste.ret_addr_stack.clear(); + ste.program = nullptr; + const_pool_top = nullptr; + libs.clear(); +} + +void VirtualCore::log_op(const Opcode &op, const uint8_t(&operands)[12]) { + switch (op) { + using enum Opcode; + case MOV_RI: { + DEBUG_LOG_FMT("MOVRI: r%d, %lld", static_cast(operands[0]), *reinterpret_cast(operands + 1)); + break; + } + case MOV_RM: { + DEBUG_LOG_FMT("MOVRM: r%d, 0x%llx", static_cast(operands[0]), static_cast(operands[1])); + break; + } + case MOV_RR: { + DEBUG_LOG_FMT("MOVRR: r%d, r%d", static_cast(operands[0]), static_cast(operands[1])); + break; + } + case MOV_RC: { + DEBUG_LOG_FMT("MOVRC: r%d, const[%llu]", static_cast(operands[0]), *(uint64_t*)(operands + 1)); + break; + } + case MOV_MI: { + DEBUG_LOG_FMT("MOVMI: 0x%llx, %lld", static_cast(operands[0]), *reinterpret_cast(operands + 1)); + break; + } + case MOV_MM: { + DEBUG_LOG_FMT("MOVMM: 0x%llx, 0x%llx", static_cast(operands[0]), static_cast(operands[1])); + break; + } + case MOV_MR: { + DEBUG_LOG_FMT("MOVMR: 0x%llx, r%d", static_cast(operands[0]), static_cast(operands[1])); + break; + } + case MOV_MC: { + DEBUG_LOG_FMT("MOVMC: 0x%llx, const[%llu]", static_cast(operands[0]), static_cast(operands[1])); + break; + } + case ADD: { + DEBUG_LOG_FMT("ADD: r%d = r%d + r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case SUB: { + DEBUG_LOG_FMT("SUB: r%d = r%d - r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case MUL: { + DEBUG_LOG_FMT("MUL: r%d = r%d * r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case DIV: { + DEBUG_LOG_FMT("DIV: r%d = r%d / r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case MOD: { + DEBUG_LOG_FMT("MOD: r%d = r%d %% r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case POW: { + DEBUG_LOG_FMT("POW: r%d = pow(r%d, r%d)", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case FCALL: { + DEBUG_LOG_FMT("FCALL: func=0x%llx, args=%d", *reinterpret_cast(operands), static_cast(operands[8])); + break; + } + case FRET: { + DEBUG_LOG("FRET: Return from function"); + break; + } + case HALT: { + DEBUG_LOG("HALT: Terminate execution"); + break; + } + case DEBUG_LOG: { + DEBUG_LOG_FMT("DEBUG_LOG: const[%llu]", *reinterpret_cast(operands)); + break; + } + case JMP: { + DEBUG_LOG_FMT("JMP: to %llu", *reinterpret_cast(operands)); + break; + } + case CMP_GE: { + DEBUG_LOG_FMT("CMP_GE: r%d = r%d >= r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case CMP_LT: { + DEBUG_LOG_FMT("CMP_LT: r%d = r%d < r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case CMP_LE: { + DEBUG_LOG_FMT("CMP_LE: r%d = r%d <= r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case CMP_GT: { + DEBUG_LOG_FMT("CMP_GT: r%d = r%d > r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case CMP_EQ: { + DEBUG_LOG_FMT("CMP_EQ: r%d = r%d == r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case CMP_NE: { + DEBUG_LOG_FMT("CMP_NE: r%d = r%d != r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case IF_TRUE: { + DEBUG_LOG_FMT("IF_TRUE: r%d ? to %llu", static_cast(operands[0]), *reinterpret_cast(operands + 1)); + break; + } + case IF_FALSE: { + DEBUG_LOG_FMT("IF_FALSE: r%d ? to %llu", static_cast(operands[0]), *reinterpret_cast(operands + 1)); + break; + } + case FUNC_CREATE: { + DEBUG_LOG("FUNC_CREATE: Function definition"); + break; + } + case FUNC_END: { + DEBUG_LOG("FUNC_END: Function end"); + break; + } + case LOCAL_GET: { + DEBUG_LOG_FMT("LOCAL_GET: r%d = frame[%d]->locals[%d]", (int)operands[0], (int)operands[1], *(uint16_t*)(operands + 2)); + break; + } + case LOCAL_SET: { + DEBUG_LOG_FMT("LOCAL_SET: frame[%d]->locals[%d] = r%d", (int)operands[0], *(uint16_t*)(operands + 1), (int)operands[3]); + break; + } + case AND: { + DEBUG_LOG_FMT("AND: r%d = r%d && r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case OR: { + DEBUG_LOG_FMT("OR: r%d = r%d || r%d", static_cast(operands[0]), static_cast(operands[1]), static_cast(operands[2])); + break; + } + case VMC: { + DEBUG_LOG_FMT("VMC: Call vmcall[%d]", *(uint16_t*)operands); + break; + } + case DEC: { + DEBUG_LOG_FMT("DEC: r%d--", static_cast(operands[0])); + break; + } + case PUSH: { + DEBUG_LOG_FMT("PUSH: r%d", static_cast(operands[0])); + break; + } + case CREATE_VECTOR: { + DEBUG_LOG_FMT("CREATE_VECTOR: r%d, %d", static_cast(operands[0]), static_cast(operands[1])); + break; + } + default: { + DEBUG_LOG_FMT("Unknown opcode: %d", static_cast(op)); + break; + } } +} + +bool VirtualCore::run_op(const Opcode &op, const uint8_t(&operands)[12], int &result) { + switch (op) { + using enum Opcode; + case MOV_RI: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t dst_reg = operands[0]; + const int64_t imm_val = *reinterpret_cast(operands + 1); + ste.regs[dst_reg] = imm_val; + ste.pc++; + DEBUG_LOG_FMT("MOVRI: r%d = %lld", static_cast(dst_reg), imm_val); + break; + } + case MOV_RM: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t dst_reg = operands[0]; + const uint64_t mem_addr = operands[1]; + if (mem_addr == 0) { + handle_error("Null memory address in MOV_RM"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + ste.regs[dst_reg] = *reinterpret_cast(mem_addr); + ste.pc++; + DEBUG_LOG_FMT("MOVRM: r%d = mem[0x%llx]", static_cast(dst_reg), mem_addr); + break; + } + case MOV_RR: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t dst_reg = operands[0]; + const uint8_t src_reg = operands[1]; + ste.regs[dst_reg] = ste.regs[src_reg]; + ste.pc++; + DEBUG_LOG_FMT("MOVRR: r%d = r%d", static_cast(dst_reg), static_cast(src_reg)); + break; + } + case MOV_RC: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + if (const_pool_top == nullptr) { + handle_error("Constant pool is null"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t dst_reg = operands[0]; + const uint64_t const_idx = *(uint64_t*)(operands + 1); + ste.regs[dst_reg] = (char*)get_constant() + const_idx; + ste.pc++; + DEBUG_LOG_FMT("MOVRC: r%d = const[%llu]", static_cast(dst_reg), const_idx); + break; + } + case MOV_MI: { + const uint64_t mem_addr = operands[0]; + const int64_t imm_val = *reinterpret_cast(operands + 1); + ste.heap[mem_addr] = imm_val; + ste.pc++; + DEBUG_LOG_FMT("MOVMI: mem[0x%llx] = %lld", mem_addr, imm_val); + break; + } + case MOV_MM: { + const uint64_t dst_addr = operands[0]; + const uint64_t src_addr = operands[1]; + ste.heap[dst_addr] = ste.heap[src_addr]; + ste.pc++; + DEBUG_LOG_FMT("MOVMM: mem[0x%llx] = mem[0x%llx]", dst_addr, src_addr); + break; + } + case MOV_MR: { + if (!is_valid_register(operands[1])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint64_t mem_addr = operands[0]; + const uint8_t src_reg = operands[1]; + ste.heap[mem_addr] = ste.regs[src_reg]; + ste.pc++; + DEBUG_LOG_FMT("MOVMR: mem[0x%llx] = r%d", mem_addr, static_cast(src_reg)); + break; + } + case MOV_MC: { + if (const_pool_top == nullptr) { + handle_error("Constant pool is null"); + result = 1; + return true; + } + const uint64_t mem_addr = operands[0]; + const uint64_t const_idx = operands[1]; + ste.heap[mem_addr] = (char*)get_constant() + const_idx; + ste.pc++; + DEBUG_LOG_FMT("MOVMC: mem[0x%llx] = const[%llu]", mem_addr, const_idx); + break; + } + case ADD: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t add_dst_reg = operands[0]; + const uint8_t add_src1_reg = operands[1]; + const uint8_t add_src2_reg = operands[2]; + + const ValueType::ValueType type1 = ste.regs[add_src1_reg].type; + const ValueType::ValueType type2 = ste.regs[add_src2_reg].type; + DEBUG_LOG("type1: " << type1 << ", type2: " << type2); + + if ((type1 == ValueType::Ptr && type2 == ValueType::Int) || + (type1 == ValueType::Int && type2 == ValueType::Ptr)) { + handle_error("Cannot add vector and number"); + result = 1; + return true; + } + + if (type1 != type2) { + handle_error("Cannot add different types"); + result = 1; + return true; + } + + if (type1 == ValueType::Int) { + const int64_t add_result = ste.regs[add_src1_reg].i64 + ste.regs[add_src2_reg].i64; + ste.regs[add_dst_reg] = add_result; + ste.pc++; + DEBUG_LOG_FMT("ADD: r%d = r%d + r%d = %lld", static_cast(add_dst_reg), + static_cast(add_src1_reg), static_cast(add_src2_reg), add_result); + } else if (type1 == ValueType::Ptr) { + DEBUG_LOG("Adding ptr..."); + const size_t vec1_addr = ste.regs[add_src1_reg].u64; + const size_t vec2_addr = ste.regs[add_src2_reg].u64; + + const size_t vec1_len = ste.heap[vec1_addr].i64; + const size_t vec2_len = ste.heap[vec2_addr].i64; + + if (vec1_len != vec2_len) { + handle_error("Vectors must have the same length for addition"); + result = 1; + return true; + } + + const size_t vec_result_addr = ste.heap.size(); + ste.heap.resize(vec_result_addr + 1 + vec1_len); + + ste.heap[vec_result_addr].type = ValueType::Int; + ste.heap[vec_result_addr].i64 = vec1_len; + + for (size_t i = 0; i < vec1_len; i++) { + const Value& elem1 = ste.heap[vec1_addr + 1 + i]; + const Value& elem2 = ste.heap[vec2_addr + 1 + i]; + + if (elem1.type != elem2.type) { + handle_error("Vector elements must have the same type for addition"); + result = 1; + return true; + } + + Value result_elem; + if (elem1.type == ValueType::Int) { + result_elem = elem1.i64 + elem2.i64; + } else { + handle_error("Unsupported vector element type for addition"); + result = 1; + return true; + } + + ste.heap[vec_result_addr + 1 + i] = result_elem; + } + + ste.regs[add_dst_reg].type = ValueType::Ptr; + ste.regs[add_dst_reg].u64 = vec_result_addr; + ste.pc++; + DEBUG_LOG_FMT("ADD: vector r%d + r%d, result len=%llu", + static_cast(add_src1_reg), static_cast(add_src2_reg), vec1_len); + } else { + handle_error("Unsupported type for addition"); + result = 1; + return true; + } + break; + } + case SUB: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t sub_dst_reg = operands[0]; + const uint8_t sub_src1_reg = operands[1]; + const uint8_t sub_src2_reg = operands[2]; + const int64_t sub_result = ste.regs[sub_src1_reg].i64 - ste.regs[sub_src2_reg].i64; + ste.regs[sub_dst_reg] = sub_result; + ste.pc++; + DEBUG_LOG_FMT("SUB: r%d = r%d - r%d = %lld", static_cast(sub_dst_reg), + static_cast(sub_src1_reg), static_cast(sub_src2_reg), sub_result); + break; + } + case MUL: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t mul_dst_reg = operands[0]; + const uint8_t mul_src1_reg = operands[1]; + const uint8_t mul_src2_reg = operands[2]; + const int64_t mul_result = ste.regs[mul_src1_reg].i64 * ste.regs[mul_src2_reg].i64; + ste.regs[mul_dst_reg] = mul_result; + ste.pc++; + DEBUG_LOG_FMT("MUL: r%d = r%d * r%d = %lld", static_cast(mul_dst_reg), + static_cast(mul_src1_reg), static_cast(mul_src2_reg), mul_result); + break; + } + case DIV: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t div_dst_reg = operands[0]; + const uint8_t div_src1_reg = operands[1]; + const uint8_t div_src2_reg = operands[2]; + if (ste.regs[div_src2_reg].i64 == 0) { + handle_error("Division by zero"); + result = 1; + return true; + } + const int64_t div_result = ste.regs[div_src1_reg].i64 / ste.regs[div_src2_reg].i64; + ste.regs[div_dst_reg] = div_result; + ste.pc++; + DEBUG_LOG_FMT("DIV: r%d = r%d / r%d = %lld", static_cast(div_dst_reg), + static_cast(div_src1_reg), static_cast(div_src2_reg), div_result); + break; + } + case MOD: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t mod_dst_reg = operands[0]; + const uint8_t mod_src1_reg = operands[1]; + const uint8_t mod_src2_reg = operands[2]; + if (ste.regs[mod_src2_reg].i64 == 0) { + handle_error("Modulo by zero"); + result = 1; + return true; + } + const int64_t mod_result = ste.regs[mod_src1_reg].i64 % ste.regs[mod_src2_reg].i64; + ste.regs[mod_dst_reg] = mod_result; + ste.pc++; + DEBUG_LOG_FMT("MOD: r%d = r%d %% r%d = %lld", static_cast(mod_dst_reg), + static_cast(mod_src1_reg), static_cast(mod_src2_reg), mod_result); + break; + } + case POW: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t pow_dst_reg = operands[0]; + const uint8_t pow_src1_reg = operands[1]; + const uint8_t pow_src2_reg = operands[2]; + const double pow_result = std::pow(ste.regs[pow_src1_reg].f64, ste.regs[pow_src2_reg].f64); + ste.regs[pow_dst_reg] = pow_result; + ste.pc++; + DEBUG_LOG_FMT("POW: r%d = pow(r%d, r%d) = %f", static_cast(pow_dst_reg), + static_cast(pow_src1_reg), static_cast(pow_src2_reg), pow_result); + break; + } + case FCALL: { + uint64_t target_pc = *reinterpret_cast(operands); + if (target_pc >= ste.program->size()) { + handle_error("Invalid jump address"); + result = 1; + return true; + } + const auto args_count = operands[8]; + ste.ret_addr_stack.push_back(ste.pc + 1); + ste.pc = target_pc; + ste.stack_frames.push_back(std::make_unique()); + ste.stack_frames.back()->locals.resize(args_count + 1); + for (uint8_t i = 0; i != args_count; i++) { + if (REG_COUNT_INDEX_MAX - i >= REG_COUNT) { + handle_error("Invalid register index"); + result = 1; + return true; + } + ste.stack_frames.back()->locals[i] = ste.regs[REG_COUNT_INDEX_MAX - i]; + } + DEBUG_LOG_FMT("FCALL: addr=%llu, args=%d", target_pc, static_cast(args_count)); + break; + } + case FRET: { + if (ste.ret_addr_stack.empty() || ste.stack_frames.size() <= 1) { + handle_error("Invalid return operation"); + result = 1; + return true; + } + const size_t return_addr = ste.ret_addr_stack.back(); + ste.pc = return_addr; + ste.ret_addr_stack.pop_back(); + ste.stack_frames.pop_back(); + DEBUG_LOG_FMT("FRET: return to %llu", return_addr); + break; + } + case HALT: { + DEBUG_LOG("HALT"); + DEBUG_SEPARATOR("VM EXECUTION END (SUCCESS)"); + DEBUG_LEAVE_FUNC(); + result = 0; + return true; + } + case DEBUG_LOG: { + if (const_pool_top == nullptr) { + handle_error("Constant pool is null"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + DEBUG_LOG_FMT("[LogInfo]: %s", static_cast(const_pool_top) + *reinterpret_cast(operands)); + ste.pc++; + break; + } + case JMP: { + uint64_t target_pc = *reinterpret_cast(operands); + if (target_pc >= ste.program->size()) { + handle_error("Invalid jump address"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + ste.pc = target_pc; + DEBUG_LOG_FMT("JMP: %llu", target_pc); + break; + } + case CMP_GE: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t dst_reg = operands[0]; + const uint8_t src1_reg = operands[1]; + const uint8_t src2_reg = operands[2]; + bool result = ste.regs[src1_reg].i64 >= ste.regs[src2_reg].i64; + ste.regs[dst_reg].b = result; + ste.pc++; + DEBUG_LOG_FMT("CMP_GE: r%d = (r%d >= r%d) = %s", static_cast(dst_reg), + static_cast(src1_reg), static_cast(src2_reg), result ? "true" : "false"); + break; + } + case CMP_LT: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t cmp_dst_reg = operands[0]; + const uint8_t cmp_src1_reg = operands[1]; + const uint8_t cmp_src2_reg = operands[2]; + bool cmp_result = ste.regs[cmp_src1_reg].i64 < ste.regs[cmp_src2_reg].i64; + ste.regs[cmp_dst_reg].b = cmp_result; + ste.pc++; + DEBUG_LOG_FMT("CMP_LT: r%d = (r%d < r%d) = %s", static_cast(cmp_dst_reg), + static_cast(cmp_src1_reg), static_cast(cmp_src2_reg), cmp_result ? "true" : "false"); + break; + } + case CMP_LE: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t cmp_dst_reg = operands[0]; + const uint8_t cmp_src1_reg = operands[1]; + const uint8_t cmp_src2_reg = operands[2]; + bool cmp_result = ste.regs[cmp_src1_reg].i64 <= ste.regs[cmp_src2_reg].i64; + ste.regs[cmp_dst_reg].b = cmp_result; + ste.pc++; + DEBUG_LOG_FMT("CMP_LE: r%d = (r%d <= r%d) = %s", static_cast(cmp_dst_reg), + static_cast(cmp_src1_reg), static_cast(cmp_src2_reg), cmp_result ? "true" : "false"); + break; + } + case CMP_GT: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + ste.regs[operands[0]].b = ste.regs[operands[1]].i64 > ste.regs[operands[2]].i64; + ste.pc++; + break; + } + case CMP_EQ: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + ste.regs[operands[0]].b = ste.regs[operands[1]].i64 == ste.regs[operands[2]].i64; + ste.pc++; + break; + } + case CMP_NE: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + ste.regs[operands[0]].b = ste.regs[operands[1]].i64 != ste.regs[operands[2]].i64; + ste.pc++; + break; + } + case IF_TRUE: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + uint64_t target_pc = *reinterpret_cast(operands + 1); + if (target_pc >= ste.program->size()) { + handle_error("Invalid jump address"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t cond_reg = operands[0]; + bool condition = ste.regs[cond_reg].b; + if (condition) { + ste.pc = target_pc; + DEBUG_LOG_FMT("IF_TRUE: r%d is true, jump to %llu", static_cast(cond_reg), target_pc); + } else { + ste.pc++; + DEBUG_LOG_FMT("IF_TRUE: r%d is false, continue", static_cast(cond_reg)); + } + break; + } + case IF_FALSE: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + uint64_t target_pc = *reinterpret_cast(operands + 1); + if (target_pc >= ste.program->size()) { + handle_error("Invalid jump address"); + result = 1; + return true; + } + if (!ste.regs[operands[0]].b) ste.pc = target_pc; + else ste.pc++; + break; + } + case FUNC_CREATE: { + while (ste.pc < ste.program->size() && ste.program->operator[](ste.pc).op != FUNC_END) { + ste.pc++; + } + if (ste.pc < ste.program->size()) { + ste.pc++; + } + break; + } + case FUNC_END: { + ste.pc++; + break; + } + case LOCAL_GET: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + if (operands[1] >= ste.stack_frames.size()) { + handle_error("Invalid stack frame index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + uint16_t local_index = *(uint16_t*)(operands + 2); + if (local_index >= ste.stack_frames[operands[1]]->locals.size()) { + handle_error("Invalid local variable index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t dst_reg = operands[0]; + const uint8_t frame_idx = operands[1]; + + ste.regs[dst_reg] = ste.stack_frames[frame_idx]->locals[local_index]; + ste.pc++; + + DEBUG_LOG_FMT("LOCAL_GET: r%d = frame[%d].locals[%d] = %s", static_cast(dst_reg), + static_cast(frame_idx), local_index, + ste.regs[dst_reg].to_string().c_str()); + break; + } + case LOCAL_SET: { + if (!is_valid_register(operands[3])) { + handle_error("Invalid register index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + if (operands[0] >= ste.stack_frames.size()) { + handle_error("Invalid stack frame index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + uint16_t local_index = *(uint16_t*)(operands + 1); + if (local_index >= ste.stack_frames[operands[0]]->locals.size()) { + handle_error("Invalid local variable index"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + result = 1; + return true; + } + const uint8_t frame_idx = operands[0]; + const uint8_t src_reg = operands[3]; + ste.stack_frames[frame_idx]->locals[local_index] = ste.regs[src_reg]; + ste.pc++; + DEBUG_LOG_FMT("LOCAL_SET: frame[%d].locals[%d] = r%d (%s)", static_cast(frame_idx), + local_index, static_cast(src_reg), ste.regs[src_reg].to_string().c_str()); + break; + } + case AND: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + ste.regs[operands[0]].b = ste.regs[operands[1]].b && ste.regs[operands[2]].b; + ste.pc++; + break; + } + case OR: { + if (!is_valid_register(operands[0]) || !is_valid_register(operands[1]) || !is_valid_register(operands[2])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + ste.regs[operands[0]].b = ste.regs[operands[1]].b || ste.regs[operands[2]].b; + ste.pc++; + break; + } + case VMC: { + uint16_t vmcall_index = *(uint16_t*)operands; + if (vmcall_index >= VMCall::vmcall_count) { + handle_error("Invalid VMCall index"); + result = 1; + return true; + } + VMCall::vmcall_table[vmcall_index](this); + ste.pc++; + break; + } + case DEC: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + ste.regs[operands[0]].i64--; + ste.pc++; + break; + } + case PUSH: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t src_reg = operands[0]; + ste.stack.push_back(ste.regs[src_reg]); + ste.pc++; + DEBUG_LOG_FMT("PUSH: r%d, stack size=%llu", static_cast(src_reg), ste.stack.size()); + break; + } + case CREATE_VECTOR: { + if (!is_valid_register(operands[0])) { + handle_error("Invalid register index"); + result = 1; + return true; + } + const uint8_t dst_reg = operands[0]; + const uint8_t count = operands[1]; + + if (ste.stack.size() < count) { + handle_error("Not enough elements on stack for CREATE_VECTOR"); + result = 1; + return true; + } + + size_t vec_addr = ste.heap.size(); + ste.heap.resize(vec_addr + 1 + count); + ste.heap[vec_addr].type = ValueType::Int; + ste.heap[vec_addr].i64 = count; + + for (size_t i = 0; i < count; i++) { + ste.heap[vec_addr + 1 + i] = ste.stack[ste.stack.size() - count + i]; + } + + for (size_t i = 0; i < count; i++) { + ste.stack.pop_back(); + } + + ste.regs[dst_reg].type = ValueType::Ptr; + ste.regs[dst_reg].u64 = vec_addr; + + ste.pc++; + DEBUG_LOG_FMT("CREATE_VECTOR: r%d = vec[%d] at heap[%llu]", static_cast(dst_reg), + static_cast(count), vec_addr); + break; + } + default: + handle_error("unknown opcode"); + ste.pc++; + break; + } + return false; +} + +int VirtualCore::run() { + DEBUG_ENTER_FUNC(); + DEBUG_SEPARATOR("VM EXECUTION START"); + + if (ste.program == nullptr) { + handle_error("Program is null"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + return 1; + } + + DEBUG_LOG_FMT("Program size: %zu instructions", ste.program->size()); + DEBUG_LOG("Starting VM execution"); + + while (ste.pc < ste.program->size()) { + const Opcode& op = (*ste.program)[ste.pc].op; + const auto& operands = (*ste.program)[ste.pc].operands; + + DEBUG_EXEC_STEP(ste.pc, op, ""); + + log_op(op, operands); + if (int result; run_op(op, operands, result)) + return result; } + DEBUG_LOG("PC out of bounds"); + DEBUG_SEPARATOR("VM EXECUTION END (ERROR)"); + DEBUG_LEAVE_FUNC(); return 1; } +void VirtualCore::insert_builtins() { + DEBUG_LOG("insert builtins..."); + + const auto old_program = ste.program; + const auto old_pc = ste.pc; + + size_t base_index = builtins::builtin_start; + for (size_t i = 0; i < builtins::builtin_constants_count; i++) { + const auto& constant = builtins::builtin_constants[i]; + + if (ste.stack_frames[0]->locals.size() <= base_index) { + ste.stack_frames[0]->locals.resize(base_index + 1); + } + + ste.stack_frames[0]->locals[base_index] = constant.value; + + DEBUG_LOG_FMT("builtin %s = %s at %zu", + constant.name, + constant.value.to_string().c_str(), + base_index); + base_index++; + } + + ste.program = old_program; + ste.pc = old_pc; +} } diff --git a/runtime/vm.hpp b/runtime/vm.hpp index e5ddbba..9cae054 100644 --- a/runtime/vm.hpp +++ b/runtime/vm.hpp @@ -17,53 +17,102 @@ namespace lmx::runtime { +// VM执行状态 struct LMVM_API LMXState { - size_t pc{0}; - std::array regs{}; + size_t pc{0}; // 程序计数器 + std::array regs{}; // 寄存器数组 + std::vector heap{}; + std::vector ret_addr_stack; // 返回地址栈 + std::vector* program{}; // 程序指令 + std::vector> stack_frames; // 栈帧 + std::vector stack; // 操作数栈 - std::vector ret_addr_stack; - //void* const_pool_top; - std::vector* program{}; - - std::vector> cur; LMXState() = default; - // 2. 删除拷贝构造函数和拷贝赋值运算符 LMXState(const LMXState&) = delete; LMXState& operator=(const LMXState&) = delete; - - // 3. 允许移动构造函数和移动赋值运算符 LMXState(LMXState&&) = delete; LMXState& operator=(LMXState&&) = delete; - LMXState(LMXState&); - // 4. 析构函数 ~LMXState() = default; }; + +// 虚拟机核心类 class LMVM_API VirtualCore { - void* const_pool_top; - LMXState ste; + void* const_pool_top; // 常量池顶部指针 + LMXState ste; // VM执行状态 + // 从常量池获取值 + [[nodiscard]] Value* get_value_from_pool(size_t offset) const; + + // 检查寄存器索引是否有效 + static bool is_valid_register(uint8_t reg) ; + + // 统一错误处理函数 + static void handle_error(const char* error_message) ; + + // 验证多个寄存器索引 + [[nodiscard]] static bool validate_registers(const uint8_t* regs, size_t count); + + // 验证跳转地址 + [[nodiscard]] bool validate_jump_address(uint64_t address) const; + + // 验证栈帧和局部变量索引 + [[nodiscard]] bool validate_stack_frame(size_t frame_index, size_t local_index) const; + + // 从常量池获取字符串 + [[nodiscard]] const char* get_constant_string(uint64_t offset) const; + + static void log_op(const Opcode &op, const uint8_t(&operands)[12]); + + bool run_op(const Opcode &op, const uint8_t(&operands)[12], int &result); - [[nodiscard]] Value *get_value_from_pool(size_t offest) const; public: - std::vector> libs; + std::vector> libs; // 加载的动态库 + VirtualCore(); + ~VirtualCore(); + + // 禁止拷贝和移动 VirtualCore(const VirtualCore&) = delete; VirtualCore& operator=(const VirtualCore&) = delete; VirtualCore(VirtualCore&&) = delete; + + // 带状态的构造函数 explicit VirtualCore(LMXState ste); + + // 带状态和常量池的构造函数 explicit VirtualCore(LMXState ste, void* const_pool_top); + + // 运行VM int run(); - [[nodiscard]] std::vector *get_program() const { return ste.program; } - void set_program(std::vector *program) { ste.pc = 0;ste.program = program; } + // 获取程序指令 + [[nodiscard]] std::vector* get_program() const { return ste.program; } + + // 设置程序指令 + void set_program(std::vector* program) { ste.pc = 0; ste.program = program; } + + // 查看寄存器值 [[nodiscard]] int64_t look_register(const size_t r) const { return ste.regs[r].i64; } + // 获取寄存器引用 [[nodiscard]] Value& get_register(const size_t r) { return ste.regs[r]; } + // 获取常量池指针 [[nodiscard]] void* get_constant() const { return const_pool_top; } + + // 设置常量池指针 void set_constant(void* const_pool) { const_pool_top = const_pool; } + // 设置寄存器指针值 void set_reg_ptr(const size_t idx, void* np) { ste.regs[idx].ptr = np; } + + // 堆内存管理方法 + [[nodiscard]] size_t heap_size() const { return ste.heap.size(); } + Value& heap_at(size_t index) { return ste.heap[index]; } + [[nodiscard]] const Value& heap_at(size_t index) const { return ste.heap[index]; } + void heap_push_back(const Value& value) { ste.heap.push_back(value); } + + void insert_builtins(); }; } diff --git a/runtime/vmcall.cpp b/runtime/vmcall.cpp index deebafe..cc8e201 100644 --- a/runtime/vmcall.cpp +++ b/runtime/vmcall.cpp @@ -1,5 +1,8 @@ #include "vmcall.hpp" +#include "vm.hpp" #include + +#include "debug.hpp" #include "libloader.hpp" #include "../compiler/generator/generator.hpp" @@ -48,7 +51,7 @@ VMC_REGISTER(dyn_set) { const auto args_type_p = static_cast(self->get_register(REG_COUNT_INDEX_MAX - 2).ptr); static_cast(self->get_register(REG_COUNT_INDEX_MAX).ptr)->set_func( self->get_register(REG_COUNT_INDEX_MAX - 1).str, - std::vector( + std::vector( args_type_p, args_type_p + self->get_register(REG_COUNT_INDEX_MAX - 3).u64), *static_cast(self->get_register(REG_COUNT_INDEX_MAX - 4).ptr) @@ -69,3 +72,53 @@ VMC_REGISTER(dyn_call) { ); } +/* + * VMC_alloc_memory(6) + * arg1 : size(0) + * + * return: memory_ptr in r0 + */ +VMC_REGISTER(alloc_memory) { + size_t slots = self->get_register(0).u64; + + size_t memory_start = self->heap_size(); + + for (size_t i = 0; i < slots; i++) { + Value value; + value.type = ValueType::Null; + value.null = nullptr; + self->heap_push_back(value); + } + + Value memory_ptr; + memory_ptr.type = ValueType::Ptr; + memory_ptr.u64 = memory_start; + self->get_register(0) = memory_ptr; + + DEBUG_LOG("VMC[6]: allocated " << slots << " slots at heap[" << memory_start << "]"); +} + +/* + * VMC_store_memory(7) + * arg1 : memory_ptr(0) + * arg2 : offset(1) + * arg3 : value(2) + * + * 存储值到指定内存地址 + */ +VMC_REGISTER(store_memory) { + const size_t memory_ptr = self->get_register(0).u64; + const size_t offset = self->get_register(1).u64; + const Value value = self->get_register(2); + + if (memory_ptr >= self->heap_size()) { + return; + } + + if (memory_ptr + offset >= self->heap_size()) { + return; + } + + self->heap_at(memory_ptr + offset) = value; +} + diff --git a/tools/lm/common/file_run.cpp b/tools/lm/common/file_run.cpp index a0537a6..9c7b2a2 100644 --- a/tools/lm/common/file_run.cpp +++ b/tools/lm/common/file_run.cpp @@ -8,6 +8,7 @@ #include "../compiler/parser.hpp" #include "../compiler/generator/generator.hpp" #include "../runtime/vm.hpp" +#include "../include/error.hpp" int binary_run(std::fstream&& file) { lmx::runtime::BinaryLoader loader{std::move(file)}; @@ -21,26 +22,27 @@ int file_run(const std::string& file_name) { uint32_t magic = 0; std::fstream file(file_name); if (!file.is_open()) { - std::cerr << "File not found: " << file_name << std::endl; + lmx::LM_ERROR("File not found: " + file_name); file.close(); return -1; } - file.read((char*)&magic, sizeof(magic)); + file.read(reinterpret_cast(&magic), sizeof(magic)); if (magic == LMX_MAGIC_NUM) return binary_run(std::move(file)); + file.seekg(0, std::ios::beg); - auto src = std::string(std::istreambuf_iterator(file), std::istreambuf_iterator()); + auto src = std::string(std::istreambuf_iterator(file), std::istreambuf_iterator()); lmx::Lexer lexer(src); auto ts = lexer.tokenize(src); - lmx::Parser parser(ts); - lmx::Generator gener; + lmx::Parser parser(ts, src, file_name); std::shared_ptr node = parser.parse_program(); if (!node || parser.error()) return -1; + + lmx::Generator gener; gener.gen(node); - if (lmx::Generator::node_has_error)return -1; + if (lmx::Generator::node_has_error) return -1; gener.ops.emplace_back(lmx::runtime::Opcode::HALT); gener.write_binary_file(file_name); - //gener.print_ops(); - //return 0; + lmx::runtime::VirtualCore vm; vm.set_program(&gener.ops); vm.set_constant(gener.constant_pool.data()); diff --git a/tools/lm/common/repl.cpp b/tools/lm/common/repl.cpp index b478140..d166615 100644 --- a/tools/lm/common/repl.cpp +++ b/tools/lm/common/repl.cpp @@ -1,54 +1,302 @@ -#include "repl.hpp" -#include "../compiler/lexer.hpp" -#include "../compiler/parser.hpp" -#include "../compiler/generator/generator.hpp" -#include "../compiler/generator/emit.hpp" -#include "../runtime/vm.hpp" -#include "../compiler/ast.hpp" -#include - -int run_repl() { - std::string expr; - lmx::Lexer l(expr); - lmx::Generator gener; - lmx::runtime::VirtualCore core; - core.set_program(&gener.ops); - - std::string prompt = ">>>"; - while (true) { - std::cout << std::flush << prompt << std::flush; - if (!std::getline(std::cin, expr)) break; - //switch (ss.view().back()) { - // case '+': case '-': case '*': case '/': case '=': case '!': case '~': case '{' :case '[': prompt = "..."; continue; - // default: break; - //} - if (expr == ":lastret") std::cout << core.look_register(0) << std::endl; - else if (expr == ":exit") break; - else if (expr == ":op") gener.print_ops(); - else if (expr == ":vars") gener.print_vars(); - else { - std::vector tks = l.tokenize(expr); - lmx::Parser parser(tks); - auto node = parser.parse(); - if (!node || parser.error()) continue; - const auto op = gener.gen(node); - if (lmx::Generator::node_has_error) continue; - gener.ops.emplace_back(lmx::runtime::Opcode::HALT); - //gener.print_ops(); - //continue; - core.set_constant(gener.constant_pool.data()); - //const auto start = std::chrono::high_resolution_clock::now(); - core.run(); - //const auto end = std::chrono::high_resolution_clock::now(); - - if (op != -1) { - gener.regs.free(op); - std::cout << "Result: " << core.look_register(op) << std::endl; - } - //std::cout << "time " << std::chrono::duration_cast(end - start) << std::endl; - if (gener.ops.back().op == lmx::runtime::Opcode::HALT) gener.ops.pop_back(); - } - } - return 0; - -} +#include "repl.hpp" +#include "../compiler/lexer.hpp" +#include "../compiler/parser.hpp" +#include "../compiler/generator/generator.hpp" +#include "../compiler/generator/emit.hpp" +#include "../runtime/vm.hpp" +#include "../compiler/ast.hpp" +#include "../../../include/debug.hpp" + +void print_ast(const std::shared_ptr& node, int indent = 0) { + if (!node) return; + + for (int i = 0; i < indent; i++) { + std::cerr << " "; + } + + switch (node->kind) { + case lmx::ASTKind::Program: + std::cerr << "Program" << std::endl; + for (const auto& child : dynamic_cast(node.get())->children) { + print_ast(child, indent + 1); + } + break; + case lmx::ASTKind::NumLiteral: + std::cerr << "NumLiteral(" << dynamic_cast(node.get())->num << ")" << std::endl; + break; + case lmx::ASTKind::StringLiteral: + std::cerr << "StringLiteral(\"" << dynamic_cast(node.get())->str << "\")" << std::endl; + break; + case lmx::ASTKind::BoolLiteral: + std::cerr << "BoolLiteral(" << (dynamic_cast(node.get())->b ? "true" : "false") << ")" << std::endl; + break; + case lmx::ASTKind::VectorLiteral: { + const auto elem_vec = (dynamic_cast(node.get())->elements); + std::cerr << "VectorLiteral(" << std::endl; + for (auto const& k : elem_vec) { + print_ast(k, indent + 1); + } + std::cerr << ")" << std::endl; + break; + } + case lmx::ASTKind::VarDecl: + { + auto decl = dynamic_cast(node.get()); + std::cerr << "VarDecl(" << decl->name << ", mutable=" << (decl->is_mut ? "true" : "false") << ")" << std::endl; + if (decl->value) { + print_ast(decl->value, indent + 1); + } + } + break; + case lmx::ASTKind::VarRef: + std::cerr << "VarRef(" << dynamic_cast(node.get())->name << ")" << std::endl; + break; + case lmx::ASTKind::FuncDecl: + { + auto func = dynamic_cast(node.get()); + std::cerr << "FuncDecl(" << func->name << ")" << std::endl; + if (func->body) { + print_ast(func->body, indent + 1); + } + } + break; + case lmx::ASTKind::FuncCallExpr: + { + auto call = dynamic_cast(node.get()); + std::cerr << "FuncCallExpr(" << call->name << ")" << std::endl; + for (const auto& arg : call->args) { + print_ast(arg, indent + 1); + } + } + break; + case lmx::ASTKind::Binary: + { + auto bin = dynamic_cast(node.get()); + std::cerr << "Binary(" << bin->op << ")" << std::endl; + print_ast(bin->left, indent + 1); + print_ast(bin->right, indent + 1); + } + break; + case lmx::ASTKind::Unary: + { + auto unary = dynamic_cast(node.get()); + std::cerr << "Unary(" << unary->op << ")" << std::endl; + print_ast(unary->operand, indent + 1); + } + break; + case lmx::ASTKind::BlockStmt: + { + std::cerr << "BlockStmt" << std::endl; + for (const auto& child : dynamic_cast(node.get())->children) { + print_ast(child, indent + 1); + } + } + break; + case lmx::ASTKind::IfStmt: + { + auto if_stmt = dynamic_cast(node.get()); + std::cerr << "IfStmt" << std::endl; + std::cerr << " Condition:" << std::endl; + print_ast(if_stmt->condition, indent + 2); + std::cerr << " Then:" << std::endl; + print_ast(if_stmt->thenBlock, indent + 2); + if (if_stmt->elseBlock) { + std::cerr << " Else:" << std::endl; + print_ast(if_stmt->elseBlock, indent + 2); + } + } + break; + case lmx::ASTKind::Return: + { + auto ret = dynamic_cast(node.get()); + std::cerr << "Return" << std::endl; + if (ret->expr) { + print_ast(ret->expr, indent + 1); + } + } + break; + case lmx::ASTKind::VMCall: + { + auto vmcall = dynamic_cast(node.get()); + std::cerr << "VMCall(" << vmcall->idx << ")" << std::endl; + for (const auto& arg : vmcall->args) { + print_ast(arg, indent + 1); + } + } + break; + case lmx::ASTKind::Module: + { + auto module = dynamic_cast(node.get()); + std::cerr << "Module(" << module->name << ")" << std::endl; + } + break; + case lmx::ASTKind::Use: + { + auto use = dynamic_cast(node.get()); + std::cerr << "Use(" << use->path->str << ")" << std::endl; + } + break; + case lmx::ASTKind::Loop: + { + auto loop = dynamic_cast(node.get()); + std::cerr << "Loop" << std::endl; + std::cerr << " Condition:" << std::endl; + print_ast(loop->condition, indent + 2); + std::cerr << " Body:" << std::endl; + print_ast(loop->body, indent + 2); + } + break; + case lmx::ASTKind::Break: + std::cerr << "Break" << std::endl; + break; + case lmx::ASTKind::Continue: + std::cerr << "Continue" << std::endl; + break; + default: + std::cerr << "Unknown AST node type" << std::endl; + break; + } +} + +int run_repl() { + std::string expr; + lmx::Lexer l(expr); + lmx::Generator generator; + lmx::runtime::VirtualCore core; + core.set_program(&generator.ops); + + const std::string prompt = std::string(COLOR_MAGENTA) + ">>> " + COLOR_RESET; + while (true) { + std::cout << prompt << std::flush; + if (!std::getline(std::cin, expr)) break; + if (expr == ":lastret") std::cout << core.look_register(0) << std::endl; + else if (expr == ":exit") break; + else if (expr == ":op") generator.print_ops(); + else if (expr == ":vars") generator.print_vars(); + else { + // Reset error flag before processing each input + lmx::Generator::node_has_error = false; + + // Tokenize and display tokens + std::vector tks = l.tokenize(expr); + DEBUG_TOKEN_LIST(tks); + + // Parse and display AST + lmx::Parser parser(tks, expr, ""); + auto node = parser.parse(); + if (!node || parser.error()) continue; + + DEBUG_SEPARATOR("ABSTRACT SYNTAX TREE"); + std::cerr << COLOR_MAGENTA; + print_ast(node); + std::cerr << COLOR_RESET; + DEBUG_SEPARATOR("AST END"); + + // Generate bytecode and display it + const auto op = generator.gen(node); + if (lmx::Generator::node_has_error) continue; + + DEBUG_SEPARATOR("GENERATED BYTECODE"); + std::cerr << COLOR_CYAN; + generator.print_ops(); + std::cerr << COLOR_RESET; + DEBUG_SEPARATOR("BYTECODE END"); + + // Execute + generator.ops.emplace_back(lmx::runtime::Opcode::HALT); + core.set_constant(generator.constant_pool.data()); + int result = core.run(); + + if (op != -1 && result == 0) { + generator.regs.free(op); + auto& value = core.get_register(op); + if (value.type == lmx::runtime::ValueType::Null) { + } else if (value.type == lmx::runtime::ValueType::Ptr) { + size_t vector_ptr = value.i64; + size_t vector_size = core.heap_at(vector_ptr).i64; + std::cout << "vec["; + for (size_t i = 0; i < vector_size; i++) { + if (i > 0) std::cout << ", "; + auto& elem = core.heap_at(vector_ptr + 1 + i); + if (elem.type == lmx::runtime::ValueType::Ptr) { + size_t elem_ptr = elem.u64; + size_t elem_size = core.heap_at(elem_ptr).i64; + std::cout << "vec["; + for (size_t j = 0; j < elem_size; j++) { + if (j > 0) std::cout << ", "; + auto& nested_elem = core.heap_at(elem_ptr + 1 + j); + switch (nested_elem.type) { + case lmx::runtime::ValueType::Int: + std::cout << nested_elem.i64; + break; + case lmx::runtime::ValueType::Str: + std::cout << "\"" << nested_elem.str << "\""; + break; + case lmx::runtime::ValueType::Bool: + std::cout << (nested_elem.b ? "true" : "false"); + break; + case lmx::runtime::ValueType::Float: + std::cout << nested_elem.f64; + break; + case lmx::runtime::ValueType::Ptr: + std::cout << "ptr(" << nested_elem.u64 << ")"; + break; + case lmx::runtime::ValueType::Null: + std::cout << "null"; + break; + default: + std::cout << ""; + break; + } + } + std::cout << "]"; + } else { + switch (elem.type) { + case lmx::runtime::ValueType::Int: + std::cout << elem.i64; + break; + case lmx::runtime::ValueType::Str: + std::cout << "\"" << elem.str << "\""; + break; + case lmx::runtime::ValueType::Bool: + std::cout << (elem.b ? "true" : "false"); + break; + case lmx::runtime::ValueType::Float: + std::cout << elem.f64; + break; + case lmx::runtime::ValueType::Null: + std::cout << "null"; + break; + default: + std::cout << ""; + break; + } + } + } + std::cout << "]" << std::endl; + } else { + switch (value.type) { + case lmx::runtime::ValueType::Int: + std::cout << value.i64 << std::endl; + break; + case lmx::runtime::ValueType::Float: + std::cout << value.f64 << std::endl; + break; + case lmx::runtime::ValueType::Bool: + std::cout << (value.b ? "true" : "false") << std::endl; + break; + case lmx::runtime::ValueType::Str: + std::cout << "\"" << value.str << "\"" << std::endl; + break; + default: + std::cout << value.to_string() << std::endl; + break; + } + } + } + if (generator.ops.back().op == lmx::runtime::Opcode::HALT) generator.ops.pop_back(); + } + } + return 0; + +} diff --git a/tools/lm/main.cpp b/tools/lm/main.cpp index 21ae4e2..b2015d7 100644 --- a/tools/lm/main.cpp +++ b/tools/lm/main.cpp @@ -18,9 +18,9 @@ std::filesystem::path get_executable_path() { #ifdef _WIN32 char buffer[MAX_PATH]; - DWORD length = GetModuleFileNameA(nullptr, buffer, MAX_PATH); + const DWORD length = GetModuleFileNameA(nullptr, buffer, MAX_PATH); if (length > 0) { - return std::filesystem::path(buffer); + return {buffer}; } #elif defined(__APPLE__)