diff --git a/src/SourceFile.cpp b/src/SourceFile.cpp index a062bdb2e..765e0a1b5 100644 --- a/src/SourceFile.cpp +++ b/src/SourceFile.cpp @@ -840,7 +840,7 @@ void SourceFile::visualizerOutput(std::string outputName, const std::string &out svgFilePath.replace_extension("svg"); dotFilePath.make_preferred(); svgFilePath.make_preferred(); - SystemUtil::exec("dot -T svg -o" + svgFilePath.string() + " " + dotFilePath.string()); + SystemUtil::exec("dot", {"-T", "svg", "-o", svgFilePath.string(), dotFilePath.string()}); std::cout << "done.\nSVG file can be found at: " << svgFilePath << "\n"; } else { // Dump to console diff --git a/src/driver/Driver.cpp b/src/driver/Driver.cpp index 9f99b96fb..c007264be 100644 --- a/src/driver/Driver.cpp +++ b/src/driver/Driver.cpp @@ -194,7 +194,7 @@ void Driver::runBinary() const { // Run executable std::filesystem::path executablePath = cliOptions.outputPath; executablePath.make_preferred(); - const auto [output, exitCode] = SystemUtil::exec(executablePath.string()); + const auto [output, exitCode] = SystemUtil::exec(executablePath.string(), {}); if (exitCode != EXIT_SUCCESS) throw CliError(NON_ZERO_EXIT_CODE, "Your Spice executable exited with non-zero exit code " + std::to_string(exitCode)); } diff --git a/src/exception/SemanticError.cpp b/src/exception/SemanticError.cpp index 653f3d3fb..e723d14ce 100644 --- a/src/exception/SemanticError.cpp +++ b/src/exception/SemanticError.cpp @@ -136,6 +136,8 @@ std::string SemanticError::getMessagePrefix(SemanticErrorType errorType) { return "Duplicate import name"; case IMPORTED_FILE_NOT_EXISTING: return "Imported source file not existing"; + case INVALID_IMPORT_PATH: + return "Invalid import path"; case CIRCULAR_DEPENDENCY: return "Circular import detected"; case ACCESS_TO_NON_EXISTING_MEMBER: diff --git a/src/exception/SemanticError.h b/src/exception/SemanticError.h index d1571b652..87ea37340 100644 --- a/src/exception/SemanticError.h +++ b/src/exception/SemanticError.h @@ -67,6 +67,7 @@ enum SemanticErrorType : uint8_t { PRINTF_ARG_COUNT_ERROR, DUPLICATE_IMPORT_NAME, IMPORTED_FILE_NOT_EXISTING, + INVALID_IMPORT_PATH, CIRCULAR_DEPENDENCY, ACCESS_TO_NON_EXISTING_MEMBER, INVALID_MEMBER_ACCESS, diff --git a/src/importcollector/ImportCollector.cpp b/src/importcollector/ImportCollector.cpp index 828c3b764..74e2fb99f 100644 --- a/src/importcollector/ImportCollector.cpp +++ b/src/importcollector/ImportCollector.cpp @@ -26,6 +26,18 @@ std::any ImportCollector::visitEntry(EntryNode *node) { } std::any ImportCollector::visitImportDef(ImportDefNode *node) { + // Validate the import path before resolving it against any base directory. Import paths must be relative and must + // not contain parent-directory ('..') components. Otherwise a malicious source file could escape the std/bootstrap/ + // project roots (e.g. 'import "../../../etc/x"' or 'import "std/../../../etc/x"') and read arbitrary files, or worse, + // smuggle shell metacharacters into a filename that later ends up in a linker invocation. + const std::filesystem::path importPathRelative(node->importPath); + if (importPathRelative.has_root_path()) + throw SemanticError(node, INVALID_IMPORT_PATH, "Import paths must be relative, but '" + node->importPath + "' is not"); + for (const std::filesystem::path &part : importPathRelative) + if (part == "..") + throw SemanticError(node, INVALID_IMPORT_PATH, + "Import paths must not contain parent-directory ('..') components: '" + node->importPath + "'"); + const bool isStd = node->importPath.starts_with("std/"); const bool isBootstrap = node->importPath.starts_with("bootstrap/"); diff --git a/src/linker/ExternalLinkerInterface.cpp b/src/linker/ExternalLinkerInterface.cpp index a95bb2476..dbc405bea 100644 --- a/src/linker/ExternalLinkerInterface.cpp +++ b/src/linker/ExternalLinkerInterface.cpp @@ -3,6 +3,7 @@ #include "ExternalLinkerInterface.h" #include +#include #include #include @@ -43,12 +44,12 @@ void ExternalLinkerInterface::prepare() { addLinkerFlag("-ltsan"); break; case Sanitizer::MEMORY: - addLinkerFlag("-L$(clang -print-resource-dir)/lib/x86_64-unknown-linux-gnu"); + addLinkerFlag("-L" + SystemUtil::getClangResourceDir() + "/lib/x86_64-unknown-linux-gnu"); addLinkerFlag("-lclang_rt.msan"); requestLibMathLinkage(); break; case Sanitizer::TYPE: - addLinkerFlag("-L$(clang -print-resource-dir)/lib/x86_64-unknown-linux-gnu"); + addLinkerFlag("-L" + SystemUtil::getClangResourceDir() + "/lib/x86_64-unknown-linux-gnu"); addLinkerFlag("-lclang_rt.tysan"); break; } @@ -96,32 +97,36 @@ void ExternalLinkerInterface::cleanup() const { void ExternalLinkerInterface::link() const { assert(!outputPath.empty()); - // Build the linker command - std::stringstream commandBuilder; + // Find the linker invoker and linker const auto [linkerInvokerName, linkerInvokerPath] = SystemUtil::findLinkerInvoker(); - commandBuilder << linkerInvokerPath; const auto [linkerName, linkerPath] = SystemUtil::findLinker(cliOptions); const bool isGccInvoker = std::string_view(linkerInvokerName) == "gcc"; + + // Build the linker argument vector. Each entry is passed verbatim to the linker invoker (no shell), so file paths + // can never be re-interpreted as shell syntax - this is what prevents command injection via attacker-controlled + // object-file or output paths. + std::vector args; // GCC 16 dropped '-fuse-ld=ld'; skip when using GCC with the default BFD linker if (!isGccInvoker || std::string_view(linkerName) != "ld") - commandBuilder << " -fuse-ld=" << linkerPath; + args.push_back("-fuse-ld=" + linkerPath); // '--target=' is clang-only; GCC uses target-specific toolchain prefixes instead if (!isGccInvoker) - commandBuilder << " --target=" << cliOptions.targetTriple.str(); + args.push_back("--target=" + cliOptions.targetTriple.str()); // Append linker flags for (const std::string &linkerFlag : linkerFlags) - commandBuilder << " " << linkerFlag; + args.push_back(linkerFlag); if (linkLibMath) - commandBuilder << " -lm"; + args.emplace_back("-lm"); // Append output path - commandBuilder << " -o " << outputPath.string(); + args.emplace_back("-o"); + args.push_back(outputPath.string()); // Append object files for (const std::filesystem::path &objectFilePath : linkedFiles) - commandBuilder << " " << objectFilePath.string(); - const std::string command = commandBuilder.str(); + args.push_back(objectFilePath.string()); // Print status message if (cliOptions.printDebugOutput) { + const std::string command = SystemUtil::renderCommandForDisplay(linkerInvokerPath, args); std::cout << "\nLinking with: " << linkerInvokerName << " (invoker) / " << linkerName << " (linker)"; // GCOV_EXCL_LINE std::cout << "\nLinker command: " << command; // GCOV_EXCL_LINE std::cout << "\nEmitting executable to path: " << outputPath.string() << "\n"; // GCOV_EXCL_LINE @@ -130,11 +135,12 @@ void ExternalLinkerInterface::link() const { // Call the linker Timer timer; timer.start(); - const auto [output, exitCode] = SystemUtil::exec(command); + const auto [output, exitCode] = SystemUtil::exec(linkerInvokerPath, args); timer.stop(); // Check for linker error if (exitCode != 0) { // GCOV_EXCL_LINE + const std::string command = SystemUtil::renderCommandForDisplay(linkerInvokerPath, args); // GCOV_EXCL_LINE const std::string errorMessage = "Linker exited with non-zero exit code\nLinker command: " + command; // GCOV_EXCL_LINE throw LinkerError(LINKER_ERROR, errorMessage); // GCOV_EXCL_LINE } // GCOV_EXCL_LINE @@ -154,34 +160,35 @@ void ExternalLinkerInterface::link() const { void ExternalLinkerInterface::archive() const { assert(!outputPath.empty()); - // Build the archiver command - std::stringstream commandBuilder; + // Find the archiver const auto [archiverName, archiverPath] = SystemUtil::findArchiver(); - commandBuilder << archiverPath; - commandBuilder << " rcs "; // r = insert files into archive; c = create archive if not existing, s = create archive index - commandBuilder << outputPath.string(); + + // Build the archiver argument vector (passed verbatim, no shell involved) + std::vector args; + args.emplace_back("rcs"); // r = insert files into archive; c = create archive if not existing, s = create archive index + args.push_back(outputPath.string()); for (const std::filesystem::path &path : linkedFiles) - commandBuilder << " " << path.string(); - const std::string command = commandBuilder.str(); + args.push_back(path.string()); // Print status message if (cliOptions.printDebugOutput) { - std::cout << "\nArchiving with: " << archiverName; // GCOV_EXCL_LINE - std::cout << "\nArchiver command: " << command; // GCOV_EXCL_LINE - std::cout << "\nEmitting static library to path: " << outputPath.string() << "\n"; // GCOV_EXCL_LINE + std::cout << "\nArchiving with: " << archiverName; // GCOV_EXCL_LINE + std::cout << "\nArchiver command: " << SystemUtil::renderCommandForDisplay(archiverPath, args); // GCOV_EXCL_LINE + std::cout << "\nEmitting static library to path: " << outputPath.string() << "\n"; // GCOV_EXCL_LINE } // Call the archiver Timer timer; timer.start(); - const auto [output, exitCode] = SystemUtil::exec(command); + const auto [output, exitCode] = SystemUtil::exec(archiverPath, args); timer.stop(); // Check for linker error if (exitCode != 0) { // GCOV_EXCL_LINE + const std::string command = SystemUtil::renderCommandForDisplay(archiverPath, args); // GCOV_EXCL_LINE const std::string errorMessage = "Archiver exited with non-zero exit code\nArchiver command: " + command; // GCOV_EXCL_LINE throw LinkerError(LINKER_ERROR, errorMessage); // GCOV_EXCL_LINE - } // GCOV_EXCL_LINE + } // Print linker result if appropriate if (cliOptions.printDebugOutput && !output.empty()) // GCOV_EXCL_LINE diff --git a/src/util/SystemUtil.cpp b/src/util/SystemUtil.cpp index 94ef2aacd..b3c0e8511 100644 --- a/src/util/SystemUtil.cpp +++ b/src/util/SystemUtil.cpp @@ -3,7 +3,10 @@ #include "SystemUtil.h" #include +#include #include // IWYU pragma: keep (usage in Windows-only code) +#include +#include #include #if OS_UNIX #include @@ -16,13 +19,22 @@ #include #include #include +#include +#include +#include +#include #include namespace spice::compiler { /** - * Execute external command. Used to execute compiled binaries + * Execute a command string through the system shell (popen/_popen). + * + * WARNING: This variant passes 'command' to a shell, so any shell metacharacters it contains are interpreted. It must + * only be used with fully trusted, internally-constructed command strings (e.g. the test runner). For anything whose + * arguments are influenced by user input, use the argument-vector overload exec(program, args, ...) instead, which + * never involves a shell. * * @param command Command to execute * @param redirectStdErrToStdOut Redirect StdErr to StdOut @@ -55,6 +67,90 @@ ExecResult SystemUtil::exec(const std::string &command, bool redirectStdErrToStd return {result.str(), transformStatusToExitCode(status)}; } +/** + * Execute an external program without involving a shell. + * + * Each argument is passed verbatim to the program via 'execve'/'CreateProcess' (through LLVM's process abstraction), + * so file paths and other arguments can never be interpreted as shell syntax. This is the safe variant that must be + * used for any command whose arguments are influenced by (untrusted) user input, e.g. the linker invocation. + * + * @param program Name or path of the program to execute (resolved against PATH if it is a bare name) + * @param args Arguments passed to the program (without argv[0], which is added automatically) + * @param redirectStdErrToStdOut Capture stderr into the returned output alongside stdout + * @return Result struct with the captured output and the process exit code + */ +ExecResult SystemUtil::exec(const std::string &program, const std::vector &args, bool redirectStdErrToStdOut) { + // Resolve the program to a concrete path. This replaces the shell's PATH lookup and keeps execution independent of + // any shell metacharacter parsing. Absolute/relative paths are returned unchanged by findProgramByName. + const llvm::ErrorOr programPath = llvm::sys::findProgramByName(program); + const std::string resolvedProgram = programPath ? *programPath : program; + + // Assemble the argument vector. LLVM expects argv[0] to be the program itself. + std::vector execArgs; + execArgs.reserve(args.size() + 1); + execArgs.emplace_back(resolvedProgram); + for (const std::string &arg : args) + execArgs.emplace_back(arg); + + // Redirect the child's stdout (and optionally stderr) into a temporary file so we can capture it + llvm::SmallString<128> outputFilePath; + if (const std::error_code ec = llvm::sys::fs::createTemporaryFile("spice-exec", "out", outputFilePath)) // GCOV_EXCL_LINE + throw CompilerError(IO_ERROR, "Could not create temporary file for command output: " + ec.message()); // GCOV_EXCL_LINE + + // Redirects layout is [stdin, stdout, stderr]; std::nullopt inherits the parent's descriptor, a path redirects to a + // file. LLVM merges stdout and stderr when both reference the same path. + const llvm::StringRef outputRef = outputFilePath.str(); + const std::array, 3> redirects = { + std::nullopt, // stdin: inherit + std::optional(outputRef), // stdout -> temp file + redirectStdErrToStdOut ? std::optional(outputRef) : std::nullopt, // stderr + }; + + // Execute and wait. No shell is involved. + std::string errorMsg; + bool executionFailed = false; + const int exitCode = + llvm::sys::ExecuteAndWait(resolvedProgram, execArgs, std::nullopt, redirects, 0, 0, &errorMsg, &executionFailed); + + if (executionFailed) { // GCOV_EXCL_LINE + llvm::sys::fs::remove(outputFilePath.str()); // GCOV_EXCL_LINE + throw CompilerError(IO_ERROR, "Failed to execute '" + program + "': " + errorMsg); // GCOV_EXCL_LINE + } // GCOV_EXCL_LINE + + // Read back the captured output and clean up the temporary file + std::string output = FileUtil::getFileContent(outputFilePath.str().str()); + llvm::sys::fs::remove(outputFilePath.str()); + + return {output, exitCode}; +} + +/** + * Render a program invocation as a human-readable command string. Used for debug output and error messages only - + * the actual execution happens via an argument vector (no shell), so this rendering is never executed. + */ +std::string SystemUtil::renderCommandForDisplay(const std::string &program, const std::vector &args) { + std::stringstream command; + command << program; + for (const std::string &arg : args) + command << " " << arg; + return command.str(); +} + +/** + * Query clang for its resource directory. Required to locate the sanitizer runtime libraries. Previously this was done + * via a '$(clang -print-resource-dir)' shell substitution embedded in a linker flag; since linking no longer goes + * through a shell, the substitution is resolved here instead. + */ +std::string SystemUtil::getClangResourceDir() { + const std::vector args{"-print-resource-dir"}; + auto [dir, exitCode] = SystemUtil::exec("clang", args); + if (exitCode != 0) // GCOV_EXCL_LINE + throw LinkerError(LINKER_ERROR, "Could not determine clang resource directory for sanitizer runtime linkage"); // GCOV_EXCL_LINE + while (!dir.empty() && std::isspace(static_cast(dir.back()))) + dir.pop_back(); + return dir; +} + /** * Checks if a certain command is available on the computer * @@ -62,15 +158,8 @@ ExecResult SystemUtil::exec(const std::string &command, bool redirectStdErrToStd * @return Present or not */ bool SystemUtil::isCommandAvailable(const std::string &cmd) { -#if OS_UNIX - const std::string checkCmd = "which " + cmd + " > /dev/null 2>&1"; -#elif OS_WINDOWS - const std::string checkCmd = "where " + cmd + " > nul 2>&1"; -#else -#error "Unsupported platform" -#endif - const int status = std::system(checkCmd.c_str()); - return transformStatusToExitCode(status) == EXIT_SUCCESS; + // Look the program up on PATH without spawning a shell ('which'/'where') + return static_cast(llvm::sys::findProgramByName(cmd)); } /** @@ -94,7 +183,7 @@ ExternalBinaryFinderResult SystemUtil::findLinkerInvoker() { return ExternalBinaryFinderResult{linkerInvokerName, path + linkerInvokerName}; #elif OS_WINDOWS for (const char *linkerInvokerName : {"clang", "gcc"}) - if (isCommandAvailable(std::string(linkerInvokerName) + " -v")) + if (isCommandAvailable(linkerInvokerName)) return ExternalBinaryFinderResult{linkerInvokerName, linkerInvokerName}; #else #error "Unsupported platform" @@ -128,7 +217,7 @@ ExternalBinaryFinderResult SystemUtil::findLinker([[maybe_unused]] const CliOpti return ExternalBinaryFinderResult{linkerName, path + linkerName}; #elif OS_WINDOWS for (const char *linkerName : {"lld", "ld"}) - if (isCommandAvailable(std::string(linkerName) + " -v")) + if (isCommandAvailable(linkerName)) return ExternalBinaryFinderResult{linkerName, linkerName}; #else #error "Unsupported platform" @@ -151,7 +240,7 @@ ExternalBinaryFinderResult SystemUtil::findArchiver() { return ExternalBinaryFinderResult{archiverName, path + archiverName}; #elif OS_WINDOWS for (const char *archiverName : {"llvm-lib", "lib"}) - if (isCommandAvailable(std::string(archiverName) + " -v")) + if (isCommandAvailable(archiverName)) return ExternalBinaryFinderResult{archiverName, archiverName}; #else #error "Unsupported platform" diff --git a/src/util/SystemUtil.h b/src/util/SystemUtil.h index 137d36f38..fdb45f4a7 100644 --- a/src/util/SystemUtil.h +++ b/src/util/SystemUtil.h @@ -4,6 +4,7 @@ #include #include +#include namespace spice::compiler { @@ -24,6 +25,10 @@ struct ExternalBinaryFinderResult { class SystemUtil { public: static ExecResult exec(const std::string &command, bool redirectStdErrToStdOut = false); + static ExecResult exec(const std::string &program, const std::vector &args, + bool redirectStdErrToStdOut = false); + static std::string renderCommandForDisplay(const std::string &program, const std::vector &args); + static std::string getClangResourceDir(); static bool isCommandAvailable(const std::string &cmd); static bool isGraphvizInstalled(); static ExternalBinaryFinderResult findLinkerInvoker(); diff --git a/test/test-files/typechecker/imports/error-import-path-traversal/exception.out b/test/test-files/typechecker/imports/error-import-path-traversal/exception.out new file mode 100644 index 000000000..e86e9e104 --- /dev/null +++ b/test/test-files/typechecker/imports/error-import-path-traversal/exception.out @@ -0,0 +1,5 @@ +[Error|Semantic] ./source.spice:1:1: +Invalid import path: Import paths must not contain parent-directory ('..') components: '../secret' + +1 import "../secret" as s; + ^^^^^^^^^^^^^^^^^^^^^^^^ \ No newline at end of file diff --git a/test/test-files/typechecker/imports/error-import-path-traversal/source.spice b/test/test-files/typechecker/imports/error-import-path-traversal/source.spice new file mode 100644 index 000000000..e4b299f5e --- /dev/null +++ b/test/test-files/typechecker/imports/error-import-path-traversal/source.spice @@ -0,0 +1,3 @@ +import "../secret" as s; + +f main() {}