diff --git a/.eslintrc.js b/.eslintrc.js
index 57152f3b..859a558c 100644
--- a/.eslintrc.js
+++ b/.eslintrc.js
@@ -85,5 +85,16 @@ module.exports = {
'no-undef': 'off', // TypeScript 已处理变量未定义检查
},
},
+ {
+ files: ['check-requires.js'], // 编译检查工具,不参与 dist 生成
+ parserOptions: {
+ project: null, // 不使用 TypeScript 项目配置
+ ecmaVersion: 'latest',
+ sourceType: 'script', // 使用 script 模式(因为文件开头有 #!)
+ },
+ rules: {
+ 'filenames/match-regex': 'off', // 允许工具文件使用不同的命名
+ },
+ },
],
}
diff --git a/.gitignore b/.gitignore
index e4b64010..8d27ac29 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,4 +47,7 @@ deps/uast4python
/workspace/
deps/uast4py
src/uast
-src/report
\ No newline at end of file
+deps-runtime/
+.cursorrules
+test/javascript/test-report
+deps
diff --git a/README.md b/README.md
index 3bd2feed..71b09688 100644
--- a/README.md
+++ b/README.md
@@ -1,78 +1,31 @@
-
+# 项目简介
+统一多语言程序分析是一个面向工业界大规模落地的静态程序分析工具。它为多语言定义了统一的抽象语法树(UAST)中间表示,基于UAST和模拟执行技术,实现了高精度的数据流、指针分析和污点分析,并同时提供了命令式和声明式两种规则扩展能力供用户灵活、低成本使用,可广泛应用于代码安全等场景。
-#### [简体中文](README_ZH.md) / [English](README.md)
+# 核心功能
++ 定义了统一的多语言的抽象语法树(UAST)中间表示,并提供了各语言向UAST转换的工具
++ 高精度的数据流、指针分析和污点分析
++ 可扩展的规则定制化能力,包括命令式规则和声明式查询语言
++ 内置常见的安全检测规则,面向安全检测场景可开箱即用
-**YASA** (**Y**et **A**nother **S**tatic **A**nalyzer, pronounced “**YA-sa**”) is an open-source static program analysis project. Its core innovation lies in a unified intermediate representation called the **U**nified **A**bstract **S**yntax **T**ree (UAST), designed to support multiple programming languages. Built on top of UAST, YASA provides a highly accurate static analysis framework. Users can extend its capabilities by writing custom checkers to perform various program analysis tasks—such as AST queries, data flow analysis, and function call graph analysis—and expose functionality through SDK, declarative query language (QL), or MCP.
+# 项目架构
+
-As a project originally developed within a security team, YASA also comes with built-in taint analysis capabilities, implemented as a checker, to detect security vulnerabilities.
+**QL**:Query Language
-## Components
-
+**UAST**:Unify Abstract Syntax Tree **统一抽象语法树**
-### YASA-UAST: Unified Abstract Syntax Tree
-[YASA-UAST](https://github.com/antgroup/YASA-UAST) is an intermediate representation structure for multi-language program analysis. The UAST-Parser parses code from different programming languages into a unified abstract syntax format. Through UAST, source code in different languages can be converted into a standardized tree structure, enabling unified analysis and processing across multiple languages.
+**YASA** :Yet Another Static Analyzer
-### YASA-Engine: Unified multi-language Analysis Engine
-The unified multi-language analysis engine is the core component of a modern program analysis platform. It aims to achieve efficient and precise analysis of multiple programming languages through a unified analysis framework and methodology. Also, with the help of AI capabilities, it addresses issues such as broken chains in traditional program analysis and high adaptation costs for new scenarios. (The AI part is not open-sourced yet.)
+# 技术优势
++ 【统一】统一多语言抽象语法树(UAST)中间表示,可低成本支持新语言,便于跨语言的分析
++ 【高精度】多语言程序模拟执行技术,还原了真实的程序运行上下文,可提供更高的分析精度
++ 【工业界落地】经过蚂蚁内部大规模落地和业界首个程序分析评价体系开源项目[xast](https://xastbenchmark.github.io/)的“双重认证”,多语言场景下的分析完整度、准确度和性能都有较高保障
++ 【低使用成本】命令式程序分析combine声明式查询语句
+ - 兼容[Github codeql](https://github.com/github/codeql)的语法和规则,用户使用门槛低
+ - 提供了灵活的规则定制能力
-### YASA-UQL: Unified Declarative Rule Query Language
-Supports declarative unified query rule writing for multiple languages, compatible with CodeQL syntax, lowering the barrier to rule writing while unifying rule sets across languages.
+# 支持的语言
+Java、JS、Go、Python......
-### YASA-MCP: Unified multi-language Program Analysis MCP
-Provides atomic analysis APIs for LLM, offering program analysis services that are LLM-friendly.
+其他语言的支持为开源社区共建“留白”
-### YASA-SDK: Unified multi-language Program Analysis SDK
-Provides SDK packages supporting multiple programming languages for traditional applications. It provides a variety of granular program analysis APIs, making integration easier and enabling efficient and user-friendly program analysis services within applications.
-
-### xAST
-[xAST](https://github.com/alipay/ant-application-security-testing-benchmark) is an open-source evaluation system for SAST/IAST/DAST tool capabilities. In YASA-Engine, it serves as the regression target for post-change testing, and during the process of multi-language adaptation, it provides positive guidance on language syntax support.
-
-## Technical Advantages
-### Low Cost for New Language Support
-- YASA is directly modeled and analyzed based on UAST. When adapting to a new language, once it is parsed into UAST, the general-layer analyzer's capabilities can be used. After supporting the new language's package structure, the new language's analysis is already supported.
-
-
-
-
-### High Analysis Accuracy, Measurable, and Unified Multi-Languages
-- YASA is based on unified multi-language symbolic interpretation capabilities, offering high precision and scalability in static code analysis. It naturally supports field-sensitive, context-sensitive, object-sensitive, path-sensitive, and flow-sensitive capabilities in the field of static analysis.
-
-- During YASA's development, we used [xAST](https://github.com/alipay/ant-application-security-testing-benchmark) to evaluate and verify our capabilities, achieving "measurable capabilities." We compared YASA's performance with other open-source program analysis tools under the xAST evaluation system:
-
-
-
-### Open and Friendly
-- Introduced the unified declarative rule query language YASA-UQL, compatible with CodeQL syntax, and pioneered a unified QL rule library for multiple languages, making program analysis more engineer-friendly.
-
-- Launched YASA MCP (LLM-friendly) and SDK (App development-friendly).
-
-## Quick Start
-
-[Getting Started](https://www.yuque.com/u22090306/bebf6g/evyf4chw26deq8xq)
-
-[Installation and Deployment](https://www.yuque.com/u22090306/bebf6g/gm7b32tcn9vosgll)
-
-## Join Us
-Welcome to submit issues if you encounter any problems!
-
-For code contributions, please refer to [CONTRIBUTION](CONTRIBUTION.md)
-
-## Resource Links
-[Official Documentation](https://www.yuque.com/u22090306/bebf6g)
-
-[Learning Resources](https://www.yuque.com/u22090306/bebf6g/sr0y5fqg0kcua5nf)
-
-[Community Activities](https://www.yuque.com/u22090306/bebf6g/fn1rauxwtp7z0l1u)
-
-## Open Source License
-Apache License 2.0 - Details in LICENSE Apache-2.0.
-
-## Acknowledgments
-Thanks to all developers who have contributed to the YASA project! Special thanks to the open-source community for their support and feedback, enabling us to jointly advance the development of program analysis technology.
-
-YASA - Making code analysis more precise, easier, and smarter.
-
-## Contact Us
-[Official Website](https://cybersec.antgroup.com/station)
-
-
diff --git a/build.sh b/build.sh
old mode 100644
new mode 100755
index b5d6d689..242d4725
--- a/build.sh
+++ b/build.sh
@@ -27,43 +27,99 @@ info() {
info "开始构建流程..."
+# 步骤 0: 清理历史结果
+info "步骤 0/8: 清理历史结果 (rm -rf dist)"
+if ! rm -rf dist > /dev/null; then
+ alert "清理历史结果失败"
+fi
+success "清理历史结果完成"
+
# 步骤 1: 安装依赖
-info "步骤 1/6: 安装依赖 (npm install)"
-if ! npm install; then
+info "步骤 1/8: 安装依赖 (npm install --package-lock=false)"
+if ! npm install --package-lock=false > /dev/null; then
alert "npm install 失败"
fi
success "依赖安装完成"
# 步骤 2: 类型检查
-info "步骤 2/6: 类型检查 (npx tsc --noEmit)"
-if ! npx tsc --noEmit; then
+info "步骤 2/8: 类型检查 (npx tsc --noEmit)"
+# 只重定向 stdout,保留 stderr 以便显示错误信息
+set +e
+npx tsc --noEmit > /dev/null
+TSC_CHECK_EXIT_CODE=$?
+set -e
+if [ $TSC_CHECK_EXIT_CODE -ne 0 ]; then
alert "类型检查失败,请修复 TypeScript 错误"
fi
success "类型检查通过"
-# 步骤 3: 运行所有测试
-info "步骤 3/6: 运行所有测试 (npm run test-all)"
-if ! npm run test-all; then
+# 步骤 3: 检查 require() 调用
+info "步骤 3/8: 检查 require() 调用 (node check-requires.js)"
+if ! node check-requires.js > /dev/null; then
+ alert "require() 检查失败,请修复模块引用错误"
+fi
+success "require() 检查通过"
+
+# 步骤 4: 运行所有测试
+info "步骤 4/8: 运行所有测试 (npm run test-all)"
+if ! npm run test-all > /dev/null; then
alert "测试失败,请修复测试错误"
fi
success "所有测试通过"
-# 步骤 4: 编译 TypeScript
-info "步骤 4/6: 编译 TypeScript (npx tsc)"
-if ! npx tsc; then
- alert "TypeScript 编译失败"
+# 步骤 5: 生成构建版本信息
+info "步骤 5/8: 生成构建版本信息"
+BUILD_DATE=$(date +%Y%m%d)
+COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
+
+# 创建 dist 目录(如果不存在)
+mkdir -p dist
+
+# 生成版本信息文件(编译后代码会读取此文件)
+cat > dist/build-version.json < /dev/null
+TSC_EXIT_CODE=$?
+set -e
+if [ $TSC_EXIT_CODE -ne 0 ]; then
+ alert "TypeScript 编译失败,请查看上方的错误信息"
fi
success "TypeScript 编译完成"
-# 步骤 5: 打包二进制
-info "步骤 5/6: 打包二进制 (npx pkg)"
-if ! npx pkg . --options max-old-space-size=13312; then
- alert "打包失败"
+# 确保版本文件在编译后仍然存在(因为 tsc 可能会清理 dist)
+mkdir -p dist
+cat > dist/build-version.json < /dev/null
+PKG_EXIT_CODE=$?
+set -e
+if [ $PKG_EXIT_CODE -ne 0 ]; then
+ alert "打包失败 (退出码: $PKG_EXIT_CODE),请查看上方的错误信息"
fi
success "打包完成"
-# 步骤 6: 删除 dist 文件
-info "步骤 6/6: 删除 dist 文件"
+# 步骤 8: 删除 dist 文件
+info "步骤 8/8: 删除 dist 文件"
if [ -d "dist" ]; then
rm -rf dist
success "dist 文件已删除"
@@ -72,4 +128,3 @@ else
fi
info "构建流程全部完成!"
-
diff --git a/check-requires.js b/check-requires.js
new file mode 100755
index 00000000..799ff911
--- /dev/null
+++ b/check-requires.js
@@ -0,0 +1,237 @@
+#!/usr/bin/env node
+
+const fs = require('fs')
+const path = require('path')
+const { globSync } = require('fast-glob')
+
+/**
+ * 检查 require() 调用是否有效
+ * 只检查 TypeScript 会编译的文件(根据 tsconfig.json 配置)
+ */
+
+// 配置
+const PROJECT_ROOT = __dirname
+const TSCONFIG_PATH = path.join(PROJECT_ROOT, 'tsconfig.json')
+
+/**
+ * 使用 TypeScript 编译器获取实际会编译的文件列表
+ */
+function getTypeScriptFiles() {
+ const { execSync } = require('child_process')
+
+ try {
+ // 使用 tsc --listFiles 获取实际编译的文件列表
+ const output = execSync('npx tsc --listFiles --noEmit', {
+ cwd: PROJECT_ROOT,
+ encoding: 'utf-8',
+ stdio: ['pipe', 'pipe', 'pipe'],
+ })
+
+ // 解析输出,提取文件路径
+ const files = output
+ .split('\n')
+ .map(line => line.trim())
+ .filter(line => line && !line.startsWith('TS') && !line.includes('node_modules'))
+ .filter(line => {
+ // 只保留源文件,排除 .d.ts 和输出文件
+ return line.endsWith('.ts') || line.endsWith('.js')
+ })
+ .map(line => {
+ // 转换为绝对路径
+ if (path.isAbsolute(line)) {
+ return line
+ }
+ return path.resolve(PROJECT_ROOT, line)
+ })
+ .filter(file => {
+ // 排除输出目录中的文件
+ return !file.includes('/dist/') && !file.includes('\\dist\\')
+ })
+
+ return [...new Set(files)] // 去重
+ } catch (error) {
+ // 如果 tsc 执行失败,回退到使用 tsconfig.json
+ const tsconfigContent = fs.readFileSync(TSCONFIG_PATH, 'utf-8')
+ const tsconfig = JSON.parse(tsconfigContent)
+ const include = tsconfig.include || ['src/**/*.ts', 'src/**/*.js']
+ const exclude = tsconfig.exclude || ['node_modules', 'test/**/*', 'dist']
+
+ // 使用 globSync 查找文件(同步方式)
+ return globSync(include, {
+ cwd: PROJECT_ROOT,
+ ignore: exclude,
+ absolute: true,
+ })
+ }
+}
+
+// 统计
+let totalFiles = 0
+let totalRequires = 0
+let invalidRequires = 0
+const invalidList = []
+
+/**
+ * 解析 require 语句,提取模块路径
+ */
+function extractRequires(content, filePath) {
+ const requires = []
+
+ // 匹配 require('xxx') 或 require("xxx")
+ const requireRegex = /require\s*\(\s*['"]([^'"]+)['"]\s*\)/g
+ let match
+
+ while ((match = requireRegex.exec(content)) !== null) {
+ const modulePath = match[1]
+ const lineNumber = content.substring(0, match.index).split('\n').length
+
+ requires.push({
+ module: modulePath,
+ line: lineNumber,
+ column: match.index - content.lastIndexOf('\n', match.index) - 1,
+ })
+ }
+
+ return requires
+}
+
+/**
+ * 检查模块是否存在
+ */
+function checkModuleExists(modulePath, currentFile) {
+ // 相对路径
+ if (modulePath.startsWith('.')) {
+ const currentDir = path.dirname(currentFile)
+ const resolvedPath = path.resolve(currentDir, modulePath)
+
+ // 检查 .js, .ts, .json, 或目录下的 index.js/index.ts
+ const extensions = ['.js', '.ts', '.json', '']
+ for (const ext of extensions) {
+ const fullPath = resolvedPath + ext
+ if (fs.existsSync(fullPath) && fs.statSync(fullPath).isFile()) {
+ return { exists: true, path: fullPath }
+ }
+ }
+
+ // 检查目录下的 index 文件
+ if (fs.existsSync(resolvedPath) && fs.statSync(resolvedPath).isDirectory()) {
+ for (const ext of ['.js', '.ts', '.json']) {
+ const indexPath = path.join(resolvedPath, 'index' + ext)
+ if (fs.existsSync(indexPath)) {
+ return { exists: true, path: indexPath }
+ }
+ }
+ // 目录存在但没有 index 文件,也算存在(可能是 package.json 的 main)
+ return { exists: true, path: resolvedPath, note: 'directory without index' }
+ }
+
+ return { exists: false, path: resolvedPath }
+ }
+
+ // node_modules 中的模块
+ // 检查是否是内置模块
+ const builtinModules = require('module').builtinModules
+ if (builtinModules.includes(modulePath)) {
+ return { exists: true, path: 'builtin', note: 'Node.js builtin module' }
+ }
+
+ // 检查 node_modules
+ let checkPath = currentFile
+ while (checkPath !== path.dirname(checkPath)) {
+ const nodeModulesPath = path.join(checkPath, 'node_modules', modulePath)
+ if (fs.existsSync(nodeModulesPath)) {
+ return { exists: true, path: nodeModulesPath }
+ }
+ checkPath = path.dirname(checkPath)
+ }
+
+ // 检查项目根目录的 node_modules
+ const rootNodeModules = path.join(PROJECT_ROOT, 'node_modules', modulePath)
+ if (fs.existsSync(rootNodeModules)) {
+ return { exists: true, path: rootNodeModules }
+ }
+
+ return { exists: false, path: modulePath }
+}
+
+/**
+ * 检查单个文件
+ */
+function checkFile(filePath) {
+ try {
+ const content = fs.readFileSync(filePath, 'utf-8')
+ const requires = extractRequires(content, filePath)
+
+ if (requires.length === 0) {
+ return
+ }
+
+ totalFiles++
+ totalRequires += requires.length
+
+ for (const req of requires) {
+ const checkResult = checkModuleExists(req.module, filePath)
+
+ if (!checkResult.exists) {
+ invalidRequires++
+ invalidList.push({
+ file: path.relative(PROJECT_ROOT, filePath),
+ line: req.line,
+ column: req.column,
+ module: req.module,
+ resolved: checkResult.path,
+ })
+ }
+ }
+ } catch (error) {
+ console.error(`Error reading file ${filePath}:`, error.message)
+ }
+}
+
+/**
+ * 主函数
+ */
+function main() {
+ // 使用 TypeScript 编译器获取实际会编译的文件
+ const files = getTypeScriptFiles()
+
+ // 检查每个文件
+ for (const file of files) {
+ checkFile(file)
+ }
+
+ // 输出结果(简化版,类似编译器错误输出)
+ if (invalidRequires > 0) {
+ // 按文件分组
+ const grouped = {}
+ for (const item of invalidList) {
+ if (!grouped[item.file]) {
+ grouped[item.file] = []
+ }
+ grouped[item.file].push(item)
+ }
+
+ // 输出错误,格式类似 TypeScript 编译器
+ for (const [file, items] of Object.entries(grouped).sort()) {
+ for (const item of items) {
+ console.error(`${file}(${item.line},${item.column + 1}): error: Cannot find module '${item.module}'`)
+ }
+ }
+
+ console.error(`\nFound ${invalidRequires} error(s).`)
+ process.exit(1)
+ }
+
+ // 成功时输出简要信息
+ console.log(`Checked ${totalRequires} require() call(s) in ${totalFiles} file(s). All valid.`)
+ process.exit(0)
+}
+
+// 运行
+try {
+ main()
+} catch (error) {
+ console.error('执行出错:', error)
+ process.exit(1)
+}
+
diff --git a/install_deps.sh b/install_deps.sh
index b3efd94c..292384dc 100644
--- a/install_deps.sh
+++ b/install_deps.sh
@@ -4,7 +4,7 @@
detect_platform() {
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
ARCH=$(uname -m)
-
+
case "$OS" in
linux)
case "$ARCH" in
@@ -37,7 +37,6 @@ check_directory() {
# Download binary files
download_binaries() {
PLATFORM=$1
-
echo "[INFO] Downloading latest release binaries for platform: $PLATFORM..."
# Create target directories
@@ -91,4 +90,4 @@ main() {
echo "[INFO] Build completed successfully."
}
-main "$@"
\ No newline at end of file
+main "$@"
diff --git a/package.json b/package.json
index be1f393d..e629d26a 100644
--- a/package.json
+++ b/package.json
@@ -5,8 +5,8 @@
"main": "./dist/main.js",
"bin": "./dist/main.js",
"dependencies": {
- "@ant-yasa/uast-parser-java-js": "^0.1.49",
- "@ant-yasa/uast-spec": "^0.1.26",
+ "@ant-yasa/uast-parser-java-js": "^0.2.9",
+ "@ant-yasa/uast-spec": "^0.2.9",
"@babel/core": "^7.14.6",
"@babel/parser": "^7.16.4",
"@babel/plugin-proposal-decorators": "^7.14.5",
@@ -33,7 +33,7 @@
"node-uuid": "^1.4.8",
"semver": "^7.3.8",
"tmp": "^0.2.3",
- "tsx": "^4.20.6",
+ "tsx": "^4.21.0",
"underscore": "^1.9.1",
"xml2js": "^0.6.2"
},
@@ -49,6 +49,7 @@
"@typescript-eslint/parser": "^5.62.0",
"async": "^3.2.0",
"babel-preset-es2015": "^6.24.1",
+ "baseline-browser-mapping": "^2.9.11",
"chai": "^4.2.0",
"eslint": "^8.57.1",
"eslint-config-airbnb-base": "^15.0.0",
@@ -67,7 +68,7 @@
"gulp-rename": "^2.0.0",
"gulp-uglify": "^3.0.2",
"gulp-uglify-es": "^3.0.0",
- "javascript-obfuscator": "^2.19.0",
+ "javascript-obfuscator": "^4.1.0",
"jscpd": "^4.0.5",
"jsdoc": "^4.0.2",
"mocha": "^8.4.0",
@@ -86,7 +87,9 @@
"test-java": "npx mocha --require tsx/cjs test/java/test-java-benchmark.ts",
"test-go": "npx mocha --require tsx/cjs test/go/test-go-benchmark.ts",
"test-python": "npx mocha --require tsx/cjs test/python/test-python-benchmark.ts",
- "test-all": "npm run test-js && npm run test-java && npm run test-go && npm run test-python",
+ "test-callchain": "npx mocha --require tsx/cjs test/callchain/test-callchain-benchmark.ts",
+ "test-callargs": "npx mocha --require tsx/cjs test/callargs/test-callargs.ts",
+ "test-all": "npm run test-callargs && npm run test-js && npm run test-java && npm run test-go && npm run test-python && npm run test-callchain",
"build": "bash build.sh",
"lint": "eslint .",
"lint:duplicates": "jscpd .",
@@ -168,7 +171,9 @@
],
"assets": [
"dist/**/*",
- "resource/**/*"
+ "resource/**/*",
+ "deps/uast4go/uast4go",
+ "deps/uast4py/uast4py"
],
"targets": [
"node18-macos-arm64",
diff --git a/resource/checker/checker-config.json b/resource/checker/checker-config.json
index 90519bfa..2638516a 100644
--- a/resource/checker/checker-config.json
+++ b/resource/checker/checker-config.json
@@ -140,6 +140,11 @@
"checkerPath": "checker/taint/python/tornado-taint-checker.ts",
"description": "python Tornado框架 entrypoint采集以及框架source添加"
},
+ {
+ "checkerId": "taint_flow_python_script_input",
+ "checkerPath": "checker/taint/python/script-taint-checker.ts",
+ "description": "Python脚本命令行参数source添加,支持argparse/sys.argv/input/os.environ/getopt等"
+ },
{
"checkerId": "taint_flow_test",
"checkerPath": "checker/taint/test-taint-checker.ts",
@@ -151,11 +156,6 @@
"checkerPath": "checker/taint/go/restful-entrypoint-collect-checker.ts",
"description": "go-restful entrypoint采集以及框架source添加"
},
- {
- "checkerId": "echo-entrypoint-collect-checker",
- "checkerPath": "checker/taint/go/echo-entrypoint-collect-checker.ts",
- "description": "echo entrypoint采集以及框架source添加"
- },
{
"checkerId": "beego-entrypoint-collect-checker",
"checkerPath": "checker/taint/go/beego-entrypoint-collect-checker.ts",
@@ -170,5 +170,29 @@
"checkerId": "get_ast_source_code",
"checkerPath": "checker/sdk/get-ast-source-code-checker.ts",
"description": "获取AST对应的源码"
+ },
+ {
+ "checkerId": "callchain_java",
+ "checkerPath": "checker/callchain/java/java-callchain-checker.ts",
+ "description": "Java callchain checker,只检测sink匹配并输出调用链路",
+ "demoRuleConfigPath": "resource/example-rule-config/rule_config_java.json"
+ },
+ {
+ "checkerId": "callchain_go",
+ "checkerPath": "checker/callchain/go/go-callchain-checker.ts",
+ "description": "Go callchain checker,只检测sink匹配并输出调用链路",
+ "demoRuleConfigPath": "resource/example-rule-config/rule_config_go.json"
+ },
+ {
+ "checkerId": "callchain_js",
+ "checkerPath": "checker/callchain/js/js-callchain-checker.ts",
+ "description": "JavaScript callchain checker,只检测sink匹配并输出调用链路",
+ "demoRuleConfigPath": "resource/example-rule-config/rule_config_js.json"
+ },
+ {
+ "checkerId": "callchain_python",
+ "checkerPath": "checker/callchain/python/python-callchain-checker.ts",
+ "description": "Python callchain checker,只检测sink匹配并输出调用链路",
+ "demoRuleConfigPath": "resource/example-rule-config/rule_config_python.json"
}
]
diff --git a/resource/checker/checker-pack-config.json b/resource/checker/checker-pack-config.json
index 3d7b95fb..21387ea4 100644
--- a/resource/checker/checker-pack-config.json
+++ b/resource/checker/checker-pack-config.json
@@ -6,7 +6,6 @@
"taint_flow_go_input",
"cobra.Command-builtIn",
"go-restful-entryPoints-collect-checker",
- "echo-entrypoint-collect-checker",
"beego-entrypoint-collect-checker",
"gorilla-mux-entrypoint-collect-checker",
"gRpc-entryPoint-collect-checker",
@@ -24,6 +23,7 @@
"taint_flow_gin_input_inner",
"cobra.Command-builtIn",
"go-restful-entryPoints-collect-checker",
+ "beego-entrypoint-collect-checker",
"gorilla-mux-entrypoint-collect-checker",
"gRpc-entryPoint-collect-checker",
"go-main-entryPoints-collection",
@@ -125,7 +125,33 @@
"callgraph"
],
"description": "java的sdk-蚂蚁内部使用的规则包"
+ },
+ {
+ "checkerPackId": "callchain-java",
+ "checkerIds": [
+ "callchain_java"
+ ],
+ "description": "Java callchain checker - 只检测sink匹配"
+ },
+ {
+ "checkerPackId": "callchain-go",
+ "checkerIds": [
+ "callchain_go"
+ ],
+ "description": "Go callchain checker - 只检测sink匹配"
+ },
+ {
+ "checkerPackId": "callchain-js",
+ "checkerIds": [
+ "callchain_js"
+ ],
+ "description": "JavaScript callchain checker - 只检测sink匹配"
+ },
+ {
+ "checkerPackId": "callchain-python",
+ "checkerIds": [
+ "callchain_python"
+ ],
+ "description": "Python callchain checker - 只检测sink匹配"
}
-
-
]
diff --git a/test.js b/resource/checker/lib-arg-to-this-sid-blacklist.json
similarity index 100%
rename from test.js
rename to resource/checker/lib-arg-to-this-sid-blacklist.json
diff --git a/resource/example-rule-config/rule_config_go.json b/resource/example-rule-config/rule_config_go.json
index 71dde301..2770fc3d 100644
--- a/resource/example-rule-config/rule_config_go.json
+++ b/resource/example-rule-config/rule_config_go.json
@@ -468,15 +468,6 @@
}
],
"FuncCallArgTaintSource": [
- {
- "args": [
- "0"
- ],
- "calleeType": "echo.Context",
- "fsig": "Bind",
- "scopeFile": "all",
- "scopeFunc": "all"
- },
{
"args": [
"0"
diff --git a/resource/example-rule-config/rule_config_python.json b/resource/example-rule-config/rule_config_python.json
index 1b866057..93de362d 100644
--- a/resource/example-rule-config/rule_config_python.json
+++ b/resource/example-rule-config/rule_config_python.json
@@ -1,11 +1,6 @@
[
{
- "checkerIds": [
- "taint_flow_python_input",
- "taint_flow_python_input_inner",
- "taint_flow_python_django_input",
- "taint_flow_python_tornado_input"
- ],
+ "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input"],
"sources": {
"FuncCallReturnValueTaintSource": [
{
diff --git a/resource/java/class-hierarchy-and-modeling.json b/resource/java/class-hierarchy-and-modeling.json
index 102068b7..6124f838 100644
--- a/resource/java/class-hierarchy-and-modeling.json
+++ b/resource/java/class-hierarchy-and-modeling.json
@@ -157,15 +157,17 @@
"sun.net.www.http.KeepAliveCache"
]
},
- "java.util.concurrent.Executor": {
- "modelingFilePath": "./builtins/executor-builtins",
+ "java.util.concurrent.ExecutorService": {
+ "modelingFilePath": "./builtins/executorservice-builtins",
"subTypeList": [
"java.util.concurrent.AbstractExecutorService",
- "java.util.concurrent.ExecutorService",
"java.util.concurrent.ForkJoinPool",
"java.util.concurrent.ScheduledExecutorService",
"java.util.concurrent.ScheduledThreadPoolExecutor",
- "java.util.concurrent.ThreadPoolExecutor"
+ "java.util.concurrent.ThreadPoolExecutor",
+ "com.alipay.sofa.common.thread.SofaScheduledThreadPoolExecutor",
+ "com.alipay.sofa.common.thread.SofaThreadPoolExecutor",
+ "com.alipay.sofa.common.thread.SofaThreadPoolTaskExecutor"
]
},
"java.util.Timer": {
@@ -183,5 +185,29 @@
"java.util.concurrent.atomic.AtomicReference": {
"modelingFilePath": "./builtins/atomicreference-builtins",
"subTypeList": []
+ },
+ "java.lang.Class": {
+ "modelingFilePath": "./builtins/class-builtins",
+ "subTypeList": []
+ },
+ "java.lang.Object": {
+ "modelingFilePath": "./builtins/object-builtins",
+ "subTypeList": []
+ },
+ "java.util.stream.Stream": {
+ "modelingFilePath": "./builtins/stream-builtins",
+ "subTypeList": []
+ },
+ "com.baomidou.mybatisplus.core.conditions.query.QueryWrapper": {
+ "modelingFilePath": "./builtins/querywrapper-builtins",
+ "subTypeList": []
+ },
+ "com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper": {
+ "modelingFilePath": "./builtins/lambdaquerywrapper-builtins",
+ "subTypeList": []
+ },
+ "java.util.Arrays": {
+ "modelingFilePath": "./builtins/arrays-builtins",
+ "subTypeList": []
}
}
diff --git a/resource/python/python-default-rule.json b/resource/python/python-default-rule.json
index 9b8d13fc..69bd25b1 100644
--- a/resource/python/python-default-rule.json
+++ b/resource/python/python-default-rule.json
@@ -1,15 +1,17 @@
[
{
- "checkerIds": [
- "taint_flow_python_input",
- "taint_flow_python_input_inner"
- ],
+ "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner"],
"sources": {
"TaintSource": [
{
"path": "flask.request",
"scopeFile": "all",
"scopeFunc": "all"
+ },
+ {
+ "path": "request",
+ "scopeFile": "all",
+ "scopeFunc": "all"
}
]
},
@@ -17,9 +19,7 @@
"FuncCallTaintSink": [
{
"fsig": "os.system",
- "args": [
- 0
- ]
+ "args": [0]
}
]
}
diff --git a/resource/tag-propagation/lib-arg-to-this-sid-blacklist.json b/resource/tag-propagation/lib-arg-to-this-sid-blacklist.json
new file mode 100644
index 00000000..ca47f165
--- /dev/null
+++ b/resource/tag-propagation/lib-arg-to-this-sid-blacklist.json
@@ -0,0 +1,28 @@
+{
+ "sidKeywords": [
+ "log",
+ "logger",
+ "logging",
+ "console",
+ "print",
+ "println",
+ "printf",
+ "debug",
+ "trace",
+ "info",
+ "warn",
+ "error",
+ "fatal",
+ "format",
+ "formatter",
+ "stringify",
+ "serialize",
+ "dump",
+ "inspect",
+ "metric",
+ "metrics",
+ "telemetry",
+ "audit",
+ "report"
+ ]
+}
diff --git a/resource/tag-propagation/lib-func-tag-propagation-rule.json b/resource/tag-propagation/lib-func-tag-propagation-rule.json
index d645142f..e8a44f2b 100644
--- a/resource/tag-propagation/lib-func-tag-propagation-rule.json
+++ b/resource/tag-propagation/lib-func-tag-propagation-rule.json
@@ -19,29 +19,45 @@
},
{
"func": {
- "calleeType": "CompilerConfiguration",
- "fsig": "addCompilationCustomizers"
+ "calleeType": "BeanUtils",
+ "fsig": "copyProperties",
+ "argNum": 2
},
"source": {
- "type": "ARG"
+ "type": "ARG",
+ "index": 0
},
"target": {
- "type": "THIS"
+ "type": "ARG",
+ "index": 1
}
},
{
"func": {
- "calleeType": "BeanUtils",
- "fsig": "copyProperties",
- "argNum": 2
+ "calleeType": "ProcessBuilder",
+ "fsig": "redirectError",
+ "argNum": 1
},
"source": {
+ "type": "THIS"
+ },
+ "target": {
"type": "ARG",
"index": 0
+ }
+ },
+ {
+ "func": {
+ "calleeType": "ProcessBuilder",
+ "fsig": "redirectOutput",
+ "argNum": 1
+ },
+ "source": {
+ "type": "THIS"
},
"target": {
"type": "ARG",
- "index": 1
+ "index": 0
}
}
]
diff --git a/src/checker/antql/rules/antql-getbaseclass.ts b/src/checker/antql/rules/antql-getbaseclass.ts
index 88cfa487..85055637 100644
--- a/src/checker/antql/rules/antql-getbaseclass.ts
+++ b/src/checker/antql/rules/antql-getbaseclass.ts
@@ -1,7 +1,7 @@
import type { Finding } from '../../../engine/analyzer/common/common-types'
const LocationUtil = require('../util/location-util')
-const QidUnifyUtil = require('../util/qid-unify-util')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const Config = require('../../../config')
const Checker = require('../../common/checker')
const InteractiveOutputStrategy = require('../../common/output/interactive-output-strategy')
@@ -86,8 +86,8 @@ class AntqlGetBaseClass extends Checker {
// 获取基类
const superSymbol = info?.val?.super
if (superSymbol && (superSymbol?.vtype === 'object' || superSymbol?.vtype === 'class')) {
- const superClassId = QidUnifyUtil.unify(superSymbol)
- const classId = QidUnifyUtil.unify(info?.val)
+ const superClassId = QidUnifyUtil.qidUnifyForQL(superSymbol)
+ const classId = QidUnifyUtil.qidUnifyForQL(info?.val)
const nodeLoc = LocationUtil.convertUastLocationToString(node.loc, Config.prefixPath)
diff --git a/src/checker/antql/rules/antql-getdefinition.ts b/src/checker/antql/rules/antql-getdefinition.ts
index 2fdd17e4..122d2471 100644
--- a/src/checker/antql/rules/antql-getdefinition.ts
+++ b/src/checker/antql/rules/antql-getdefinition.ts
@@ -3,7 +3,7 @@ import type { Finding } from '../../../engine/analyzer/common/common-types'
const LocationUtil = require('../util/location-util')
const EntrypointUtil = require('../util/entrypoint-util')
const Config = require('../../../config')
-const QidUnifyUtil = require('../util/qid-unify-util')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const logger = require('../../../util/logger')(__filename)
const Checker = require('../../common/checker')
const InteractiveOutputStrategy = require('../../common/output/interactive-output-strategy')
@@ -85,7 +85,8 @@ class AntQLGetDefinition extends Checker {
const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getEntryPointsUsingCallGraphByLoc(
LocationUtil.convertQLLocationStringListToUastLocation([this.input], Config.prefixPath),
analyzer.ainfo?.callgraph,
- analyzer.fileManager
+ analyzer.fileManager,
+ analyzer
)
const uniqueEntries = EntrypointUtil.mergeEntryPoints(fullCallGraphEntrypoint, analyzer.entryPoints)
analyzer.entryPoints = Array.from(uniqueEntries.values())
@@ -107,7 +108,7 @@ class AntQLGetDefinition extends Checker {
const qlLocationString = LocationUtil.findUastLocationInList(node?.loc, [this.input], Config.prefixPath)
if (qlLocationString) {
const finding: Finding = {
- output: QidUnifyUtil.unify(info.val),
+ output: QidUnifyUtil.qidUnifyForQL(info.val),
}
this.resultManager.newFinding(finding, InteractiveOutputStrategy.outputStrategyId)
}
diff --git a/src/checker/antql/rules/antql-hasflow.ts b/src/checker/antql/rules/antql-hasflow.ts
index ed61469c..84fdf94c 100644
--- a/src/checker/antql/rules/antql-hasflow.ts
+++ b/src/checker/antql/rules/antql-hasflow.ts
@@ -111,10 +111,7 @@ class AntQLHasFlow extends TaintChecker {
for (const sourceLoc in this.sourceSymbol) {
const symbol = this.sourceSymbol[sourceLoc]
if (symbol !== '') {
- symbol._has_tags = undefined
- symbol.hasTagRec = undefined
- symbol._tags = undefined
- symbol.trace = undefined
+ symbol.taint.clear()
symbol.value = {}
// symbol.misc_ = {}
}
@@ -152,7 +149,8 @@ class AntQLHasFlow extends TaintChecker {
const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getEntryPointsUsingCallGraphByLoc(
LocationUtil.convertQLLocationStringListToUastLocation(this.sourceLocs, Config.prefixPath),
analyzer.ainfo?.callgraph,
- analyzer.fileManager
+ analyzer.fileManager,
+ analyzer
)
const uniqueEntries = EntrypointUtil.mergeEntryPoints(fullCallGraphEntrypoint, analyzer.entryPoints)
analyzer.entryPoints = Array.from(uniqueEntries.values())
@@ -184,13 +182,14 @@ class AntQLHasFlow extends TaintChecker {
*/
markTaintSource(unit: any, { node, kind }: { node: any; kind: string }): void {
SourceUtil.setTaint(unit, kind)
+ const existingTrace = unit.taint.getFirstTrace()
if (
- unit.trace &&
- Array.isArray(unit.trace) &&
- (unit.trace[0]?.tag !== 'SOURCE: ' ||
- (typeof unit.trace[0]?.str === 'string' && !unit.trace[0].str.includes('SOURCE: ')))
+ existingTrace &&
+ Array.isArray(existingTrace) &&
+ (existingTrace[0]?.tag !== 'SOURCE: ' ||
+ (typeof existingTrace[0]?.str === 'string' && !existingTrace[0].str.includes('SOURCE: ')))
) {
- unit.trace = undefined
+ unit.taint.clearTrace()
} else {
const startLine = node?.loc?.start?.line
const endLine = node?.loc?.end?.line
@@ -203,10 +202,7 @@ class AntQLHasFlow extends TaintChecker {
affectedNodeName: AstUtil.prettyPrint(node),
}
- if (!unit.trace) {
- unit.trace = []
- }
- unit.trace.push(trace)
+ unit.taint.addTraceToAllTags(trace)
}
}
@@ -289,7 +285,7 @@ class AntQLHasFlow extends TaintChecker {
): any {
const finding = BasicRuleHandler.getFinding(this.getCheckerId(), this.desc, currentNode)
// const finding = this.mng.newFinding(this.getCheckerId(), currentNode, currentNode.loc, sourceNode, fclos.id)
- if (finding && sourceNode.hasTagRec) {
+ if (finding && sourceNode.taint?.isTaintedRec) {
const sourceTrace = FindingUtil.getTrace(sourceNode, tag)
if (sourceTrace.length > 0) {
let flag = false
diff --git a/src/checker/antql/rules/antql-hasfunctioncall.ts b/src/checker/antql/rules/antql-hasfunctioncall.ts
index b99961ca..4254c794 100644
--- a/src/checker/antql/rules/antql-hasfunctioncall.ts
+++ b/src/checker/antql/rules/antql-hasfunctioncall.ts
@@ -5,7 +5,7 @@ const LocationUtil = require('../util/location-util')
const EntrypointUtil = require('../util/entrypoint-util')
const Config = require('../../../config')
const SymbolUtil = require('../util/symbol-util')
-const QidUnifyUtil = require('../util/qid-unify-util')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const Checker = require('../../common/checker')
const InteractiveOutputStrategy = require('../../common/output/interactive-output-strategy')
@@ -21,7 +21,7 @@ class AntQLHasFunctionCall extends Checker {
output: string[]
- symbolMap: Map
+ antQLSymbolMap: Map
input!: string
@@ -37,7 +37,7 @@ class AntQLHasFunctionCall extends Checker {
this.kit = mng.kit
this.status = false
this.output = []
- this.symbolMap = new Map()
+ this.antQLSymbolMap = new Map()
}
/**
@@ -77,19 +77,19 @@ class AntQLHasFunctionCall extends Checker {
}
if (this.input.includes('*') || this.input.includes('**')) {
- const qidList = Array.from(this.symbolMap.keys())
+ const qidList = Array.from(this.antQLSymbolMap.keys())
const output: string[] = []
for (const qid of qidList) {
if (SymbolUtil.matchPattern(qid, this.input)) {
- const locations = this.symbolMap.get(qid)
+ const locations = this.antQLSymbolMap.get(qid)
if (locations) {
output.push(...locations)
}
}
}
finding.output = output.join(',')
- } else if (this.symbolMap.has(this.input)) {
- const locations = this.symbolMap.get(this.input)
+ } else if (this.antQLSymbolMap.has(this.input)) {
+ const locations = this.antQLSymbolMap.get(this.input)
finding.output = locations ? locations.join(',') : ''
}
this.resultManager.newFinding(finding, InteractiveOutputStrategy.outputStrategyId)
@@ -116,7 +116,8 @@ class AntQLHasFunctionCall extends Checker {
const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getEntryPointsUsingCallGraphByKeyWords(
[keyword],
analyzer.ainfo?.callgraph,
- analyzer.fileManager
+ analyzer.fileManager,
+ analyzer
)
const uniqueEntries = EntrypointUtil.mergeEntryPoints(fullCallGraphEntrypoint, analyzer.entryPoints)
const prepareEntryPoints: EntryPoint[] = []
@@ -143,14 +144,14 @@ class AntQLHasFunctionCall extends Checker {
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void {
const { fclos } = info
- const checkQid = QidUnifyUtil.unify(fclos)
+ const checkQid = QidUnifyUtil.qidUnifyForQL(fclos)
if (checkQid) {
const nodeLoc = LocationUtil.convertUastLocationToString(node.loc, Config.prefixPath)
- if (!this.symbolMap.has(checkQid)) {
- this.symbolMap.set(checkQid, [])
+ if (!this.antQLSymbolMap.has(checkQid)) {
+ this.antQLSymbolMap.set(checkQid, [])
}
- const locations = this.symbolMap.get(checkQid)
+ const locations = this.antQLSymbolMap.get(checkQid)
if (locations && !locations.includes(nodeLoc)) {
locations.push(nodeLoc)
}
diff --git a/src/checker/antql/rules/antql-hasproperty.ts b/src/checker/antql/rules/antql-hasproperty.ts
index 325ddd02..a3d42260 100644
--- a/src/checker/antql/rules/antql-hasproperty.ts
+++ b/src/checker/antql/rules/antql-hasproperty.ts
@@ -3,7 +3,7 @@ import type { EntryPoint } from '../../../engine/analyzer/common/entrypoint'
const LocationUtil = require('../util/location-util')
const EntrypointUtil = require('../util/entrypoint-util')
-const QidUnifyUtil = require('../util/qid-unify-util')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const Config = require('../../../config')
const SymbolUtil = require('../util/symbol-util')
const logger = require('../../../util/logger')(__filename)
@@ -22,7 +22,7 @@ class AntQLHasProperty extends Checker {
output: string[]
- symbolMap: Map
+ antQLSymbolMap: Map
alreadyExecutedEntries: Map
@@ -38,7 +38,7 @@ class AntQLHasProperty extends Checker {
this.kit = mng.kit
this.status = false
this.output = []
- this.symbolMap = new Map()
+ this.antQLSymbolMap = new Map()
this.alreadyExecutedEntries = new Map()
}
@@ -72,18 +72,18 @@ class AntQLHasProperty extends Checker {
}
if (this.input.includes('*') || this.input.includes('**')) {
const output: string[] = []
- const qidList = Array.from(this.symbolMap.keys())
+ const qidList = Array.from(this.antQLSymbolMap.keys())
for (const qid of qidList) {
if (SymbolUtil.matchPattern(qid, this.input)) {
- const locations = this.symbolMap.get(qid)
+ const locations = this.antQLSymbolMap.get(qid)
if (locations) {
output.push(...locations)
}
}
}
finding.output = output.join(',')
- } else if (this.symbolMap.has(this.input)) {
- const locations = this.symbolMap.get(this.input)
+ } else if (this.antQLSymbolMap.has(this.input)) {
+ const locations = this.antQLSymbolMap.get(this.input)
finding.output = locations ? locations.join(',') : ''
}
this.status = false
@@ -111,7 +111,8 @@ class AntQLHasProperty extends Checker {
const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getEntryPointsUsingCallGraphByKeyWords(
[keyword],
analyzer.ainfo?.callgraph,
- analyzer.fileManager
+ analyzer.fileManager,
+ analyzer
)
const uniqueEntries = EntrypointUtil.mergeEntryPoints(fullCallGraphEntrypoint, analyzer.entryPoints)
// analyzer.entryPoints = Array.from(uniqueEntries.values())
@@ -152,13 +153,13 @@ class AntQLHasProperty extends Checker {
* @param info
*/
private checkIsIdentifier(node: any, res: any, scope: any, info: any): void {
- const checkQid = QidUnifyUtil.unify(res)
+ const checkQid = QidUnifyUtil.qidUnifyForQL(res)
if (checkQid) {
const nodeLoc = LocationUtil.convertUastLocationToString(node.loc, Config.prefixPath)
- if (!this.symbolMap.has(checkQid)) {
- this.symbolMap.set(checkQid, [])
+ if (!this.antQLSymbolMap.has(checkQid)) {
+ this.antQLSymbolMap.set(checkQid, [])
}
- const locations = this.symbolMap.get(checkQid)
+ const locations = this.antQLSymbolMap.get(checkQid)
if (locations && !locations.includes(nodeLoc)) {
locations.push(nodeLoc)
}
diff --git a/src/checker/antql/util/entrypoint-util.ts b/src/checker/antql/util/entrypoint-util.ts
index f8606b1e..369d39d9 100644
--- a/src/checker/antql/util/entrypoint-util.ts
+++ b/src/checker/antql/util/entrypoint-util.ts
@@ -28,7 +28,7 @@ function mergeEntryPoints(entryPoints: EntryPoint[], analyzerEntryPoints: EntryP
* @param entryPoint
*/
function getEntryPointUniqueKey(entryPoint: EntryPoint): string {
- const loc = entryPoint?.entryPointSymVal?.ast?.loc
+ const loc = entryPoint?.entryPointSymVal?.ast?.node?.loc
if (loc) {
return `${loc?.sourcefile}:${loc?.start?.line}:${loc?.start?.column}:${loc?.end?.line}:${loc?.end?.column}`
}
diff --git a/src/checker/antql/util/qid-unify-util.ts b/src/checker/antql/util/qid-unify-util.ts
deleted file mode 100644
index 6ac4f6a4..00000000
--- a/src/checker/antql/util/qid-unify-util.ts
+++ /dev/null
@@ -1,147 +0,0 @@
-interface SymbolLike {
- qid?: string
- vtype?: string
- sid?: string
- [key: string]: any
-}
-
-/**
- * 统一各语言的qid
- */
-class QidUnifyUtil {
- symbol: SymbolLike | undefined
-
- value: string
-
- /**
- * 需要传符号值
- * @param symbol
- */
- constructor(symbol?: SymbolLike) {
- this.symbol = symbol
- this.value = symbol?.qid || ''
- }
-
- /**
- * 统一路径形式,将开头的"/"去掉,并将每一层目录替换成".", 即 /tp/2.func ==> tp.2.func
- */
- removePath(): QidUnifyUtil {
- this.value = this.value?.replace(/^\//, '').replace(/\//g, '.')
- return this
- }
-
- /**
- * python中找不到import时,会以"syslib_from."开头
- */
- removeSyslibFrom(): QidUnifyUtil {
- if (this.value.startsWith('syslib_from.')) {
- this.value = this.value.replace('syslib_from.', '')
- }
- return this
- }
-
- /**
- * js-chair框架会将agg替换成Egg.Application,将ctx替换成Egg.Context,替换回来
- */
- removeChair(): QidUnifyUtil {
- this.value = this.value.replace('Egg.Application', 'app')
- this.value = this.value.replace('Egg.Context', 'ctx')
- return this
- }
-
- /**
- * 去除所有的括号及括号内内容(包括嵌套)——更通用的情况
- */
- removeParentheses(): QidUnifyUtil {
- let result = ''
- let level = 0
- for (const char of this.value) {
- if (char === '(') {
- level++
- } else if (char === ')') {
- if (level > 0) level--
- } else if (level === 0) {
- result += char
- }
- }
- this.value = result
- return this
- }
-
- /**
- * remove *_scope.写法,即1.calculate.calculate_scope..process ==> 1.calculate.process
- */
- removeBlock(): QidUnifyUtil {
- if (!this.value.includes(' 0 ? temp[i - 1] : 'NaN'
- if (curStr === `${preStr}_scope`) {
- continue
- }
- // 移除掉多余的
- if (curStr.startsWith('.,去掉
- */
- removeInstance(): QidUnifyUtil {
- this.value = this.value.replace('', '')
- return this
- }
-
- /**
- * 统一去掉
- */
- removeGlobal(): QidUnifyUtil {
- this.value = this.value.replace('.', '')
- return this
- }
-
- /**
- * 获取当前的值
- */
- get(): string {
- return this.value
- }
-
- /**
- * 静态方法,用于调用上面所有的方法,一步到位统一符号值qid
- * @param symbol
- */
- static unify(symbol?: SymbolLike): string {
- let unifyID = symbol?.qid || ''
- if (symbol?.vtype !== 'primitive' && symbol?.vtype !== 'uninitialized') {
- unifyID = new QidUnifyUtil(symbol)
- .removePath()
- .removeSyslibFrom()
- .removeChair()
- .removeParentheses()
- .removeBlock()
- .removeInstance()
- .removeGlobal()
- .get()
- }
- return unifyID
- }
-}
-
-module.exports = QidUnifyUtil
diff --git a/src/checker/callchain/callchain-checker.ts b/src/checker/callchain/callchain-checker.ts
new file mode 100644
index 00000000..78b1c357
--- /dev/null
+++ b/src/checker/callchain/callchain-checker.ts
@@ -0,0 +1,342 @@
+import type { CallInfo } from '../../engine/analyzer/common/call-args'
+
+const _ = require('lodash')
+const Checker = require('../common/checker')
+const AstUtil = require('../../util/ast-util')
+const SourceLine = require('../../engine/analyzer/common/source-line')
+const entryPointConfig = require('../../engine/analyzer/common/current-entrypoint')
+const RulesBasicHandler = require('../common/rules-basic-handler')
+const Config = require('../../config')
+const QidUnifyUtil = require('../../util/qid-unify-util')
+const CallchainOutputStrategy = require('../common/output/callchain-output-strategy')
+
+/**
+ * basic class for callchain checker
+ * This checker only detects sink matches and outputs call chains,
+ * without checking for taint flow
+ */
+class CallchainChecker extends Checker {
+ /**
+ * constructor of CallchainChecker
+ * @param resultManager
+ * @param checkerId
+ */
+ constructor(resultManager: any, checkerId: any) {
+ super(resultManager, checkerId)
+ this.sinkRuleArray = undefined
+ this.matchSinkRuleResultMap = new Map()
+ }
+
+ /**
+ * 从 fclos 中提取文件路径(相对路径)
+ * @param fclos
+ */
+ extractFilePath(fclos: any): string {
+ const sourcefile = fclos?.ast?.node?.loc?.sourcefile || fclos?.loc?.sourcefile
+ if (!sourcefile) return ''
+ return this.toRelativePath(sourcefile)
+ }
+
+ /**
+ * 从 state.callstack 中构建调用链信息
+ * 每个元素包含 CallstackElement 的内容(type, nodeHash, funcDef, fullName)
+ * 以及额外的可读信息(function, file, line, column)
+ * 最后追加 sink 调用点(CallExpression node)的信息
+ * @param callstack
+ * @param sinkNode
+ * @param sinkFclos
+ */
+ buildCallstackInfo(callstack: any[], sinkNode: any, sinkFclos: any): any[] {
+ const result: any[] = []
+
+ // 1. 记录从 entrypoint 到 sink 的函数调用链(与 sarif CallstackElement 统一)
+ if (callstack && callstack.length > 0) {
+ for (const fclos of callstack) {
+ if (!fclos) continue
+
+ const astNode = fclos.ast?.node
+ const loc = astNode?.loc
+ const sourcefile = this.extractFilePath(fclos)
+ const funcName = astNode?.id?.name || fclos.name || fclos.sid || ''
+ const qid = fclos.qid || ''
+
+ const entry: any = {
+ // CallstackElement 标准字段
+ type: 0,
+ nodeHash: astNode?._meta?.nodehash || null,
+ fullName: qid ? QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(qid) : null,
+ // 额外可读信息
+ function: funcName,
+ file: sourcefile,
+ line: loc?.start?.line,
+ column: loc?.start?.column,
+ }
+
+ if (loc?.start?.column) {
+ entry.column = loc.start.column
+ }
+
+ result.push(entry)
+ }
+ }
+
+ // 2. 记录 sink 调用点(CallExpression node)的信息
+ if (sinkNode) {
+ let sourcefile = ''
+ let srcNode = sinkNode
+ while (srcNode && !srcNode?.loc?.sourcefile) {
+ srcNode = srcNode.parent
+ }
+ if (srcNode?.loc?.sourcefile) {
+ sourcefile = this.toRelativePath(srcNode.loc.sourcefile)
+ } else if (sinkFclos) {
+ sourcefile = this.extractFilePath(sinkFclos)
+ }
+ const funcName = sinkNode?.id?.name || sinkFclos.name || sinkFclos.sid || ''
+ result.push({
+ // CallstackElement 标准字段
+ type: 1,
+ nodeHash: sinkNode?._meta?.nodehash || null,
+ // sink 调用点信息
+ fullName: QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(sinkFclos.qid) || null,
+ function: funcName,
+ file: sourcefile,
+ line: sinkNode.loc?.start?.line,
+ column: sinkNode.loc?.start?.column,
+ })
+ }
+
+ return result
+ }
+
+ /**
+ * 将绝对路径转换为相对路径
+ * @param sourcefile
+ */
+ toRelativePath(sourcefile: string): string {
+ if (!sourcefile) return ''
+ if (Config.maindirPrefix && sourcefile.startsWith(Config.maindirPrefix)) {
+ return sourcefile.substring(Config.maindirPrefix.length)
+ }
+ if (Config.maindir && sourcefile.startsWith(Config.maindir)) {
+ return sourcefile.substring(Config.maindir.length)
+ }
+ return sourcefile
+ }
+
+ /**
+ * 从 sink 调用节点 (CallExpression node) 中提取调用点信息
+ * @param node
+ * @param fclos
+ */
+ buildSinkCallSiteInfo(node: any, fclos: any): any {
+ if (!node) return {}
+
+ const { loc } = node
+ // 获取 sourcefile,优先从 node 自身获取,再从 fclos 获取
+ let sourcefile = ''
+ let srcNode = node
+ while (srcNode && !srcNode?.loc?.sourcefile) {
+ srcNode = srcNode.parent
+ }
+ if (srcNode?.loc?.sourcefile) {
+ sourcefile = this.toRelativePath(srcNode.loc.sourcefile)
+ } else {
+ sourcefile = this.extractFilePath(fclos)
+ }
+
+ const callExpr = AstUtil.getRawCode(node.callee || node).slice(0, 100)
+
+ return {
+ code: callExpr,
+ file: sourcefile,
+ line: loc?.start?.line,
+ column: loc?.start?.column,
+ }
+ }
+
+ /**
+ * 将 state.callsites 转换为可读的调用点信息(路径转为相对路径)
+ * callsites 中每个元素结构为 { code, nodehash, loc }
+ * @param callsites
+ * @param sinkNode
+ */
+ buildCallsitesInfo(callsites: any[], sinkNode: any): any[] {
+ if (!callsites || callsites.length === 0) {
+ return []
+ }
+ const result = callsites.map((site: any) => {
+ let sourcefile = ''
+ if (site?.loc?.sourcefile) {
+ sourcefile = this.toRelativePath(site.loc.sourcefile)
+ } else {
+ sourcefile = this.extractFilePath(site)
+ }
+
+ return {
+ code: site.code,
+ nodeHash: site.nodeHash,
+ file: sourcefile,
+ line: site.loc.start.line,
+ column: site.loc.start.column,
+ }
+ })
+ // 2. 记录 sink 调用点(CallExpression node)的信息
+ if (sinkNode) {
+ let sourcefile = ''
+ if (sinkNode?.loc?.sourcefile) {
+ sourcefile = this.toRelativePath(sinkNode.loc.sourcefile)
+ } else {
+ sourcefile = this.extractFilePath(sinkNode)
+ }
+ result.push({
+ code: AstUtil.getRawCode(sinkNode).slice(0, 100),
+ nodeHash: sinkNode._meta?.nodehash,
+ file: sourcefile,
+ line: sinkNode.loc.start.line,
+ column: sinkNode.loc.start.column,
+ })
+ }
+ return result
+ }
+
+ /**
+ * construct callchain finding detail info
+ * @param finding
+ */
+ buildCallchainFindingDetail(finding: any): any {
+ const callNode = finding.node
+ const sinkRule = finding.ruleName
+ const { fclos, callstack, callsites } = finding
+ if (finding && callNode) {
+ const trace = SourceLine.getNodeTrace(fclos, callNode)
+ trace.tag = 'SINK: '
+ trace.affectedNodeName = AstUtil.getRawCode(callNode?.callee || callNode).slice(0, 100)
+
+ const arr = sinkRule.split('\nSINK Attribute: ')
+ if (arr.length === 1) {
+ finding.sinkRule = arr[0]
+ } else if (arr.length === 2) {
+ finding.sinkRule = arr[0]
+ finding.sinkAttribute = arr[1]
+ }
+
+ finding.sinkInfo = {
+ sinkRule: finding.sinkRule,
+ sinkAttribute: finding.sinkAttribute,
+ callSite: this.buildSinkCallSiteInfo(callNode, fclos),
+ }
+
+ finding.entrypoint = _.pickBy(
+ _.clone(entryPointConfig.getCurrentEntryPoint()),
+ (value: any) => !_.isObject(value)
+ )
+
+ finding.trace = [trace]
+ finding.callstackInfo = this.buildCallstackInfo(callstack, callNode, fclos)
+ finding.callsitesInfo = this.buildCallsitesInfo(callsites, callNode)
+ finding.callstack = callstack
+ finding.callsites = callsites
+ }
+ if (
+ finding.callsites &&
+ finding.callstack &&
+ finding.callsites.length > 0 &&
+ finding.callstack.length > 0 &&
+ finding.callstack.length === finding.callsites.length
+ ) {
+ return finding
+ }
+
+ return null
+ }
+
+ /**
+ * construct callchain finding object with detail info
+ * @param checkerId
+ * @param checkerDesc
+ * @param node
+ * @param fclos
+ * @param ruleName
+ * @param callstack
+ * @param callsites
+ */
+ buildCallchainFinding(
+ checkerId: any,
+ checkerDesc: any,
+ node: any,
+ fclos: any,
+ ruleName: any,
+ callstack: any,
+ callsites: any
+ ): any {
+ const callchainFinding = this.buildCallchainFindingObject(
+ checkerId,
+ checkerDesc,
+ node,
+ fclos,
+ ruleName,
+ callstack,
+ callsites
+ )
+ return this.buildCallchainFindingDetail(callchainFinding)
+ }
+
+ /**
+ * construct callchain finding object
+ * @param checkerId
+ * @param checkerDesc
+ * @param node
+ * @param fclos
+ * @param ruleName
+ * @param callstack
+ * @param callsites
+ */
+ buildCallchainFindingObject(
+ checkerId: any,
+ checkerDesc: any,
+ node: any,
+ fclos: any,
+ ruleName: any,
+ callstack: any,
+ callsites: any
+ ): any {
+ const callchainFinding = RulesBasicHandler.getFinding(checkerId, checkerDesc, node)
+ callchainFinding.node = node
+ callchainFinding.fclos = fclos
+ callchainFinding.ruleName = ruleName
+ callchainFinding.callstack = callstack
+ callchainFinding.callsites = callsites
+ return callchainFinding
+ }
+
+ /**
+ *
+ * @param node
+ * @param callInfo
+ * @param fclos
+ * @param rule
+ * @param state
+ */
+ findArgsAndAddNewFinding(node: any, callInfo: CallInfo | undefined, fclos: any, rule: any, state: any) {
+ let ruleName = (rule as any).fsig
+ if (typeof (rule as any).attribute !== 'undefined') {
+ ruleName += `\nSINK Attribute: ${(rule as any).attribute}`
+ }
+ const callchainFinding = this.buildCallchainFinding(
+ this.getCheckerId(),
+ this.desc,
+ node,
+ fclos,
+ ruleName,
+ state?.callstack,
+ state?.callsites
+ )
+
+ if (!CallchainOutputStrategy.isNewFinding(this.resultManager, callchainFinding)) return
+ this.resultManager.newFinding(callchainFinding, CallchainOutputStrategy.outputStrategyId)
+ return true
+ }
+}
+
+module.exports = CallchainChecker
diff --git a/src/checker/callchain/go/go-callchain-checker.ts b/src/checker/callchain/go/go-callchain-checker.ts
new file mode 100644
index 00000000..7da7a2c2
--- /dev/null
+++ b/src/checker/callchain/go/go-callchain-checker.ts
@@ -0,0 +1,232 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+
+const _ = require('lodash')
+const CallchainChecker = require('../callchain-checker')
+const { matchSinkAtFuncCallWithCalleeType } = require('../../taint/common-kit/sink-util')
+const GoEntryPoint = require('../../../engine/analyzer/golang/common/entrypoint-collector/go-default-entrypoint')
+const FullCallGraphFileEntryPoint = require('../../common/full-callgraph-file-entrypoint')
+const completeEntryPoint = require('../../taint/common-kit/entry-points-util')
+const AstUtil = require('../../../util/ast-util')
+const FileUtil = require('../../../util/file-util')
+
+/**
+ * Go callchain checker
+ * Only detects sink matches and outputs call chains without checking for taint
+ */
+class GoCallchainChecker extends CallchainChecker {
+ entryPoints: any[]
+
+ /**
+ * constructor
+ * @param resultManager
+ */
+ constructor(resultManager: any) {
+ super(resultManager, 'callchain_go')
+ this.entryPoints = []
+ }
+
+ /**
+ * starter trigger
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const { topScope } = analyzer
+ const Config = require('../../../config')
+ const EntryPoint = require('../../../engine/analyzer/common/entrypoint')
+ const Constant = require('../../../util/constant')
+ const logger = require('../../../util/logger')(__filename)
+
+ try {
+ logger.info('[GoCallchainChecker] triggerAtStartOfAnalyze called')
+
+ // 直接从 analyzer.checkerManager.Rules 获取规则配置
+ const BasicRuleHandler = analyzer.getCheckerManager().Rules
+ if (BasicRuleHandler && BasicRuleHandler.getRules) {
+ const allRules = BasicRuleHandler.getRules()
+ if (Array.isArray(allRules) && allRules.length > 0) {
+ for (const rule of allRules) {
+ if (rule.checkerIds && rule.checkerIds.includes(this.getCheckerId())) {
+ _.merge(this.checkerRuleConfigContent, rule)
+ break
+ }
+ }
+ }
+ }
+
+ // 完整复制 GoDefaultTaintChecker 的 prepareEntryPoints 逻辑
+ // 1. 添加 main 入口点(如果不是 ONLY_CUSTOM 模式)
+ if (Config.entryPointMode !== 'ONLY_CUSTOM') {
+ // 添加 main 入口
+ let mainEntryPoints = GoEntryPoint.getMainEntryPoints(topScope.context.packages)
+ if (!_.isEmpty(mainEntryPoints)) {
+ if (Array.isArray(mainEntryPoints)) {
+ mainEntryPoints = _.uniqBy(mainEntryPoints, (value: any) => value.ast?.fdef)
+ } else {
+ mainEntryPoints = [mainEntryPoints]
+ }
+ mainEntryPoints.forEach((main: any) => {
+ if (main) {
+ const entryPoint = completeEntryPoint(main)
+ this.entryPoints.push(entryPoint)
+ }
+ })
+ }
+
+ // 使用 callGraph 边界作为 entrypoint
+ if (Config.cgAlgo === 'CHA' && analyzer.typeResolver) {
+ FullCallGraphFileEntryPoint.makeFullCallGraphByType(analyzer, analyzer.typeResolver)
+ } else {
+ FullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
+ }
+ const fullCallGraphEntrypoint = FullCallGraphFileEntryPoint.getAllEntryPointsUsingCallGraph(
+ analyzer.ainfo?.callgraph,
+ analyzer
+ )
+ this.entryPoints.push(...fullCallGraphEntrypoint)
+ }
+ } catch (err: any) {
+ logger.error(`[GoCallchainChecker] Error in entrypoint collection: ${err.message}`)
+ logger.error(`[GoCallchainChecker] Stack: ${err.stack}`)
+ }
+
+ // 2. 使用用户规则中指定的 entrypoint
+ const { entrypoints: ruleConfigEntryPoints } = this.checkerRuleConfigContent
+ if (!_.isEmpty(ruleConfigEntryPoints) && Config.entryPointMode !== 'SELF_COLLECT') {
+ logger.info(`[GoCallchainChecker] Processing ${ruleConfigEntryPoints.length} custom entrypoints`)
+ for (const entrypoint of ruleConfigEntryPoints) {
+ logger.info(`[GoCallchainChecker] Looking for: ${entrypoint.filePath}#${entrypoint.functionName}`)
+ let entryPointSymVal
+ if (entrypoint.funcReceiverType) {
+ entryPointSymVal = AstUtil.satisfy(
+ topScope.context.packages,
+ (n: any) =>
+ n.vtype === 'fclos' &&
+ FileUtil.extractAfterSubstring(n?.ast?.node?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
+ n?.parent?.ast?.node?.type === 'ClassDefinition' &&
+ n?.parent?.ast?.node?.id?.name === entrypoint.funcReceiverType &&
+ n?.ast?.node?.id?.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
+ null,
+ false
+ )
+ } else {
+ // 尝试多种路径匹配方式
+ entryPointSymVal = AstUtil.satisfy(
+ topScope.context.packages,
+ (n: any) => {
+ const sourcefile = n?.ast?.node?.loc?.sourcefile
+ const extracted = FileUtil.extractAfterSubstring(sourcefile, Config.maindirPrefix)
+ const matches =
+ n.vtype === 'fclos' &&
+ (extracted === entrypoint.filePath ||
+ sourcefile?.endsWith(entrypoint.filePath) ||
+ sourcefile?.includes(`/${entrypoint.filePath}`)) &&
+ n?.ast?.node?.id?.name === entrypoint.functionName
+
+ if (matches) {
+ logger.info(`[GoCallchainChecker] Found match: ${sourcefile} -> ${n?.ast?.node?.id?.name}`)
+ }
+ return matches
+ },
+ (node: any, prop: any) => prop === '_field',
+ null,
+ false
+ )
+ }
+
+ if (_.isEmpty(entryPointSymVal)) {
+ logger.warn(
+ `[GoCallchainChecker] match entryPoint fail for ${entrypoint.filePath}#${entrypoint.functionName}`
+ )
+ continue
+ }
+
+ logger.info(
+ `[GoCallchainChecker] Found ${Array.isArray(entryPointSymVal) ? entryPointSymVal.length : 1} match(es)`
+ )
+ const symValArray = Array.isArray(entryPointSymVal)
+ ? _.uniqBy(entryPointSymVal, (value: any) => value.ast?.fdef)
+ : [entryPointSymVal]
+ for (const main of symValArray) {
+ if (main) {
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
+ entryPoint.scopeVal = main.parent
+ entryPoint.argValues = []
+ entryPoint.entryPointSymVal = main
+ entryPoint.filePath = entrypoint.filePath
+ entryPoint.functionName = entrypoint.functionName
+ entryPoint.attribute = entrypoint.attribute
+ entryPoint.funcReceiverType = main.funcReceiverType
+ this.entryPoints.push(entryPoint)
+ }
+ }
+ }
+ }
+
+ logger.info(`[GoCallchainChecker] Total entryPoints: ${this.entryPoints.length}`)
+ analyzer.mainEntryPoints = this.entryPoints
+ }
+
+ /**
+ * FunctionCall trigger
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const { fclos, callInfo } = info
+ this.checkSinkMatch(node, fclos, callInfo, scope, state)
+ }
+
+ /**
+ * check if sink matches by name and class
+ * @param node
+ * @param fclos
+ * @param callInfo
+ * @param scope
+ * @param state
+ */
+ checkSinkMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state: any) {
+ if (fclos === undefined) {
+ return
+ }
+ const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
+
+ if (!rules || !callInfo) return
+
+ const nodeCallee = node.callee || node
+
+ let rule = matchSinkAtFuncCallWithCalleeType(node, fclos, rules, scope, callInfo)
+ rule = rule.length > 0 ? rule[0] : null
+
+ // 如果没有匹配到,尝试基于 AST node 的匹配(用于处理类型信息缺失的情况)
+ if (!rule && nodeCallee?.type === 'MemberAccess') {
+ const objectName = nodeCallee.object?.name
+ const propertyName = nodeCallee.property?.name
+
+ if (objectName && propertyName) {
+ for (const tspec of rules) {
+ // 尝试匹配:如果 fsig 是方法名,检查是否匹配
+ if (tspec.fsig === propertyName || tspec.fsig === `${objectName}.${propertyName}`) {
+ // 对于 callchain checker,当类型信息缺失时,忽略 calleeType 检查
+ // 因为我们只关心 sink 匹配,不需要严格的类型检查
+ rule = tspec
+ break
+ }
+ }
+ }
+ }
+
+ if (rule) {
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
+ }
+ }
+}
+
+module.exports = GoCallchainChecker
diff --git a/src/checker/callchain/java/java-callchain-checker.ts b/src/checker/callchain/java/java-callchain-checker.ts
new file mode 100644
index 00000000..a2ff0a37
--- /dev/null
+++ b/src/checker/callchain/java/java-callchain-checker.ts
@@ -0,0 +1,307 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+import type { Invocation } from '../../../resolver/common/value/invocation'
+
+const _ = require('lodash')
+const CallchainChecker = require('../callchain-checker')
+const RulesBasicHandler = require('../../common/rules-basic-handler')
+const CallchainOutputStrategy = require('../../common/output/callchain-output-strategy')
+const { matchSinkAtFuncCallWithCalleeType, checkInvocationMatchSink } = require('../../taint/common-kit/sink-util')
+const SpringEntryPoint = require('../../../engine/analyzer/java/spring/entrypoint-collector/spring-default-entrypoint')
+const Loader = require('../../../util/loader')
+const CommonUtil = require('../../../util/common-util')
+const Constant = require('../../../util/constant')
+const {
+ valueUtil: {
+ ValueUtil: { Scoped },
+ },
+} = require('../../../engine/analyzer/common')
+
+/**
+ * Java callchain checker
+ * Only detects sink matches and outputs call chains without checking for taint
+ */
+class JavaCallchainChecker extends CallchainChecker {
+ entryPoints: any[]
+
+ /**
+ * constructor
+ * @param resultManager
+ */
+ constructor(resultManager: any) {
+ super(resultManager, 'callchain_java')
+ this.entryPoints = []
+ }
+
+ /**
+ * starter trigger
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const { topScope } = analyzer
+ const AstUtil = require('../../../util/ast-util')
+ const Config = require('../../../config')
+ const EntryPoint = require('../../../engine/analyzer/common/entrypoint')
+ const logger = require('../../../util/logger')(__filename)
+
+ // 直接从 analyzer.checkerManager.Rules 获取规则配置
+ const BasicRuleHandler = analyzer.getCheckerManager().Rules
+ if (BasicRuleHandler && BasicRuleHandler.getRules) {
+ const allRules = BasicRuleHandler.getRules()
+ if (Array.isArray(allRules) && allRules.length > 0) {
+ for (const rule of allRules) {
+ if (rule.checkerIds && rule.checkerIds.includes(this.getCheckerId())) {
+ _.merge(this.checkerRuleConfigContent, rule)
+ break
+ }
+ }
+ }
+ }
+
+ // 准备 entrypoints - 完整复制 JavaTaintChecker 的逻辑
+ const { entrypoints: ruleConfigEntryPoints } = this.checkerRuleConfigContent
+
+ // 1. 自动采集 Spring entrypoints(如果不是 ONLY_CUSTOM 模式)
+ if (Config.entryPointMode !== 'ONLY_CUSTOM') {
+ logger.info('YASA will collect Entrypoint and Source for callchain')
+ const { selfCollectSpringEntryPoints } = SpringEntryPoint.getSpringEntryPointAndSource(topScope.context.packages)
+
+ if (!_.isEmpty(selfCollectSpringEntryPoints)) {
+ selfCollectSpringEntryPoints.forEach((main: any) => {
+ if (main) {
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
+ entryPoint.scopeVal = main.parent
+ entryPoint.argValues = []
+ entryPoint.entryPointSymVal = main
+ entryPoint.filePath = main.filePath
+ entryPoint.functionName = main.functionName
+ entryPoint.attribute = main.attribute
+ entryPoint.funcReceiverType = main.funcReceiverType
+ this.entryPoints.push(entryPoint)
+ }
+ })
+ }
+ }
+
+ // 2. 处理 rule config 中的自定义 entrypoints(如果不是 SELF_COLLECT 模式)
+ if (!_.isEmpty(ruleConfigEntryPoints) && Config.entryPointMode !== 'SELF_COLLECT') {
+ for (const entrypoint of ruleConfigEntryPoints) {
+ // 先尝试使用 packageName 查找(原始逻辑)
+ if (entrypoint.packageName) {
+ let targetPackage = entrypoint.packageName
+ targetPackage = targetPackage.startsWith('.') ? targetPackage.slice(1) : targetPackage
+ const arr = Loader.getPackageNameProperties(targetPackage)
+ let packageManagerT = topScope.context.packages
+ arr.forEach((path: any) => {
+ packageManagerT = packageManagerT?.members?.get(path)
+ })
+
+ if (packageManagerT && packageManagerT.vtype !== 'undefine') {
+ const func = entrypoint.functionName
+ const entryPointSymVal = CommonUtil.getFclosFromScope(packageManagerT, func)
+ if (entryPointSymVal?.vtype === 'fclos') {
+ const scopeVal = Scoped('', {
+ vtype: 'scope',
+ sid: 'mock',
+ qid: 'mock',
+ field: {},
+ parent: null,
+ })
+
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
+ entryPoint.scopeVal = scopeVal
+ entryPoint.argValues = []
+ entryPoint.functionName = entrypoint.functionName
+ entryPoint.filePath = entrypoint.filePath
+ entryPoint.attribute = entrypoint.attribute
+ entryPoint.packageName = entrypoint.packageName
+ entryPoint.entryPointSymVal = entryPointSymVal
+ this.entryPoints.push(entryPoint)
+ continue
+ }
+ }
+ }
+
+ // 如果 packageName 查找失败,使用 filePath 查找(备选方案)
+ const entryPointSymVal = AstUtil.satisfy(
+ topScope.context.packages,
+ (n: any) =>
+ n.vtype === 'fclos' &&
+ (n?.ast?.node?.loc?.sourcefile?.endsWith(entrypoint.filePath) ||
+ n?.ast?.node?.loc?.sourcefile?.includes(entrypoint.filePath)) &&
+ n?.ast?.node?.id?.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
+ null,
+ false
+ )
+
+ if (!_.isEmpty(entryPointSymVal)) {
+ const symVal = Array.isArray(entryPointSymVal) ? entryPointSymVal[0] : entryPointSymVal
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
+ entryPoint.scopeVal = symVal.parent
+ entryPoint.argValues = []
+ entryPoint.functionName = entrypoint.functionName
+ entryPoint.filePath = entrypoint.filePath
+ entryPoint.attribute = entrypoint.attribute || 'HTTP'
+ entryPoint.packageName = entrypoint.packageName
+ entryPoint.entryPointSymVal = symVal
+ this.entryPoints.push(entryPoint)
+ }
+ }
+ }
+
+ analyzer.entryPoints = this.entryPoints
+ }
+
+ /**
+ * FunctionCall trigger
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const { fclos, callInfo } = info
+ this.checkSinkMatch(node, fclos, callInfo, scope, state, info, analyzer)
+ }
+
+ /**
+ * check if sink matches by name and class
+ * @param node
+ * @param fclos
+ * @param callInfo
+ * @param scope
+ * @param state
+ * @param info
+ * @param analyzer
+ */
+ checkSinkMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state: any, info: any, analyzer: any) {
+ let sinkRules
+ if (RulesBasicHandler.getPreprocessReady()) {
+ if (!this.sinkRuleArray) {
+ this.sinkRuleArray = this.assembleFunctionCallSinkRule()
+ this.sinkArray = analyzer?.loadAllSink()
+ }
+ sinkRules = this.sinkRuleArray
+ } else {
+ sinkRules = this.assembleFunctionCallSinkRule()
+ }
+
+ let rules
+ if (RulesBasicHandler.getPreprocessReady()) {
+ if (node?._meta?.nodehash) {
+ if (this.matchSinkRuleResultMap.has(node._meta.nodehash)) {
+ rules = this.matchSinkRuleResultMap.get(node._meta.nodehash)
+ } else {
+ rules = matchSinkAtFuncCallWithCalleeType(node, fclos, sinkRules, scope, callInfo)
+ this.appendCgRules(rules, node, scope, sinkRules, analyzer)
+ this.matchSinkRuleResultMap.set(node._meta.nodehash, rules)
+ }
+ } else {
+ rules = matchSinkAtFuncCallWithCalleeType(node, fclos, sinkRules, scope, callInfo)
+ this.appendCgRules(rules, node, scope, sinkRules, analyzer)
+ }
+ } else {
+ rules = matchSinkAtFuncCallWithCalleeType(node, fclos, sinkRules, scope, callInfo)
+ this.appendCgRules(rules, node, scope, sinkRules, analyzer)
+ }
+
+ for (const rule of rules) {
+ let ruleName = rule.fsig
+ if (typeof rule.attribute !== 'undefined') {
+ ruleName += `\nSINK Attribute: ${rule.attribute}`
+ }
+ const callchainFinding = this.buildCallchainFinding(
+ this.getCheckerId(),
+ this.desc,
+ node,
+ fclos,
+ ruleName,
+ state.callstack,
+ state.callsites
+ )
+ if (!CallchainOutputStrategy.isNewFinding(this.resultManager, callchainFinding)) continue
+ this.resultManager.newFinding(callchainFinding, CallchainOutputStrategy.outputStrategyId)
+ }
+
+ return true
+ }
+
+ /**
+ * append matched rules find by callgraph
+ * @param rules
+ * @param node
+ * @param scope
+ * @param sinkRules
+ * @param analyzer
+ */
+ appendCgRules(rules: any[], node: any, scope: any, sinkRules: any[], analyzer: any) {
+ if (rules.length > 0) {
+ return
+ }
+ const cgRules = this.findMatchedRuleByCallGraph(node, scope, sinkRules, analyzer)
+ for (const cgRule of cgRules) {
+ rules.push(cgRule)
+ }
+ }
+
+ /**
+ * find matched rule by CallGraph
+ * @param node
+ * @param scope
+ * @param analyzer
+ * @param sinkRules
+ */
+ findMatchedRuleByCallGraph(node: any, scope: any, sinkRules: any[], analyzer: any) {
+ const resultArray: any[] = []
+
+ if (!node || !scope || !sinkRules || !analyzer || !analyzer.findNodeInvocations) {
+ return resultArray
+ }
+
+ const invocations: Invocation[] = analyzer.findNodeInvocations(scope, node)
+ if (!invocations) {
+ return resultArray
+ }
+
+ for (const invocation of invocations) {
+ for (const sink of sinkRules) {
+ const matchSink: boolean = checkInvocationMatchSink(invocation, sink, analyzer.typeResolver)
+ if (matchSink) {
+ resultArray.push(sink)
+ }
+ }
+ }
+
+ return resultArray
+ }
+
+ /**
+ * assemble function call sink rule
+ */
+ assembleFunctionCallSinkRule() {
+ const sinkRules: any[] = []
+ const funcCallTaintSinkRules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
+ if (Array.isArray(funcCallTaintSinkRules)) {
+ for (const funcCallTaintSinkRule of funcCallTaintSinkRules) {
+ funcCallTaintSinkRule._sinkType = 'FuncCallTaintSink'
+ }
+ sinkRules.push(...funcCallTaintSinkRules)
+ }
+ const objectTaintFuncCallSinkRules = this.checkerRuleConfigContent.sinks?.ObjectTaintFuncCallSink
+ if (Array.isArray(objectTaintFuncCallSinkRules)) {
+ for (const objectTaintFuncCallSinkRule of objectTaintFuncCallSinkRules) {
+ objectTaintFuncCallSinkRule._sinkType = 'ObjectTaintFuncCallSink'
+ }
+ sinkRules.push(...objectTaintFuncCallSinkRules)
+ }
+
+ return sinkRules
+ }
+}
+
+module.exports = JavaCallchainChecker
diff --git a/src/checker/callchain/js/js-callchain-checker.ts b/src/checker/callchain/js/js-callchain-checker.ts
new file mode 100644
index 00000000..dd8b41b9
--- /dev/null
+++ b/src/checker/callchain/js/js-callchain-checker.ts
@@ -0,0 +1,288 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+
+const _ = require('lodash')
+const CallchainChecker = require('../callchain-checker')
+const { matchSinkAtFuncCall } = require('../../taint/common-kit/sink-util')
+const config = require('../../../config')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
+const Config = require('../../../config')
+
+/**
+ * JavaScript callchain checker
+ * Only detects sink matches and outputs call chains without checking for taint
+ */
+class JsCallchainChecker extends CallchainChecker {
+ entryPoints: any[]
+
+ /**
+ * constructor
+ * @param resultManager
+ */
+ constructor(resultManager: any) {
+ super(resultManager, 'callchain_js')
+ this.entryPoints = []
+ }
+
+ /**
+ * starter trigger
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const { topScope, fileManager } = analyzer
+ const loader = require('../../../util/loader')
+ const commonUtil = require('../../../util/common-util')
+ const EntryPoint = require('../../../engine/analyzer/common/entrypoint')
+ const constValue = require('../../../util/constant')
+ const { handleException } = require('../../../engine/analyzer/common/exception-handler')
+
+ // 直接从 analyzer.checkerManager.Rules 获取规则配置
+ const BasicRuleHandler = analyzer.getCheckerManager().Rules
+ if (BasicRuleHandler && BasicRuleHandler.getRules) {
+ const allRules = BasicRuleHandler.getRules()
+ if (Array.isArray(allRules) && allRules.length > 0) {
+ for (const rule of allRules) {
+ if (rule.checkerIds && rule.checkerIds.includes(this.getCheckerId())) {
+ _.merge(this.checkerRuleConfigContent, rule)
+ break
+ }
+ }
+ }
+ }
+
+ // 完整复制 JsTaintChecker 的 prepareEntryPoints 逻辑
+ const { entrypoints: ruleConfigEntryPoints } = this.checkerRuleConfigContent
+ if (config.entryPointMode !== 'SELF_COLLECT') {
+ // 自定义 source 入口方式,并根据入口自主加载 source
+ const prepareEntryPointList = []
+ if (!_.isEmpty(ruleConfigEntryPoints)) {
+ prepareEntryPointList.push(...ruleConfigEntryPoints)
+ }
+ if (!_.isEmpty(prepareEntryPointList)) {
+ for (const entrypoint of prepareEntryPointList) {
+ try {
+ let filepath = entrypoint.filePath
+ filepath = filepath.startsWith('/') ? filepath.slice(1) : filepath
+ const arr = loader.getFilePathProperties(filepath, { caseStyle: 'lower' })
+ let fieldT = topScope
+ arr.forEach((path: any) => {
+ fieldT = fieldT?.members?.get(path)
+ })
+ if (!fieldT || fieldT.vtype === 'undefine') {
+ for (const [mod, modVal] of topScope.context.modules.members.entries()) {
+ if (
+ mod.includes(entrypoint.filePath) &&
+ modVal.ast?.node?.type === 'CompileUnit'
+ ) {
+ fieldT = modVal
+ break
+ }
+ }
+ }
+
+ if (entrypoint.functionName) {
+ const func = entrypoint.functionName
+ const valExport = fieldT
+ const entryPointSymVal = commonUtil.getFclosFromScope(valExport, func)
+ if (entryPointSymVal?.vtype !== 'fclos') {
+ continue
+ }
+
+ const entryPoint = new EntryPoint(constValue.ENGIN_START_FUNCALL)
+ entryPoint.scopeVal = entryPointSymVal.parent
+ entryPoint.functionName = entrypoint.functionName
+ entryPoint.filePath = entrypoint.filePath
+ entryPoint.attribute = entrypoint.attribute
+ entryPoint.entryPointSymVal = entryPointSymVal
+ this.entryPoints.push(entryPoint)
+ } else {
+ if (!fieldT.ast?.node || fieldT.ast.node.type !== 'CompileUnit') continue
+ const entryPoint = new EntryPoint(constValue.ENGIN_START_FILE_BEGIN)
+ entryPoint.scopeVal = fieldT
+ entryPoint.argValues = undefined
+ entryPoint.functionName = undefined
+ entryPoint.filePath = fieldT?.ast?.node?.loc?.sourcefile
+ entryPoint.attribute = entrypoint.attribute
+ entryPoint.packageName = undefined
+ entryPoint.entryPointSymVal = fieldT
+ this.entryPoints.push(entryPoint)
+ }
+ } catch (e: any) {
+ handleException(
+ e,
+ '[js-callchain-checker]An Error Occurred in custom entrypoint',
+ '[js-callchain-checker]An Error Occurred in custom entrypoint'
+ )
+ }
+ }
+ }
+ }
+
+ // 使用 callgraph 边界 + file 作为 entrypoint
+ if (config.entryPointMode !== 'ONLY_CUSTOM') {
+ const fullCallGraphFileEntryPoint = require('../../common/full-callgraph-file-entrypoint')
+ fullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
+ const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getAllEntryPointsUsingCallGraph(
+ analyzer.ainfo?.callgraph,
+ analyzer
+ )
+ const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer)
+ this.entryPoints.push(...fullCallGraphEntrypoint)
+ this.entryPoints.push(...fullFileEntrypoint)
+ }
+
+ analyzer.entryPoints.push(...this.entryPoints)
+ }
+
+ /**
+ * FunctionCall trigger
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const { fclos, callInfo } = info
+ this.checkSinkMatch(node, fclos, callInfo, state)
+ this.checkByFieldMatch(node, fclos, callInfo, state)
+ }
+
+ /**
+ * check if sink matches by name
+ * @param node
+ * @param fclos
+ * @param callInfo
+ * @param state
+ */
+ checkSinkMatch(node: any, fclos: any, callInfo: CallInfo | undefined, state: any) {
+ if (fclos === undefined) {
+ return
+ }
+ const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
+
+ if (!rules || !callInfo) return
+ const nodeCallee = node.callee || node
+
+ let rule = matchSinkAtFuncCall(node, fclos, rules, callInfo)
+ rule = rule.length > 0 ? rule[0] : null
+
+ // 如果没有匹配到,尝试基于函数名的匹配(用于处理解构导入等情况)
+ if (!rule) {
+ const functionName = fclos?.name || fclos?.ast?.node?.id?.name || nodeCallee?.name
+
+ if (functionName) {
+ for (const tspec of rules) {
+ // 尝试匹配:如果 fsig 包含函数名(例如 child_process.exec 匹配 exec)
+ if (tspec.fsig && (tspec.fsig === functionName || tspec.fsig.endsWith(`.${functionName}`))) {
+ rule = tspec
+ break
+ }
+ }
+ }
+ }
+
+ if (rule) {
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
+ }
+ }
+
+ /**
+ *
+ * @param node
+ * @param fclos
+ * @param callInfo
+ * @param state
+ */
+ checkByFieldMatch(node: any, fclos: any, callInfo: CallInfo | undefined, state: any) {
+ const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
+ if (_.isEmpty(rules)) {
+ return
+ }
+
+ rules.some((rule: any) => {
+ if (typeof rule.fsig !== 'string') {
+ return false
+ }
+ const paths = rule.fsig.split('.')
+ const lastIndex = rule.fsig.lastIndexOf('.')
+ let RuleObj = rule.fsig.substring(0, lastIndex)
+ if (lastIndex === -1) {
+ RuleObj = rule.fsig
+ }
+ const ruleCallName = paths[paths.length - 1]
+ let callName
+ const { callee } = node
+ if (!callee) return false
+ if (callee.type === 'MemberAccess') {
+ callName = callee.property.name
+ } else {
+ // Identifier
+ callName = callee.name
+ }
+ const CallFull = this.getObj(fclos)
+ if (typeof CallFull === 'undefined') {
+ return false
+ }
+ const lastIndexofCall = CallFull.lastIndexOf('.')
+ if (ruleCallName !== '*' && ruleCallName !== callName) {
+ if (lastIndexofCall >= 0) {
+ // 补偿获取一次callName
+ callName = CallFull.substring(lastIndexofCall + 1)
+ if (ruleCallName !== callName && rule.fsig.includes('.')) {
+ return false
+ }
+ }
+ }
+
+ let CallObj = CallFull
+ if (lastIndexofCall >= 0) {
+ CallObj = CallFull.substring(0, lastIndexofCall)
+ }
+ if (CallObj !== RuleObj) {
+ const idx = CallObj.lastIndexOf('(')
+ const result = idx !== -1 ? CallObj.slice(0, idx) : CallObj
+ if (result !== RuleObj) {
+ if (!result.endsWith(`.${RuleObj}`) && !result.startsWith(`${RuleObj}.`)) {
+ return false
+ }
+ }
+ }
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
+ })
+ }
+
+ /**
+ *
+ * @param fclos
+ */
+ getObj(fclos: any): any {
+ if (typeof fclos?.qid === 'undefined' && typeof fclos?._this === 'undefined') {
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
+ }
+ if (typeof fclos?.qid !== 'undefined') {
+ let qid = fclos?.qid?.replace('Egg.Context', 'this.ctx')
+ qid = qid?.replace('Egg.Application', 'this.app')
+ qid = qid?.replace('this.app.service', 'this.ctx.service')
+ qid = qid?.replace('Egg.Request', 'this.ctx.request')
+ if (fclos.ast?.node?.loc?.sourcefile && fclos.ast?.node?.loc?.sourcefile.startsWith(Config.maindirPrefix)) {
+ const prefix = fclos.ast.node.loc.sourcefile.substring(Config.maindirPrefix.length)
+ const lastDotIndex = prefix.lastIndexOf('.')
+ const result = lastDotIndex >= 0 ? prefix.substring(0, lastDotIndex) : prefix
+ if (result) {
+ qid = qid?.substring(prefix.length + 1)
+ }
+ }
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(qid)
+ }
+ if (!(fclos === fclos?._this)) {
+ return this.getObj(fclos._this)
+ }
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
+ }
+}
+
+module.exports = JsCallchainChecker
diff --git a/src/checker/callchain/python/python-callchain-checker.ts b/src/checker/callchain/python/python-callchain-checker.ts
new file mode 100644
index 00000000..37805d6f
--- /dev/null
+++ b/src/checker/callchain/python/python-callchain-checker.ts
@@ -0,0 +1,256 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+
+const _ = require('lodash')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
+const CallchainChecker = require('../callchain-checker')
+const { matchSinkAtFuncCall, matchRegex } = require('../../taint/common-kit/sink-util')
+const {
+ findPythonFcEntryPointAndSource,
+ buildFclosIndex,
+ lookupFclos,
+} = require('../../../engine/analyzer/python/common/entrypoint-collector/python-entrypoint')
+const Constant = require('../../../util/constant')
+const EntryPoint = require('../../../engine/analyzer/common/entrypoint')
+const Config = require('../../../config')
+const { extractRelativePath } = require('../../../util/file-util')
+const logger = require('../../../util/logger')(__filename)
+const { loadPythonDefaultRule } = require('../../taint/python/python-taint-abstract-checker')
+
+/**
+ * Python callchain checker
+ * Only detects sink matches and outputs call chains without checking for taint
+ */
+class PythonCallchainChecker extends CallchainChecker {
+ entryPoints: any[]
+
+ /**
+ * constructor
+ * @param resultManager
+ */
+ constructor(resultManager: any) {
+ super(resultManager, 'callchain_python')
+ this.entryPoints = []
+ }
+
+ /**
+ * starter trigger - 完全复制 PythonTaintChecker 的实现
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const moduleManager = analyzer.topScope.context.modules
+ const fileManager = analyzer.topScope.context.files
+ this.prepareEntryPoints(analyzer, Config.maindir, moduleManager, fileManager)
+ analyzer.entryPoints.push(...this.entryPoints)
+ }
+
+ /**
+ * prepare entrypoint - 完全复制 PythonTaintChecker 的逻辑
+ * @param analyzer
+ * @param dir
+ * @param moduleManager
+ * @param fileManager
+ */
+ prepareEntryPoints(analyzer: any, dir: any, moduleManager: any, fileManager: any) {
+ const funCallEntryPoints: any[] = []
+ const fileEntryPoints: any[] = []
+ const { entrypoints: ruleConfigEntryPoints } = this.checkerRuleConfigContent
+
+ if (Config.entryPointMode !== 'ONLY_CUSTOM') {
+ const pythonDefaultRule = loadPythonDefaultRule()
+ if (pythonDefaultRule[0].checkerIds.includes(this.getCheckerId())) {
+ this.checkerRuleConfigContent.sources = this.checkerRuleConfigContent.sources || {}
+ this.checkerRuleConfigContent.sources.TaintSource = this.checkerRuleConfigContent.sources.TaintSource || []
+ this.checkerRuleConfigContent.sources.TaintSource = Array.isArray(
+ this.checkerRuleConfigContent.sources.TaintSource
+ )
+ ? this.checkerRuleConfigContent.sources.TaintSource
+ : [this.checkerRuleConfigContent.sources.TaintSource]
+ this.checkerRuleConfigContent.sources.TaintSource.push(...pythonDefaultRule[0].sources.TaintSource)
+ }
+ const { pyFcEntryPointArray, pyFcEntryPointSourceArray } = findPythonFcEntryPointAndSource(
+ dir,
+ fileManager,
+ analyzer
+ )
+ if (pyFcEntryPointArray) {
+ funCallEntryPoints.push(...pyFcEntryPointArray)
+ }
+ if (pyFcEntryPointSourceArray) {
+ this.checkerRuleConfigContent.sources = this.checkerRuleConfigContent.sources || {}
+ this.checkerRuleConfigContent.sources.TaintSource = this.checkerRuleConfigContent.sources.TaintSource || []
+ this.checkerRuleConfigContent.sources.TaintSource = Array.isArray(
+ this.checkerRuleConfigContent.sources.TaintSource
+ )
+ ? this.checkerRuleConfigContent.sources.TaintSource
+ : [this.checkerRuleConfigContent.sources.TaintSource]
+ this.checkerRuleConfigContent.sources.TaintSource.push(...pyFcEntryPointSourceArray)
+ }
+ }
+ if (Config.entryPointMode !== 'SELF_COLLECT' && !_.isEmpty(ruleConfigEntryPoints)) {
+ for (const entrypoint of ruleConfigEntryPoints) {
+ if (entrypoint.functionName) {
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
+ entryPoint.filePath = entrypoint.filePath
+ entryPoint.functionName = entrypoint.functionName
+ entryPoint.attribute = entrypoint.attribute
+ funCallEntryPoints.push(entryPoint)
+ } else {
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN)
+ entryPoint.filePath = entrypoint.filePath
+ entryPoint.attribute = entrypoint.attribute
+ fileEntryPoints.push(entryPoint)
+ }
+ }
+ }
+
+ // 构建 fclos 索引,一次遍历替代多次查找
+ const fclosIndex = buildFclosIndex(moduleManager, dir, extractRelativePath)
+
+ for (const funCallEntryPoint of funCallEntryPoints) {
+ // 使用索引查找,O(1) 操作
+ let valFuncs = lookupFclos(fclosIndex, funCallEntryPoint.filePath, funCallEntryPoint.functionName)
+
+ if (_.isEmpty(valFuncs)) {
+ logger.info('match entryPoint fail')
+ continue
+ }
+
+ // 去重
+ valFuncs = _.uniqBy(valFuncs, (value: any) => value.ast?.fdef)
+
+ for (const valFunc of valFuncs) {
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
+ entryPoint.filePath = funCallEntryPoint.filePath
+ entryPoint.functionName = funCallEntryPoint.functionName
+ entryPoint.attribute = funCallEntryPoint.attribute
+ entryPoint.entryPointSymVal = valFunc
+ this.entryPoints.push(entryPoint)
+ }
+ }
+
+ for (const fileEntryPoint of fileEntryPoints) {
+ const fullFilePath = `${Config.maindir}${fileEntryPoint.filePath}`.replace('//', '/')
+ const fileUuid = fileManager[fullFilePath]
+ const file = analyzer.symbolTable.get(fileUuid)
+ if (file?.ast?.node?.type === 'CompileUnit') {
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN)
+ entryPoint.scopeVal = file
+ entryPoint.argValues = undefined
+ entryPoint.functionName = undefined
+ entryPoint.filePath = file?.ast?.node?.loc?.sourcefile
+ entryPoint.attribute = fileEntryPoint.attribute
+ entryPoint.packageName = undefined
+ entryPoint.entryPointSymVal = file
+ this.entryPoints.push(entryPoint)
+ }
+ }
+ }
+
+ /**
+ * FunctionCall trigger
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
+ const { fclos, callInfo } = info
+ this.checkByNameMatch(node, fclos, callInfo, state)
+ this.checkByFieldMatch(node, fclos, callInfo, state)
+ }
+
+ /**
+ * check if sink matches by name
+ * @param node
+ * @param fclos
+ * @param callInfo
+ * @param state
+ */
+ checkByNameMatch(node: any, fclos: any, callInfo: CallInfo | undefined, state: any) {
+ if (fclos === undefined) {
+ return
+ }
+ const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
+
+ if (!rules || !callInfo) return
+ let rule = matchSinkAtFuncCall(node, fclos, rules, callInfo)
+ rule = rule.length > 0 ? rule[0] : null
+
+ if (rule) {
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
+ }
+ }
+
+ /**
+ *
+ * @param node
+ * @param fclos
+ * @param callInfo
+ * @param scope
+ * @param state
+ */
+ checkByFieldMatch(node: any, fclos: any, callInfo: CallInfo | undefined, state: any) {
+ const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
+ if (_.isEmpty(rules)) {
+ return
+ }
+ rules.some((rule: any): boolean => {
+ if (typeof rule.fsig !== 'string') {
+ return false
+ }
+ const callFull = this.getObj(fclos)
+ if (typeof callFull === 'undefined') {
+ return false
+ }
+ if (rule.fsig) {
+ if (rule.fsig === callFull) {
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
+ return true
+ }
+ } else {
+ if (!rule.fregex) {
+ return false
+ }
+ if (callFull.type === 'MemberAccess' && matchRegex(rule.fregex, fclos.qid)) {
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
+ return true
+ }
+ }
+ return false
+ })
+ }
+
+ /**
+ * get obj
+ * @param fclos
+ */
+ getObj(fclos: any): any {
+ if (typeof fclos?.sid !== 'undefined' && typeof fclos?.qid === 'undefined' && typeof fclos?._this === 'undefined') {
+ const index = fclos?.sid.indexOf('>.')
+ return index !== -1 ? fclos?.sid.substring(index + 2) : fclos?.sid
+ }
+ if (typeof fclos?.qid !== 'undefined' && typeof fclos.qid === 'string') {
+ const index = fclos.qid.indexOf('>.')
+ const result = index !== -1 ? fclos?.qid.substring(index + 2) : fclos?.qid
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(result)
+ }
+ if (!(fclos === fclos?._this)) {
+ return this.getObj(fclos._this)
+ }
+ if (typeof fclos?.sid === 'string') {
+ const index = fclos?.sid.indexOf('>.')
+ const result = index !== -1 ? fclos?.sid.substring(index + 2) : fclos?.sid
+ if (result) {
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(result)
+ }
+ }
+ }
+
+}
+
+module.exports = PythonCallchainChecker
diff --git a/src/checker/callgraph/callgraph-checker.ts b/src/checker/callgraph/callgraph-checker.ts
index 5cac34ae..2867b3a0 100644
--- a/src/checker/callgraph/callgraph-checker.ts
+++ b/src/checker/callgraph/callgraph-checker.ts
@@ -1,5 +1,4 @@
// used for dump call graph
-import type { IConfig } from '../../config'
import type TypeRelatedInfoResolver from '../../resolver/common/type-related-info-resolver'
const _ = require('lodash')
@@ -8,9 +7,6 @@ const kitCallgraph = require('../common/checker-kit')
const configCallgraph = require('../../config')
const CheckerCallgraph = require('../common/checker')
const CallgraphOutputStrategyCallgraph = require('../common/output/callgraph-output-strategy')
-
-let ConfigCallgraph: IConfig
-let loggerCallgraph: any
/**
* CallgraphChecker represents calling relationships between procedures.
* CallgraphChecker has nodes and edges.
@@ -37,8 +33,6 @@ class CallgraphChecker extends CheckerCallgraph {
super(mng, 'callgraph')
this.mng = mng
this.kit = kitCallgraph
- loggerCallgraph = kitCallgraph.logger(__filename)
- ConfigCallgraph = this.kit.Config
}
/**
@@ -82,28 +76,47 @@ class CallgraphChecker extends CheckerCallgraph {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void {
- const { fclos, argvalues, ainfo } = info
- const fdecl = fclos.fdef
- if (fclos === undefined || fdecl?.type !== 'FunctionDefinition') {
+ const { fclos, ainfo } = info
+ if (!fclos) {
+ return
+ }
+ const fdecl = fclos.ast?.fdef
+ if (fdecl && fdecl.type !== 'FunctionDefinition') {
return
}
const stack = state.callstack
+ if (!stack) {
+ return
+ }
const to = fclos
- const toAST = fclos && fclos.fdef
- const call_site_node = node
+ const toAST = fclos.ast?.fdef
+ const callSiteNode = node
const from = stack[stack.length - 1] || { name: '<__entry_point__>', sid: '<__entry_point__>', vtype: 'fclos' }
- const fromAST = from.fdef
+ const fromAST = from.ast?.fdef
if (fromAST && fromAST.type !== 'FunctionDefinition' && from.vtype !== 'fclos') {
return
}
const callgraph = (ainfo.callgraph = ainfo.callgraph || new this.kit.Graph())
- const fromNode = callgraph.addNode(this.prettyPrint(from, fromAST, call_site_node), {
- funcDef: fromAST,
- funcSymbol: from,
+
+ // 获取 AST 的 nodehash 和符号值的 UUID
+ const fromASTNodehash = fromAST?._meta?.nodehash || null
+ const fromFuncSymbolUuid = from?.uuid || null
+ const toASTNodehash = toAST?._meta?.nodehash || null
+ const toFuncSymbolUuid = to?.uuid || null
+
+ const fromNode = callgraph.addNode(this.prettyPrint(from, fromAST, callSiteNode), {
+ funcDefNodehash: fromASTNodehash,
+ funcSymbolUuid: fromFuncSymbolUuid,
+ })
+
+ // 存储 callSite 的 nodehash
+ const callSiteNodehash = callSiteNode?._meta?.nodehash || null
+ const toNode = callgraph.addNode(this.prettyPrint(to, toAST, callSiteNode), {
+ funcDefNodehash: toASTNodehash,
+ funcSymbolUuid: toFuncSymbolUuid,
})
- const toNode = callgraph.addNode(this.prettyPrint(to, toAST, call_site_node), { funcDef: toAST, funcSymbol: to })
- callgraph.addEdge(fromNode, toNode, { callSite: call_site_node })
+ callgraph.addEdge(fromNode, toNode, { callSiteNodehash })
}
/**
@@ -116,8 +129,13 @@ class CallgraphChecker extends CheckerCallgraph {
*/
triggerAtEndOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void {
const finding = analyzer.ainfo.callgraph
- finding.type = this.getCheckerId()
- this.mng.newFinding(finding, CallgraphOutputStrategyCallgraph.outputStrategyId)
+ if (finding) {
+ finding.type = this.getCheckerId()
+ // 在 finding 中存储 astManager 和 symbolTable 的引用,供 dumpGraph 使用
+ ;(finding as any).astManager = analyzer.astManager
+ ;(finding as any).symbolTable = analyzer.symbolTable
+ this.mng.newFinding(finding, CallgraphOutputStrategyCallgraph.outputStrategyId)
+ }
}
/**
@@ -129,33 +147,32 @@ class CallgraphChecker extends CheckerCallgraph {
prettyPrint(fclos: any, fdef: any, callSiteNode: any): string {
let ret: string = ''
let name: string
- if (!fdef || !fdef.name || fdef.name === '') {
+ // 临时补丁,防止stc 漏洞uk变化
+ if (!fdef || !fdef.name || fdef.name.includes(' f.id)
- if (fclos) {
- ret = fclos.id
- }
+ let fclosArray = fclos.value
+ if (fclosArray && !Array.isArray(fclosArray)) {
+ fclosArray = Object.entries(fclosArray)
+ }
+ const f = _.find(fclosArray, (f1: any) => f1.sid)
+ if (f) {
+ ret = f.sid
}
} else if (fclos.vtype && fclos.type !== 'MemberAccess') {
// 针对[]byte(xx)场景,fclos是一个symbol value,且fclos.qid是ArrayType这个identifier节点,而非string,因此这里if条件需做限定
if (fclos.name) {
ret = fclos.name
- } else if (
- (typeof fclos.id !== 'string' && fclos.id?.name) ||
- (typeof fclos.sid !== 'string' && fclos.sid?.name)
- ) {
- ret = fclos.id?.name || fclos.sid?.name
+ } else if (typeof fclos.sid !== 'string' && fclos.sid?.name) {
+ ret = fclos.sid?.name
}
let { parent } = fclos
while (parent) {
if (['object', 'modScope', 'fclos', 'symbol'].indexOf(parent.vtype) === -1) break
- name = parent.id || parent.name || parent.sid
+ name = parent.name || parent.sid
if (!name) break
ret = `${name}.${ret}`
parent = parent.parent
@@ -174,15 +191,19 @@ class CallgraphChecker extends CheckerCallgraph {
}
} else {
// pretty print fdef
- name = fdef.name || ''
+ name =
+ fdef.name ||
+ ``
// try to attach namespace
- if (fclos && fclos.__proto__.constructor.name !== 'BVT') {
+ if (fclos && fclos.__proto__.constructor.name !== 'BVTValue') {
if (fclos.vtype === 'class') {
// e.g. javascript function class
name = `new ${name}`
- } else if (fclos.parent?.vtype === 'class' || fclos.parent?.fdef?.type === 'ClassDefinition') {
- const nsDef = fclos.parent.fdef
- const nsName = nsDef?.name || ''
+ } else if (fclos.parent?.vtype === 'class' || fclos.parent?.ast.fdef?.type === 'ClassDefinition') {
+ const nsDef = fclos.parent.ast.fdef
+ const nsName =
+ nsDef?.name ||
+ ``
if (name === '_CTOR_') {
name = `new ${nsName}`
} else {
@@ -202,7 +223,7 @@ class CallgraphChecker extends CheckerCallgraph {
ret = `${ret.slice(0, 500)}...`
}
// attach loc
- if (fdef) {
+ if (fdef && fdef?.loc) {
ret += this.printLoc(fdef)
}
return ret
@@ -219,8 +240,8 @@ class CallgraphChecker extends CheckerCallgraph {
const splits = sourcefile.split('/')
sourcefile = splits[splits.length - 1]
}
- const startLine = ast && ast.loc.start.line
- const endLine = ast && ast.loc.end.line
+ const startLine = ast && ast?.loc?.start?.line
+ const endLine = ast && ast?.loc?.end?.line
return ` \\n[${sourcefile} : ${startLine}_${endLine}]`
}
diff --git a/src/checker/common/full-callgraph-file-entrypoint.ts b/src/checker/common/full-callgraph-file-entrypoint.ts
index 5b40902b..36748110 100644
--- a/src/checker/common/full-callgraph-file-entrypoint.ts
+++ b/src/checker/common/full-callgraph-file-entrypoint.ts
@@ -11,6 +11,7 @@ const options = require('../../config')
const { Graph } = require('../../util/graph')
const logger = require('../../util/logger')(__filename)
const sourceLine = require('../../engine/analyzer/common/source-line')
+const { performanceTracker } = require('../../util/performance-tracker')
/**
*
@@ -23,8 +24,8 @@ function printLoc(ast: any): string {
const splits = sourcefile.split('/')
sourcefile = splits[splits.length - 1]
}
- const startLine = ast && ast.loc.start.line
- const endLine = ast && ast.loc.end.line
+ const startLine = ast && ast?.loc?.start.line
+ const endLine = ast && ast?.loc?.end.line
return ` \\n[${sourcefile} : ${startLine}_${endLine}]`
}
@@ -60,13 +61,16 @@ function prettyPrint(
// pretty print fdef
name = fdef.name || ''
// try to attach namespace
- if (fclos && fclos.__proto__.constructor.name !== 'BVT') {
+ if (fclos && fclos.__proto__.constructor.name !== 'BVTValue') {
if (fclos.vtype === 'class') {
// e.g. javascript function class
name = `new ${name}`
- } else if (fclos.parent?.vtype === 'class' || fclos.parent?.fdef?.type === 'ClassDefinition') {
- const nsDef = fclos.parent.fdef
- const nsName = nsDef?.name || ''
+ } else if (fclos.parent?.vtype === 'class' || fclos.parent?.ast.fdef?.type === 'ClassDefinition') {
+ const nsDef = fclos.parent.ast.fdef
+ let nsName = nsDef?.name || ''
+ if (fclos.parent.qid) {
+ nsName = fclos.parent.qid
+ }
if (name === '_CTOR_') {
name = `new ${nsName}`
} else {
@@ -86,18 +90,33 @@ function prettyPrint(
ret = `${ret.slice(0, 500)}...`
}
// attach loc
- if (fdef) {
+ if (fdef && fdef?.loc) {
ret += printLoc(fdef)
}
return ret
}
+/**
+ * 从 nodehash 和 UUID 还原 funcDef 和 funcSymbol
+ * @param node callgraph 节点
+ * @param astManager AST 管理器
+ * @param symbolTable 符号表管理器
+ * @returns 包含 funcDef 和 funcSymbol 的对象
+ */
+function restoreNodeFromReferences(node: any, astManager?: any, symbolTable?: any): { funcDef: any; funcSymbol: any } {
+ const funcDef =
+ node.opts?.funcDefNodehash && astManager ? astManager.get(node.opts.funcDefNodehash) : node.opts?.funcDef
+ const funcSymbol =
+ node.opts?.funcSymbolUuid && symbolTable ? symbolTable.get(node.opts.funcSymbolUuid) : node.opts?.funcSymbol
+ return { funcDef, funcSymbol }
+}
+
/**
* generate full callGraph by funcSymbolTable
* @param analyzer
*/
function makeFullCallGraph(analyzer: any): void {
- analyzer.performanceTracker.start(`makeFullCallGraph(BySymbolInterpret)`)
+ performanceTracker.start(`startAnalyze.makeFullCallGraph(BySymbolInterpret)`)
config.loadDefaultRule = false
config.loadExternalRule = false
config.makeAllCG = true
@@ -108,20 +127,24 @@ function makeFullCallGraph(analyzer: any): void {
const backupCheckerManager = analyzer.checkerManager
analyzer.checkerManager = newCheckerManager
analyzer.ainfo.callgraph = analyzer.ainfo.callgraph || new Graph()
- if (analyzer.ainfo.callgraph && Object.keys(analyzer.funcSymbolTable).length > 0) {
+ if (analyzer.ainfo.callgraph && Object.keys(analyzer.topScope.context.funcs).length > 0) {
const alreadyCheckList: any[] = [] // 分析过的callnode一定会出现在nodes中
for (const node of analyzer.ainfo.callgraph.nodes.values()) {
- if (node.opts?.funcSymbol) {
- alreadyCheckList.push(node.opts?.funcSymbol)
+ // 从 UUID 还原 funcSymbol
+ if (node.opts?.funcSymbolUuid) {
+ const funcSymbol = analyzer.symbolTable.get(node.opts.funcSymbolUuid)
+ if (funcSymbol) {
+ alreadyCheckList.push(funcSymbol)
+ }
}
}
let totalCount = 0
- Object.entries(analyzer.funcSymbolTable).forEach(([key, funcSymbol]) => {
+ Object.entries(analyzer.topScope.context.funcs).forEach(([key, funcSymbol]) => {
const funcSymbolAny = funcSymbol as any
if (
!alreadyCheckList.includes(funcSymbolAny) &&
- funcSymbolAny.fdef &&
- funcSymbolAny.fdef.type === 'FunctionDefinition'
+ funcSymbolAny.ast.fdef &&
+ funcSymbolAny.ast.fdef.type === 'FunctionDefinition'
) {
totalCount += 1
}
@@ -131,7 +154,7 @@ function makeFullCallGraph(analyzer: any): void {
let already30Percent = false
let already70Percent = false
logger.info('makeAllCG-start')
- Object.entries(analyzer.funcSymbolTable).forEach(([key, funcSymbol]) => {
+ Object.entries(analyzer.topScope.context.funcs).forEach(([key, funcSymbol]) => {
analyzedCount += 1
if (analyzedCount > totalCount * 0.1 && !already10Percent) {
logger.info('\tmakeAllCG-10%')
@@ -149,15 +172,13 @@ function makeFullCallGraph(analyzer: any): void {
const funcSymbolAny2 = funcSymbol as any
if (
!alreadyCheckList.includes(funcSymbolAny2) &&
- funcSymbolAny2.fdef &&
- funcSymbolAny2.fdef.type === 'FunctionDefinition'
+ funcSymbolAny2.ast.fdef &&
+ funcSymbolAny2.ast.fdef.type === 'FunctionDefinition'
) {
alreadyCheckList.push(funcSymbolAny2)
- const argValues: any[] = []
analyzer.executeCall(
- funcSymbolAny2.fdef,
+ funcSymbolAny2.ast.fdef,
funcSymbolAny2,
- argValues,
analyzer.initState(funcSymbolAny2.parent),
funcSymbolAny2.parent
)
@@ -167,7 +188,7 @@ function makeFullCallGraph(analyzer: any): void {
}
analyzer.checkerManager = backupCheckerManager
config.makeAllCG = false
- analyzer.performanceTracker.end(`makeFullCallGraph(BySymbolInterpret)`)
+ performanceTracker.end(`startAnalyze.makeFullCallGraph(BySymbolInterpret)`)
}
/**
@@ -176,16 +197,34 @@ function makeFullCallGraph(analyzer: any): void {
* @param resolver
*/
function makeFullCallGraphByType(analyzer: any, resolver: TypeRelatedInfoResolver) {
- if (!resolver) {
+ if (!resolver || (resolver.resolveFinish && analyzer?.ainfo?.callgraph)) {
return
}
- analyzer.performanceTracker.start('makeFullCallGraphByType')
+ performanceTracker.start('startAnalyze.makeFullCallGraphByType')
if (!resolver.resolveFinish) {
resolver.resolve(analyzer)
}
+ // Helper function to extract only location and name from AST to reduce memory usage
+ const extractFuncDefInfo = (ast: any): { loc?: any; name?: any; id?: any } | null => {
+ if (!ast) return null
+ return {
+ loc: ast.loc,
+ name: ast.name,
+ id: ast.id, // Store id for functionName access
+ }
+ }
+
+ // Helper function to extract only location from callSite AST to reduce memory usage
+ const extractCallSiteInfo = (callSite: any): { loc?: any } | null => {
+ if (!callSite) return null
+ return {
+ loc: callSite.loc,
+ }
+ }
+
const graph = new Graph()
Object.entries(analyzer.funcSymbolTable).forEach(([, funcSymbol]) => {
const funcSymbolAny = funcSymbol as any
@@ -201,7 +240,10 @@ function makeFullCallGraphByType(analyzer: any, resolver: TypeRelatedInfoResolve
invocation.calleeType,
invocation.fsig
),
- { funcDef: invocation.fromScopeAst, funcSymbol: invocation.fromScope }
+ {
+ funcDef: extractFuncDefInfo(invocation.fromScopeAst),
+ funcSymbol: invocation.fromScope,
+ }
)
const toNode = graph.addNode(
prettyPrint(
@@ -213,31 +255,45 @@ function makeFullCallGraphByType(analyzer: any, resolver: TypeRelatedInfoResolve
invocation.fsig
),
{
- funcDef: invocation.toScopeAst,
+ funcDef: extractFuncDefInfo(invocation.toScopeAst),
funcSymbol: invocation.toScope,
}
)
- graph.addEdge(fromNode, toNode, { callSite: invocation.callSite })
+ graph.addEdge(fromNode, toNode, { callSite: extractCallSiteInfo(invocation.callSite) })
}
}
}
})
analyzer.ainfo.callgraph = graph
- analyzer.performanceTracker.end('makeFullCallGraphByType')
+ performanceTracker.end('startAnalyze.makeFullCallGraphByType')
}
/**
* 从CallGraph中拿取边界作为全func类型的Entrypoint
* @param callGraph
+ * @param analyzer
*/
-function getAllEntryPointsUsingCallGraph(callGraph: any): any[] {
+function getAllEntryPointsUsingCallGraph(callGraph: any, analyzer?: any): any[] {
const entryPoints = {
fclosEntryPoints: new Map(),
}
+ const astManager = analyzer?.astManager
+ const symbolTable = analyzer?.symbolTable
+
for (const f of callGraph.nodes.keys()) {
const thisNode = callGraph.nodes.get(f)
- if (!thisNode.opts?.funcDef) {
+ // 从 nodehash 和 UUID 还原 funcDef 和 funcSymbol
+ const thisNodeFuncDef =
+ thisNode.opts?.funcDefNodehash && astManager
+ ? astManager.get(thisNode.opts.funcDefNodehash)
+ : thisNode.opts?.funcDef
+ const thisNodeFuncSymbol =
+ thisNode.opts?.funcSymbolUuid && symbolTable
+ ? symbolTable.get(thisNode.opts.funcSymbolUuid)
+ : thisNode.opts?.funcSymbol
+
+ if (!thisNodeFuncDef) {
continue
}
let hasCalled = false
@@ -245,21 +301,27 @@ function getAllEntryPointsUsingCallGraph(callGraph: any): any[] {
// 需要准确比较ast上的loc,因为函数符号值由于有new等问题不一定是同一个
const targetNode = callGraph.nodes.get(callGraph.edges.get(ek).targetNodeId)
if (thisNode && targetNode && !callGraph.edges.get(ek)?.sourceNodeId.includes('entry_point')) {
+ // 从 nodehash 还原 targetNode 的 funcDef
+ const targetNodeFuncDef =
+ targetNode.opts?.funcDefNodehash && astManager
+ ? astManager.get(targetNode.opts.funcDefNodehash)
+ : targetNode.opts?.funcDef
+
if (
- targetNode.opts?.funcDef?.loc?.sourcefile &&
- targetNode.opts?.funcDef?.loc?.start?.line &&
- targetNode.opts?.funcDef?.loc?.end?.line &&
- targetNode.opts?.funcDef?.loc?.sourcefile === thisNode.opts?.funcDef?.loc?.sourcefile &&
- targetNode.opts?.funcDef?.loc?.start?.line === thisNode.opts?.funcDef?.loc?.start?.line &&
- targetNode.opts?.funcDef?.loc?.end?.line === thisNode.opts?.funcDef?.loc?.end?.line
+ targetNodeFuncDef?.loc?.sourcefile &&
+ targetNodeFuncDef?.loc?.start?.line &&
+ targetNodeFuncDef?.loc?.end?.line &&
+ targetNodeFuncDef?.loc?.sourcefile === thisNodeFuncDef?.loc?.sourcefile &&
+ targetNodeFuncDef?.loc?.start?.line === thisNodeFuncDef?.loc?.start?.line &&
+ targetNodeFuncDef?.loc?.end?.line === thisNodeFuncDef?.loc?.end?.line
) {
hasCalled = true
break
}
}
}
- if (!hasCalled) {
- entryPoints.fclosEntryPoints.set(thisNode.id, thisNode.opts.funcSymbol)
+ if (!hasCalled && thisNodeFuncSymbol) {
+ entryPoints.fclosEntryPoints.set(thisNode.id, thisNodeFuncSymbol)
}
}
const newEntryPointList: any[] = []
@@ -267,10 +329,10 @@ function getAllEntryPointsUsingCallGraph(callGraph: any): any[] {
const entryPoint = new EntryPoint(constValue.ENGIN_START_FUNCALL)
entryPoint.scopeVal = entry.parent
entryPoint.argValues = []
- entryPoint.functionName = entry.fdef?.id?.name
- entryPoint.filePath = entry.fdef?.loc?.sourcefile?.startsWith(config.maindirPrefix)
- ? entry.fdef?.loc?.sourcefile?.substring(config.maindirPrefix.length)
- : entry.fdef?.loc?.sourcefile
+ entryPoint.functionName = entry.ast.fdef?.id?.name
+ entryPoint.filePath = entry.ast.fdef?.loc?.sourcefile?.startsWith(config.maindirPrefix)
+ ? entry.ast.fdef?.loc?.sourcefile?.substring(config.maindirPrefix.length)
+ : entry.ast.fdef?.loc?.sourcefile
entryPoint.attribute = 'fullCallGraphMade'
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = entry
@@ -281,19 +343,20 @@ function getAllEntryPointsUsingCallGraph(callGraph: any): any[] {
/**
* 若为弱类型脚本语言,则加入所有文件作为EntryPoint
- * @param fileManager
+ * @param analyzer
*/
-function getAllFileEntryPointsUsingFileManager(fileManager: any): any[] {
+function getAllFileEntryPointsUsingFileManager(analyzer: any): any[] {
const entryPoints: any[] = []
if (options.language === 'python' || options.language === 'javascript') {
- if (fileManager) {
- Object.values(fileManager).forEach((file: any) => {
- if (!file.ast || file.ast.type !== 'CompileUnit') return
+ if (analyzer?.fileManager) {
+ Object.values(analyzer?.fileManager).forEach((fileUUid: any) => {
+ const file = analyzer.symbolTable.get(fileUUid)
+ if (!file.ast.node || file.ast.node.type !== 'CompileUnit') return
const entryPoint = new EntryPoint(constValue.ENGIN_START_FILE_BEGIN)
entryPoint.scopeVal = file
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = file?.ast?.loc?.sourcefile
+ entryPoint.filePath = file?.ast?.node?.loc?.sourcefile
entryPoint.attribute = 'fullfileManagerMade'
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = file
@@ -309,30 +372,45 @@ function getAllFileEntryPointsUsingFileManager(fileManager: any): any[] {
* @param keywords need an array
* @param callGraph
* @param fileManager
+ * @param analyzer
*/
-function getEntryPointsUsingCallGraphByKeyWords(keywords: string[], callGraph: any, fileManager: any): any[] {
+function getEntryPointsUsingCallGraphByKeyWords(
+ keywords: string[],
+ callGraph: any,
+ fileManager: any,
+ analyzer?: any
+): any[] {
const newEntryPointList: any[] = []
if (!callGraph || !keywords || !Array.isArray(keywords)) {
return newEntryPointList
}
+ const astManager = analyzer?.astManager
+ const symbolTable = analyzer?.symbolTable
for (const keyword of keywords) {
const alreadyCalculate: any[] = []
- const nodes = getNodeInCallGraphByKeyword(keyword, callGraph.nodes)
+ const nodes = getNodeInCallGraphByKeyword(keyword, callGraph.nodes, astManager)
for (const node of nodes) {
// const node = getNodeInCallGraphByKeyword(keyword, callGraph.nodes)
if (node) {
- const fclosNodes = getFclosEntryPointsUsingCallGraphByTargetNode(node.id, callGraph, alreadyCalculate)
+ const fclosNodes = getFclosEntryPointsUsingCallGraphByTargetNode(
+ node.id,
+ callGraph,
+ alreadyCalculate,
+ astManager,
+ symbolTable
+ )
if (fclosNodes && Array.isArray(fclosNodes) && fclosNodes.length > 0) {
for (const f of fclosNodes) {
- const entry = f.opts.funcSymbol
+ const { funcSymbol: entry } = restoreNodeFromReferences(f, astManager, symbolTable)
+ if (!entry) continue
const entryPoint = new EntryPoint(constValue.ENGIN_START_FUNCALL)
entryPoint.scopeVal = entry.parent
entryPoint.argValues = []
- entryPoint.functionName = entry.fdef?.id?.name
- entryPoint.filePath = entry.fdef?.loc?.sourcefile?.startsWith(config.maindirPrefix)
- ? entry.fdef?.loc?.sourcefile?.substring(config.maindirPrefix.length)
- : entry.fdef?.loc?.sourcefile
+ entryPoint.functionName = entry.ast.fdef?.id?.name
+ entryPoint.filePath = entry.ast.fdef?.loc?.sourcefile?.startsWith(config.maindirPrefix)
+ ? entry.ast.fdef?.loc?.sourcefile?.substring(config.maindirPrefix.length)
+ : entry.ast.fdef?.loc?.sourcefile
entryPoint.attribute = 'FuncEntryPointByLoc'
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = entry
@@ -344,13 +422,13 @@ function getEntryPointsUsingCallGraphByKeyWords(keywords: string[], callGraph: a
for (const file of Object.values(fileManager)) {
// const file = fileManager[loc.sourcefile]
- const content = sourceLine.getCodeBySourceFile((file as any)?.ast?.loc?.sourcefile)
+ const content = sourceLine.getCodeBySourceFile((file as any)?.ast?.node?.loc?.sourcefile)
if (file && content.includes(keyword)) {
const entryPoint = new EntryPoint(constValue.ENGIN_START_FILE_BEGIN)
entryPoint.scopeVal = file
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = (file as any)?.ast?.sourcefile || (file as any)?.ast?.loc?.sourcefile
+ entryPoint.filePath = (file as any)?.ast?.node?.sourcefile || (file as any)?.ast?.node?.loc?.sourcefile
entryPoint.attribute = 'FileEntryPointByLoc'
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = file
@@ -366,30 +444,41 @@ function getEntryPointsUsingCallGraphByKeyWords(keywords: string[], callGraph: a
* @param locs need an array
* @param callGraph
* @param fileManager
+ * @param analyzer
*/
-function getEntryPointsUsingCallGraphByLoc(locs: any[], callGraph: any, fileManager: any): any[] {
+function getEntryPointsUsingCallGraphByLoc(locs: any[], callGraph: any, fileManager: any, analyzer?: any): any[] {
const newEntryPointList: any[] = []
if (!callGraph || !locs || !Array.isArray(locs)) {
return newEntryPointList
}
+ const astManager = analyzer?.astManager
+ const symbolTable = analyzer?.symbolTable
+
for (const loc of locs) {
- if (!loc.sourcefile || !loc.start?.line || !loc.end.line) {
+ if (!loc.sourcefile || !loc.start?.line || !loc.end?.line) {
continue
}
const alreadyCalculate: any[] = []
- const node = getNodeInCallGraphByLoc(loc, callGraph.nodes)
+ const node = getNodeInCallGraphByLoc(loc, callGraph.nodes, astManager)
if (node) {
- const fclosNodes = getFclosEntryPointsUsingCallGraphByTargetNode(node.id, callGraph, alreadyCalculate)
+ const fclosNodes = getFclosEntryPointsUsingCallGraphByTargetNode(
+ node.id,
+ callGraph,
+ alreadyCalculate,
+ astManager,
+ symbolTable
+ )
if (fclosNodes && Array.isArray(fclosNodes) && fclosNodes.length > 0) {
for (const f of fclosNodes) {
- const entry = f.opts.funcSymbol
+ const { funcSymbol: entry } = restoreNodeFromReferences(f, astManager, symbolTable)
+ if (!entry) continue
const entryPoint = new EntryPoint(constValue.ENGIN_START_FUNCALL)
entryPoint.scopeVal = entry.parent
entryPoint.argValues = []
- entryPoint.functionName = entry.fdef?.id?.name
- entryPoint.filePath = entry.fdef?.loc?.sourcefile?.startsWith(config.maindirPrefix)
- ? entry.fdef?.loc?.sourcefile?.substring(config.maindirPrefix.length)
- : entry.fdef?.loc?.sourcefile
+ entryPoint.functionName = entry.ast.fdef?.id?.name
+ entryPoint.filePath = entry.ast.fdef?.loc?.sourcefile?.startsWith(config.maindirPrefix)
+ ? entry.ast.fdef?.loc?.sourcefile?.substring(config.maindirPrefix.length)
+ : entry.ast.fdef?.loc?.sourcefile
entryPoint.attribute = 'FuncEntryPointByLoc'
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = entry
@@ -403,7 +492,7 @@ function getEntryPointsUsingCallGraphByLoc(locs: any[], callGraph: any, fileMana
entryPoint.scopeVal = file
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = (file as any)?.ast?.sourcefile || (file as any)?.ast?.loc?.sourcefile
+ entryPoint.filePath = (file as any)?.ast?.node?.sourcefile || (file as any)?.ast?.node?.loc?.sourcefile
entryPoint.attribute = 'FileEntryPointByLoc'
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = file
@@ -419,11 +508,15 @@ function getEntryPointsUsingCallGraphByLoc(locs: any[], callGraph: any, fileMana
* @param key
* @param callGraph
* @param alreadyCalculate
+ * @param astManager
+ * @param symbolTable
*/
function getFclosEntryPointsUsingCallGraphByTargetNode(
key: any,
callGraph: any,
- alreadyCalculate: any[]
+ alreadyCalculate: any[],
+ astManager?: any,
+ symbolTable?: any
): any[] | null {
if (
!key ||
@@ -444,8 +537,10 @@ function getFclosEntryPointsUsingCallGraphByTargetNode(
continue
}
if (circularDetected.includes(n)) {
- if (callGraph.nodes.get(n)?.opts?.funcDef) {
- res.push(callGraph.nodes.get(n))
+ const node = callGraph.nodes.get(n)
+ const { funcDef } = restoreNodeFromReferences(node, astManager, symbolTable)
+ if (funcDef) {
+ res.push(node)
}
continue
}
@@ -454,8 +549,10 @@ function getFclosEntryPointsUsingCallGraphByTargetNode(
let hasFind = false
for (const ek of callGraph.edges.keys()) {
// 需要准确比较ast上的loc,因为函数符号值由于有new等问题不一定是同一个
- const targetNodeAST = callGraph.nodes.get(callGraph.edges.get(ek).targetNodeId).opts?.funcDef
- const thisNodeAST = callGraph.nodes.get(n).opts?.funcDef
+ const targetNode = callGraph.nodes.get(callGraph.edges.get(ek).targetNodeId)
+ const thisNode = callGraph.nodes.get(n)
+ const { funcDef: targetNodeAST } = restoreNodeFromReferences(targetNode, astManager, symbolTable)
+ const { funcDef: thisNodeAST } = restoreNodeFromReferences(thisNode, astManager, symbolTable)
if (
thisNodeAST &&
targetNodeAST &&
@@ -473,8 +570,10 @@ function getFclosEntryPointsUsingCallGraphByTargetNode(
}
}
if (!hasFind) {
- if (callGraph.nodes.get(n)?.opts?.funcDef) {
- res.push(callGraph.nodes.get(n))
+ const node = callGraph.nodes.get(n)
+ const { funcDef } = restoreNodeFromReferences(node, astManager, symbolTable)
+ if (funcDef) {
+ res.push(node)
}
}
}
@@ -485,8 +584,9 @@ function getFclosEntryPointsUsingCallGraphByTargetNode(
*
* @param loc
* @param nodes
+ * @param astManager
*/
-function getNodeInCallGraphByLoc(loc: any, nodes: any): any {
+function getNodeInCallGraphByLoc(loc: any, nodes: any, astManager?: any): any {
let tempStartLine = -1
let tempEndLine = Number.MAX_VALUE
let tempKey
@@ -495,10 +595,12 @@ function getNodeInCallGraphByLoc(loc: any, nodes: any): any {
}
for (const key of nodes.keys()) {
if (key.includes('\\n[')) {
- const filename = nodes.get(key)?.opts?.funcDef?.loc?.sourcefile
- const startLine = nodes.get(key)?.opts?.funcDef?.loc?.start?.line
- const endLine = nodes.get(key)?.opts?.funcDef?.loc?.end?.line
- if (loc.sourcefile === filename && loc.start.line >= startLine && loc.end.line <= endLine) {
+ const node = nodes.get(key)
+ const { funcDef } = restoreNodeFromReferences(node, astManager)
+ const filename = funcDef?.loc?.sourcefile
+ const startLine = funcDef?.loc?.start?.line
+ const endLine = funcDef?.loc?.end?.line
+ if (loc.sourcefile === filename && loc.start?.line >= startLine && loc.end?.line <= endLine) {
if (startLine > tempStartLine && endLine < tempEndLine) {
tempStartLine = startLine
tempEndLine = endLine
@@ -515,19 +617,21 @@ function getNodeInCallGraphByLoc(loc: any, nodes: any): any {
* 判断函数中是否包含关键字
* @param keyword
* @param nodes
+ * @param astManager
*/
-function getNodeInCallGraphByKeyword(keyword: string, nodes: any): any[] {
+function getNodeInCallGraphByKeyword(keyword: string, nodes: any, astManager?: any): any[] {
const result: any[] = []
if (keyword === '') {
return result
}
for (const key of nodes.keys()) {
if (key.includes('\\n[')) {
- const funcDef = nodes.get(key)?.opts?.funcDef
+ const node = nodes.get(key)
+ const { funcDef } = restoreNodeFromReferences(node, astManager)
if (funcDef) {
const content = sourceLine.getCodeByLocation(funcDef?.loc)
if (content.includes(keyword)) {
- result.push(nodes.get(key))
+ result.push(node)
}
}
}
diff --git a/src/checker/common/output/callchain-output-strategy.ts b/src/checker/common/output/callchain-output-strategy.ts
new file mode 100644
index 00000000..f1da9d4c
--- /dev/null
+++ b/src/checker/common/output/callchain-output-strategy.ts
@@ -0,0 +1,140 @@
+import type { IResultManager } from '../../../engine/analyzer/common/result-manager'
+import type { IConfig } from '../../../config'
+
+const _ = require('lodash')
+const path = require('path')
+const OutputStrategy = require('../../../engine/analyzer/common/output-strategy')
+const Config = require('../../../config')
+const FileUtil = require('../../../util/file-util')
+const logger = require('../../../util/logger')(__filename)
+const { handleException } = require('../../../engine/analyzer/common/exception-handler')
+
+/**
+ * Output strategy for callchain checker
+ * Outputs findings as JSON with entrypoint, sinkInfo, and callstack
+ */
+class CallchainOutputStrategy extends OutputStrategy {
+ static outputStrategyId = 'callchain'
+
+ /**
+ * constructor
+ */
+ constructor() {
+ super()
+ this.outputFilePath = 'callchain-report.json'
+ }
+
+ /**
+ * output findings
+ * @param resultManager
+ * @param outputFilePath
+ * @param config
+ * @param printf
+ */
+ outputFindings(resultManager: IResultManager, outputFilePath: string, config: IConfig, printf: any): void {
+ let reportFilePath
+ if (resultManager) {
+ const allFindings = resultManager.getFindings()
+ const callchainFindings = allFindings[CallchainOutputStrategy.outputStrategyId]
+ if (callchainFindings) {
+ // if (printf) {
+ // this.outputCallchainResultToConsole(callchainFindings, printf)
+ // }
+ const results = this.buildCallchainJSON(callchainFindings)
+ reportFilePath = path.join(Config.reportDir, outputFilePath)
+ FileUtil.writeJSONfile(reportFilePath, results)
+ logger.info(`callchain report is written to ${reportFilePath}`)
+ }
+ }
+ }
+
+ /**
+ * output callchain result to console
+ * @param callchainFindings
+ * @param printf
+ */
+ outputCallchainResultToConsole(callchainFindings: any[], printf: any): void {
+ if (!callchainFindings || callchainFindings.length === 0) {
+ printf('No callchain findings detected.')
+ return
+ }
+ printf(`\nTotal callchain findings: ${callchainFindings.length}\n`)
+ callchainFindings.forEach((finding: any, index: number) => {
+ printf(`\n[${index + 1}] Sink matched: ${finding.sinkRule}`)
+ if (finding.sinkAttribute) {
+ printf(` Attribute: ${finding.sinkAttribute}`)
+ }
+ printf(` Entry point: ${finding.entrypoint?.functionName || 'N/A'}`)
+ printf(` Location: ${finding.sourcefile}:${finding.line}`)
+ if (finding.callstackInfo && finding.callstackInfo.length > 0) {
+ printf(` Call stack depth: ${finding.callstackInfo.length}`)
+ finding.callstackInfo.forEach((frame: any, i: number) => {
+ printf(` [${i}] ${frame.function || 'anonymous'} at ${frame.file || '?'}:${frame.line || '?'}`)
+ })
+ }
+ })
+ }
+
+ /**
+ * check whether callchain finding is new or not
+ * @param resultManager
+ * @param finding
+ */
+ static isNewFinding(resultManager: IResultManager, finding: any): boolean {
+ try {
+ if (!finding) {
+ return false
+ }
+ const category = resultManager?.findings[CallchainOutputStrategy.outputStrategyId]
+ if (!category) return true
+ for (const issue of category) {
+ if (
+ issue.line === finding.line &&
+ issue.node === finding.node &&
+ issue.issuecause === finding.issuecause &&
+ issue.entry_fclos === finding.entry_fclos &&
+ issue.entrypoint?.attribute === finding.entrypoint?.attribute &&
+ issue.entrypoint?.filePath === finding.entrypoint?.filePath &&
+ issue.entrypoint?.functionName === finding.entrypoint?.functionName &&
+ issue.sinkRule === finding.sinkRule
+ ) {
+ return false
+ }
+ }
+ } catch (e) {
+ handleException(
+ e,
+ 'Error: an error occurred in CallchainOutputStrategy.isNewFinding',
+ 'Error: an error occurred in CallchainOutputStrategy.isNewFinding'
+ )
+ }
+ return true
+ }
+
+ /**
+ * Build JSON output with entrypoint, sinkInfo, callstack, and callsites
+ * @param callchainFindings
+ */
+ buildCallchainJSON(callchainFindings: any[]): any {
+ const findings: any[] = []
+
+ _.values(callchainFindings).forEach((finding: any) => {
+ const entry: any = {
+ entrypoint: finding.entrypoint || {},
+ sinkInfo: finding.sinkInfo || {},
+ callstack: finding.callstackInfo || [],
+ callsites: finding.callsitesInfo || [],
+ }
+
+ findings.push(entry)
+ })
+
+ return {
+ version: '1.0',
+ totalFindings: findings.length,
+ findings,
+ }
+ }
+}
+
+module.exports = CallchainOutputStrategy
diff --git a/src/checker/common/output/callgraph-output-strategy.ts b/src/checker/common/output/callgraph-output-strategy.ts
index 8165bed8..d1533618 100644
--- a/src/checker/common/output/callgraph-output-strategy.ts
+++ b/src/checker/common/output/callgraph-output-strategy.ts
@@ -2,7 +2,6 @@ import type { IResultManager } from '../../../engine/analyzer/common/result-mana
import type { IConfig } from '../../../config'
const path = require('path')
-const fs = require('fs-extra')
const OutputStrategy = require('../../../engine/analyzer/common/output-strategy')
const logger = require('../../../util/logger')(__filename)
const { createWriteStream } = require('fs')
@@ -23,105 +22,101 @@ class CallgraphOutputStrategy extends OutputStrategy {
/**
* 流式写入 CG 内容到文件,避免内存溢出
- * @param cgContent
- * @param filePath
+ * 使用原生 JSON.stringify 配合 replacer 提升性能,同时保持流式写入
+ * @param cgContent - 调用图内容,包含 nodes 和 edges
+ * @param filePath - 输出文件路径
*/
- private writeCgContentToStream(cgContent: { nodes: Record; edges: Record }, filePath: string): void {
- const writeStream = createWriteStream(filePath, { encoding: 'utf8' })
-
- // 流式序列化单个值到流中(应用过滤器:排除 parent,将 undefined 转为 '')
- const writeValue = (value: any): void => {
- if (value === undefined) {
- writeStream.write('""')
- return
- }
- if (value === null) {
- writeStream.write('null')
- return
- }
- if (typeof value === 'string') {
- writeStream.write(JSON.stringify(value))
- return
+ private writeCgContentToStream(
+ cgContent: { nodes: Record; edges: Record },
+ filePath: string
+ ): void {
+ const writeStream = createWriteStream(filePath, { encoding: 'utf8', highWaterMark: 64 * 1024 })
+ const bufferSize = 1024 * 1024 // 1MB 缓冲区
+ const chunks: string[] = []
+ let currentSize = 0
+
+ // 批量写入缓冲区,减少系统调用
+ const flush = (): void => {
+ if (chunks.length > 0) {
+ writeStream.write(chunks.join(''))
+ chunks.length = 0
+ currentSize = 0
}
- if (typeof value === 'number' || typeof value === 'boolean') {
- writeStream.write(String(value))
- return
+ }
+
+ const append = (str: string): void => {
+ chunks.push(str)
+ currentSize += str.length
+ if (currentSize >= bufferSize) {
+ flush()
}
- if (Array.isArray(value)) {
- writeStream.write('[')
- value.forEach((item, index) => {
- if (index > 0) {
- writeStream.write(',')
- }
- writeValue(item)
- })
- writeStream.write(']')
- return
+ }
+
+ // JSON.stringify 的 replacer:排除 parent 属性,将 undefined 转为空字符串
+ const replacer = (key: string, value: any): any => {
+ // 排除 parent 属性
+ if (key === 'parent') {
+ return undefined
}
- if (typeof value === 'object') {
- writeStream.write('{')
- let first = true
- for (const [key, val] of Object.entries(value)) {
- // 排除 parent 属性
- if (key === 'parent') {
- continue
- }
- if (!first) {
- writeStream.write(',')
- }
- first = false
- writeStream.write(JSON.stringify(key))
- writeStream.write(':')
- // 将 undefined 转为 ''
- writeValue(val === undefined ? '' : val)
- }
- writeStream.write('}')
- return
+ // 将 undefined 转为空字符串
+ if (value === undefined) {
+ return ''
}
- writeStream.write('""')
+ return value
}
// 写入开始
- writeStream.write('{')
+ append('{')
- // 写入 nodes
- writeStream.write('"nodes":{')
+ // 写入 nodes:使用原生 JSON.stringify 序列化每个节点,利用 V8 优化
+ append('"nodes":{')
const nodeKeys = Object.keys(cgContent.nodes)
- nodeKeys.forEach((key, index) => {
- if (index > 0) {
- writeStream.write(',')
+ if (nodeKeys.length > 0) {
+ for (let i = 0; i < nodeKeys.length; i++) {
+ if (i > 0) {
+ append(',')
+ }
+ const key = nodeKeys[i]
+ const nodeValue = cgContent.nodes[key]
+ // 使用原生 JSON.stringify,利用 V8 的原生优化
+ const serializedNode = JSON.stringify(nodeValue, replacer)
+ append(`${JSON.stringify(key)}:${serializedNode}`)
}
- writeStream.write(JSON.stringify(key))
- writeStream.write(':')
- writeValue(cgContent.nodes[key])
- })
- writeStream.write('}')
+ }
+ append('}')
- // 写入 edges
- writeStream.write(',"edges":{')
+ // 写入 edges:使用原生 JSON.stringify 序列化每条边
+ append(',"edges":{')
const edgeKeys = Object.keys(cgContent.edges)
- edgeKeys.forEach((key, index) => {
- if (index > 0) {
- writeStream.write(',')
+ if (edgeKeys.length > 0) {
+ for (let i = 0; i < edgeKeys.length; i++) {
+ if (i > 0) {
+ append(',')
+ }
+ const key = edgeKeys[i]
+ const edgeValue = cgContent.edges[key]
+ // 使用原生 JSON.stringify,利用 V8 的原生优化
+ const serializedEdge = JSON.stringify(edgeValue, replacer)
+ append(`${JSON.stringify(key)}:${serializedEdge}`)
}
- writeStream.write(JSON.stringify(key))
- writeStream.write(':')
- writeValue(cgContent.edges[key])
- })
- writeStream.write('}')
+ }
+ append('}')
// 写入结束
- writeStream.write('}')
+ append('}')
+
+ // 刷新剩余缓冲区并关闭流
+ flush()
writeStream.end()
}
/**
* output callgraph findings
*
- * @param resultManager
- * @param outputFilePath
- * @param config
- * @param printf
+ * @param resultManager - 结果管理器
+ * @param outputFilePath - 输出文件路径
+ * @param config - 配置对象
+ * @param printf - 打印函数(未使用)
*/
outputFindings(resultManager: IResultManager, outputFilePath: string, config: IConfig, printf: any): void {
const allFindings = resultManager.getFindings()
@@ -132,7 +127,10 @@ class CallgraphOutputStrategy extends OutputStrategy {
if (config.dumpCG || config.dumpAllCG) {
const callgraph = findings
if (Array.isArray(callgraph) && callgraph.length > 0) {
- const cgContent = callgraph[0].dumpGraph()
+ // 从 finding 中获取 astManager 和 symbolTable(在 triggerAtEndOfAnalyze 中已设置)
+ const astManager = (callgraph[0] as any).astManager
+ const symbolTable = (callgraph[0] as any).symbolTable
+ const cgContent = callgraph[0].dumpGraph(astManager, symbolTable)
if (cgContent) {
const cgFilePath = path.join(config.reportDir, outputFilePath)
diff --git a/src/checker/common/output/taint-output-strategy.ts b/src/checker/common/output/taint-output-strategy.ts
index 3e95de66..4666327b 100644
--- a/src/checker/common/output/taint-output-strategy.ts
+++ b/src/checker/common/output/taint-output-strategy.ts
@@ -9,6 +9,7 @@ const OutputStrategy = require('../../../engine/analyzer/common/output-strategy'
const Config = require('../../../config')
const FileUtil = require('../../../util/file-util')
const TaintFindingUtil = require('../../taint/common-kit/taint-finding-util')
+const { getOutputTrace } = require('../../taint/common-kit/taint-trace-output')
const SourceLine = require('../../../engine/analyzer/common/source-line')
const FindingUtil = require('../../../util/finding-util')
const logger = require('../../../util/logger')(__filename)
@@ -23,6 +24,42 @@ const {
const AstUtil = require('../../../util/ast-util')
const { handleException } = require('../../../engine/analyzer/common/exception-handler')
+/**
+ * 比较单个 trace item 是否相等(file、line、tag、affectedNodeName)
+ */
+function isTraceItemEqual(item1: any, item2: any): boolean {
+ if (item1?.file !== item2?.file) return false
+ const line1 = item1?.line
+ const line2 = item2?.line
+ if (Array.isArray(line1) && Array.isArray(line2)) {
+ if (!_.isEqual(line1, line2)) return false
+ } else if (line1 !== line2) {
+ return false
+ }
+ if (item1?.tag !== item2?.tag) return false
+ if (item1?.affectedNodeName !== item2?.affectedNodeName) return false
+ return true
+}
+
+/**
+ * 比较两个 trace 数组是否相等
+ * 如果大小一样,且每一项的 file、line、tag、affectedNodeName 都一样,则返回 true
+ * @param trace1
+ * @param trace2
+ */
+function isTraceEqual(trace1: any[] | undefined, trace2: any[] | undefined): boolean {
+ if (!Array.isArray(trace1) || !Array.isArray(trace2)) {
+ return false
+ }
+ if (trace1.length !== trace2.length) {
+ return false
+ }
+ for (let i = 0; i < trace1.length; i++) {
+ if (!isTraceItemEqual(trace1[i], trace2[i])) return false
+ }
+ return true
+}
+
/**
*
*/
@@ -51,7 +88,9 @@ class TaintOutputStrategy extends OutputStrategy {
const taintFindings = allFindings[TaintOutputStrategy.outputStrategyId]
let callgraphFindings
if (taintFindings) {
- TaintFindingUtil.outputCheckerResultToConsole(taintFindings, printf)
+ if (printf) {
+ TaintFindingUtil.outputCheckerResultToConsole(taintFindings, printf)
+ }
callgraphFindings = allFindings[CallgraphOutputStrategy.outputStrategyId]
const results = this.getTaintFlowAsSarif(taintFindings, callgraphFindings)
reportFilePath = path.join(Config.reportDir, outputFilePath)
@@ -80,10 +119,25 @@ class TaintOutputStrategy extends OutputStrategy {
issue.entrypoint.attribute === finding.entrypoint.attribute
) {
if (issue.argNode && finding.argNode) {
- if (_.isEqual(issue.argNode.trace, finding.argNode.trace)) {
+ if (isTraceEqual(issue.argNode.taint.getFirstTrace(), finding.argNode.taint.getFirstTrace())) {
return false
}
- } else if (_.isEqual(issue.trace, finding.trace)) {
+ } else if (isTraceEqual(issue.trace, finding.trace)) {
+ return false
+ } else if (isTraceEqual(getOutputTrace(issue), getOutputTrace(finding))) {
+ // callstack-only output may collapse distinct internal traces into the same
+ // user-visible chain; suppress duplicate visible findings in that mode.
+ return false
+ } else if (
+ finding.trace && finding.trace.length === 2 &&
+ finding.trace[0]?.tag === 'SOURCE: ' && finding.trace[1]?.tag === 'SINK: ' &&
+ issue.trace && issue.trace.length > 2 &&
+ issue.trace[0]?.tag === 'SOURCE: ' &&
+ isTraceItemEqual(finding.trace[0], issue.trace[0]) &&
+ isTraceItemEqual(finding.trace[1], issue.trace[issue.trace.length - 1])
+ ) {
+ // TaintRecord._clone 拷贝 trace 数组导致部分 finding 的 trace 退化为仅 SOURCE+SINK(len=2),
+ // 当已有同 SOURCE 且同 SINK 的更长 trace finding 时,跳过退化 finding。
return false
}
}
@@ -106,9 +160,10 @@ class TaintOutputStrategy extends OutputStrategy {
getTaintFlowAsSarif(taintFindings: TaintFinding[], callgraphFindings: any): any {
const results: any[] = []
_.values(taintFindings).forEach((finding: TaintFinding) => {
+ const outputTrace = getOutputTrace(finding)
// prepare trace
const locations: any[] = []
- finding.trace?.forEach((item: any) => {
+ outputTrace?.forEach((item: any) => {
const affectedNodeName = item?.affectedNodeName
if (item.node) {
const snippetText = SourceLine.formatSingleTrace(item)
@@ -147,7 +202,7 @@ class TaintOutputStrategy extends OutputStrategy {
finding.node?._meta?.nodehash
)
- const callstackElements = prepareCallstackElements(finding.callstack)
+ const callstackElements = prepareCallstackElements(finding.callstack, finding.node)
results.push(
prepareResult(
@@ -185,16 +240,18 @@ class TaintOutputStrategy extends OutputStrategy {
const res: any = {}
const { id, opts } = node
res.id = id
- const funcDef = opts?.funcDef
+ // 从 nodehash 还原 funcDef
+ let funcDef = opts?.funcDef
+ if (opts?.funcDefNodehash && (callgraph as any).astManager) {
+ funcDef = (callgraph as any).astManager.get(opts.funcDefNodehash)
+ }
if (funcDef) {
res.location = prepareLocation(
- funcDef.loc.start.line,
- funcDef.loc.start.column,
- funcDef.loc.end.line,
- funcDef.loc.end.column,
- funcDef.loc.sourcefile,
- '',
- funcDef._meta?.nodehash || ''
+ funcDef.loc.start?.line,
+ funcDef.loc.start?.column,
+ funcDef.loc.end?.line,
+ funcDef.loc.end?.column,
+ funcDef.loc.sourcefile
)
}
return res
@@ -202,16 +259,18 @@ class TaintOutputStrategy extends OutputStrategy {
edges: callgraph.getEdgesAsArray().map((node: any) => {
const res: any = {}
const { id, sourceNodeId, targetNodeId, opts } = node
- const callSite = opts?.callSite
+ // 从 callSiteNodehash 还原 callSite
+ let callSite = opts?.callSite
+ if (opts?.callSiteNodehash && (callgraph as any).astManager) {
+ callSite = (callgraph as any).astManager.get(opts.callSiteNodehash)
+ }
if (callSite?.loc) {
res.location = prepareLocation(
- callSite.loc.start.line,
- callSite.loc.start.column,
- callSite.loc.end.line,
- callSite.loc.end.column,
- callSite.loc.sourcefile,
- '',
- callSite._meta?.nodehash || ''
+ callSite.loc.start?.line,
+ callSite.loc.start?.column,
+ callSite.loc.end?.line,
+ callSite.loc.end?.column,
+ callSite.loc.sourcefile
)
}
res.id = id
diff --git a/src/checker/common/ql-uast-convert/converter.ts b/src/checker/common/ql-uast-convert/converter.ts
index 42bb445c..1d617745 100644
--- a/src/checker/common/ql-uast-convert/converter.ts
+++ b/src/checker/common/ql-uast-convert/converter.ts
@@ -106,7 +106,7 @@ function traverseAndCollectNodes(
): void {
if (!node) return
const currentNodePath = buildPath(node)
- if (node.loc && node.loc.start.line <= targetEndLine && node.loc.end.line >= targetStartLine) {
+ if (node.loc && node.loc.start?.line <= targetEndLine && node.loc.end?.line >= targetStartLine) {
collectedNodes.push({ node, level, path: currentNodePath })
}
for (const key in node) {
@@ -119,7 +119,6 @@ function traverseAndCollectNodes(
'type',
'ast',
'loc',
- 'sort',
'_tags',
'uninit',
'callnode',
@@ -163,8 +162,8 @@ function isDirectParent(childPath: any[], potentialParentPath: any[]): boolean {
* @param flag
*/
function findClosestNode(ast: any, loc: any, flag: string): any {
- const targetStartLine = loc.start.line
- const targetEndLine = loc.end.line
+ const targetStartLine = loc.start?.line
+ const targetEndLine = loc.end?.line
let closestNode: any = null
let closestNodeLevel = -1
const collectedNodes: any[] = []
@@ -177,8 +176,8 @@ function findClosestNode(ast: any, loc: any, flag: string): any {
!collectedNodes.some(
({ node: otherNode, path: otherPath }) =>
otherNode !== node &&
- otherNode.loc.start.line <= targetEndLine &&
- otherNode.loc.end.line >= targetStartLine &&
+ otherNode.loc.start?.line <= targetEndLine &&
+ otherNode.loc.end?.line >= targetStartLine &&
isDirectParent(otherPath, path)
))
) {
@@ -188,10 +187,10 @@ function findClosestNode(ast: any, loc: any, flag: string): any {
})
if (flag === 'source') {
closestNode._meta.isSource = true
- closestNode._meta.sourcePos = `${loc.filename}:${loc.start.line}:${loc.start.column}:${loc.end.line}:${loc.end.column}`
+ closestNode._meta.sourcePos = `${loc.filename}:${loc.start?.line}:${loc.start?.column}:${loc.end?.line}:${loc.end?.column}`
} else if (flag === 'sink') {
closestNode._meta.isSink = true
- closestNode._meta.sinkPos = `${loc.filename}:${loc.start.line}:${loc.start.column}:${loc.end.line}:${loc.end.column}`
+ closestNode._meta.sinkPos = `${loc.filename}:${loc.start?.line}:${loc.start?.column}:${loc.end?.line}:${loc.end?.column}`
}
return closestNode
}
@@ -258,7 +257,7 @@ function introduceFlowConfig(options: any, ast: any, filename: string): void {
}
}
-// filemanager = {filename : scope(filescope) }
+// filemanager = {filename : scope(filescope).uuid }
// 从source的文件出发
/**
*
@@ -274,10 +273,11 @@ function calcEntryPointAndRun(options: any, fileManager: any, analyzer: any): vo
for (const filename in fileManager) {
for (const sourcefile in options.FlowConfig.sourcefiles) {
if (filename.endsWith(sourcefile)) {
- const filescope = fileManager[filename]
+ const fileUuid = fileManager[filename]
+ const filescope = analyzer.symbolTable.get(fileUuid)
let entryPoints = AstUtilConverter.satisfy(
filescope,
- (n: any) => n.vtype === 'fclos' && n.ast,
+ (n: any) => n.vtype === 'fclos' && n.ast.node,
null,
null,
true
@@ -287,20 +287,20 @@ function calcEntryPointAndRun(options: any, fileManager: any, analyzer: any): vo
return
}
if (Array.isArray(entryPoints)) {
- entryPoints = _.uniqBy(entryPoints, (value: any) => value.fdef)
+ entryPoints = _.uniqBy(entryPoints, (value: any) => value.ast.fdef)
} else {
entryPoints = [entryPoints]
}
const state = analyzer.initState(filescope)
entryPoints.forEach((main: any) => {
- const nd = AstUtilConverter.satisfy(main.ast, (n: any) => n?._meta?.isSource === true)
+ const nd = AstUtilConverter.satisfy(main.ast?.node, (n: any) => n?._meta?.isSource === true)
if (nd) {
const argValues: any[] = []
- for (const key in main?.ast?.parameters) {
- argValues.push(analyzer.processInstruction(filescope, main.ast.parameters[key], state))
+ for (const key in main?.ast?.node?.parameters) {
+ argValues.push(analyzer.processInstruction(filescope, main.ast.node.parameters[key], state))
}
- logger.info(`entryPoint ${main?.ast?.loc?.sourcefile}:${main.id}`)
- analyzer.executeCall(main.ast, main, argValues, state, filescope)
+ logger.info(`entryPoint ${main?.ast?.node?.loc?.sourcefile}:${main.id}`)
+ analyzer.executeCall(main.ast?.node, main, state, filescope, { callArgs: { args: argValues.map((v, i) => ({ index: i, value: v, kind: 'positional' as const })) } })
}
})
}
diff --git a/src/checker/common/rules-basic-handler.ts b/src/checker/common/rules-basic-handler.ts
index 86d3d69c..a0ae42e7 100644
--- a/src/checker/common/rules-basic-handler.ts
+++ b/src/checker/common/rules-basic-handler.ts
@@ -1,4 +1,10 @@
import type { TaintFinding } from '../../engine/analyzer/common/common-types'
+import {
+ getLegacyArgValues,
+ getCallArgsFromInfo,
+ getBoundCallFromInfo,
+ type CallInfo,
+} from '../../engine/analyzer/common/call-args'
const _ = require('lodash')
const config = require('../../config')
@@ -7,13 +13,71 @@ const { handleException } = require('../../engine/analyzer/common/exception-hand
const logger = require('../../util/logger')(__filename)
interface Rule {
+ selectors?: Array<{ type?: string; index?: number | '*'; name?: string }>
args?: (string | number)[]
+ positions?: (string | number)[]
+ paramNames?: string[]
+ keywordNames?: string[]
+ includeReceiver?: boolean
[key: string]: any
}
+/**
+ * 将 rule 中的多种选择器格式统一为 { type, index?, name? } 数组
+ */
+function normalizeSelectors(
+ rule: Rule
+): Array<{ type: 'position' | 'keyword' | 'all'; index?: number; name?: string }> {
+ const selectors: Array<{ type: 'position' | 'keyword' | 'all'; index?: number; name?: string }> = []
+
+ if (Array.isArray(rule.selectors)) {
+ for (const selector of rule.selectors) {
+ if (selector?.type === 'position' && selector.index === '*') {
+ selectors.push({ type: 'all' })
+ } else if (selector?.type === 'position' && Number.isInteger(selector.index)) {
+ selectors.push({ type: 'position', index: selector.index as number })
+ } else if (selector?.type === 'keyword' && typeof selector.name === 'string' && selector.name !== '') {
+ selectors.push({ type: 'keyword', name: selector.name })
+ }
+ }
+ }
+
+ const positions = Array.isArray(rule.positions) ? rule.positions : Array.isArray(rule.args) ? rule.args : []
+ for (const item of positions) {
+ if (item === '*') {
+ selectors.push({ type: 'all' })
+ continue
+ }
+ const parsed = parseInt(String(item), 10)
+ if (!Number.isNaN(parsed)) {
+ selectors.push({ type: 'position', index: parsed })
+ }
+ }
+
+ if (Array.isArray(rule.keywordNames)) {
+ for (const item of rule.keywordNames) {
+ if (typeof item === 'string' && item !== '') {
+ selectors.push({ type: 'keyword', name: item })
+ }
+ }
+ }
+
+ if (rule.includeReceiver === true) {
+ selectors.push({ type: 'position', index: -1 })
+ }
+
+ return selectors
+}
+
let rules: any[]
let preprocessReady: boolean = false
+function normalizeTraceStrategy(strategy: any): string | undefined {
+ if (strategy === 'folded') return 'callstack-only'
+ if (strategy === 'callstack-only' || strategy === 'full') return strategy
+ return undefined
+}
+
/**
*
* @param ruleConfigPath
@@ -43,42 +107,108 @@ function getRules(ruleConfigPath: string): any[] {
/**
*
- * @param argvalues
+ * @param callInfo
* @param fclos
* @param rule
*/
-function prepareArgs(argvalues: any[], fclos: any, rule: Rule): any[] {
- let { args } = rule
- let res = argvalues.concat()
- args = (args || []).map((item: string | number) => {
- if (item !== '*') {
- return parseInt(String(item))
- }
- return item
- })
- if (!args.some((v: string | number) => v === '*')) {
- args = args.filter((v: string | number) => typeof v === 'number')
- res = argvalues.filter((value: any, index: number) => {
- return (args as number[]).indexOf(index) !== -1
- })
- }
-
- // check whether receiver is tainted
- if (args.some((v: string | number) => v === -1)) {
- res.push(fclos.getThis())
+function prepareArgs(callInfo: CallInfo | undefined, fclos: any, rule: Rule): any[] {
+ const res: any[] = []
+ const callArgs = getCallArgsFromInfo(callInfo)
+ const boundCall = getBoundCallFromInfo(callInfo)
+ const legacyArgvalues = getLegacyArgValues(callInfo)
+ const selectors = normalizeSelectors(rule)
+ const paramNames = Array.isArray(rule.paramNames) ? rule.paramNames.filter((item: string) => typeof item === 'string') : []
+ const explicitArgs =
+ callArgs?.args && Array.isArray(callArgs.args)
+ ? callArgs.args
+ : legacyArgvalues.map((value: any, index: number) => ({ index, value }))
+
+ const appendResult = (value: any) => {
+ if (typeof value === 'undefined') return
+ if (!res.includes(value)) {
+ res.push(value)
+ }
+ }
+
+ for (const selector of selectors) {
+ if (selector.type === 'all') {
+ explicitArgs.forEach((arg: any) => appendResult(arg.value))
+ continue
+ }
+ if (selector.type === 'position') {
+ if (selector.index === -1) {
+ appendResult(callArgs?.receiver || fclos?.getThisObj?.())
+ } else if (typeof selector.index === 'number' && selector.index >= 0) {
+ explicitArgs.filter((arg: any) => arg.index === selector.index).forEach((arg: any) => appendResult(arg.value))
+ }
+ continue
+ }
+ if (selector.type === 'keyword') {
+ explicitArgs
+ .filter((arg: any) => arg.name && arg.name === selector.name)
+ .forEach((arg: any) => appendResult(arg.value))
+ }
}
+
+ // 兼容路径:通过形参名匹配
+ if (paramNames.length > 0 && boundCall?.params?.length) {
+ boundCall.params
+ .filter((param: any) => paramNames.includes(param.name) && param.provided)
+ .forEach((param: any) => appendResult(param.value))
+ }
+
+ if (paramNames.includes('self') || paramNames.includes('cls')) {
+ appendResult(callArgs?.receiver || fclos?.getThisObj?.())
+ }
+
return res
}
+/**
+ * prepare args by type
+ * @param callInfo
+ * @param fclos
+ * @param rule
+ */
+function prepareArgsByType(callInfo: CallInfo | undefined, fclos: any, rule: Rule): any[] {
+ const resultArray: any[] = []
+ const argvalues = getLegacyArgValues(callInfo)
+
+ if (!Array.isArray(argvalues) || !rule || !Array.isArray(rule.argTypes)) {
+ return resultArray
+ }
+ const { argTypes } = rule
+ for (const argvalue of argvalues) {
+ if (!argvalue.rtype || !argvalue.rtype.definiteType || argvalue.rtype.vagueType) {
+ continue
+ }
+ for (const argType of argTypes) {
+ if (argvalue.rtype.definiteType.name === argType || argvalue.rtype.definiteType.name.endsWith(`.${argType}`)) {
+ resultArray.push(argvalue)
+ break
+ }
+ }
+ }
+
+ return resultArray
+}
+
/**
*
*/
function initRules(): void {
- const configPath = require.resolve('../../config')
- logger.info(`rules-basic-handler [CONFIG] Loaded from: ${configPath}`)
-
if (config.ruleConfigFile && config.ruleConfigFile !== '') {
rules = FileUtil.loadJSONfile(FileUtil.getAbsolutePath(config.ruleConfigFile))
+ // Extract taint trace output strategy from ruleConfig
+ if (Array.isArray(rules)) {
+ for (const rule of rules) {
+ const traceStrategy = normalizeTraceStrategy(rule.outputAtTaint?.traceStrategy)
+ if (traceStrategy) {
+ config.taintTraceOutputStrategy = traceStrategy
+ break
+ }
+ }
+ }
} else {
logger.info('Attention: no ruleConfig found')
}
@@ -181,7 +311,7 @@ function getFinding(type: string, description: string, node: any, argNode?: any)
type,
desc: description,
node,
- line: node.loc.start.line,
+ line: node.loc.start?.line,
}
if (argNode) {
finding.argNode = argNode
@@ -200,6 +330,7 @@ module.exports = {
setPreprocessReady,
getPreprocessReady,
prepareArgs,
+ prepareArgsByType,
matchPackageValueSink,
getFinding,
}
diff --git a/src/checker/sanitizer/sanitizer-checker.ts b/src/checker/sanitizer/sanitizer-checker.ts
index 226f446a..35805fae 100644
--- a/src/checker/sanitizer/sanitizer-checker.ts
+++ b/src/checker/sanitizer/sanitizer-checker.ts
@@ -1,3 +1,5 @@
+import type { CallInfo } from '../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
const BasicRuleHandler = require('../common/rules-basic-handler')
const SanitizerTag = require('../common/value/sanitizer-tag')
@@ -32,6 +34,10 @@ const callstackSanitizers = new Set()
*
*/
class SanitizerChecker extends Checker {
+ static sanitizerMap: Map | undefined = undefined
+
+ static matchSanitizerResultMap = new Map()
+
/**
*
* @param mng
@@ -71,7 +77,7 @@ class SanitizerChecker extends Checker {
* @param info
*/
triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void {
- const { fclos, ret, argvalues } = info
+ const { fclos, ret, callInfo } = info
const sanitizers = SanitizerChecker.findAllSanitizers()
if (sanitizers) {
SanitizerChecker.checkAddOrDeleteFunctionCallSanitizer(
@@ -79,7 +85,7 @@ class SanitizerChecker extends Checker {
node,
fclos,
ret,
- argvalues,
+ callInfo,
scope,
info?.callstack
)
@@ -96,7 +102,7 @@ class SanitizerChecker extends Checker {
* @param info
*/
triggerAtNewExprAfter(analyzer: any, scope: any, node: any, state: any, info: any): void {
- const { fclos, ret, argvalues } = info
+ const { fclos, ret, callInfo } = info
const sanitizers = SanitizerChecker.findAllSanitizers()
if (sanitizers) {
SanitizerChecker.checkAddOrDeleteFunctionCallSanitizer(
@@ -104,7 +110,7 @@ class SanitizerChecker extends Checker {
node,
fclos,
ret,
- argvalues,
+ callInfo,
scope,
info?.callstack
)
@@ -140,7 +146,7 @@ class SanitizerChecker extends Checker {
* @param node
* @param fclos
* @param ret
- * @param argvalues
+ * @param callInfo
* @param scope
* @param callstack
*/
@@ -149,7 +155,7 @@ class SanitizerChecker extends Checker {
node: any,
fclos: any,
ret: any,
- argvalues: any[],
+ callInfo: CallInfo,
scope: any,
callstack: any
): void {
@@ -157,7 +163,7 @@ class SanitizerChecker extends Checker {
return
}
- const matchedSanitizers = SanitizerChecker.findMatchedSanitizerOfFunctionCall(sanitizers, node, fclos, scope)
+ const matchedSanitizers = SanitizerChecker.findMatchedSanitizerOfFunctionCall(sanitizers, node, fclos, scope, callInfo)
if (!matchedSanitizers) {
return
}
@@ -173,7 +179,7 @@ class SanitizerChecker extends Checker {
}
break
case SANITIZER.SANITIZER_SCENARIO.VALIDATE_BY_FUNCTIONCALL:
- const args = BasicRuleHandler.prepareArgs(argvalues, fclos, matchedSanitizer)
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, matchedSanitizer)
if (args) {
for (const arg of args) {
SanitizerChecker.addSanitizerInSymbolValue(matchedSanitizer, node, arg, callstack)
@@ -296,19 +302,36 @@ class SanitizerChecker extends Checker {
}
/**
- * find all sanitizers from rule
- * @returns {*}
+ * load and store all sanitizers from rule
*/
- static findAllSanitizers(): any[] {
- const sanitizers: any[] = []
+ static loadAndStoreAllSanitizersFromRule() {
+ if (!BasicRuleHandler.getPreprocessReady() || SanitizerChecker.sanitizerMap) {
+ return
+ }
+ SanitizerChecker.sanitizerMap = new Map()
if (Array.isArray(BasicRuleHandler.getRules()) && BasicRuleHandler.getRules().length > 0) {
for (const rule of BasicRuleHandler.getRules()) {
if (Array.isArray(rule.sanitizers)) {
- sanitizers.push(...rule.sanitizers)
+ for (const sanitizer of rule.sanitizers) {
+ SanitizerChecker.sanitizerMap.set(sanitizer.id, sanitizer)
+ }
}
}
}
- return sanitizers
+ }
+
+ /**
+ * find all sanitizers from rule
+ * @returns {*}
+ */
+ static findAllSanitizers(): any[] {
+ if (!SanitizerChecker.sanitizerMap) {
+ SanitizerChecker.loadAndStoreAllSanitizersFromRule()
+ }
+ if (!SanitizerChecker.sanitizerMap) {
+ return []
+ }
+ return Array.from(SanitizerChecker.sanitizerMap.values())
}
/**
@@ -322,17 +345,19 @@ class SanitizerChecker extends Checker {
return result
}
- if (Array.isArray(BasicRuleHandler.getRules()) && BasicRuleHandler.getRules().length > 0) {
- for (const rule of BasicRuleHandler.getRules()) {
- if (Array.isArray(rule.sanitizers)) {
- for (const sanitizer of rule.sanitizers) {
- if (sanitizerIds.includes(sanitizer.id)) {
- result.push(sanitizer)
- }
- }
- }
+ if (!SanitizerChecker.sanitizerMap) {
+ SanitizerChecker.loadAndStoreAllSanitizersFromRule()
+ }
+ if (!SanitizerChecker.sanitizerMap) {
+ return []
+ }
+
+ for (const sanitizerId of sanitizerIds) {
+ if (SanitizerChecker.sanitizerMap.has(sanitizerId)) {
+ result.push(SanitizerChecker.sanitizerMap.get(sanitizerId))
}
}
+
return result
}
@@ -374,23 +399,23 @@ class SanitizerChecker extends Checker {
for (const obj of sanitizerTag.callstack) {
const callstackElement = new SanitizerCallstackElement()
callstackElement.id = index
- if (obj.ast?.loc?.sourcefile) {
- callstackElement.fileName = shortenSourceFile(obj.ast?.loc?.sourcefile, Config.maindir_prefix)
+ if (obj.ast?.node?.loc?.sourcefile) {
+ callstackElement.fileName = shortenSourceFile(obj.ast?.node?.loc?.sourcefile, Config.maindir_prefix)
}
- if (obj.ast?.loc?.start?.line) {
- callstackElement.beginLine = obj.ast?.loc?.start?.line
+ if (obj.ast?.node?.loc?.start?.line) {
+ callstackElement.beginLine = obj.ast?.node?.loc?.start?.line
}
- if (obj.ast?.loc?.end?.line) {
- callstackElement.endLine = obj.ast?.loc?.end?.line
+ if (obj.ast?.node?.loc?.end?.line) {
+ callstackElement.endLine = obj.ast?.node?.loc?.end?.line
}
- if (obj.ast?.loc?.start?.column) {
- callstackElement.beginColumn = obj.ast?.loc?.start?.column
+ if (obj.ast?.node?.loc?.start?.column) {
+ callstackElement.beginColumn = obj.ast?.node?.loc?.start?.column
}
- if (obj.ast?.loc?.end?.column) {
- callstackElement.endColumn = obj.ast?.loc?.end?.column
+ if (obj.ast?.node?.loc?.end?.column) {
+ callstackElement.endColumn = obj.ast?.node?.loc?.end?.column
}
- if (obj.ast) {
- callstackElement.codeSnippet = prettyPrint(obj.fdef ? obj.fdef : obj.ast)
+ if (obj.ast.node) {
+ callstackElement.codeSnippet = prettyPrint(obj.ast.fdef ? obj.ast.fdef : obj.ast.node)
}
callstackElements.push(callstackElement)
index += 1
@@ -412,7 +437,15 @@ class SanitizerChecker extends Checker {
* @param scope
* @returns {*[]}
*/
- static findMatchedSanitizerOfFunctionCall(sanitizers: any[], node: any, fclos: any, scope: any): any[] {
+ static findMatchedSanitizerOfFunctionCall(sanitizers: any[], node: any, fclos: any, scope: any, callInfo: CallInfo): any[] {
+ if (!BasicRuleHandler.getPreprocessReady()) {
+ return []
+ }
+
+ if (node?._meta?.nodehash && SanitizerChecker.matchSanitizerResultMap.has(node._meta.nodehash)) {
+ return SanitizerChecker.matchSanitizerResultMap.get(node._meta.nodehash)
+ }
+
const matchedSanitizers: any[] = []
const sanitizersWithoutCalleeType = sanitizers.filter(
@@ -420,7 +453,7 @@ class SanitizerChecker extends Checker {
sanitizer.sanitizerType === SANITIZER.SANITIZER_TYPE.FUNCTION_CALL_SANITIZER &&
(!sanitizer.calleeType || sanitizer.calleeType.length === 0)
)
- const matchedSanitizersWithoutCalleeType = matchSinkAtFuncCall(node, fclos, sanitizersWithoutCalleeType)
+ const matchedSanitizersWithoutCalleeType = matchSinkAtFuncCall(node, fclos, sanitizersWithoutCalleeType, callInfo)
if (matchedSanitizersWithoutCalleeType) {
matchedSanitizers.push(...matchedSanitizersWithoutCalleeType)
}
@@ -435,12 +468,17 @@ class SanitizerChecker extends Checker {
node,
fclos,
sanitizersWithCalleeType,
- scope
+ scope,
+ callInfo
)
if (matchedSanitizersWithCalleeType) {
matchedSanitizers.push(...matchedSanitizersWithCalleeType)
}
+ if (node?._meta?.nodehash) {
+ SanitizerChecker.matchSanitizerResultMap.set(node._meta.nodehash, matchedSanitizers)
+ }
+
return matchedSanitizers
}
@@ -502,7 +540,7 @@ class SanitizerChecker extends Checker {
if (!sanitizer || !sanitizer.id || !val) {
return
}
- if (this.checkSanitizerTagExist(val._tags, sanitizer, node)) {
+ if (this.checkSanitizerTagExist(val.taint.getTags(), sanitizer, node)) {
return
}
@@ -558,7 +596,7 @@ class SanitizerChecker extends Checker {
(sanitizer: any) => sanitizer.sanitizerScenario === SANITIZER.SANITIZER_SCENARIO.CONFIG_BY_FUNCTIONCALL
)
const fConfig = (nd: any) => {
- const tags = nd?._tags
+ const tags = nd?.taint ? nd.taint.getTags() : undefined
return tags && SanitizerChecker.findMatchedSanitizerTag(Configs, tags)?.length > 0
}
@@ -566,13 +604,13 @@ class SanitizerChecker extends Checker {
if (sanitizerNd) {
if (Array.isArray(sanitizerNd)) {
for (const n of sanitizerNd) {
- const matchedConfigSanitizerTags = SanitizerChecker.findMatchedSanitizerTag(sanitizers, n._tags)
+ const matchedConfigSanitizerTags = SanitizerChecker.findMatchedSanitizerTag(sanitizers, n.taint ? n.taint.getTags() : undefined)
if (matchedConfigSanitizerTags) {
matchedSanitizerTagsForAllTrace.push(...matchedConfigSanitizerTags)
}
}
} else {
- const matchedConfigSanitizerTags = SanitizerChecker.findMatchedSanitizerTag(sanitizers, sanitizerNd._tags)
+ const matchedConfigSanitizerTags = SanitizerChecker.findMatchedSanitizerTag(sanitizers, sanitizerNd.taint ? sanitizerNd.taint.getTags() : undefined)
if (matchedConfigSanitizerTags) {
matchedSanitizerTagsForAllTrace.push(...matchedConfigSanitizerTags)
}
@@ -586,8 +624,9 @@ class SanitizerChecker extends Checker {
sanitizer.sanitizerScenario === SANITIZER.SANITIZER_SCENARIO.VALIDATE_BY_BINARYOPERATION
)
const fFlow = (nd: any) => {
- const tags = nd?._tags
- return _.isFunction(tags?.has) && tags.has(attribute)
+ const tagTraceMap = nd?.taint ? nd.taint.getTagTracesMap() : undefined
+ if (!tagTraceMap) return false
+ return tagTraceMap.has(attribute)
}
const filter = defaultFilter
const satisfyCallback = (nd: any, from: any, parentMap: any) => {
@@ -610,7 +649,7 @@ class SanitizerChecker extends Checker {
} while (currentNd)
}
for (const parentNd of parentNdList) {
- const matchedFlowSanitizerTags = SanitizerChecker.findMatchedSanitizerTag(flowSanitizers, parentNd._tags)
+ const matchedFlowSanitizerTags = SanitizerChecker.findMatchedSanitizerTag(flowSanitizers, parentNd.taint ? parentNd.taint.getTags() : undefined)
if (matchedFlowSanitizerTags) {
matchedSanitizerTags.push(...matchedFlowSanitizerTags)
}
diff --git a/src/checker/sdk/get-file-ast-checker.ts b/src/checker/sdk/get-file-ast-checker.ts
index e4b53b2e..89c895be 100644
--- a/src/checker/sdk/get-file-ast-checker.ts
+++ b/src/checker/sdk/get-file-ast-checker.ts
@@ -16,6 +16,8 @@ class GetFileAstChecker extends Checker {
fileManager: Record
+ symbolTable: any
+
/**
*
* @param mng
@@ -56,18 +58,24 @@ class GetFileAstChecker extends Checker {
const finding: Finding = {
output: '',
}
- if (this.fileManager[this.input]) {
- finding.output = JSON.stringify(this.fileManager[this.input].ast, (key: string, value: any) => {
- // 如果属性名是 'parent',则返回 undefined 表示排除
- if (key === 'parent') {
- return undefined
- }
- if (value === undefined) {
- return ''
- }
- return value
- })
- this.resultManager.newFinding(finding, InteractiveOutputStrategy.outputStrategyId)
+ let fileValue = this.fileManager[this.input]
+ if (fileValue) {
+ if (typeof fileValue === 'string' && fileValue.startsWith('symuuid_')) {
+ fileValue = this.symbolTable.get(this.fileManager[this.input])
+ }
+ if (fileValue?.ast?.node) {
+ finding.output = JSON.stringify(fileValue.ast?.node, (key: string, value: any) => {
+ // 如果属性名是 'parent',则返回 undefined 表示排除
+ if (key === 'parent') {
+ return undefined
+ }
+ if (value === undefined) {
+ return ''
+ }
+ return value
+ })
+ this.resultManager.newFinding(finding, InteractiveOutputStrategy.outputStrategyId)
+ }
}
this.status = false
}
@@ -82,6 +90,7 @@ class GetFileAstChecker extends Checker {
*/
triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void {
this.fileManager = analyzer.fileManager
+ this.symbolTable = analyzer.symbolTable
}
}
module.exports = GetFileAstChecker
diff --git a/src/checker/taint/common-kit/entry-points-util.ts b/src/checker/taint/common-kit/entry-points-util.ts
index bec25dd4..42861775 100644
--- a/src/checker/taint/common-kit/entry-points-util.ts
+++ b/src/checker/taint/common-kit/entry-points-util.ts
@@ -11,11 +11,13 @@ interface EntryPointConfig {
interface MainFunction {
parent?: any
ast?: {
- loc?: {
- sourcefile?: string
- }
- id?: {
- name?: string
+ node?: {
+ loc?: {
+ sourcefile?: string
+ }
+ id?: {
+ name?: string
+ }
}
}
filePath?: string
@@ -35,19 +37,23 @@ if (Array.isArray(Rules.getRules()) && Rules.getRules().length > 0) {
/**
* 填充entryPoint信息
* @param main
+ * @param isPreProcess 是否是为了模拟服务上下文而必须执行的操作,并非真实的api
* @returns {EntryPoint}
*/
-function completeEntryPoint(main: MainFunction): typeof EntryPoint {
+function completeEntryPoint(main: MainFunction, isPreProcess = false): typeof EntryPoint {
const entryPoint = new EntryPoint(constValue.ENGIN_START_FUNCALL)
entryPoint.scopeVal = main.parent
entryPoint.argValues = []
entryPoint.entryPointSymVal = main
- entryPoint.filePath = main.filePath || main.ast?.loc?.sourcefile?.substring(config.maindirPrefix.length)
- entryPoint.functionName = main.functionName || main.ast?.id?.name
+ entryPoint.filePath = main.filePath || (config.maindirPrefix
+ ? main.ast?.node?.loc?.sourcefile?.substring(config.maindirPrefix.length)
+ : main.ast?.node?.loc?.sourcefile)
+ entryPoint.functionName = main.functionName || main.ast?.node?.id?.name
entryPoint.attribute = 'HTTP'
entryPoint.parent ??= main.parent
// TODO
entryPoint.funcReceiverType = main.funcReceiverType
+ entryPoint.isPreProcess = isPreProcess
return entryPoint
}
diff --git a/src/checker/taint/common-kit/sink-util.ts b/src/checker/taint/common-kit/sink-util.ts
index 98a80b22..b0a98893 100644
--- a/src/checker/taint/common-kit/sink-util.ts
+++ b/src/checker/taint/common-kit/sink-util.ts
@@ -1,8 +1,24 @@
+import { Invocation } from '../../../resolver/common/value/invocation'
+import TypeRelatedInfoResolver from '../../../resolver/common/type-related-info-resolver'
+import type { ClassHierarchy } from '../../../resolver/common/value/class-hierarchy'
+import { getExplicitArgCount, type CallInfo } from '../../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
const { matchField: matchFieldSinkUtil } = require('../../common/rules-basic-handler')
const AstUtilSinkUtil = require('../../../util/ast-util')
const { handleException: handleExceptionSinkUtil } = require('../../../engine/analyzer/common/exception-handler')
+// 全局统计:实际匹配的 sink 数量
+let matchedSinkCount = 0
+
+function getMatchedSinkCount(): number {
+ return matchedSinkCount
+}
+
+function resetMatchedSinkCount(): void {
+ matchedSinkCount = 0
+}
+
interface SinkRule {
argNum?: number
fsig?: string
@@ -16,15 +32,16 @@ interface SinkRule {
* @param node
* @param fclos
* @param sinks
- * @param argvalues
+ * @param callInfo
* @returns {Array}
*/
-function matchSinkAtFuncCall(node: any, fclos: any, sinks: SinkRule[], argvalues: any[]): SinkRule[] {
+function matchSinkAtFuncCall(node: any, fclos: any, sinks: SinkRule[], callInfo: CallInfo): SinkRule[] {
+ const argCount = getExplicitArgCount(callInfo)
const callExpr = node.callee || node
const res: SinkRule[] = []
if (sinks && sinks.length > 0) {
for (const tspec of sinks) {
- if (tspec.argNum !== undefined && tspec.argNum >= 0 && argvalues && tspec.argNum !== argvalues.length) {
+ if (tspec.argNum !== undefined && tspec.argNum >= 0 && tspec.argNum !== argCount) {
continue
}
@@ -32,10 +49,12 @@ function matchSinkAtFuncCall(node: any, fclos: any, sinks: SinkRule[], argvalues
const marray = tspec.fsig.split('.')
if (matchFieldSinkUtil(callExpr, marray, marray.length - 1)) {
res.push(tspec)
+ matchedSinkCount++ // 统计实际匹配的 sink
}
} else if (tspec.fregex) {
- if (callExpr.type === 'MemberAccess' && matchRegex(tspec.fregex, fclos._qid)) {
+ if (callExpr.type === 'MemberAccess' && matchRegex(tspec.fregex, fclos.qid)) {
res.push(tspec)
+ matchedSinkCount++ // 统计实际匹配的 sink
}
}
}
@@ -56,19 +75,20 @@ function matchSinkAtFuncCallWithCalleeType(
fclos: any,
rules: SinkRule[],
scope: any,
- argvalues: any[]
+ callInfo: CallInfo
): SinkRule[] {
+ const argCount = getExplicitArgCount(callInfo)
const callExpr = node.callee || node
const res: SinkRule[] = []
if (rules && rules.length > 0) {
- if (fclos.vtype === 'union' && !_.isEmpty(fclos.field)) {
- fclos.field.forEach((subFClos: any) => {
- res.push(...matchSinkAtFuncCallWithCalleeType(node, subFClos, rules, scope, argvalues))
+ if (fclos.vtype === 'union' && !_.isEmpty(fclos.value)) {
+ fclos.value.forEach((subFClos: any) => {
+ res.push(...matchSinkAtFuncCallWithCalleeType(node, subFClos, rules, scope, callInfo))
})
return res
}
for (const tspec of rules) {
- if (tspec.argNum !== undefined && tspec.argNum >= 0 && argvalues && tspec.argNum !== argvalues.length) {
+ if (tspec.argNum !== undefined && tspec.argNum >= 0 && tspec.argNum !== argCount) {
continue
}
@@ -91,7 +111,7 @@ function matchSinkAtFuncCallWithCalleeType(
AstUtilSinkUtil.prettyPrint(fclos.rtype?.definiteType).endsWith(`.${tspec.calleeType}`) ||
tspec.calleeType === '*') &&
(AstUtilSinkUtil.prettyPrint(fclos.rtype?.vagueType).replace(/"/g, '') === tspec.fsig ||
- fclos._sid === tspec.fsig)
+ fclos.sid === tspec.fsig)
) {
// import cn.hutool.http.HttpRequest; HttpRequest.post
res.push(tspec)
@@ -112,7 +132,7 @@ function matchSinkAtFuncCallWithCalleeType(
AstUtilSinkUtil.prettyPrint(fclos.rtype?.definiteType) === tspec.calleeType ||
AstUtilSinkUtil.prettyPrint(fclos.rtype?.definiteType).endsWith(`.${tspec.calleeType}`) ||
tspec.calleeType === '*') &&
- AstUtilSinkUtil.prettyPrint(fclos.ast) === tspec.fsig
+ AstUtilSinkUtil.prettyPrint(fclos.ast?.node) === tspec.fsig
) {
res.push(tspec)
}
@@ -121,7 +141,7 @@ function matchSinkAtFuncCallWithCalleeType(
// 用于匹配形如 squirrel.Delete(*).Where形式的sink点,*为通配符
callExpr.type === 'MemberAccess' &&
tspec.calleeType === '' &&
- matchRegex(tspec.fregex, fclos._qid)
+ matchRegex(tspec.fregex, fclos.qid)
) {
res.push(tspec)
}
@@ -149,8 +169,59 @@ function matchRegex(pattern: string, testStr: string): boolean {
}
}
+/**
+ * check if invocation match sink
+ * @param invocation
+ * @param sink
+ * @param typeResolver
+ */
+function checkInvocationMatchSink(invocation: Invocation, sink: SinkRule, typeResolver: TypeRelatedInfoResolver): boolean {
+ if (!invocation || !sink) {
+ return false
+ }
+
+ if (!sink.fsig || sink.fsig === '') {
+ return false
+ }
+ if (!sink.calleeType || sink.calleeType === '') {
+ if (invocation.callSiteLiteral === sink.fsig || invocation.fsig === sink.fsig) {
+ return true
+ }
+ } else {
+ if (invocation.fsig === sink.fsig && invocation.calleeType && invocation.calleeType !== '') {
+ if (invocation.calleeType === sink.calleeType || invocation.calleeType.endsWith(`.${sink.calleeType}`)) {
+ return true
+ } else if (typeResolver) {
+ const classHierarchy: ClassHierarchy | undefined = typeResolver.classHierarchyMap.get(invocation.calleeType)
+ if (classHierarchy) {
+ const baseTypes: string[] = typeResolver.findBaseTypes(classHierarchy)
+ for (const baseType of baseTypes) {
+ if (baseType === sink.calleeType || baseType?.endsWith(`.${sink.calleeType}`)) {
+ return true
+ }
+ }
+ const subTypes: string[] = typeResolver.findSubTypes(classHierarchy)
+ for (const subType of subTypes) {
+ if (subType === sink.calleeType || subType?.endsWith(`.${sink.calleeType}`)) {
+ return true
+ }
+ }
+ }
+ }
+ }
+ if (invocation.callSiteLiteral === `${sink.calleeType}.${sink.fsig}` || invocation.callSiteLiteral?.endsWith(`.${sink.calleeType}.${sink.fsig}`)) {
+ return true
+ }
+ }
+
+ return false
+}
+
module.exports = {
matchSinkAtFuncCall,
matchSinkAtFuncCallWithCalleeType,
matchRegex,
+ checkInvocationMatchSink,
+ getMatchedSinkCount,
+ resetMatchedSinkCount,
}
diff --git a/src/checker/taint/common-kit/source-util.ts b/src/checker/taint/common-kit/source-util.ts
index 1eb2fa15..653abba1 100644
--- a/src/checker/taint/common-kit/source-util.ts
+++ b/src/checker/taint/common-kit/source-util.ts
@@ -1,10 +1,31 @@
+import { buildNewValueInstance } from '../../../util/clone-util'
+import { getLegacyArgValues, type CallInfo } from '../../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
const AstUtil = require('../../../util/ast-util')
const { prepareArgs, matchField } = require('../../common/rules-basic-handler')
const BasicRuleHandler = require('../../common/rules-basic-handler')
const { Scope } = require('../../../engine/analyzer/common')
-const ValueUtil = require('../../../engine/analyzer/common/value/valueUtil')
-const varUtil = require('../../../util/variable-util')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
+
+import { SymbolValue } from '../../../engine/analyzer/common/value/symbolic'
+
+// 全局统计:实际标记的 source 数量
+let markedSourceCount = 0
+
+/**
+ *
+ */
+function getMarkedSourceCount(): number {
+ return markedSourceCount
+}
+
+/**
+ *
+ */
+function resetMarkedSourceCount(): void {
+ markedSourceCount = 0
+}
/**
*
@@ -12,15 +33,14 @@ const varUtil = require('../../../util/variable-util')
* @param tagType
*/
function setTaint(res: any, tagType: any): void {
- res._tags = res._tags || new Set()
+ // taint 在 Unit 构造函数中已创建
if (Array.isArray(tagType)) {
for (const item of tagType) {
- res._tags.add(item)
+ res.taint.addTag(item)
}
} else if (tagType) {
- res._tags.add(tagType)
+ res.taint.addTag(tagType)
}
- res.hasTagRec = true
}
/**
@@ -35,22 +55,24 @@ function markTaintSource(unit: any, { path, kind }: { path: any; kind: any }): v
return
}
setTaint(unit, kind)
- if (unit.trace && Array.isArray(unit.trace) && unit.trace[0]?.tag !== 'SOURCE: ') {
- unit.trace = undefined
+ markedSourceCount++ // 统计实际标记的 source
+ // 如果已有 trace 但首项不是 SOURCE,清空 trace
+ const existingTrace = unit.taint.getFirstTrace()
+ if (existingTrace && Array.isArray(existingTrace) && existingTrace[0]?.tag !== 'SOURCE: ') {
+ unit.taint.clearTrace()
}
- if (!unit.trace) {
+ if (!unit.taint.hasTraces()) {
const start_line = path?.loc?.start?.line
const end_line = path?.loc?.end?.line
const tline = start_line === end_line ? start_line : _.range(start_line, end_line + 1)
- unit.trace = [
- {
- file: path?.loc?.sourcefile,
- line: tline,
- node: path,
- tag: 'SOURCE: ',
- affectedNodeName: AstUtil.prettyPrint(path),
- },
- ]
+ const traceItem = {
+ file: path?.loc?.sourcefile,
+ line: tline,
+ node: path,
+ tag: 'SOURCE: ',
+ affectedNodeName: AstUtil.prettyPrint(path),
+ }
+ unit.taint.setAllTraces([traceItem])
}
}
@@ -102,33 +124,34 @@ function introduceTaintAtFuncCallReturnValue(
*
* @param scope
* @param node
- * @param res
+ * @param callInfo
* @param funcCallArgTaintSource
*/
-function introduceFuncArgTaintByRuleConfig(scope: any, node: any, res: any, funcCallArgTaintSource: any): void {
+function introduceFuncArgTaintByRuleConfig(scope: any, node: any, callInfo: CallInfo | undefined, funcCallArgTaintSource: any): void {
if (!BasicRuleHandler.getPreprocessReady()) {
return
}
+ const argvalues = getLegacyArgValues(callInfo)
const rules = funcCallArgTaintSource
if (rules && Array.isArray(rules) && rules.length > 0) {
const call = node
for (const tspec of rules) {
if (tspec.fsig) {
const marray = tspec.fsig.split('.')
- if (call.callee?.type === 'MemberAccess' && _.isArray(res)) {
+ if (call.callee?.type === 'MemberAccess' && _.isArray(argvalues)) {
if (
(matchField(call.callee?.property, marray, marray.length - 1) ||
matchField(call.callee, marray, marray.length - 1)) &&
(AstUtil.prettyPrint(scope?.rtype) === tspec.calleeType || tspec.calleeType === '*')
) {
- const args = prepareArgs(res, undefined, tspec)
+ const args = prepareArgs(callInfo, undefined, tspec)
for (let i = 0; i < args.length; i++) {
markTaintSource(args[i], { path: node, kind: tspec.kind })
}
}
} else if (call.callee?.type === 'Identifier') {
if (call.callee.name === tspec.fsig) {
- const args = prepareArgs(res, undefined, tspec)
+ const args = prepareArgs(callInfo, undefined, tspec)
for (let i = 0; i < args.length; i++) {
markTaintSource(args[i], { path: node, kind: tspec.kind })
}
@@ -142,43 +165,74 @@ function introduceFuncArgTaintByRuleConfig(scope: any, node: any, res: any, func
/**
*
+ * @param analyzer
+ * @param scope
* @param node
* @param res
* @param sourceScopeVal
*/
-function introduceTaintAtIdentifier(node: any, res: any, sourceScopeVal: any): any {
+function introduceTaintAtIdentifier(analyzer: any, scope: any, node: any, res: any, sourceScopeVal: any): any {
if (!BasicRuleHandler.getPreprocessReady()) {
- return
+ return res
}
- if (varUtil.isEmpty(res._tags)) {
- // source定义方式,增加文件域和函数域的匹配,主要用于形参场景。identifier的source添加基本都用于插件-->形参场景
- const nodeStart = node.loc?.start?.line
- const nodeEnd = node.loc?.end?.line
- if (sourceScopeVal && sourceScopeVal.length > 0) {
- for (const val of sourceScopeVal) {
- const paths = val.path
- if (res._sid === paths || res._qid === paths || node.name === paths) {
- const valStart = val.locStart
- const valEnd = val.locEnd
- if (typeof valStart === 'undefined' || typeof valEnd === 'undefined') {
+ const nodeStart = node.loc?.start?.line
+ const nodeEnd = node.loc?.end?.line
+
+ const alreadyTainted = res.taint?.hasTags()
+ let target = res
+
+ if (sourceScopeVal && sourceScopeVal.length > 0) {
+ for (const val of sourceScopeVal) {
+ const paths = val.path
+ if (
+ res.sid === paths ||
+ res.qid === paths ||
+ QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(res.qid) === paths ||
+ node.name === paths
+ ) {
+ const valStart = val.locStart
+ const valEnd = val.locEnd
+ if (typeof valStart === 'undefined' || typeof valEnd === 'undefined') {
+ continue
+ }
+ let shouldMark = false
+ if (valStart === 'all' && valEnd === 'all' && val.scopeFile === 'all' && val.scopeFunc === 'all') {
+ shouldMark = true
+ } else if (valStart === 'all' && valEnd === 'all' && val.scopeFile !== 'all' && val.scopeFunc === 'all') {
+ if (typeof node.loc.sourcefile === 'string' && node.loc.sourcefile.includes(val.scopeFile)) {
+ shouldMark = true
+ }
+ } else if (node.loc.sourcefile.includes(val.scopeFile) && nodeStart >= valStart && nodeEnd <= valEnd) {
+ shouldMark = true
+ }
+
+ if (shouldMark) {
+ // 只有全局标识符 source(如 request)才需要拷贝。
+ // 路由函数参数等局部 source(locStart/locEnd 为具体行号)不需要拷贝。
+ const isGlobalIdentifierSource = valStart === 'all' && valEnd === 'all'
+ // 局部 source:子值已持有该 kind 的 tag 时跳过,避免 φ 合并后重复标记
+ const kinds = Array.isArray(val.kind) ? val.kind : [val.kind]
+ if (!isGlobalIdentifierSource && kinds.some((k: string) => target.taint?.containsTag(k))) {
continue
}
- if (valStart === 'all' && valEnd === 'all' && val.scopeFile === 'all' && val.scopeFunc === 'all') {
- markTaintSource(res, { path: node, kind: val.kind })
- } else if (valStart === 'all' && valEnd === 'all' && val.scopeFile !== 'all' && val.scopeFunc === 'all') {
- if (typeof node.loc.sourcefile === 'string') {
- if (node.loc.sourcefile.includes(val.scopeFile)) {
- markTaintSource(res, { path: node, kind: val.kind })
- }
- }
- } else if (node.loc.sourcefile.includes(val.scopeFile) && nodeStart >= valStart && nodeEnd <= valEnd) {
- markTaintSource(res, { path: node, kind: val.kind })
+ if (alreadyTainted && target === res && isGlobalIdentifierSource) {
+ target = buildNewValueInstance(
+ analyzer,
+ res,
+ node,
+ scope,
+ () => false,
+ () => false,
+ 1,
+ { skipTagTraceMap: true }
+ )
}
+ markTaintSource(target, { path: node, kind: val.kind })
}
}
}
}
- return res
+ return target
}
/**
@@ -219,34 +273,38 @@ function matchAndMark(paths: any, scp: any, rule: any, mark_cb: any, createIfNot
const path = paths.shift()
if (path === '*') {
- for (const i in scp.field) {
- const u = scp.field[i]
- matchAndMark(paths, u, rule, mark_cb, createIfNotExists)
+ if (scp.members) {
+ for (const i of scp.members.keys()) {
+ const u = scp.members.get(i)
+ matchAndMark(paths, u, rule, mark_cb, createIfNotExists)
+ }
}
} else if (path === '**') {
mark_cb(scp, rule)
- for (const i in scp.field) {
- const u = scp.field[i]
- matchAndMark(['**'], u, rule, mark_cb, createIfNotExists)
+ if (scp.members) {
+ for (const i of scp.members.keys()) {
+ const u = scp.members.get(i)
+ matchAndMark(['**'], u, rule, mark_cb, createIfNotExists)
+ }
}
} else if (path === 'this') {
- const val = scp.getThis()
+ const val = scp.getThisObj()
if (!val) return
matchAndMark(paths, val, rule, mark_cb, createIfNotExists)
} else {
const scpBackup = scp
- scp = Scope.getDefScope(scp, ValueUtil.SymbolValue({ type: 'Identifier', name: path }))
+ scp = Scope.getDefScope(scp, new SymbolValue(scp.qid || '', { sid: path, type: 'Identifier', name: path }))
if (!scp) {
scp = scpBackup
}
let val = scp?.getFieldValue(path, createIfNotExists)
if (!val) {
- if (scp._sid !== '') {
+ if (scp.sid !== '') {
while (scp.hasOwnProperty('parent') && scp.parent) {
scp = scp.parent
}
- if (scp?._sid === '') {
- scp = scp.moduleManager
+ if (scp?.sid === '') {
+ scp = scp.context.modules
}
// 确保scp的值不是undefined
if (scp && typeof scp.getFieldValue === 'function') {
@@ -265,18 +323,20 @@ function matchAndMark(paths: any, scp: any, rule: any, mark_cb: any, createIfNot
/**
* introduce identifier taint globally, no limitation for file and function, usually for benchmark testing
+ * @param analyzer
+ * @param scope
* @param node
* @param res
* @param sourceScopeVal
*/
-function introduceTaintAtIdentifierDirect(node: any, res: any, sourceScopeVal: any): void {
+function introduceTaintAtIdentifierDirect(analyzer: any, scope: any, node: any, res: any, sourceScopeVal: any): any {
if (!BasicRuleHandler.getPreprocessReady()) {
- return
+ return res
}
if (sourceScopeVal) {
for (const rule of sourceScopeVal) {
const paths = rule.path
- if (res._sid === paths) {
+ if (res.sid === paths) {
markTaintSource(res, { path: node, kind: rule.kind })
}
}
@@ -345,7 +405,7 @@ function introduceFuncArgTaintBySelfCollection(
rule: any,
sourceKind: any
): void {
- const parameters = entryPoint.fdef?.parameters
+ const parameters = entryPoint.ast.fdef?.parameters
const interestedParas = getArrayElementsByRule(parameters, rule)
interestedParas.forEach((para) => {
const argv = analyzer.processInstruction(entryPoint, para, state)
@@ -363,4 +423,6 @@ module.exports = {
introduceFuncArgTaintBySelfCollection,
introduceFuncArgTaintByRuleConfig,
setTaint,
+ getMarkedSourceCount,
+ resetMarkedSourceCount,
}
diff --git a/src/checker/taint/go/util.ts b/src/checker/taint/common-kit/taint-entrypoint-util.ts
similarity index 75%
rename from src/checker/taint/go/util.ts
rename to src/checker/taint/common-kit/taint-entrypoint-util.ts
index a1c64881..bee036f2 100644
--- a/src/checker/taint/go/util.ts
+++ b/src/checker/taint/common-kit/taint-entrypoint-util.ts
@@ -1,14 +1,14 @@
import type Unit from '../../../engine/analyzer/common/value/unit'
-const IntroduceTaint = require('../common-kit/source-util')
-const completeEntryPoint = require('../common-kit/entry-points-util')
+const IntroduceTaint = require('./source-util')
+const completeEntryPoint = require('./entry-points-util')
/**
*
* @param list
*/
export function flattenUnionValues(list: Array): Array {
- return list.flatMap((unit) => {
+ return list.filter(Boolean).flatMap((unit) => {
switch (unit.vtype) {
case 'union':
return flattenUnionValues(unit.value)
@@ -30,22 +30,24 @@ export function flattenUnionValues(list: Array): Array {
* @param processedRouteRegistry
* @param entryPointUnitValue
* @param source
+ * @param taintKind
*/
export function processEntryPointAndTaintSource(
analyzer: any,
state: any,
processedRouteRegistry: Set,
entryPointUnitValue: Unit,
- source: string
+ source: string,
+ taintKind: string
) {
flattenUnionValues([entryPointUnitValue])
.filter((val) => val.vtype === 'fclos')
.forEach((entryPointFuncValue) => {
- if (entryPointFuncValue?.ast.loc) {
- const hash = JSON.stringify(entryPointFuncValue.ast.loc)
+ if (entryPointFuncValue?.ast?.node?.loc) {
+ const hash = JSON.stringify(entryPointFuncValue.ast.node.loc)
if (!processedRouteRegistry.has(hash)) {
processedRouteRegistry.add(hash)
- IntroduceTaint.introduceFuncArgTaintBySelfCollection(entryPointFuncValue, state, analyzer, source, 'GO_INPUT')
+ IntroduceTaint.introduceFuncArgTaintBySelfCollection(entryPointFuncValue, state, analyzer, source, taintKind)
const entryPoint = completeEntryPoint(entryPointFuncValue)
analyzer.entryPoints.push(entryPoint)
}
diff --git a/src/checker/taint/common-kit/taint-finding-util.ts b/src/checker/taint/common-kit/taint-finding-util.ts
index 90a14cfa..dfde9109 100644
--- a/src/checker/taint/common-kit/taint-finding-util.ts
+++ b/src/checker/taint/common-kit/taint-finding-util.ts
@@ -9,6 +9,7 @@ const Statistics = require('../../../util/statistics').default
const { shortenSourceFile } = require('../../../util/finding-util')
const Config = require('../../../config')
const logger = require('../../../util/logger')(__filename)
+const { getOutputTrace } = require('./taint-trace-output')
/**
* output taint flow result to console
@@ -111,21 +112,22 @@ function formatTaintFinding(finding: TaintFinding): Record {
// the line of the issue
if (finding.node) {
const { loc } = finding.node
- const line_str = loc.start.line == loc.end.line ? loc.start.line : `[${loc.start.line}, ${loc.end.line}]`
+ const line_str = loc.start?.line == loc.end?.line ? loc.start?.line : `[${loc.start?.line}, ${loc.end?.line}]`
let code = AstUtil.prettyPrint(finding.node)
if (code.startsWith('{\n "type'))
// non-pretty-printed ast
- code = SourceLine.formatTraces([{ file: finding.sourcefile, line: loc.start.line }])
+ code = SourceLine.formatTraces([{ file: finding.sourcefile, line: loc.start?.line }])
res.line = `Line ${line_str}: ${code}`
} else {
logger.warn('finding.node is null')
}
// the trace of the origin of the issue
- if (finding.trace) {
- for (const item of finding.trace) {
+ const outputTrace = getOutputTrace(finding)
+ if (outputTrace) {
+ for (const item of outputTrace) {
if (item.file) item.shortfile = shortenSourceFile(item.file)
}
- const trace = SourceLine.formatTraces(finding.trace)
+ const trace = SourceLine.formatTraces(outputTrace)
res.trace = trace
}
// the trace of an example attack
diff --git a/src/checker/taint/common-kit/taint-trace-output.ts b/src/checker/taint/common-kit/taint-trace-output.ts
new file mode 100644
index 00000000..0d6a3ab2
--- /dev/null
+++ b/src/checker/taint/common-kit/taint-trace-output.ts
@@ -0,0 +1,63 @@
+import type { TaintFinding } from '../../../engine/analyzer/common/common-types'
+
+const Config = require('../../../config')
+function normalizeTraceStrategy(strategy: string | undefined): string {
+ if (strategy === 'folded') return 'callstack-only'
+ return strategy || 'callstack-only'
+}
+
+function isLineInScope(line: any, scope: { startLine: number; endLine: number }): boolean {
+ if (Array.isArray(line)) {
+ return line.some(
+ (singleLine) => typeof singleLine === 'number' && singleLine >= scope.startLine && singleLine <= scope.endLine
+ )
+ }
+ return typeof line === 'number' && line >= scope.startLine && line <= scope.endLine
+}
+
+function getOutputTrace(finding: TaintFinding): any[] | undefined {
+ const strategy = normalizeTraceStrategy(Config.taintTraceOutputStrategy)
+ const rawTrace = finding.trace
+ if (!Array.isArray(rawTrace)) return rawTrace
+ if (strategy !== 'callstack-only') return rawTrace
+ if (rawTrace.length === 0) return rawTrace
+
+ const scopes: Array<{ file: string; startLine: number; endLine: number }> = []
+
+ if (Array.isArray(finding.callstack)) {
+ for (const fclos of finding.callstack) {
+ const loc = fclos?.ast?.node?.loc
+ if (loc?.sourcefile && loc.start?.line != null && loc.end?.line != null) {
+ scopes.push({
+ file: loc.sourcefile,
+ startLine: loc.start.line,
+ endLine: loc.end.line,
+ })
+ }
+ }
+ }
+
+ const entryLoc = finding.entrypointLoc
+ if (entryLoc?.sourcefile && entryLoc.start?.line != null && entryLoc.end?.line != null) {
+ scopes.push({
+ file: entryLoc.sourcefile,
+ startLine: entryLoc.start.line,
+ endLine: entryLoc.end.line,
+ })
+ }
+
+ if (scopes.length === 0) return rawTrace
+
+ const filtered = rawTrace.filter((step: any) => {
+ if (step?.tag === 'SOURCE: ' || step?.tag === 'SINK: ') return true
+ const stepFile = step?.loc?.sourcefile || step?.file
+ const stepLine = step?.loc?.start?.line ?? step?.line
+ return scopes.some(
+ (scope) => stepFile === scope.file && isLineInScope(stepLine, scope)
+ )
+ })
+
+ return filtered.length > 0 ? filtered : rawTrace
+}
+
+export { getOutputTrace }
diff --git a/src/checker/taint/go/beego-entrypoint-collect-checker.ts b/src/checker/taint/go/beego-entrypoint-collect-checker.ts
index 6e97f315..1ea87a7e 100644
--- a/src/checker/taint/go/beego-entrypoint-collect-checker.ts
+++ b/src/checker/taint/go/beego-entrypoint-collect-checker.ts
@@ -1,5 +1,6 @@
import type Unit from '../../../engine/analyzer/common/value/unit'
-import { flattenUnionValues, processEntryPointAndTaintSource } from './util'
+import { getLegacyArgValues } from '../../../engine/analyzer/common/call-args'
+import { flattenUnionValues, processEntryPointAndTaintSource } from '../common-kit/taint-entrypoint-util'
const config = require('../../../config')
@@ -46,33 +47,32 @@ class BeegoEntrypointCollectChecker extends Checker {
* @param node
* @param state
* @param info
- * @param info.fclos
- * @param info.argvalues
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
+ const argvalues = getLegacyArgValues(callInfo)
if (config.entryPointMode === 'ONLY_CUSTOM') return
if (fclos.vtype === 'symbol') {
if (fclos.type === 'Identifier') {
- if (fclos._qid.startsWith('github.com/beego/beego/v2/server/web/filter/apiauth.APISecretAuth')) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0')
- } else if (fclos._qid.startsWith('github.com/beego/beego/v2/server/web/filter/auth.NewBasicAuthenticator')) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0, 1')
- } else if (fclos._qid.startsWith('github.com/beego/beego/v2/server/web')) {
+ if (fclos._qid.includes('github.com/beego/beego/v2/server/web/filter/apiauth.APISecretAuth')) {
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0', 'GO_INPUT')
+ } else if (fclos._qid.includes('github.com/beego/beego/v2/server/web/filter/auth.NewBasicAuthenticator')) {
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0, 1', 'GO_INPUT')
+ } else if (fclos._qid.includes('github.com/beego/beego/v2/server/web')) {
this.handleHttpServerMethod(analyzer, scope, state, fclos.name, argvalues)
}
} else if (fclos.type === 'MemberAccess') {
if (controllerQids.has(fclos.object._qid) && fclos.property.name === 'Mapping') {
const controllerMethodVal = argvalues[1]
- if (controllerMethodVal?.ast.loc) {
- const hash = JSON.stringify(controllerMethodVal.ast.loc)
+ if (controllerMethodVal?.ast?.node?.loc) {
+ const hash = JSON.stringify(controllerMethodVal.ast.node.loc)
if (!processedRouteRegistry.has(hash)) {
processedRouteRegistry.add(hash)
const entryPoint = completeEntryPoint(controllerMethodVal)
analyzer.entryPoints.push(entryPoint)
}
}
- } else if (fclos._qid.startsWith('github.com/beego/beego/v2/server/web.NewNamespace')) {
+ } else if (fclos._qid.includes('github.com/beego/beego/v2/server/web.NewNamespace')) {
this.handleNamespaceMethod(analyzer, scope, state, fclos.property.name, argvalues)
}
}
@@ -88,7 +88,8 @@ class BeegoEntrypointCollectChecker extends Checker {
* @param info
*/
triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues, ret } = info
+ const { fclos, ret, callInfo } = info
+ const argvalues = getLegacyArgValues(callInfo)
if (config.entryPointMode === 'ONLY_CUSTOM') return
if (fclos.vtype === 'symbol' && fclos.type === 'MemberAccess') {
if (controllerQids.has(fclos.object._qid)) {
@@ -138,10 +139,10 @@ class BeegoEntrypointCollectChecker extends Checker {
const { rvalue } = info
const { left } = node
if (
- analyzer.processInstruction(scope, left.object, state)?._qid === 'github.com/beego/beego/v2/server/web.BConfig' &&
+ analyzer.processInstruction(scope, left.object, state)?._qid?.includes('github.com/beego/beego/v2/server/web.BConfig') &&
left.property?.name === 'RecoverFunc'
) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, rvalue, '0')
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, rvalue, '0', 'GO_INPUT')
}
}
@@ -166,8 +167,10 @@ class BeegoEntrypointCollectChecker extends Checker {
isControllerMethod(name: string, value: any): boolean {
if (!name[0] || name[0] < 'A' || name[0] > 'Z') return false
if (!value || value.vtype !== 'fclos') return false
- if (value.fdef.returnType.type !== 'VoidType') return false
- return value.fdef.parameters.length === 0
+ const fdef = value.ast?.fdef || value.ast?.node
+ if (!fdef) return false
+ if (fdef.returnType?.type !== 'VoidType') return false
+ return fdef.parameters?.length === 0
}
/**
@@ -183,38 +186,39 @@ class BeegoEntrypointCollectChecker extends Checker {
switch (name) {
case 'AutoRouter':
case 'NSAutoRouter':
- this.handleAutoControllerArgVal(analyzer, argvalues[0])
+ if (argvalues[0]) this.handleAutoControllerArgVal(analyzer, argvalues[0])
break
case 'AutoPrefix':
case 'NSAutoPrefix':
- this.handleAutoControllerArgVal(analyzer, argvalues[1])
+ if (argvalues[1]) this.handleAutoControllerArgVal(analyzer, argvalues[1])
break
case 'InsertFilter':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[2], '0')
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[2], '0', 'GO_INPUT')
break
case 'InsertFilterChain':
flattenUnionValues([argvalues[1]])
.filter((unit) => unit.vtype === 'fclos')
.forEach((fclos) => {
- const retVal = analyzer.processAndCallFuncDef(scope, (fclos as any).fdef, fclos, state)
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, retVal, '0')
+ const fdef = (fclos as any).ast?.fdef || (fclos as any).ast?.node
+ const retVal = analyzer.processAndCallFuncDef(scope, fdef, fclos, state)
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, retVal, '0', 'GO_INPUT')
})
break
case 'Handler':
flattenUnionValues([argvalues[1]]).forEach((handlerVal) => {
- const serveHttp = handlerVal.field?.ServeHTTP
+ const serveHttp = handlerVal.value?.ServeHTTP
if (serveHttp) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, serveHttp, '1')
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, serveHttp, '1', 'GO_INPUT')
}
})
break
case 'Include':
case 'NSInclude':
flattenUnionValues(argvalues).forEach((mappingController) => {
- const urlMapping = mappingController.field?.URLMapping
+ const urlMapping = mappingController.value?.URLMapping
if (urlMapping) {
controllerQids.add(mappingController._qid)
- analyzer.processAndCallFuncDef(scope, urlMapping.fdef, urlMapping, state)
+ analyzer.processAndCallFuncDef(scope, urlMapping.ast?.fdef || urlMapping.ast?.node, urlMapping, state)
}
})
break
@@ -237,20 +241,22 @@ class BeegoEntrypointCollectChecker extends Checker {
flattenUnionValues([argvalues[1]])
.filter((unit) => unit.vtype === 'fclos')
.forEach((unboundMethodVal: any) => {
+ const thisVal = unboundMethodVal._this
const instance = analyzer.buildNewObject(
- unboundMethodVal.__this.cdef,
+ thisVal?.cdef || thisVal?.ast?.cdef,
[],
- unboundMethodVal.__this,
+ thisVal,
state,
null,
scope
)
- const boundMethodVal = instance.field?.[unboundMethodVal.fdef.id.name]
- if (boundMethodVal?.ast.loc) {
- const hash = JSON.stringify(boundMethodVal.ast.loc)
+ const fdef = unboundMethodVal.ast?.fdef || unboundMethodVal.ast?.node
+ const boundMethodVal = instance.value?.[fdef?.id?.name]
+ if (boundMethodVal?.ast?.node?.loc) {
+ const hash = JSON.stringify(boundMethodVal.ast.node.loc)
if (!processedRouteRegistry.has(hash)) {
processedRouteRegistry.add(hash)
- controllerQids.add(boundMethodVal.__this._qid)
+ controllerQids.add(boundMethodVal._this?._qid)
const entryPoint = completeEntryPoint(boundMethodVal)
analyzer.entryPoints.push(entryPoint)
}
@@ -264,12 +270,12 @@ class BeegoEntrypointCollectChecker extends Checker {
.forEach((stringVal) => {
const methodName = stringVal.value.slice(1, -1).split(':')[1]
flattenUnionValues([argvalues[1]]).forEach((controllerVal) => {
- const controllerMethodVal = controllerVal.field?.[methodName]
- if (controllerMethodVal?.ast.loc) {
- const hash = JSON.stringify(controllerMethodVal.ast.loc)
+ const controllerMethodVal = controllerVal.value?.[methodName]
+ if (controllerMethodVal?.ast?.node?.loc) {
+ const hash = JSON.stringify(controllerMethodVal.ast.node.loc)
if (!processedRouteRegistry.has(hash)) {
processedRouteRegistry.add(hash)
- controllerQids.add(controllerMethodVal.__this._qid)
+ if (controllerMethodVal._this?._qid) controllerQids.add(controllerMethodVal._this._qid)
const entryPoint = completeEntryPoint(controllerMethodVal)
analyzer.entryPoints.push(entryPoint)
}
@@ -293,14 +299,14 @@ class BeegoEntrypointCollectChecker extends Checker {
case 'NSOptions':
case 'NSPatch':
case 'NSPut':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[1], '0')
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[1], '0', 'GO_INPUT')
break
case 'NSCond':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0')
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0', 'GO_INPUT')
break
case 'NSBefore':
case 'NSAfter':
- argvalues.forEach((val) => processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, val, '0'))
+ argvalues.forEach((val) => processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, val, '0', 'GO_INPUT'))
break
case 'ErrorController':
this.handleErrorControllerArgVal(analyzer, argvalues[0])
@@ -323,10 +329,10 @@ class BeegoEntrypointCollectChecker extends Checker {
case 'Filter':
argvalues
.slice(1)
- .forEach((val) => processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, val, '0'))
+ .forEach((val) => processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, val, '0', 'GO_INPUT'))
break
case 'Cond':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0')
+ processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[0], '0', 'GO_INPUT')
break
default:
break
@@ -340,15 +346,15 @@ class BeegoEntrypointCollectChecker extends Checker {
*/
handleErrorControllerArgVal(analyzer: any, controllerArgVal: Unit) {
flattenUnionValues([controllerArgVal])
- .flatMap((v) => Object.entries(v.field))
+ .flatMap((v) => Object.entries(v.value))
.filter(([fieldName, fieldVal]) => this.isControllerMethod(fieldName, fieldVal) && fieldName.startsWith('Error'))
.map(([, controllerMethodVal]) => controllerMethodVal as Unit)
.forEach((controllerMethodVal) => {
- if (controllerMethodVal?.ast.loc) {
- const hash = JSON.stringify(controllerMethodVal.ast.loc)
+ if (controllerMethodVal?.ast?.node?.loc) {
+ const hash = JSON.stringify(controllerMethodVal.ast.node.loc)
if (!processedRouteRegistry.has(hash)) {
processedRouteRegistry.add(hash)
- controllerQids.add(controllerMethodVal.__this._qid)
+ if (controllerMethodVal._this?._qid) controllerQids.add(controllerMethodVal._this._qid)
const entryPoint = completeEntryPoint(controllerMethodVal)
analyzer.entryPoints.push(entryPoint)
}
@@ -363,15 +369,15 @@ class BeegoEntrypointCollectChecker extends Checker {
*/
handleAutoControllerArgVal(analyzer: any, controllerArgVal: Unit) {
flattenUnionValues([controllerArgVal])
- .flatMap((v) => Object.entries(v.field))
+ .flatMap((v) => Object.entries(v.value))
.filter(([fieldName, fieldVal]) => this.isControllerMethod(fieldName, fieldVal))
.map(([, controllerMethodVal]) => controllerMethodVal as Unit)
.forEach((controllerMethodVal) => {
- if (controllerMethodVal?.ast.loc) {
- const hash = JSON.stringify(controllerMethodVal.ast.loc)
+ if (controllerMethodVal?.ast?.node?.loc) {
+ const hash = JSON.stringify(controllerMethodVal.ast.node.loc)
if (!processedRouteRegistry.has(hash)) {
processedRouteRegistry.add(hash)
- controllerQids.add(controllerMethodVal.__this._qid)
+ if (controllerMethodVal._this?._qid) controllerQids.add(controllerMethodVal._this._qid)
const entryPoint = completeEntryPoint(controllerMethodVal)
analyzer.entryPoints.push(entryPoint)
}
diff --git a/src/checker/taint/go/cobra-command-checker.ts b/src/checker/taint/go/cobra-command-checker.ts
index 996dbaea..4342ae51 100644
--- a/src/checker/taint/go/cobra-command-checker.ts
+++ b/src/checker/taint/go/cobra-command-checker.ts
@@ -2,11 +2,11 @@ import type { EntryPoint } from '../../../engine/analyzer/common/entrypoint'
const _ = require('lodash')
const completeEntryPoint = require('../common-kit/entry-points-util')
-const configCobra = require('../../../config')
-const CheckerCobra = require('../../common/checker')
+const config = require('../../../config')
+const Checker = require('../../common/checker')
const processedBuiltInRegistry = new Set()
-const cobraCommandQid = 'github.com/spf13/cobra.Command'
+const cobraCommandQid = /github\.com\/spf13\/cobra\.Command/
const preAction: string[] = ['PreRun', 'PreRunE']
const postAction: string[] = ['RunE', 'Run']
@@ -14,7 +14,7 @@ const postAction: string[] = ['RunE', 'Run']
* cobra.Command bulitIn checker
* 为第三方库方法cobra.command做建模,添加entryPoints
*/
-class cobraCommandChecker extends CheckerCobra {
+class cobraCommandChecker extends Checker {
/**
* constructor
* @param resultManager
@@ -34,9 +34,9 @@ class cobraCommandChecker extends CheckerCobra {
* @param fClos
*/
ifIgnoreEntryPoint(fClos: any): boolean {
- if (!fClos.fdef?.loc) return true
+ if (!fClos.ast.fdef?.loc) return true
// todo:this.func{call this.f1()},this.f1依赖于this的符号值,但注册this.func时,目前的hash无法反映不同this符号值的区别,如alarm_center/pkg/app/app.go的#173行
- const hash = JSON.stringify(fClos.fdef.loc)
+ const hash = JSON.stringify(fClos.ast.fdef.loc)
if (processedBuiltInRegistry.has(hash)) return true
processedBuiltInRegistry.add(hash)
return false
@@ -52,9 +52,9 @@ class cobraCommandChecker extends CheckerCobra {
*/
triggerAtVariableDeclaration(analyzer: any, scope: any, node: any, state: any, info: any): void {
const { initVal } = info
- if (configCobra.entryPointMode === 'ONLY_CUSTOM') return
- if (initVal?._qid !== cobraCommandQid || _.isEmpty(initVal.field)) return
- const initField = initVal.field
+ if (config.entryPointMode === 'ONLY_CUSTOM') return
+ if (!cobraCommandQid.test(initVal?.qid) || !initVal.members || initVal.members.size === 0) return
+ const initField = initVal.value
const preEntryPoints: EntryPoint[] = []
const postEntryPoints: EntryPoint[] = []
@@ -64,7 +64,7 @@ class cobraCommandChecker extends CheckerCobra {
if (initField.hasOwnProperty(action) && initField[action]?.vtype === 'fclos') {
const ep = initField[action]
if (this.ifIgnoreEntryPoint(ep)) return
- targetEntryPoints.push(completeEntryPoint(ep))
+ targetEntryPoints.push(completeEntryPoint(ep, true))
}
})
}
@@ -83,11 +83,19 @@ class cobraCommandChecker extends CheckerCobra {
*/
triggerAtAssignment(analyzer: any, scope: any, node: any, state: any, info: any): void {
const { lvalue, rvalue } = info
- if (configCobra.entryPointMode === 'ONLY_CUSTOM') return // 不路由自采集
- if (!lvalue?._qid || rvalue?.vtype !== 'fclos') return
- if (!lvalue._qid.startsWith(cobraCommandQid) || ![...preAction, ...postAction].includes(lvalue._sid)) return
- if (this.ifIgnoreEntryPoint(rvalue)) return
- analyzer.entryPoints.push(completeEntryPoint(rvalue))
+ if (config.entryPointMode === 'ONLY_CUSTOM') {
+ return // 不路由自采集
+ }
+ if (!lvalue?.qid || rvalue?.vtype !== 'fclos') {
+ return
+ }
+ if (!cobraCommandQid.test(lvalue.qid) || ![...preAction, ...postAction].includes(lvalue.sid)) {
+ return
+ }
+ if (this.ifIgnoreEntryPoint(rvalue)) {
+ return
+ }
+ analyzer.entryPoints.push(completeEntryPoint(rvalue, true))
}
/**
diff --git a/src/checker/taint/go/echo-entrypoint-collect-checker.ts b/src/checker/taint/go/echo-entrypoint-collect-checker.ts
deleted file mode 100644
index 042bd93c..00000000
--- a/src/checker/taint/go/echo-entrypoint-collect-checker.ts
+++ /dev/null
@@ -1,439 +0,0 @@
-import type Unit from '../../../engine/analyzer/common/value/unit'
-import { flattenUnionValues, processEntryPointAndTaintSource } from './util'
-
-const config = require('../../../config')
-const GoAnalyzer = require('../../../engine/analyzer/golang/common/go-analyzer')
-
-const KnownPackageName = {
- 'github.com/labstack/echo/v4': 'echo',
- 'github.com/labstack/echo-jwt/v4': 'echojwt',
-}
-
-const RouteRegistryObject = ['github.com/labstack/echo/v4.New()']
-
-const MiddlewareHandlerRegistryObject = [
- 'github.com/labstack/echo/v4/middleware',
- 'github.com/labstack/echo-contrib/casbin',
- 'github.com/labstack/echo-jwt/v4',
- 'github.com/labstack/echo-contrib/echoprometheus',
- 'github.com/labstack/echo-contrib/session',
-]
-
-const ConfigObjectCollectionTable = new Map>([
- [
- 'BasicAuthWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'Validator', source: '0, 1, 2' },
- ],
- ],
- [
- 'BodyDumpWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'Handler', source: '0, 1, 2' },
- ],
- ],
- ['BodyLimitWithConfig', [{ name: 'Skipper', source: '0' }]],
- [
- 'MiddlewareWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'EnforceHandler', source: '0, 1' },
- { name: 'UserGetter', source: '0' },
- { name: 'ErrorHandler', source: '0, 1' },
- ],
- ],
- [
- 'ContextTimeoutWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'ErrorHandler', source: '0, 1' },
- ],
- ],
- [
- 'CORSWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'AllowOriginFunc', source: '0' },
- ],
- ],
- [
- 'CSRFWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'ErrorHandler', source: '0, 1' },
- ],
- ],
- ['DecompressWithConfig', [{ name: 'Skipper', source: '0' }]],
- ['GzipWithConfig', [{ name: 'Skipper', source: '0' }]],
- [
- 'WithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'BeforeFunc', source: '0' },
- { name: 'SuccessHandler', source: '0' },
- { name: 'ErrorHandler', source: '0, 1' },
- { name: 'KeyFunc', source: '0' },
- { name: 'ParseTokenFunc', source: '0' },
- { name: 'NewClaimsFunc', source: '0' },
- ],
- ],
- [
- 'KeyAuthWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'Validator', source: '0, 1' },
- { name: 'ErrorHandler', source: '0, 1' },
- ],
- ],
- [
- 'LoggerWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'CustomTagFunc', source: '0' },
- ],
- ],
- [
- 'RequestLoggerWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'BeforeNextFunc', source: '0' },
- { name: 'LogValuesFunc', source: '0, 1' },
- ],
- ],
- [
- 'MethodOverrideWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'Getter', source: '0' },
- ],
- ],
- [
- 'NewMiddlewareWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'BeforeNext', source: '0' },
- { name: 'AfterNext', source: '0, 1' },
- { name: 'StatusCodeResolver', source: '0, 1' },
- ],
- ],
- ['Proxy', [{ name: 'Next', source: '0' }]],
- [
- 'ProxyWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'RetryFilter', source: '0, 1' },
- { name: 'ErrorHandler', source: '0, 1' },
- { name: 'ModifyResponse', source: '0' },
- ],
- ],
- ['RateLimiter', [{ name: 'Allow', source: '0' }]],
- [
- 'RateLimiterWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'BeforeFunc', source: '0' },
- { name: 'IdentifierExtractor', source: '0' },
- { name: 'ErrorHandler', source: '0, 1' },
- { name: 'DenyHandler', source: '0, 1, 2' },
- ],
- ],
- [
- 'RecoverWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'LogErrorFunc', source: '0, 1' },
- ],
- ],
- ['HTTPSRedirectWithConfig', [{ name: 'Skipper', source: '0' }]],
- [
- 'RequestIDWithConfig',
- [
- { name: 'Skipper', source: '0' },
- { name: 'RequestIDHandler', source: '0, 1' },
- ],
- ],
- ['RewriteWithConfig', [{ name: 'Skipper', source: '0' }]],
- ['SecureWithConfig', [{ name: 'Skipper', source: '0' }]],
- [
- 'Middleware',
- [
- { name: 'Get', source: '0' },
- { name: 'New', source: '0' },
- { name: 'Save', source: '0' },
- ],
- ],
- ['StaticWithConfig', [{ name: 'Skipper', source: '0' }]],
- ['AddTrailingSlashWithConfig', [{ name: 'Skipper', source: '0' }]],
- ['RemoveTrailingSlashWithConfig', [{ name: 'Skipper', source: '0' }]],
-])
-
-const Checker = require('../../common/checker')
-
-const processedRouteRegistry = new Set()
-
-/**
- *
- */
-class EchoEntrypointCollectChecker extends Checker {
- /**
- *
- * @param resultManager
- */
- constructor(resultManager: any) {
- super(resultManager, 'echo-entrypoint-collect-checker')
- GoAnalyzer.registerKnownPackageNames(KnownPackageName)
- }
-
- /**
- *
- * @param analyzer
- * @param scope
- * @param node
- * @param state
- * @param info
- */
- triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
- if (config.entryPointMode === 'ONLY_CUSTOM') return
- if (!(fclos && fclos.object && fclos.property)) return
- const { object, property } = fclos
- if (!object._qid || !property.name) return
- if (!RouteRegistryObject.some((obj) => object._qid.includes(obj))) return
- switch (property.name) {
- case 'Use':
- case 'Pre':
- this.handleMiddlewareArgs(analyzer, scope, state, argvalues)
- break
- case 'CONNECT':
- case 'DELETE':
- case 'GET':
- case 'HEAD':
- case 'OPTIONS':
- case 'PATCH':
- case 'POST':
- case 'PUT':
- case 'TRACE':
- case 'RouteNotFound':
- case 'Any':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[1], '0')
- this.handleMiddlewareArgs(analyzer, scope, state, argvalues.slice(2))
- break
- case 'Match':
- case 'Add':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argvalues[2], '0')
- this.handleMiddlewareArgs(analyzer, scope, state, argvalues.slice(3))
- break
- case 'File':
- this.handleMiddlewareArgs(analyzer, scope, state, argvalues.slice(2))
- break
- case 'FileFS':
- flattenUnionValues([argvalues[2]]).forEach((fs) => {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, fs.field.Open, '0')
- })
- this.handleMiddlewareArgs(analyzer, scope, state, argvalues.slice(3))
- break
- case 'Host':
- case 'Group':
- this.handleMiddlewareArgs(analyzer, scope, state, argvalues.slice(1))
- break
- default:
- break
- }
- }
-
- /**
- *
- * @param analyzer
- * @param scope
- * @param node
- * @param state
- * @param info
- */
- triggerAtSymbolInterpretOfEntryPointAfter(analyzer: any, scope: any, node: any, state: any, info: any) {
- if (info?.entryPoint.functionName === 'main') processedRouteRegistry.clear()
- }
-
- /**
- *
- * @param analyzer
- * @param scope
- * @param node
- * @param state
- * @param info
- */
- triggerAtAssignment(analyzer: any, scope: any, node: any, state: any, info: any) {
- if (config.entryPointMode === 'ONLY_CUSTOM') return
- const { lvalue, rvalue } = info
- if (!(lvalue.object && lvalue.property)) return
- const { object, property } = lvalue
- if (!object._qid || !property.name) return
- if (!RouteRegistryObject.some((obj) => object._qid.includes(obj))) return
- const rvalueObjs = flattenUnionValues([rvalue])
- switch (property.name) {
- case 'HTTPErrorHandler':
- rvalueObjs.forEach((obj) =>
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, obj, '0, 1')
- )
- break
- case 'Binder':
- rvalueObjs.forEach((obj) =>
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, obj.field.Bind, '1')
- )
- break
- case 'Renderer':
- rvalueObjs.forEach((obj) =>
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, obj.field.Render, '3')
- )
- break
- case 'Filesystem':
- rvalueObjs.forEach((obj) =>
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, obj.field.Open, '0')
- )
- break
- default:
- break
- }
- }
-
- /**
- *
- * @param analyzer
- * @param state
- * @param symbol
- */
- handleConfigObjectCollection(analyzer: any, state: any, symbol: any) {
- const rules = ConfigObjectCollectionTable.get(symbol.expression?.name)
- if (!rules) return
- flattenUnionValues([symbol.arguments[0]]).forEach((middlewareConfig) => {
- rules.forEach((rule) => {
- const fieldValue = middlewareConfig.field[rule.name]
- if (!fieldValue) return
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, fieldValue, rule.source)
- })
- })
- }
-
- /**
- *
- * @param analyzer
- * @param state
- * @param symbol
- */
- handleKnownEchoMiddlewares(analyzer: any, state: any, symbol: any) {
- if (symbol.type !== 'CallExpression') return
- const objectQid = symbol.expression?._qid
- if (!(objectQid && MiddlewareHandlerRegistryObject.some((obj) => objectQid.startsWith(obj)))) return
-
- switch (symbol.expression.name) {
- case 'BasicAuth':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, symbol.arguments[0], '0, 1, 2')
- break
- case 'BodyDump':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, symbol.arguments[0], '0, 1, 2')
- break
- case 'KeyAuth':
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, symbol.arguments[0], '0, 1')
- break
- case 'WithConfig':
- flattenUnionValues([symbol.arguments[0]]).forEach((middlewareConfig) => {
- const tokenLookupFuncs = middlewareConfig.field.TokenLookupFuncs
- if (!tokenLookupFuncs) return
- Object.values(tokenLookupFuncs.value).forEach((v) => {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, v as Unit, '0')
- })
- })
- this.handleConfigObjectCollection(analyzer, state, symbol)
- break
- case 'NewMiddlewareWithConfig':
- flattenUnionValues([symbol.arguments[0]]).forEach((middlewareConfig) => {
- const labelFuncs = middlewareConfig.field.LabelFuncs
- if (!labelFuncs) return
- Object.values(labelFuncs.value).forEach((v) => {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, v as Unit, '0, 1')
- })
- })
- this.handleConfigObjectCollection(analyzer, state, symbol)
- break
- case 'ProxyWithConfig':
- flattenUnionValues([symbol.arguments[0]]).forEach((middlewareConfig) => {
- const balancerNext = middlewareConfig.field?.Balancer?.field?.Next
- if (balancerNext) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, balancerNext, '0')
- }
- const transportRoundTrip = middlewareConfig.field?.Transport?.field?.RoundTrip
- if (transportRoundTrip) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, transportRoundTrip, '0')
- }
- })
- this.handleConfigObjectCollection(analyzer, state, symbol)
- break
- case 'RateLimiterWithConfig':
- flattenUnionValues([symbol.arguments[0]]).forEach((middlewareConfig) => {
- const allow = middlewareConfig.field?.Store?.field?.Allow
- if (allow) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, allow, '0')
- }
- })
- this.handleConfigObjectCollection(analyzer, state, symbol)
- break
- case 'MiddlewareWithConfig':
- flattenUnionValues([symbol.arguments[0]]).forEach((middlewareConfig) => {
- const store = middlewareConfig.field.Store
- if (!store) return
- ;[store.field.Get, store.field.New, store.field.Save]
- .filter((v) => v)
- .forEach((v) => {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, v, '0')
- })
- })
- this.handleConfigObjectCollection(analyzer, state, symbol)
- break
- case 'StaticWithConfig':
- flattenUnionValues([symbol.arguments[0]]).forEach((middlewareConfig) => {
- const open = middlewareConfig.field?.Filesystem?.field?.Open
- if (open) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, open, '0')
- }
- })
- this.handleConfigObjectCollection(analyzer, state, symbol)
- break
- default:
- this.handleConfigObjectCollection(analyzer, state, symbol)
- break
- }
- }
-
- /**
- *
- * @param analyzer
- * @param scope
- * @param state
- * @param middlewareFunctionValue
- */
- handleCustomMiddleware(analyzer: any, scope: any, state: any, middlewareFunctionValue: any) {
- const retVal = analyzer.processAndCallFuncDef(scope, middlewareFunctionValue.fdef, middlewareFunctionValue, state)
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, retVal, '0')
- }
-
- /**
- *
- * @param analyzer
- * @param scope
- * @param state
- * @param list
- */
- handleMiddlewareArgs(analyzer: any, scope: any, state: any, list: Array) {
- const flattened = flattenUnionValues(list)
- flattened.forEach((unit) => {
- if (unit.vtype === 'symbol') {
- this.handleKnownEchoMiddlewares(analyzer, state, unit)
- } else if (unit.vtype === 'fclos') {
- this.handleCustomMiddleware(analyzer, scope, state, unit)
- }
- })
- }
-}
-
-module.exports = EchoEntrypointCollectChecker
diff --git a/src/checker/taint/go/gRpc-entrypoint-collect-checker.ts b/src/checker/taint/go/gRpc-entrypoint-collect-checker.ts
index d42024a1..a7fbcd0d 100644
--- a/src/checker/taint/go/gRpc-entrypoint-collect-checker.ts
+++ b/src/checker/taint/go/gRpc-entrypoint-collect-checker.ts
@@ -1,4 +1,5 @@
import type { EntryPoint } from '../../../engine/analyzer/common/entrypoint'
+import { getLegacyArgValues } from '../../../engine/analyzer/common/call-args'
const completeEntryPoint = require('../common-kit/entry-points-util')
const AstUtil = require('../../../util/ast-util')
@@ -68,7 +69,7 @@ class GRpcEntrypointCollectChecker extends Checker {
if (match) {
const serverName = match[1]
const fClos = analyzer.processFunctionDefinition(scope, exp, state)
- if (fClos?._qid) registerServerPoints[fClos._qid] = serverName
+ if (fClos?.qid) registerServerPoints[fClos.qid] = serverName
}
break
@@ -111,11 +112,12 @@ class GRpcEntrypointCollectChecker extends Checker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void {
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
if (config.entryPointMode === 'ONLY_CUSTOM') return // 不路由自采集
- if (!(fclos._qid in registerServerPoints)) return // 处理Register_xxx_Server函数,即实现类注册点
+ if (!(fclos.qid in registerServerPoints)) return // 处理Register_xxx_Server函数,即实现类注册点
+ const argvalues = getLegacyArgValues(callInfo)
if (!Array.isArray(argvalues) || argvalues.length < 1) return
- const serverName = registerServerPoints[fclos._qid]
+ const serverName = registerServerPoints[fclos.qid]
const implServer = argvalues[1]
this.searchServiceEntryPoints(serverName, implServer, fclos, state, analyzer)
}
@@ -135,14 +137,14 @@ class GRpcEntrypointCollectChecker extends Checker {
interfaceEntryPoints.forEach((entryPointName: string) => {
const ep = AstUtil.satisfy(
implServer,
- (n: any) => n.vtype === 'fclos' && n?.ast?.id.name === entryPointName,
- (node: any, prop: any) => prop === 'field',
+ (n: any) => n.vtype === 'fclos' && n?.ast?.node?.id.name === entryPointName,
+ (node: any, prop: any) => prop === '_field',
null,
false
)
if (ep) {
- const hash = JSON.stringify(ep.ast.loc)
+ const hash = JSON.stringify(ep.ast.node.loc)
if (!hash || processedRegisterEntryPoints.has(hash)) return
processedRegisterEntryPoints.add(hash)
this.introduceGrpcTaint(ep, state, analyzer)
diff --git a/src/checker/taint/go/gin-default-taint-checker.ts b/src/checker/taint/go/gin-default-taint-checker.ts
index 330af726..77149d75 100644
--- a/src/checker/taint/go/gin-default-taint-checker.ts
+++ b/src/checker/taint/go/gin-default-taint-checker.ts
@@ -1,3 +1,5 @@
+import { getLegacyArgValues, type CallInfo } from '../../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
const BasicRuleHandler = require('../../common/rules-basic-handler')
const FileUtil = require('../../../util/file-util')
@@ -66,13 +68,13 @@ class GinDefaultTaintChecker extends TaintChecker {
* @param topScope
*/
prepareEntryPoints(analyzer: any, topScope: any) {
- const { entrypoints: ruleConfigEntryPoints, sources: ruleConfigSources } = this.checkerRuleConfigContent
+ const { entrypoints: ruleConfigEntryPoints, sources: ruleConfigSources } = this.checkerRuleConfigContent || {}
const {
TaintSource: TaintSourceRules,
FuncCallArgTaintSource: FuncCallArgTaintSourceRules,
FuncCallReturnValueTaintSource: FuncCallReturnValueTaintSourceRules,
- } = ruleConfigSources
+ } = ruleConfigSources || {}
if (Config.entryPointMode !== 'SELF_COLLECT') {
// 添加rule_config中的route入口
@@ -81,25 +83,25 @@ class GinDefaultTaintChecker extends TaintChecker {
let entryPointSymVal
if (entrypoint.funcReceiverType) {
entryPointSymVal = AstUtil.satisfy(
- topScope.packageManager,
+ topScope.context.packages,
(n: any) =>
n.vtype === 'fclos' &&
- FileUtil.extractAfterSubstring(n?.ast?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
- n?.parent?.ast?.type === 'ClassDefinition' &&
- n?.parent?.ast?.id?.name === entrypoint.funcReceiverType &&
- n?.ast?.id.name === entrypoint.functionName,
- (node: any, prop: any) => prop === 'field',
+ FileUtil.extractAfterSubstring(n?.ast?.node?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
+ n?.parent?.ast?.node?.type === 'ClassDefinition' &&
+ n?.parent?.ast?.node?.id?.name === entrypoint.funcReceiverType &&
+ n?.ast?.node?.id.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
null,
false
)
} else {
entryPointSymVal = AstUtil.satisfy(
- topScope.packageManager,
+ topScope.context.packages,
(n: any) =>
n.vtype === 'fclos' &&
- FileUtil.extractAfterSubstring(n?.ast?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
- n?.ast?.id.name === entrypoint.functionName,
- (node: any, prop: any) => prop === 'field',
+ FileUtil.extractAfterSubstring(n?.ast?.node?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
+ n?.ast?.node?.id.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
null,
false
)
@@ -108,7 +110,7 @@ class GinDefaultTaintChecker extends TaintChecker {
continue
}
if (Array.isArray(entryPointSymVal)) {
- entryPointSymVal = _.uniqBy(entryPointSymVal, (value: any) => value.fdef)
+ entryPointSymVal = _.uniqBy(entryPointSymVal, (value: any) => value.ast.fdef)
} else {
entryPointSymVal = [entryPointSymVal]
}
@@ -126,12 +128,12 @@ class GinDefaultTaintChecker extends TaintChecker {
}
}
if (Config.entryPointMode !== 'ONLY_CUSTOM') {
- const ginDefaultEntrypoint = GinEntryPoint.getGinDefaultEntrypoint(topScope.packageManager)
+ const ginDefaultEntrypoint = GinEntryPoint.getGinDefaultEntrypoint(topScope.context.packages)
analyzer.ruleEntrypoints.push(...ginDefaultEntrypoint)
// 添加source
const { TaintSource, FuncCallArgTaintSource, FuncCallReturnValueTaintSource } =
- GinEntryPoint.getGinEntryPointAndSource(topScope.packageManager)
+ GinEntryPoint.getGinEntryPointAndSource(topScope.context.packages)
if (
_.isEmpty(TaintSource) &&
@@ -201,13 +203,14 @@ class GinDefaultTaintChecker extends TaintChecker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const calleeObject = fclos.object
- this.checkByNameAndClassMatch(node, fclos, argvalues, scope)
+ this.checkByNameAndClassMatch(node, fclos, callInfo, scope, state)
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaint.introduceFuncArgTaintByRuleConfig(calleeObject, node, argvalues, funcCallArgTaintSource)
+ IntroduceTaint.introduceFuncArgTaintByRuleConfig(calleeObject, node, callInfo, funcCallArgTaintSource)
if (Config.entryPointMode === 'ONLY_CUSTOM') return
+ const argvalues = getLegacyArgValues(callInfo)
this.collectRouteRegistry(node, calleeObject, argvalues, scope, analyzer)
}
@@ -271,18 +274,19 @@ class GinDefaultTaintChecker extends TaintChecker {
* @param fclos
* @param argvalues
* @param scope
+ * @param state
*/
- checkByNameAndClassMatch(node: any, fclos: any, argvalues: any, scope: any) {
+ checkByNameAndClassMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state?: any) {
if (fclos === undefined) {
return
}
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
- if (!rules || !argvalues) return
- let rule = matchSinkAtFuncCallWithCalleeType(node, fclos, rules, scope)
+ if (!rules || !callInfo) return
+ let rule = matchSinkAtFuncCallWithCalleeType(node, fclos, rules, scope, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- const args = BasicRuleHandler.prepareArgs(argvalues, fclos, rule)
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -309,7 +313,8 @@ class GinDefaultTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME_GIN_DEFAULT,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategyGinDefault.isNewFinding(this.resultManager, taintFlowFinding)) continue
this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategyGinDefault.outputStrategyId)
diff --git a/src/checker/taint/go/gin-taint-checker.ts b/src/checker/taint/go/gin-taint-checker.ts
index 2b7982c5..f0829b31 100644
--- a/src/checker/taint/go/gin-taint-checker.ts
+++ b/src/checker/taint/go/gin-taint-checker.ts
@@ -1,3 +1,5 @@
+import { getLegacyArgValues, type CallInfo } from '../../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
const BasicRuleHandler = require('../../common/rules-basic-handler')
const FileUtil = require('../../../util/file-util')
@@ -66,13 +68,13 @@ class GinTaintChecker extends TaintChecker {
* @param topScope
*/
prepareEntryPoints(analyzer: any, topScope: any) {
- const { entrypoints: ruleConfigEntryPoints, sources: ruleConfigSources } = this.checkerRuleConfigContent
+ const { entrypoints: ruleConfigEntryPoints, sources: ruleConfigSources } = this.checkerRuleConfigContent || {}
const {
TaintSource: TaintSourceRules,
FuncCallArgTaintSource: FuncCallArgTaintSourceRules,
FuncCallReturnValueTaintSource: FuncCallReturnValueTaintSourceRules,
- } = ruleConfigSources
+ } = ruleConfigSources || {}
// 添加rule_config中的route入口
if (!_.isEmpty(ruleConfigEntryPoints) && Config.entryPointMode !== 'SELF_COLLECT') {
@@ -80,25 +82,25 @@ class GinTaintChecker extends TaintChecker {
let entryPointSymVal
if (entrypoint.funcReceiverType) {
entryPointSymVal = AstUtil.satisfy(
- topScope.packageManager,
+ topScope.context.packages,
(n: any) =>
n.vtype === 'fclos' &&
- FileUtil.extractAfterSubstring(n?.ast?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
- n?.parent?.ast?.type === 'ClassDefinition' &&
- n?.parent?.ast?.id?.name === entrypoint.funcReceiverType &&
- n?.ast?.id.name === entrypoint.functionName,
- (node: any, prop: any) => prop === 'field',
+ FileUtil.extractAfterSubstring(n?.ast?.node?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
+ n?.parent?.ast?.node?.type === 'ClassDefinition' &&
+ n?.parent?.ast?.node?.id?.name === entrypoint.funcReceiverType &&
+ n?.ast?.node?.id.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
null,
false
)
} else {
entryPointSymVal = AstUtil.satisfy(
- topScope.packageManager,
+ topScope.context.packages,
(n: any) =>
n.vtype === 'fclos' &&
- FileUtil.extractAfterSubstring(n?.ast?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
- n?.ast?.id.name === entrypoint.functionName,
- (node: any, prop: any) => prop === 'field',
+ FileUtil.extractAfterSubstring(n?.ast?.node?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
+ n?.ast?.node?.id.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
null,
false
)
@@ -107,7 +109,7 @@ class GinTaintChecker extends TaintChecker {
continue
}
if (Array.isArray(entryPointSymVal)) {
- entryPointSymVal = _.uniqBy(entryPointSymVal, (value: any) => value.fdef)
+ entryPointSymVal = _.uniqBy(entryPointSymVal, (value: any) => value.ast.fdef)
} else {
entryPointSymVal = [entryPointSymVal]
}
@@ -127,7 +129,7 @@ class GinTaintChecker extends TaintChecker {
// 添加source
if (Config.entryPointMode !== 'ONLY_CUSTOM') {
const { TaintSource, FuncCallArgTaintSource, FuncCallReturnValueTaintSource } =
- GinEntryPoint.getGinEntryPointAndSource(topScope.packageManager)
+ GinEntryPoint.getGinEntryPointAndSource(topScope.context.packages)
if (
_.isEmpty(TaintSource) &&
@@ -197,13 +199,14 @@ class GinTaintChecker extends TaintChecker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const calleeObject = fclos.object
- this.checkByNameAndClassMatch(node, fclos, argvalues, scope)
+ this.checkByNameAndClassMatch(node, fclos, callInfo, scope, state)
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaint.introduceFuncArgTaintByRuleConfig(calleeObject, node, argvalues, funcCallArgTaintSource)
+ IntroduceTaint.introduceFuncArgTaintByRuleConfig(calleeObject, node, callInfo, funcCallArgTaintSource)
if (Config.entryPointMode === 'ONLY_CUSTOM') return
+ const argvalues = getLegacyArgValues(callInfo)
this.collectRouteRegistry(node, calleeObject, argvalues, scope, analyzer)
}
@@ -268,19 +271,20 @@ class GinTaintChecker extends TaintChecker {
* @param fclos
* @param argvalues
* @param scope
+ * @param state
*/
- checkByNameAndClassMatch(node: any, fclos: any, argvalues: any, scope: any) {
+ checkByNameAndClassMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state?: any) {
if (fclos === undefined) {
return
}
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
- if (!rules || !argvalues) return
- let rule = matchSinkAtFuncCallWithCalleeType(node, fclos, rules, scope)
+ if (!rules || !callInfo) return
+ let rule = matchSinkAtFuncCallWithCalleeType(node, fclos, rules, scope, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- const args = BasicRuleHandler.prepareArgs(argvalues, fclos, rule)
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -307,7 +311,8 @@ class GinTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME_GIN,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId)
diff --git a/src/checker/taint/go/go-default-taint-checker.ts b/src/checker/taint/go/go-default-taint-checker.ts
index f13036b8..c31b5cc9 100644
--- a/src/checker/taint/go/go-default-taint-checker.ts
+++ b/src/checker/taint/go/go-default-taint-checker.ts
@@ -1,3 +1,5 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
const GoEntryPoint = require('../../../engine/analyzer/golang/common/entrypoint-collector/go-default-entrypoint')
const completeEntryPoint = require('../common-kit/entry-points-util')
@@ -55,13 +57,13 @@ class GoDefaultTaintChecker extends TaintChecker {
prepareEntryPoints(topScope: any, analyzer: any) {
if (Config.entryPointMode === 'ONLY_CUSTOM') return
// 添加main入口
- let mainEntryPoints = GoEntryPoint.getMainEntryPoints(topScope.packageManager)
+ let mainEntryPoints = GoEntryPoint.getMainEntryPoints(topScope.context.packages)
if (_.isEmpty(mainEntryPoints)) {
logger.info('[go-default-taint-checker]EntryPoints are not found')
return
}
if (Array.isArray(mainEntryPoints)) {
- mainEntryPoints = _.uniqBy(mainEntryPoints, (value: any) => value.fdef)
+ mainEntryPoints = _.uniqBy(mainEntryPoints, (value: any) => value.ast.fdef)
} else {
mainEntryPoints = [mainEntryPoints]
}
@@ -80,7 +82,8 @@ class GoDefaultTaintChecker extends TaintChecker {
FullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
}
const fullCallGraphEntrypoint = FullCallGraphFileEntryPoint.getAllEntryPointsUsingCallGraph(
- analyzer.ainfo?.callgraph
+ analyzer.ainfo?.callgraph,
+ analyzer
)
this.entryPoints.push(...fullCallGraphEntrypoint)
}
@@ -93,25 +96,25 @@ class GoDefaultTaintChecker extends TaintChecker {
let entryPointSymVal
if (entrypoint.funcReceiverType) {
entryPointSymVal = AstUtil.satisfy(
- topScope.packageManager,
+ topScope.context.packages,
(n: any) =>
n.vtype === 'fclos' &&
- FileUtil.extractAfterSubstring(n?.ast?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
- n?.parent?.ast?.type === 'ClassDefinition' &&
- n?.parent?.ast?.id?.name === entrypoint.funcReceiverType &&
- n?.ast?.id.name === entrypoint.functionName,
- (node: any, prop: any) => prop === 'field',
+ FileUtil.extractAfterSubstring(n?.ast?.node?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
+ n?.parent?.ast?.node?.type === 'ClassDefinition' &&
+ n?.parent?.ast?.node?.id?.name === entrypoint.funcReceiverType &&
+ n?.ast?.node?.id.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
null,
false
)
} else {
entryPointSymVal = AstUtil.satisfy(
- topScope.packageManager,
+ topScope.context.packages,
(n: any) =>
n.vtype === 'fclos' &&
- FileUtil.extractAfterSubstring(n?.ast?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
- n?.ast?.id.name === entrypoint.functionName,
- (node: any, prop: any) => prop === 'field',
+ FileUtil.extractAfterSubstring(n?.ast?.node?.loc?.sourcefile, Config.maindirPrefix) === entrypoint.filePath &&
+ n?.ast?.node?.id.name === entrypoint.functionName,
+ (node: any, prop: any) => prop === '_field',
null,
false
)
@@ -120,7 +123,7 @@ class GoDefaultTaintChecker extends TaintChecker {
continue
}
if (Array.isArray(entryPointSymVal)) {
- entryPointSymVal = _.uniqBy(entryPointSymVal, (value: any) => value.fdef)
+ entryPointSymVal = _.uniqBy(entryPointSymVal, (value: any) => value.ast.fdef)
} else {
entryPointSymVal = [entryPointSymVal]
}
@@ -160,11 +163,11 @@ class GoDefaultTaintChecker extends TaintChecker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const calleeObject = fclos?.object
- this.checkByNameAndClassMatch(node, fclos, argvalues, scope)
+ this.checkByNameAndClassMatch(node, fclos, callInfo, scope, state)
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaint.introduceFuncArgTaintByRuleConfig(calleeObject, node, argvalues, funcCallArgTaintSource)
+ IntroduceTaint.introduceFuncArgTaintByRuleConfig(calleeObject, node, callInfo, funcCallArgTaintSource)
}
/**
@@ -188,19 +191,20 @@ class GoDefaultTaintChecker extends TaintChecker {
* @param fclos
* @param argvalues
* @param scope
+ * @param state
*/
- checkByNameAndClassMatch(node: any, fclos: any, argvalues: any, scope: any) {
+ checkByNameAndClassMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state?: any) {
if (fclos === undefined) {
return
}
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
- if (!rules || !argvalues) return
- let rule = matchSinkAtFuncCallWithCalleeType(node, fclos, rules, scope, argvalues)
+ if (!rules || !callInfo) return
+ let rule = matchSinkAtFuncCallWithCalleeType(node, fclos, rules, scope, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- const args = BasicRuleHandler.prepareArgs(argvalues, fclos, rule)
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds((rule as any).sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -227,7 +231,8 @@ class GoDefaultTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
@@ -247,7 +252,7 @@ class GoDefaultTaintChecker extends TaintChecker {
* @param info
*/
triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any) {
- IntroduceTaint.introduceTaintAtIdentifierDirect(node, info.res, this.sourceScope.value)
+ IntroduceTaint.introduceTaintAtIdentifierDirect(analyzer, scope, node, info.res, this.sourceScope.value)
}
}
diff --git a/src/checker/taint/go/gorilla-mux-entrypoint-collect-checker.ts b/src/checker/taint/go/gorilla-mux-entrypoint-collect-checker.ts
index 50fb4ba7..2771a684 100644
--- a/src/checker/taint/go/gorilla-mux-entrypoint-collect-checker.ts
+++ b/src/checker/taint/go/gorilla-mux-entrypoint-collect-checker.ts
@@ -1,8 +1,10 @@
+import { getLegacyArgValues } from '../../../engine/analyzer/common/call-args'
+
const completeEntryPoint = require('../common-kit/entry-points-util')
const Config = require('../../../config')
const RouteRegistryProperty = ['HandleFunc', 'Handle', 'Handler']
-const RouteRegistryObject = ['github.com/gorilla/mux.NewRouter()']
+const RouteRegistryObject = ['.packageManager.github.com/gorilla/mux.NewRouter()']
const IntroduceTaint = require('../common-kit/source-util')
const Checker = require('../../common/checker')
@@ -30,8 +32,8 @@ class MuxEntryPointCollectChecker extends Checker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
-
+ const { fclos, callInfo } = info
+ const argvalues = getLegacyArgValues(callInfo)
this.collectRouteRegistry(node, fclos, argvalues, scope, info)
}
@@ -60,16 +62,16 @@ class MuxEntryPointCollectChecker extends Checker {
if (Config.entryPointMode === 'ONLY_CUSTOM') return // 不路由自采集
if (!(calleeFClos && calleeFClos.object && calleeFClos.property)) return
const { object, property } = calleeFClos
- if (!object._qid || !property.name) return
- const objectQid = object._qid
+ if (!object.qid || !property.name) return
+ const objectQid = object.qid
const propertyName = property.name
if (
RouteRegistryObject.some((muxPrefix: any) => objectQid.startsWith(muxPrefix)) &&
RouteRegistryProperty.includes(propertyName)
) {
for (const arg of argValues) {
- if (arg?.vtype === 'fclos' && arg?.ast.loc) {
- const hash = JSON.stringify(arg.ast.loc)
+ if (arg?.vtype === 'fclos' && arg?.ast.node.loc) {
+ const hash = JSON.stringify(arg.ast.node.loc)
if (!processedRouteRegistry.has(hash)) {
processedRouteRegistry.add(hash)
IntroduceTaint.introduceFuncArgTaintBySelfCollection(arg, state, analyzer, '1:', 'GO_INPUT')
diff --git a/src/checker/taint/go/main-entrypoint-collect-checker.ts b/src/checker/taint/go/main-entrypoint-collect-checker.ts
index fc2e8254..d8193671 100644
--- a/src/checker/taint/go/main-entrypoint-collect-checker.ts
+++ b/src/checker/taint/go/main-entrypoint-collect-checker.ts
@@ -42,18 +42,18 @@ class MainEntrypointCollectChecker extends Checker {
prepareEntryPoints(topScope: any): void {
if (Config.entryPointMode === 'ONLY_CUSTOM') return
// 添加main入口
- let mainEntryPoints = GoEntryPoint.getMainEntryPoints(topScope.packageManager)
+ let mainEntryPoints = GoEntryPoint.getMainEntryPoints(topScope.context.packages)
if (_.isEmpty(mainEntryPoints)) {
return
}
if (Array.isArray(mainEntryPoints)) {
- mainEntryPoints = _.uniqBy(mainEntryPoints, (value: EntryPoint) => value.fdef)
+ mainEntryPoints = _.uniqBy(mainEntryPoints, (value: EntryPoint) => value.ast.fdef)
} else {
mainEntryPoints = [mainEntryPoints]
}
mainEntryPoints.forEach((main: EntryPoint) => {
if (main) {
- const entryPoint = completeEntryPoint(main)
+ const entryPoint = completeEntryPoint(main, true)
this.entryPoints.push(entryPoint)
}
})
diff --git a/src/checker/taint/go/restful-entrypoint-collect-checker.ts b/src/checker/taint/go/restful-entrypoint-collect-checker.ts
index 6fca9f0c..867d90ba 100644
--- a/src/checker/taint/go/restful-entrypoint-collect-checker.ts
+++ b/src/checker/taint/go/restful-entrypoint-collect-checker.ts
@@ -1,20 +1,17 @@
-import { processEntryPointAndTaintSource } from './util'
+import { getLegacyArgValues } from '../../../engine/analyzer/common/call-args'
const config = require('../../../config')
-const GoAnalyzer = require('../../../engine/analyzer/golang/common/go-analyzer')
const RouteRegistryProperty = ['Filter', 'To', 'If']
-const KnownPackageName = {
- 'github.com/emicklei/go-restful': 'restful',
- 'github.com/emicklei/go-restful/v3': 'restful',
-}
const RouteRegistryObject = [
- 'github.com/emicklei/go-restful.WebService',
- 'github.com/emicklei/go-restful/v3.WebService',
+ /github\.com\/emicklei\/go-restful\/v3\.WebService/,
+ /github\.com\/emicklei\/go-restful\.WebService/,
]
+const IntroduceTaint = require('../common-kit/source-util')
const Checker = require('../../common/checker')
+const completeEntryPoint = require('../common-kit/entry-points-util')
-const processedRouteRegistry = new Set()
+const processedRouteRegistry = new Set()
/**
*
@@ -26,7 +23,6 @@ class RestfulEntrypointCollectChecker extends Checker {
*/
constructor(resultManager: any) {
super(resultManager, 'go-restful-entryPoints-collect-checker')
- GoAnalyzer.registerKnownPackageNames(KnownPackageName)
}
/**
@@ -38,8 +34,8 @@ class RestfulEntrypointCollectChecker extends Checker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
-
+ const { fclos, callInfo } = info
+ const argvalues = getLegacyArgValues(callInfo)
this.collectRouteRegistry(node, fclos, argvalues, scope, info)
}
@@ -68,15 +64,22 @@ class RestfulEntrypointCollectChecker extends Checker {
if (config.entryPointMode === 'ONLY_CUSTOM') return
if (!(calleeFClos && calleeFClos.object && calleeFClos.property)) return
const { object, property } = calleeFClos
- if (!object._qid || !property.name) return
- const objectQid = object._qid
+ if (!object.qid || !property.name) return
+ const objectQid = object.qid
const propertyName = property.name
- if (
- RouteRegistryObject.some((prefix) => objectQid.startsWith(prefix)) &&
- RouteRegistryProperty.includes(propertyName) &&
- argValues[0]
- ) {
- processEntryPointAndTaintSource(analyzer, state, processedRouteRegistry, argValues[0], '0')
+ if (RouteRegistryObject.some((prefix) => prefix.test(objectQid)) && RouteRegistryProperty.includes(propertyName)) {
+ if (argValues.length < 1) return
+ const arg0 = argValues[0]
+
+ if (arg0?.vtype === 'fclos' && arg0?.ast.node.loc) {
+ const hash = JSON.stringify(arg0.ast.node.loc)
+ if (!processedRouteRegistry.has(hash)) {
+ processedRouteRegistry.add(hash)
+ IntroduceTaint.introduceFuncArgTaintBySelfCollection(arg0, state, analyzer, '0', 'GO_INPUT')
+ const entryPoint = completeEntryPoint(arg0)
+ analyzer.entryPoints.push(entryPoint)
+ }
+ }
}
}
}
diff --git a/src/checker/taint/go/sync-once-do-checker.ts b/src/checker/taint/go/sync-once-do-checker.ts
index 654dad57..831efa87 100644
--- a/src/checker/taint/go/sync-once-do-checker.ts
+++ b/src/checker/taint/go/sync-once-do-checker.ts
@@ -1,11 +1,13 @@
-const CheckerSyncOnceDo = require('../../common/checker')
+import { getLegacyArgValues, type CallInfo } from '../../../engine/analyzer/common/call-args'
+
+const Checker = require('../../common/checker')
const done: Set = new Set()
-const syncOnceDoQid: string = 'sync.Once.Do'
+const syncOnceDoQidRegex: RegExp = /sync\.Once\.Do/
interface TriggerInfo {
fclos: any
- argvalues: any[]
+ callInfo: CallInfo | undefined
[key: string]: any
}
@@ -13,7 +15,7 @@ interface TriggerInfo {
* sync.Once.Do bulitIn checker
* 为Go内置库方法sync.Once.Do做建模,执行且只执行一次传给Do方法的funcDef
*/
-class syncOnceDoChecker extends CheckerSyncOnceDo {
+class syncOnceDoChecker extends Checker {
/**
* constructor
* @param resultManager
@@ -31,12 +33,13 @@ class syncOnceDoChecker extends CheckerSyncOnceDo {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: TriggerInfo): void {
- const { fclos, argvalues } = info
- if (fclos._qid !== syncOnceDoQid) return
+ const { fclos, callInfo } = info
+ if (!syncOnceDoQidRegex.test(fclos.qid)) return
const hash: string = JSON.stringify(node.loc)
if (done.has(hash)) return
done.add(hash)
- if (argvalues.length !== 1 && argvalues[0].vtype !== 'fclos') return
+ const argvalues = getLegacyArgValues(callInfo)
+ if (argvalues.length !== 1 || argvalues[0].vtype !== 'fclos') return
const fDef = node.arguments[0]
const fClos = argvalues[0]
diff --git a/src/checker/taint/go/urfave-cli-checker.ts b/src/checker/taint/go/urfave-cli-checker.ts
index 4e5801d8..a35aeb56 100644
--- a/src/checker/taint/go/urfave-cli-checker.ts
+++ b/src/checker/taint/go/urfave-cli-checker.ts
@@ -1,16 +1,16 @@
const completeEntryPoint = require('../common-kit/entry-points-util')
-const configUrfaveCli = require('../../../config')
-const CheckerUrfaveCli = require('../../common/checker')
+const config = require('../../../config')
+const Checker = require('../../common/checker')
-const processedBuiltInRegistryUrfaveCli = new Set()
-const builtInOnjectListUrfaveCli = ['github.com/urfave/cli.NewApp()']
-const builtInPropertyListUrfaveCli = ['Action']
+const processedBuiltInRegistry = new Set()
+const builtInObjectList = ['github.com/urfave/cli.NewApp()']
+const builtInPropertyList = ['Action']
/**
* urfave.cli bulitIn checker
* 为第三方库方法urfave.cli做建模,添加entryPoints
*/
-class urfaveCliChecker extends CheckerUrfaveCli {
+class urfaveCliChecker extends Checker {
/**
* constructor
* @param resultManager
@@ -29,16 +29,15 @@ class urfaveCliChecker extends CheckerUrfaveCli {
*/
triggerAtAssignment(analyzer: any, scope: any, node: any, state: any, info: any): void {
const { lvalue, rvalue } = info
- if (configUrfaveCli.entryPointMode === 'ONLY_CUSTOM') return // 不路由自采集
+ if (config.entryPointMode === 'ONLY_CUSTOM') return // 不路由自采集
if (!lvalue || !rvalue || rvalue.vtype !== 'fclos') return
const { object, property } = lvalue
if (!object || !property) return
- if (!builtInOnjectListUrfaveCli.includes(object._qid) || !builtInPropertyListUrfaveCli.includes(property.name))
- return
+ if (!builtInObjectList.includes(object.qid) || !builtInPropertyList.includes(property.name)) return
const hash = JSON.stringify(node.right.loc)
- if (processedBuiltInRegistryUrfaveCli.has(hash)) return
- processedBuiltInRegistryUrfaveCli.add(hash)
- analyzer.entryPoints.push(completeEntryPoint(rvalue))
+ if (processedBuiltInRegistry.has(hash)) return
+ processedBuiltInRegistry.add(hash)
+ analyzer.entryPoints.push(completeEntryPoint(rvalue, true))
}
}
diff --git a/src/checker/taint/java/java-default-taint-checker.ts b/src/checker/taint/java/java-default-taint-checker.ts
index 46d11a54..a617bbcf 100644
--- a/src/checker/taint/java/java-default-taint-checker.ts
+++ b/src/checker/taint/java/java-default-taint-checker.ts
@@ -41,13 +41,13 @@ class JavaDefaultTaintChecker extends JavaTaintAbstractChecker {
const selfCollectEntryPoints: any[] = []
const selfCollectTaintSource: any[] = []
const { selfCollectMainEntryPoints, selfCollectMainTaintSource } = MainEntryPoint.getJavaMainEntryPointAndSource(
- topScope.packageManager
+ topScope.context.packages
)
selfCollectEntryPoints.push(...selfCollectMainEntryPoints)
selfCollectTaintSource.push(...selfCollectMainTaintSource)
const { selfCollectSpringEntryPoints, selfCollectSpringTaintSource } =
- springEntryPoint.getSpringEntryPointAndSource(topScope.packageManager)
+ springEntryPoint.getSpringEntryPointAndSource(topScope.context.packages)
selfCollectEntryPoints.push(...selfCollectSpringEntryPoints)
selfCollectTaintSource.push(...selfCollectSpringTaintSource)
@@ -87,9 +87,10 @@ class JavaDefaultTaintChecker extends JavaTaintAbstractChecker {
FullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
}
const fullCallGraphEntrypoint = FullCallGraphFileEntryPoint.getAllEntryPointsUsingCallGraph(
- analyzer.ainfo?.callgraph
+ analyzer.ainfo?.callgraph,
+ analyzer
)
- const fullFileEntrypoint = FullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer.fileManager)
+ const fullFileEntrypoint = FullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer)
this.entryPoints.push(...fullCallGraphEntrypoint)
this.entryPoints.push(...fullFileEntrypoint)
}
@@ -102,9 +103,9 @@ class JavaDefaultTaintChecker extends JavaTaintAbstractChecker {
}
targetPackage = targetPackage.startsWith('.') ? targetPackage.slice(1) : targetPackage
const arr = Loader.getPackageNameProperties(targetPackage)
- let packageManagerT = topScope.packageManager
+ let packageManagerT = topScope.context.packages
arr.forEach((path: any) => {
- packageManagerT = packageManagerT?.field[path]
+ packageManagerT = packageManagerT?.members?.get(path)
})
if (!packageManagerT || packageManagerT.vtype === 'undefine') {
continue
@@ -117,23 +118,7 @@ class JavaDefaultTaintChecker extends JavaTaintAbstractChecker {
continue
}
- const scopeVal = Scoped({
- vtype: 'scope',
- _sid: 'mock',
- _id: 'mock',
- field: {},
- parent: null,
- })
-
- const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
- entryPoint.scopeVal = scopeVal
- entryPoint.argValues = []
- entryPoint.functionName = entrypoint.functionName
- entryPoint.filePath = entrypoint.filePath
- entryPoint.attribute = entrypoint.attribute
- entryPoint.packageName = entrypoint.packageName
- entryPoint.entryPointSymVal = entryPointSymVal
- this.entryPoints.push(entryPoint)
+ this.resolveAndPushEntryPoint(entryPointSymVal, entrypoint, func, analyzer, Scoped, EntryPoint, Constant)
}
}
}
diff --git a/src/checker/taint/java/java-taint-abstract-checker.ts b/src/checker/taint/java/java-taint-abstract-checker.ts
index fd4811db..d9e5deae 100644
--- a/src/checker/taint/java/java-taint-abstract-checker.ts
+++ b/src/checker/taint/java/java-taint-abstract-checker.ts
@@ -1,17 +1,223 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+import type { Invocation } from '../../../resolver/common/value/invocation'
+
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const TaintCheckerJava = require('../taint-checker')
const IntroduceTaintJava = require('../common-kit/source-util')
const commonUtilJavaAbstract = require('../../../util/common-util')
-const { matchSinkAtFuncCallWithCalleeType: matchSinkAtFuncCallWithCalleeTypeJava } = require('../common-kit/sink-util')
+const {
+ matchSinkAtFuncCallWithCalleeType: matchSinkAtFuncCallWithCalleeTypeJava,
+ checkInvocationMatchSink,
+} = require('../common-kit/sink-util')
const RulesJava = require('../../common/rules-basic-handler')
const SanitizerCheckerJava = require('../../sanitizer/sanitizer-checker')
const TaintOutputStrategyJava = require('../../common/output/taint-output-strategy')
+const Config = require('../../../config')
+const logger = require('../../../util/logger')(__filename)
+
const TAINT_TAG_NAME_JAVA = 'JAVA_INPUT'
/**
* java taint base checker
*/
class JavaTaintAbstractChecker extends TaintCheckerJava {
+ /**
+ * When the entrypoint resolves to an interface/abstract method with no body,
+ * find implementation classes and return their overriding methods instead.
+ * @param entryPointSymVal - the resolved entrypoint function closure
+ * @param funcName - the function name to look for
+ * @param analyzer - the analyzer instance with classMap and symbolTable
+ * @returns array of resolved function closures (may contain multiple implementations)
+ */
+ resolveInterfaceEntryPoint(entryPointSymVal: any, funcName: string, analyzer: any): any[] {
+ const parentScope = entryPointSymVal?.parent
+ const isInterface = parentScope?.ast?.node?._meta?.isInterface
+ const isAbstract = parentScope?.ast?.node?._meta?.isAbstract
+
+ if (!(isInterface || isAbstract) || !analyzer?.classMap) {
+ return [entryPointSymVal]
+ }
+
+ const parentQid = parentScope?.qid
+
+ const implSymVals: any[] = []
+ for (const [, classRef] of analyzer.classMap) {
+ const classVal = analyzer.symbolTable?.get(classRef) ?? classRef
+ if (!classVal || typeof classVal !== 'object' || classVal === parentScope) {
+ continue
+ }
+
+ // Check all supers (both extends and implements) via the AST supers array,
+ // because classVal.super only holds the last resolved super reference.
+ const fdefSupers = classVal.ast?.fdef?.supers
+ let isImpl = false
+ if (Array.isArray(fdefSupers)) {
+ for (const superId of fdefSupers) {
+ if (!superId) continue
+ const superName = superId.name ?? superId.id?.name
+ if (superName && parentScope?.sid && superName === parentScope.sid) {
+ isImpl = true
+ break
+ }
+ if (parentQid && (superId.qid === parentQid || superId.logicalQid === parentQid)) {
+ isImpl = true
+ break
+ }
+ }
+ }
+ // Fallback: also check the runtime super chain for cases already resolved
+ if (!isImpl) {
+ let superRef = classVal.super
+ while (superRef) {
+ if (superRef === parentScope || (parentQid && superRef.qid === parentQid)) {
+ isImpl = true
+ break
+ }
+ superRef = superRef.super
+ }
+ }
+ if (!isImpl) continue
+
+ const implMethod = classVal.members?.get(funcName) ?? classVal.value?.[funcName]
+ if (implMethod?.vtype === 'fclos' && !implMethod.inherited) {
+ implSymVals.push(implMethod)
+ logger.info(
+ 'Resolved interface entrypoint [%s.%s] to implementation [%s.%s]',
+ parentScope?.sid,
+ funcName,
+ classVal?.sid,
+ funcName
+ )
+ }
+ }
+
+ return implSymVals.length > 0 ? implSymVals : [entryPointSymVal]
+ }
+
+ /**
+ * When entrypoint is resolved from interface to implementation, augment TaintSource
+ * entries so that sources configured for the interface file also apply to the
+ * implementation class file.
+ * @param interfaceSymVal - the original interface method fclos
+ * @param implSymVals - the resolved implementation method fclos array
+ * @param funcName - the function name
+ */
+ augmentSourcesForInterfaceResolution(interfaceSymVal: any, implSymVals: any[], funcName: string): void {
+ const taintSources = this.checkerRuleConfigContent.sources?.TaintSource
+ if (!Array.isArray(taintSources) || taintSources.length === 0) return
+
+ const interfacePath = this.normalizeAstSourceFilePath(interfaceSymVal?.ast?.node?.loc?.sourcefile)
+ if (!interfacePath) return
+
+ // Track full source identity so repeated interface resolution does not append the
+ // same implementation source twice, while still allowing multiple distinct sources
+ // inside the same function.
+ const buildSourceKey = (source: any): string =>
+ `${source.scopeFile}::${source.scopeFunc}::${source.path}::${source.kind}::${source.attribute || ''}`
+ const existingKeys = new Set(taintSources.map((s: any) => buildSourceKey(s)))
+
+ for (const implSymVal of implSymVals) {
+ const implPath = this.normalizeAstSourceFilePath(implSymVal?.ast?.node?.loc?.sourcefile)
+ if (!implPath || implPath === interfacePath) continue
+
+ const implAstNode = implSymVal?.ast?.node
+ // locStart: use the first parameter's start line when available so that the
+ // source scope covers the parameter list rather than the method keyword itself.
+ // This matches how initSourceScopeByTaintSourceWithLoc computes effective ranges.
+ const locStart = implAstNode?.parameters?.length > 0
+ ? implAstNode.parameters[0].loc?.start?.line
+ : implAstNode?.loc?.start?.line
+ const locEnd = implAstNode?.loc?.end?.line
+
+ const newSources: any[] = []
+ for (const source of taintSources) {
+ if (source.scopeFile === interfacePath && source.scopeFunc === funcName) {
+ const key = buildSourceKey({ ...source, scopeFile: implPath })
+ if (!existingKeys.has(key)) {
+ newSources.push({ ...source, scopeFile: implPath })
+ existingKeys.add(key)
+ }
+ }
+ }
+
+ if (newSources.length > 0) {
+ taintSources.push(...newSources)
+ for (const ns of newSources) {
+ const scopeEntry = {
+ path: ns.path,
+ kind: ns.kind,
+ scopeFile: ns.scopeFile,
+ scopeFunc: ns.scopeFunc,
+ attribute: ns.attribute,
+ locStart,
+ locEnd,
+ }
+ this.sourceScope.value.push(scopeEntry)
+ this.sourceScope.fillLineValues.push(scopeEntry)
+ }
+ logger.info(
+ 'Augmented TaintSource for implementation [%s] (loc %s-%s) from interface [%s.%s]',
+ implPath,
+ locStart,
+ locEnd,
+ interfacePath,
+ funcName
+ )
+ }
+ }
+ }
+
+ /**
+ * 将接口/抽象类 entrypoint 解析为实现类,并推入 this.entryPoints。
+ * 两个子类(JavaTaintChecker / JavaDefaultTaintChecker)的 prepareEntryPoints
+ * 共用此方法,避免重复代码。
+ */
+ resolveAndPushEntryPoint(entryPointSymVal: any, entrypoint: any, func: string, analyzer: any, Scoped: any, EntryPoint: any, Constant: any): void {
+ const resolvedSymVals = this.resolveInterfaceEntryPoint(entryPointSymVal, func, analyzer)
+ if (resolvedSymVals.length > 0 && resolvedSymVals[0] !== entryPointSymVal) {
+ this.augmentSourcesForInterfaceResolution(entryPointSymVal, resolvedSymVals, func)
+ }
+ for (const resolvedSymVal of resolvedSymVals) {
+ const scopeVal = new Scoped('', {
+ vtype: 'scope',
+ sid: 'mock',
+ qid: 'mock',
+ field: {},
+ parent: null,
+ })
+ const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
+ entryPoint.scopeVal = scopeVal
+ entryPoint.argValues = []
+ entryPoint.functionName = entrypoint.functionName
+ entryPoint.filePath = entrypoint.filePath
+ entryPoint.attribute = entrypoint.attribute
+ entryPoint.packageName = entrypoint.packageName
+ entryPoint.entryPointSymVal = resolvedSymVal
+ this.entryPoints.push(entryPoint)
+ }
+ }
+
+ /**
+ * Normalize an AST sourcefile path to the format used by ruleconfig scopeFile.
+ * @param astPath - the full path from ast.loc.sourcefile
+ * @returns normalized relative path (e.g. "/app/biz/.../Foo.java") or null
+ */
+ normalizeAstSourceFilePath(astPath: string | undefined): string | null {
+ if (!astPath) return null
+ try {
+ const prefixIdx = astPath.indexOf(Config.maindirPrefix)
+ if (prefixIdx === -1) return null
+ let relativePath = astPath.substring(prefixIdx + Config.maindirPrefix.length)
+ const slashIdx = relativePath.indexOf('/')
+ if (slashIdx === -1) return null
+ relativePath = relativePath.substring(slashIdx)
+ return relativePath
+ } catch {
+ return null
+ }
+ }
+
/**
* starter trigger
* @param analyzer
@@ -37,7 +243,7 @@ class JavaTaintAbstractChecker extends TaintCheckerJava {
* @param info
*/
triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any) {
- IntroduceTaintJava.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value)
+ IntroduceTaintJava.introduceTaintAtIdentifier(analyzer, scope, node, info.res, this.sourceScope.value)
}
/**
@@ -61,11 +267,11 @@ class JavaTaintAbstractChecker extends TaintCheckerJava {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaintJava.introduceFuncArgTaintByRuleConfig(fclos?.object, node, argvalues, funcCallArgTaintSource)
- this.checkByNameAndClassMatch(node, fclos, argvalues, scope, state, info)
- this.checkByFieldMatch(node, fclos, argvalues, scope, state, info)
+ IntroduceTaintJava.introduceFuncArgTaintByRuleConfig(fclos?.object, node, callInfo, funcCallArgTaintSource)
+ this.checkByNameAndClassMatch(node, fclos, callInfo, scope, state, info, analyzer)
+ // this.checkByFieldMatch(node, fclos, callInfo, scope, state, info)
}
/**
@@ -91,17 +297,49 @@ class JavaTaintAbstractChecker extends TaintCheckerJava {
* @param scope
* @param state
* @param info
+ * @param analyzer
*/
- checkByNameAndClassMatch(node: any, fclos: any, argvalues: any, scope: any, state: any, info: any) {
- const sinkRules = this.assembleFunctionCallSinkRule()
+ checkByNameAndClassMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state: any, info: any, analyzer: any) {
+ let sinkRules
+ if (RulesJava.getPreprocessReady()) {
+ if (!this.sinkRuleArray) {
+ this.sinkRuleArray = this.assembleFunctionCallSinkRule()
+ this.sinkArray = analyzer?.loadAllSink()
+ }
+ sinkRules = this.sinkRuleArray
+ } else {
+ sinkRules = this.assembleFunctionCallSinkRule()
+ }
+
+ let rules
+ if (RulesJava.getPreprocessReady()) {
+ if (node?._meta?.nodehash) {
+ if (this.matchSinkRuleResultMap.has(node._meta.nodehash)) {
+ rules = this.matchSinkRuleResultMap.get(node._meta.nodehash)
+ } else {
+ rules = matchSinkAtFuncCallWithCalleeTypeJava(node, fclos, sinkRules, scope)
+ this.appendCgRules(rules, node, scope, sinkRules, analyzer)
+ this.matchSinkRuleResultMap.set(node._meta.nodehash, rules)
+ }
+ } else {
+ rules = matchSinkAtFuncCallWithCalleeTypeJava(node, fclos, sinkRules, scope)
+ this.appendCgRules(rules, node, scope, sinkRules, analyzer)
+ }
+ } else {
+ rules = matchSinkAtFuncCallWithCalleeTypeJava(node, fclos, sinkRules, scope)
+ this.appendCgRules(rules, node, scope, sinkRules, analyzer)
+ }
- const rules = matchSinkAtFuncCallWithCalleeTypeJava(node, fclos, sinkRules, scope)
for (const rule of rules) {
let args
if (rule._sinkType === 'FuncCallTaintSink') {
- args = RulesJava.prepareArgs(argvalues, fclos, rule)
+ if (rule.args) {
+ args = RulesJava.prepareArgs(callInfo, fclos, rule)
+ } else if (rule.argTypes) {
+ args = RulesJava.prepareArgsByType(callInfo, fclos, rule)
+ }
} else if (rule._sinkType === 'ObjectTaintFuncCallSink') {
- args = fclos.getThis()
+ args = fclos.getThisObj()
}
if (!args) {
continue
@@ -145,6 +383,55 @@ class JavaTaintAbstractChecker extends TaintCheckerJava {
return true
}
+ /**
+ * append matched rules find by callgraph
+ * @param rules
+ * @param node
+ * @param scope
+ * @param sinkRules
+ * @param analyzer
+ */
+ appendCgRules(rules: any[], node: any, scope: any, sinkRules: any[], analyzer: any) {
+ if (rules.length > 0) {
+ return
+ }
+ const cgRules = this.findMatchedRuleByCallGraph(node, scope, sinkRules, analyzer)
+ for (const cgRule of cgRules) {
+ rules.push(cgRule)
+ }
+ }
+
+ /**
+ * find matched rule by CallGraph
+ * @param node
+ * @param scope
+ * @param analyzer
+ * @param sinkRules
+ */
+ findMatchedRuleByCallGraph(node: any, scope: any, sinkRules: any[], analyzer: any) {
+ const resultArray: any[] = []
+
+ if (!node || !scope || !sinkRules || !analyzer || !analyzer.findNodeInvocations) {
+ return resultArray
+ }
+
+ const invocations: Invocation[] = analyzer.findNodeInvocations(scope, node)
+ if (!invocations) {
+ return resultArray
+ }
+
+ for (const invocation of invocations) {
+ for (const sink of sinkRules) {
+ const matchSink: boolean = checkInvocationMatchSink(invocation, sink, analyzer.typeResolver)
+ if (matchSink) {
+ resultArray.push(sink)
+ }
+ }
+ }
+
+ return resultArray
+ }
+
/**
* check if sink or not by obj value
* @param node
@@ -154,8 +441,16 @@ class JavaTaintAbstractChecker extends TaintCheckerJava {
* @param state
* @param info
*/
- checkByFieldMatch(node: any, fclos: any, argvalues: any, scope: any, state: any, info: any) {
- const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
+ checkByFieldMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state: any, info: any) {
+ let rules
+ if (RulesJava.getPreprocessReady()) {
+ if (!this.sinkRuleArray) {
+ this.sinkRuleArray = this.assembleFunctionCallSinkRule()
+ }
+ rules = this.sinkRuleArray
+ } else {
+ rules = this.assembleFunctionCallSinkRule()
+ }
if (!rules) return
let matched = false
@@ -223,7 +518,7 @@ class JavaTaintAbstractChecker extends TaintCheckerJava {
create
)
if (matched) {
- const args = RulesJava.prepareArgs(argvalues, fclos, rule)
+ const args = RulesJava.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerCheckerJava.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerCheckerJava.findTagAndMatchedSanitizer(
node,
@@ -269,27 +564,23 @@ class JavaTaintAbstractChecker extends TaintCheckerJava {
* @param fclos
*/
getObj(fclos: any): any {
- if (
- typeof fclos?._sid !== 'undefined' &&
- typeof fclos?._qid === 'undefined' &&
- typeof fclos?._this === 'undefined'
- ) {
- const index = fclos?._sid.indexOf('>.')
- const result = index !== -1 ? fclos?._sid.substring(index + 2) : fclos?._sid
- return result.replace('', '')
+ if (typeof fclos?.sid !== 'undefined' && typeof fclos?.qid === 'undefined' && typeof fclos?._this === 'undefined') {
+ const index = fclos?.sid.indexOf('>.')
+ const result = index !== -1 ? fclos?.sid.substring(index + 2) : fclos?.sid
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(result)
}
- if (typeof fclos?._qid !== 'undefined') {
- const index = fclos._qid.indexOf('>.')
- const result = index !== -1 ? fclos?._qid.substring(index + 2) : fclos?._qid
- return result.replace('', '')
+ if (typeof fclos?.qid !== 'undefined') {
+ const index = fclos.qid.indexOf('>.')
+ const result = index !== -1 ? fclos?.qid.substring(index + 2) : fclos?.qid
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(result)
}
if (!(fclos === fclos?._this)) {
return this.getObj(fclos._this)
}
- const index = fclos?._sid.indexOf('>.')
- const result = index !== -1 ? fclos?._sid.substring(index + 2) : fclos?._sid
+ const index = fclos?.sid.indexOf('>.')
+ const result = index !== -1 ? fclos?.sid.substring(index + 2) : fclos?.sid
if (result) {
- return result.replace('', '')
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(result)
}
}
diff --git a/src/checker/taint/java/java-taint-checker.ts b/src/checker/taint/java/java-taint-checker.ts
index ff0dc25d..97c71b2e 100644
--- a/src/checker/taint/java/java-taint-checker.ts
+++ b/src/checker/taint/java/java-taint-checker.ts
@@ -36,7 +36,7 @@ class JavaTaintChecker extends JavaTaintAbstractChecker {
if (Config.entryPointMode !== 'ONLY_CUSTOM') {
logger.info('YASA will collect Entrypoint and Source')
const { selfCollectSpringEntryPoints, selfCollectSpringTaintSource } =
- SpringEntryPoint.getSpringEntryPointAndSource(topScope.packageManager)
+ SpringEntryPoint.getSpringEntryPointAndSource(topScope.context.packages)
if (!_.isEmpty(selfCollectSpringTaintSource)) {
this.checkerRuleConfigContent.sources = this.checkerRuleConfigContent.sources || {}
@@ -77,9 +77,9 @@ class JavaTaintChecker extends JavaTaintAbstractChecker {
}
targetPackage = targetPackage.startsWith('.') ? targetPackage.slice(1) : targetPackage
const arr = Loader.getPackageNameProperties(targetPackage)
- let packageManagerT = topScope.packageManager
+ let packageManagerT = topScope.context.packages
arr.forEach((path: any) => {
- packageManagerT = packageManagerT?.field[path]
+ packageManagerT = packageManagerT?.members?.get(path)
})
if (!packageManagerT || packageManagerT.vtype === 'undefine') {
continue
@@ -92,23 +92,7 @@ class JavaTaintChecker extends JavaTaintAbstractChecker {
continue
}
- const scopeVal = Scoped({
- vtype: 'scope',
- _sid: 'mock',
- _id: 'mock',
- field: {},
- parent: null,
- })
-
- const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
- entryPoint.scopeVal = scopeVal
- entryPoint.argValues = []
- entryPoint.functionName = entrypoint.functionName
- entryPoint.filePath = entrypoint.filePath
- entryPoint.attribute = entrypoint.attribute
- entryPoint.packageName = entrypoint.packageName
- entryPoint.entryPointSymVal = entryPointSymVal
- this.entryPoints.push(entryPoint)
+ this.resolveAndPushEntryPoint(entryPointSymVal, entrypoint, func, analyzer, Scoped, EntryPoint, Constant)
}
}
}
diff --git a/src/checker/taint/js/egg-taint-checker.ts b/src/checker/taint/js/egg-taint-checker.ts
index df55287f..63445f27 100644
--- a/src/checker/taint/js/egg-taint-checker.ts
+++ b/src/checker/taint/js/egg-taint-checker.ts
@@ -1,3 +1,5 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
const BasicRuleHandler = require('../../common/rules-basic-handler')
const IntroduceTaint = require('../common-kit/source-util')
@@ -7,11 +9,6 @@ const Constant = require('../../../util/constant')
const CommonUtil = require('../../../util/common-util')
const Loader = require('../../../util/loader')
const { matchSinkAtFuncCall } = require('../common-kit/sink-util')
-const {
- valueUtil: {
- ValueUtil: { Scoped },
- },
-} = require('../../../engine/analyzer/common')
const Config = require('../../../config')
const eggHttpEgg = require('../../../engine/analyzer/javascript/egg/entrypoint-collector/egg-http')
const SanitizerCheckerEgg = require('../../sanitizer/sanitizer-checker')
@@ -19,6 +16,7 @@ const { handleException: handleExceptionEgg } = require('../../../engine/analyze
const logger = require('../../../util/logger')(__filename)
const TaintCheckerEgg = require('../taint-checker')
const TaintOutputStrategyEgg = require('../../common/output/taint-output-strategy')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const TAINT_TAG_NAME_EGG = 'EGG_INPUT'
@@ -67,7 +65,7 @@ class EggTaintChecker extends TaintCheckerEgg {
return
}
try {
- IntroduceTaint.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value)
+ IntroduceTaint.introduceTaintAtIdentifier(analyzer, scope, node, info.res, this.sourceScope.value)
} catch (e: any) {
handleExceptionEgg(
e,
@@ -105,7 +103,8 @@ class EggTaintChecker extends TaintCheckerEgg {
logger.info('YASA collecting egg source and entrypoint...')
// eslint-disable-next-line prefer-const
let { selfCollectEntryPoints, selfCollectTaintSource } = eggHttpEgg.getEggHttpEntryPointsAndSources(
- topScope.fileManager
+ analyzer.fileManager,
+ analyzer
)
if (_.isEmpty(selfCollectEntryPoints) && _.isEmpty(ruleConfigEntryPoints)) {
@@ -156,12 +155,12 @@ class EggTaintChecker extends TaintCheckerEgg {
// const arr = filepath.split("/").filter(str => str !== "").map(str => str.split(".").shift());
let fieldT = topScope
arr.forEach((path: any) => {
- fieldT = fieldT?.field[path]
+ fieldT = fieldT?.members?.get(path)
})
if (!fieldT || fieldT.vtype === 'undefine') {
- for (const mod in topScope.moduleManager.field) {
- if (mod.includes(entrypoint.filePath) && topScope.moduleManager.field[mod].ast?.type === 'CompileUnit') {
- fieldT = topScope.moduleManager.field[mod]
+ for (const mod of topScope.context.modules.members.keys()) {
+ if (mod.includes(entrypoint.filePath) && topScope.context.modules.members.get(mod)?.ast?.node?.type === 'CompileUnit') {
+ fieldT = topScope.context.modules.members.get(mod)
break
}
}
@@ -187,12 +186,12 @@ class EggTaintChecker extends TaintCheckerEgg {
entryPoint.entryPointSymVal = entryPointSymVal
this.entryPoints.push(entryPoint)
} else {
- if (!fieldT.ast || fieldT.ast.type !== 'CompileUnit') continue
+ if (!fieldT.ast.node || fieldT.ast.node.type !== 'CompileUnit') continue
const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN)
entryPoint.scopeVal = fieldT
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = fieldT?.ast?.loc?.sourcefile
+ entryPoint.filePath = fieldT?.ast?.node?.loc?.sourcefile
entryPoint.attribute = entrypoint.attribute
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = fieldT
@@ -221,11 +220,11 @@ class EggTaintChecker extends TaintCheckerEgg {
if (Config.analyzer !== 'EggAnalyzer') {
return
}
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, argvalues, funcCallArgTaintSource)
- this.checkSinkAtFunctionCall(node, fclos, argvalues)
- this.checkByFieldMatch(node, fclos, argvalues, scope)
+ IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, callInfo, funcCallArgTaintSource)
+ this.checkSinkAtFunctionCall(node, fclos, callInfo, state)
+ this.checkByFieldMatch(node, fclos, callInfo, scope, state)
}
/**
@@ -265,19 +264,20 @@ class EggTaintChecker extends TaintCheckerEgg {
*
* @param node
* @param fclos
- * @param argvalues
+ * @param callInfo
+ * @param state
*/
- checkSinkAtFunctionCall(node: any, fclos: any, argvalues: any) {
+ checkSinkAtFunctionCall(node: any, fclos: any, callInfo: CallInfo | undefined, state?: any) {
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
}
- let rule = matchSinkAtFuncCall(node, fclos, rules)
+ let rule = matchSinkAtFuncCall(node, fclos, rules, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- const args = BasicRuleHandler.prepareArgs(argvalues, fclos, rule)
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerCheckerEgg.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerCheckerEgg.findTagAndMatchedSanitizer(
node,
@@ -304,7 +304,8 @@ class EggTaintChecker extends TaintCheckerEgg {
fclos,
TAINT_TAG_NAME_EGG,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategyEgg.isNewFinding(this.resultManager, taintFlowFinding)) continue
this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategyEgg.outputStrategyId)
@@ -319,8 +320,9 @@ class EggTaintChecker extends TaintCheckerEgg {
* @param fclos
* @param argvalues
* @param scope
+ * @param state
*/
- checkByFieldMatch(node: any, fclos: any, argvalues: any, scope: any) {
+ checkByFieldMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state?: any) {
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
@@ -388,7 +390,7 @@ class EggTaintChecker extends TaintCheckerEgg {
create
)
if (matched) {
- const args = BasicRuleHandler.prepareArgs(argvalues, fclos, rule)
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerCheckerEgg.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerCheckerEgg.findTagAndMatchedSanitizer(
node,
@@ -415,7 +417,8 @@ class EggTaintChecker extends TaintCheckerEgg {
fclos,
TAINT_TAG_NAME_EGG,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategyEgg.isNewFinding(this.resultManager, taintFlowFinding)) continue
@@ -432,26 +435,28 @@ class EggTaintChecker extends TaintCheckerEgg {
* @param fclos
*/
getObj(fclos: any): any {
- if (typeof fclos?._qid === 'undefined' && typeof fclos?._this === 'undefined') {
- return fclos._sid?.replace('', '')
+ if (typeof fclos?.qid === 'undefined' && typeof fclos?._this === 'undefined') {
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
}
- if (typeof fclos?._qid !== 'undefined') {
- let qid = fclos?._qid?.replace('Egg.Context', 'this.ctx')
+ if (typeof fclos?.qid !== 'undefined') {
+ let qid = fclos?.qid?.replace('Egg.Context', 'this.ctx')
qid = qid?.replace('Egg.Application', 'this.app')
qid = qid?.replace('this.app.service', 'this.ctx.service')
qid = qid?.replace('Egg.Request', 'this.ctx.request')
- if (fclos.ast?.loc?.sourcefile && fclos.ast?.loc?.sourcefile.startsWith(Config.maindirPrefix)) {
- const prefix = fclos.ast.loc.sourcefile.substring(Config.maindirPrefix.length).split('.')[0]
- if (prefix) {
+ if (fclos.ast?.node?.loc?.sourcefile && fclos.ast?.node?.loc?.sourcefile.startsWith(Config.maindirPrefix)) {
+ const prefix = fclos.ast.node.loc.sourcefile.substring(Config.maindirPrefix.length)
+ const lastDotIndex = prefix.lastIndexOf('.')
+ const result = lastDotIndex >= 0 ? prefix.substring(0, lastDotIndex) : prefix
+ if (result) {
qid = qid?.substring(prefix.length + 1)
}
}
- return qid?.replace('', '')
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(qid)
}
if (!(fclos === fclos?._this)) {
return this.getObj(fclos._this)
}
- return fclos._sid?.replace('', '')
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
}
}
diff --git a/src/checker/taint/js/express/express-taint-checker.ts b/src/checker/taint/js/express/express-taint-checker.ts
index f24fb717..b80f9b93 100644
--- a/src/checker/taint/js/express/express-taint-checker.ts
+++ b/src/checker/taint/js/express/express-taint-checker.ts
@@ -1,3 +1,5 @@
+import type { CallInfo } from '../../../../engine/analyzer/common/call-args'
+
// checker加载逻辑重构后可打开,让stc不要用
const _ = require('lodash')
const Rules = require('../../../common/rules-basic-handler')
@@ -14,6 +16,7 @@ const SanitizerChecker = require('../../../sanitizer/sanitizer-checker')
const TaintOutputStrategy = require('../../../common/output/taint-output-strategy')
const { handleException } = require('../../../../engine/analyzer/common/exception-handler')
const logger = require('../../../../util/logger')(__filename)
+const QidUnifyUtil = require('../../../../util/qid-unify-util')
const TAINT_TAG_NAME = 'EXPRESS_INPUT'
@@ -63,7 +66,7 @@ class ExpressTaintChecker extends TaintChecker {
if (config.analyzer !== 'JavaScriptAnalyzer') {
return
}
- IntroduceTaint.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value)
+ IntroduceTaint.introduceTaintAtIdentifier(analyzer, scope, node, info.res, this.sourceScope.value)
}
/**
@@ -108,12 +111,12 @@ class ExpressTaintChecker extends TaintChecker {
const arr = loader.getFilePathProperties(filepath, { caseStyle: 'lower' })
let fieldT = topScope
arr.forEach((path: any) => {
- fieldT = fieldT?.field[path]
+ fieldT = fieldT?.members?.get(path)
})
if (!fieldT || fieldT.vtype === 'undefine') {
- for (const mod in topScope.moduleManager.field) {
- if (mod.includes(entrypoint.filePath) && topScope.moduleManager.field[mod].ast?.type === 'CompileUnit') {
- fieldT = topScope.moduleManager.field[mod]
+ for (const mod of topScope.context.modules.members.keys()) {
+ if (mod.includes(entrypoint.filePath) && topScope.context.modules.members.get(mod)?.ast?.node?.type === 'CompileUnit') {
+ fieldT = topScope.context.modules.members.get(mod)
break
}
}
@@ -137,12 +140,12 @@ class ExpressTaintChecker extends TaintChecker {
entryPoint.entryPointSymVal = entryPointSymVal
this.entryPoints.push(entryPoint)
} else {
- if (!fieldT.ast || fieldT.ast.type !== 'CompileUnit') continue
+ if (!fieldT.ast.node || fieldT.ast.node.type !== 'CompileUnit') continue
const entryPoint = new EntryPoint(constValue.ENGIN_START_FILE_BEGIN)
entryPoint.scopeVal = fieldT
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = fieldT?.ast?.loc?.sourcefile
+ entryPoint.filePath = fieldT?.ast?.node?.loc?.sourcefile
entryPoint.attribute = entrypoint.attribute
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = fieldT
@@ -173,12 +176,12 @@ class ExpressTaintChecker extends TaintChecker {
return
}
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, argvalues, funcCallArgTaintSource)
- this.checkSinkAtFunctionCall(node, fclos, argvalues)
- this.checkByFieldMatch(node, fclos, argvalues, scope)
+ IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, callInfo, funcCallArgTaintSource)
+ this.checkSinkAtFunctionCall(node, fclos, callInfo, state)
+ this.checkByFieldMatch(node, fclos, callInfo, scope, state)
}
/**
@@ -219,18 +222,19 @@ class ExpressTaintChecker extends TaintChecker {
* @param node
* @param fclos
* @param argvalues
+ * @param state
*/
- checkSinkAtFunctionCall(node: any, fclos: any, argvalues: any) {
+ checkSinkAtFunctionCall(node: any, fclos: any, callInfo: CallInfo | undefined, state?: any) {
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
}
- let rule = matchSinkAtFuncCall(node, fclos, rules)
+ let rule = matchSinkAtFuncCall(node, fclos, rules, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- const args = Rules.prepareArgs(argvalues, fclos, rule)
+ const args = Rules.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -257,7 +261,8 @@ class ExpressTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId)
@@ -272,8 +277,9 @@ class ExpressTaintChecker extends TaintChecker {
* @param fclos
* @param argvalues
* @param scope
+ * @param state
*/
- checkByFieldMatch(node: any, fclos: any, argvalues: any, scope: any) {
+ checkByFieldMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state?: any) {
let rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
@@ -342,7 +348,7 @@ class ExpressTaintChecker extends TaintChecker {
create
)
if (matched) {
- const args = Rules.prepareArgs(argvalues, fclos, rule)
+ const args = Rules.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -369,7 +375,8 @@ class ExpressTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
@@ -386,26 +393,25 @@ class ExpressTaintChecker extends TaintChecker {
* @param fclos
*/
getObj(fclos: any): any {
- if (typeof fclos?._qid === 'undefined' && typeof fclos?._this === 'undefined') {
- return fclos._sid?.replace('', '')
+ if (typeof fclos?.qid === 'undefined' && typeof fclos?._this === 'undefined') {
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
}
- if (typeof fclos?._qid !== 'undefined') {
- let qid = fclos._qid?.replace('Egg.Context', 'this.ctx')
- qid = qid?.replace('Egg.Application', 'this.app')
- qid = qid?.replace('this.app.service', 'this.ctx.service')
- qid = qid?.replace('Egg.Request', 'this.ctx.request')
- if (fclos.ast?.loc?.sourcefile && fclos.ast?.loc?.sourcefile.startsWith(config.maindirPrefix)) {
- const prefix = fclos.ast.loc.sourcefile.substring(config.maindirPrefix.length).split('.')[0]
- if (prefix) {
+ if (typeof fclos?.qid !== 'undefined') {
+ let { qid } = fclos
+ if (fclos.ast?.node?.loc?.sourcefile && fclos.ast?.node?.loc?.sourcefile.startsWith(config.maindirPrefix)) {
+ const prefix = fclos.ast.node.loc.sourcefile.substring(config.maindirPrefix.length)
+ const lastDotIndex = prefix.lastIndexOf('.')
+ const result = lastDotIndex >= 0 ? prefix.substring(0, lastDotIndex) : prefix
+ if (result) {
qid = qid?.substring(prefix.length + 1)
}
}
- return qid?.replace('', '')
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(qid)
}
if (!(fclos === fclos?._this)) {
return this.getObj(fclos._this)
}
- return fclos._sid?.replace('', '')
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
}
}
diff --git a/src/checker/taint/js/js-taint-checker.ts b/src/checker/taint/js/js-taint-checker.ts
index 230fea7a..face6f97 100644
--- a/src/checker/taint/js/js-taint-checker.ts
+++ b/src/checker/taint/js/js-taint-checker.ts
@@ -1,3 +1,5 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+
// checker加载逻辑重构后可打开,让stc不要用
const _ = require('lodash')
const Rules = require('../../common/rules-basic-handler')
@@ -8,12 +10,8 @@ const constValue = require('../../../util/constant')
const commonUtil = require('../../../util/common-util')
const loader = require('../../../util/loader')
const { matchSinkAtFuncCall } = require('../common-kit/sink-util')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const TaintChecker = require('../taint-checker')
-const {
- valueUtil: {
- ValueUtil: { Scoped },
- },
-} = require('../../../engine/analyzer/common')
const config = require('../../../config')
const SanitizerChecker = require('../../sanitizer/sanitizer-checker')
const TaintOutputStrategy = require('../../common/output/taint-output-strategy')
@@ -65,7 +63,7 @@ class JsTaintChecker extends TaintChecker {
if (config.analyzer !== 'JavaScriptAnalyzer') {
return
}
- IntroduceTaint.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value)
+ IntroduceTaint.introduceTaintAtIdentifier(analyzer, scope, node, info.res, this.sourceScope.value)
}
/**
@@ -106,15 +104,15 @@ class JsTaintChecker extends TaintChecker {
// const arr = filepath.split("/").filter(str => str !== "").map(str => str.split(".").shift());
let fieldT = topScope
arr.forEach((path: any) => {
- fieldT = fieldT?.field[path]
+ fieldT = fieldT?.members?.get(path)
})
if (!fieldT || fieldT.vtype === 'undefine') {
- for (const mod in topScope.moduleManager.field) {
+ for (const mod of topScope.context.modules.members.keys()) {
if (
mod.includes(entrypoint.filePath) &&
- topScope.moduleManager.field[mod].ast?.type === 'CompileUnit'
+ topScope.context.modules.members.get(mod)?.ast?.node?.type === 'CompileUnit'
) {
- fieldT = topScope.moduleManager.field[mod]
+ fieldT = topScope.context.modules.members.get(mod)
break
}
}
@@ -138,12 +136,12 @@ class JsTaintChecker extends TaintChecker {
entryPoint.entryPointSymVal = entryPointSymVal
this.entryPoints.push(entryPoint)
} else {
- if (!fieldT.ast || fieldT.ast.type !== 'CompileUnit') continue
+ if (!fieldT.ast.node || fieldT.ast.node.type !== 'CompileUnit') continue
const entryPoint = new EntryPoint(constValue.ENGIN_START_FILE_BEGIN)
entryPoint.scopeVal = fieldT
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = fieldT?.ast?.loc?.sourcefile
+ entryPoint.filePath = fieldT?.ast?.node?.loc?.sourcefile
entryPoint.attribute = entrypoint.attribute
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = fieldT
@@ -165,9 +163,10 @@ class JsTaintChecker extends TaintChecker {
const fullCallGraphFileEntryPoint = require('../../common/full-callgraph-file-entrypoint')
fullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getAllEntryPointsUsingCallGraph(
- analyzer.ainfo?.callgraph
+ analyzer.ainfo?.callgraph,
+ analyzer
)
- const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer.fileManager)
+ const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer)
this.entryPoints.push(...fullCallGraphEntrypoint)
this.entryPoints.push(...fullFileEntrypoint)
}
@@ -186,11 +185,11 @@ class JsTaintChecker extends TaintChecker {
if (config.analyzer !== 'JavaScriptAnalyzer') {
return
}
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, argvalues, funcCallArgTaintSource)
- this.checkSinkAtFunctionCall(node, fclos, argvalues)
- this.checkByFieldMatch(node, fclos, argvalues, scope)
+ IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, callInfo, funcCallArgTaintSource)
+ this.checkSinkAtFunctionCall(node, fclos, callInfo, state)
+ this.checkByFieldMatch(node, fclos, callInfo, scope, state)
}
/**
@@ -230,19 +229,20 @@ class JsTaintChecker extends TaintChecker {
*
* @param node
* @param fclos
- * @param argvalues
+ * @param callInfo
+ * @param state
*/
- checkSinkAtFunctionCall(node: any, fclos: any, argvalues: any) {
+ checkSinkAtFunctionCall(node: any, fclos: any, callInfo: CallInfo | undefined, state?: any) {
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
}
- let rule = matchSinkAtFuncCall(node, fclos, rules)
+ let rule = matchSinkAtFuncCall(node, fclos, rules, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- const args = Rules.prepareArgs(argvalues, fclos, rule)
+ const args = Rules.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -269,7 +269,8 @@ class JsTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME_JS_TAINT,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId)
@@ -282,10 +283,11 @@ class JsTaintChecker extends TaintChecker {
*
* @param node
* @param fclos
- * @param argvalues
+ * @param callInfo
* @param scope
+ * @param state
*/
- checkByFieldMatch(node: any, fclos: any, argvalues: any, scope: any) {
+ checkByFieldMatch(node: any, fclos: any, callInfo: CallInfo | undefined, scope: any, state?: any) {
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
@@ -353,7 +355,7 @@ class JsTaintChecker extends TaintChecker {
create
)
if (matched) {
- const args = Rules.prepareArgs(argvalues, fclos, rule)
+ const args = Rules.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -380,7 +382,8 @@ class JsTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME_JS_TAINT,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
@@ -397,23 +400,25 @@ class JsTaintChecker extends TaintChecker {
* @param fclos
*/
getObj(fclos: any): any {
- if (typeof fclos?._qid === 'undefined' && typeof fclos?._this === 'undefined') {
- return fclos._sid?.replace('', '')
+ if (typeof fclos?.qid === 'undefined' && typeof fclos?._this === 'undefined') {
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
}
- if (typeof fclos?._qid !== 'undefined') {
- let qid = fclos?._qid
- if (fclos.ast?.loc?.sourcefile && fclos.ast?.loc?.sourcefile.startsWith(config.maindirPrefix)) {
- const prefix = fclos.ast.loc.sourcefile.substring(config.maindirPrefix.length).split('.')[0]
- if (prefix) {
+ if (typeof fclos?.qid !== 'undefined') {
+ let qid = fclos?.qid
+ if (fclos.ast?.node?.loc?.sourcefile && fclos.ast?.node?.loc?.sourcefile.startsWith(config.maindirPrefix)) {
+ const prefix = fclos.ast.node.loc.sourcefile.substring(config.maindirPrefix.length)
+ const lastDotIndex = prefix.lastIndexOf('.')
+ const result = lastDotIndex >= 0 ? prefix.substring(0, lastDotIndex) : prefix
+ if (result) {
qid = qid?.substring(prefix.length + 1)
}
}
- return qid?.replace('', '')
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(qid)
}
if (!(fclos === fclos?._this)) {
return this.getObj(fclos._this)
}
- return fclos._sid?.replace('', '')
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(fclos.sid)
}
}
diff --git a/src/checker/taint/js/source-util-for-egg.ts b/src/checker/taint/js/source-util-for-egg.ts
index d61e829e..05f1c16d 100644
--- a/src/checker/taint/js/source-util-for-egg.ts
+++ b/src/checker/taint/js/source-util-for-egg.ts
@@ -1,3 +1,4 @@
+const QidUnifyUtil = require('../../../util/qid-unify-util')
const { markTaintSource } = require('../common-kit/source-util')
const BasicRuleHandler = require('../../common/rules-basic-handler')
@@ -25,7 +26,7 @@ function _introduceTaintAtMemberAccess(res: any, sourceScopeVal: any, node: any)
if (!BasicRuleHandler.getPreprocessReady()) {
return
}
- if (typeof res._qid === 'undefined' || typeof res._qid !== 'string') {
+ if (typeof res.qid === 'undefined' || typeof res.qid !== 'string') {
return res
}
if (markTaintAtMemberAccess(res, sourceScopeVal, node)) {
@@ -41,15 +42,15 @@ function _introduceTaintAtMemberAccess(res: any, sourceScopeVal: any, node: any)
* @param node
*/
function markTaintAtMemberAccess(res: any, sourceScopeVal: any, node: any): boolean {
- if (typeof res._qid !== 'undefined') {
- let qid = res._qid
+ if (typeof res.qid !== 'undefined') {
+ let { qid } = res
if (typeof qid !== 'string') {
return false
}
+ qid = QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(qid)
qid = qid?.replace('Egg.Context', 'this.ctx')
qid = qid?.replace('Egg.Application', 'this.app')
qid = qid?.replace('Egg.Request', 'this.ctx.request')
- qid = qid?.replace('\', '')
// 适配ctx=this
const sourceFile = node.loc?.sourcefile
@@ -63,9 +64,10 @@ function markTaintAtMemberAccess(res: any, sourceScopeVal: any, node: any): bool
qid = qid.charAt(0).toLowerCase() + qid.slice(1)
className = className.charAt(0).toLowerCase() + className.slice(1)
qid = qid.replace(className, 'this.ctx')
- } else if (qid.startsWith('module.exports')) {
- // module.exports场景
- qid = qid.replace('module.exports', 'this.ctx')
+ }
+ if (qid.includes('module.exports')) {
+ // module.exports场景,去掉module.exports前面的所有部分
+ qid = qid.replace(/.*module\.exports/, 'this.ctx')
}
}
}
diff --git a/src/checker/taint/python/django-taint-checker.ts b/src/checker/taint/python/django-taint-checker.ts
index 3853d7d8..cebf58c2 100644
--- a/src/checker/taint/python/django-taint-checker.ts
+++ b/src/checker/taint/python/django-taint-checker.ts
@@ -1,4 +1,4 @@
-const PythonTaintAbstractChecker = require('./python-taint-abstract-checker')
+const { PythonTaintAbstractChecker } = require('./python-taint-abstract-checker')
const completeEntryPoint = require('../common-kit/entry-points-util')
const { extractRelativePath } = require('../../../util/file-util')
@@ -64,6 +64,7 @@ class DjangoTaintChecker extends PythonTaintAbstractChecker {
if (registerFile.size === 0 || !registerFile.has(fileName)) {
return
}
+
if (node.left.name === 'urlpatterns') {
const { right } = node
this.collectDjangoEntrypointAndSource(analyzer, scope, state, right)
@@ -131,19 +132,31 @@ class DjangoTaintChecker extends PythonTaintAbstractChecker {
analyzer.entryPoints.push(completeEntryPoint(ep))
if (targetSrcName.length > 0) {
const targetName = targetSrcName[0]
- for (const param of ep.fdef.parameters) {
+ for (const param of ep.ast.fdef.parameters) {
if (param.id.name === targetName) {
this.sourceScope.value.push({
path: param.id.name,
kind: 'PYTHON_INPUT',
scopeFile: extractRelativePath(param?.loc?.sourcefile, Config.maindir),
- scopeFunc: ep.fdef?.id?.name,
- locStart: param.loc.start.line,
- locEnd: param.loc.end.line,
+ scopeFunc: ep.ast.fdef?.id?.name,
+ locStart: param.loc.start?.line,
+ locEnd: param.loc.end?.line,
})
}
}
}
+ for (const param of ep.ast.fdef.parameters) {
+ if (param.id.name === 'request') {
+ this.sourceScope.value.push({
+ path: param.id.name,
+ kind: 'PYTHON_INPUT',
+ scopeFile: extractRelativePath(param?.loc?.sourcefile, Config.maindir),
+ scopeFunc: ep.ast.fdef?.id?.name,
+ locStart: param.loc.start.line,
+ locEnd: param.loc.end.line,
+ })
+ }
+ }
}
}
@@ -172,15 +185,15 @@ class DjangoTaintChecker extends PythonTaintAbstractChecker {
if (targetSrcName.length > 0) {
const targetName = targetSrcName[0]
for (const ep of entrypoints as any[]) {
- for (const param of ep.fdef.parameters) {
+ for (const param of ep.ast.fdef.parameters) {
if (param.id.name === targetName) {
this.sourceScope.value.push({
path: param.id.name,
kind: 'PYTHON_INPUT',
scopeFile: extractRelativePath(param?.loc?.sourcefile, Config.maindir),
- scopeFunc: ep.fdef?.id?.name,
- locStart: param.loc.start.line,
- locEnd: param.loc.end.line,
+ scopeFunc: ep.ast.fdef?.id?.name,
+ locStart: param.loc.start?.line,
+ locEnd: param.loc.end?.line,
})
}
}
@@ -188,6 +201,18 @@ class DjangoTaintChecker extends PythonTaintAbstractChecker {
}
} else {
for (const ep of entrypoints as any[]) {
+ for (const param of ep.ast.fdef.parameters) {
+ if (param.id.name === 'request') {
+ this.sourceScope.value.push({
+ path: param.id.name,
+ kind: 'PYTHON_INPUT',
+ scopeFile: extractRelativePath(param?.loc?.sourcefile, Config.maindir),
+ scopeFunc: ep.ast.fdef?.id?.name,
+ locStart: param.loc.start.line,
+ locEnd: param.loc.end.line,
+ })
+ }
+ }
analyzer.entryPoints.push(completeEntryPoint(ep))
}
}
diff --git a/src/checker/taint/python/python-default-taint-checker.ts b/src/checker/taint/python/python-default-taint-checker.ts
index 4e0225bc..6ff8ca84 100644
--- a/src/checker/taint/python/python-default-taint-checker.ts
+++ b/src/checker/taint/python/python-default-taint-checker.ts
@@ -1,20 +1,19 @@
-import { handleException } from '../../../engine/analyzer/common/exception-handler'
-
const _ = require('lodash')
-const PythonTaintAbstractChecker = require('./python-taint-abstract-checker')
+const { PythonTaintAbstractChecker } = require('./python-taint-abstract-checker')
const CommonUtil = require('../../../util/common-util')
const {
findPythonFcEntryPointAndSource,
+ buildFclosIndex,
+ lookupFclos,
} = require('../../../engine/analyzer/python/common/entrypoint-collector/python-entrypoint')
const Constant = require('../../../util/constant')
const EntryPoint = require('../../../engine/analyzer/common/entrypoint')
-const AstUtil = require('../../../util/ast-util')
const Config = require('../../../config')
-const FileUtil = require('../../../util/file-util')
const { extractRelativePath } = require('../../../util/file-util')
const logger = require('../../../util/logger')(__filename)
+const { loadPythonDefaultRule } = require('./python-taint-abstract-checker')
const TAINT_TAG_NAME_PYTHON_DEFAULT = 'PYTHON_INPUT'
@@ -40,7 +39,8 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker {
* @param info
*/
triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { moduleManager, fileManager } = analyzer
+ const moduleManager = analyzer.topScope.context.modules
+ const fileManager = analyzer.topScope.context.files
this.prepareEntryPoints(analyzer, Config.maindir, moduleManager, fileManager)
analyzer.entryPoints.push(...this.entryPoints)
this.addSourceTagForSourceScope(TAINT_TAG_NAME_PYTHON_DEFAULT, this.sourceScope.value)
@@ -60,7 +60,7 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker {
const { entrypoints: ruleConfigEntryPoints } = this.checkerRuleConfigContent
if (Config.entryPointMode !== 'ONLY_CUSTOM') {
- const pythonDefaultRule = this.loadPythonDefaultRule()
+ const pythonDefaultRule = loadPythonDefaultRule()
if (pythonDefaultRule[0].checkerIds.includes(this.getCheckerId())) {
this.checkerRuleConfigContent.sources = this.checkerRuleConfigContent.sources || {}
this.checkerRuleConfigContent.sources.TaintSource = this.checkerRuleConfigContent.sources.TaintSource || []
@@ -71,7 +71,11 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker {
: [this.checkerRuleConfigContent.sources.TaintSource]
this.checkerRuleConfigContent.sources.TaintSource.push(...pythonDefaultRule[0].sources.TaintSource)
}
- const { pyFcEntryPointArray, pyFcEntryPointSourceArray } = findPythonFcEntryPointAndSource(dir, fileManager)
+ const { pyFcEntryPointArray, pyFcEntryPointSourceArray } = findPythonFcEntryPointAndSource(
+ dir,
+ fileManager,
+ analyzer
+ )
if (pyFcEntryPointArray) {
funCallEntryPoints.push(...pyFcEntryPointArray)
}
@@ -104,26 +108,20 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker {
}
}
+ // 构建 fclos 索引,一次遍历替代多次查找
+ const fclosIndex = buildFclosIndex(moduleManager, dir, extractRelativePath)
+
for (const funCallEntryPoint of funCallEntryPoints) {
- let valFuncs = AstUtil.satisfy(
- moduleManager,
- (n: any) =>
- n.vtype === 'fclos' &&
- extractRelativePath(n?.ast?.loc?.sourcefile, dir) === funCallEntryPoint.filePath &&
- n?.ast?.id?.name === funCallEntryPoint.functionName,
- (node: any, prop: any) => prop === 'field',
- null,
- true
- )
+ // 使用索引查找,O(1) 操作
+ let valFuncs = lookupFclos(fclosIndex, funCallEntryPoint.filePath, funCallEntryPoint.functionName)
+
if (_.isEmpty(valFuncs)) {
logger.info('match entryPoint fail')
continue
}
- if (Array.isArray(valFuncs)) {
- valFuncs = _.uniqBy(valFuncs, (value: any) => value.fdef)
- } else {
- valFuncs = [valFuncs]
- }
+
+ // 去重
+ valFuncs = _.uniqBy(valFuncs, (value: any) => value.ast.fdef)
for (const valFunc of valFuncs) {
const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
@@ -138,13 +136,14 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker {
for (const fileEntryPoint of fileEntryPoints) {
const fullFilePath = `${Config.maindir}${fileEntryPoint.filePath}`.replace('//', '/')
- const file = fileManager[fullFilePath]
- if (file?.ast?.type === 'CompileUnit') {
+ const fileUuid = fileManager[fullFilePath]
+ const file = analyzer.symbolTable.get(fileUuid)
+ if (file?.ast?.node?.type === 'CompileUnit') {
const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN)
entryPoint.scopeVal = file
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = file?.ast?.sourcefile || file?.ast?.loc?.sourcefile
+ entryPoint.filePath = file?.ast?.node?.sourcefile || file?.ast?.node?.loc?.sourcefile
entryPoint.attribute = fileEntryPoint.attribute
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = file
@@ -157,29 +156,16 @@ class PythonDefaultTaintChecker extends PythonTaintAbstractChecker {
if (Config.entryPointMode !== 'ONLY_CUSTOM') {
fullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getAllEntryPointsUsingCallGraph(
- analyzer.ainfo?.callgraph
+ analyzer.ainfo?.callgraph,
+ analyzer
)
- const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer.fileManager)
+ const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer)
this.entryPoints.push(...fullCallGraphEntrypoint)
this.entryPoints.push(...fullFileEntrypoint)
}
CommonUtil.initSourceScopeByTaintSourceWithLoc(this.sourceScope, this.checkerRuleConfigContent.sources?.TaintSource)
}
-
- /**
- * load python default rule
- */
- loadPythonDefaultRule() {
- let pythonDefaultRule
- try {
- const rulePath = FileUtil.getAbsolutePath('./resource/python/python-default-rule.json')
- pythonDefaultRule = FileUtil.loadJSONfile(rulePath)
- } catch (e) {
- handleException(e, 'Error occurred in load python default rule', 'Error occurred in load python default rule')
- }
- return pythonDefaultRule
- }
}
module.exports = PythonDefaultTaintChecker
diff --git a/src/checker/taint/python/python-taint-abstract-checker.ts b/src/checker/taint/python/python-taint-abstract-checker.ts
index f4ac5fa1..9ea5da23 100644
--- a/src/checker/taint/python/python-taint-abstract-checker.ts
+++ b/src/checker/taint/python/python-taint-abstract-checker.ts
@@ -1,10 +1,18 @@
+import type { CallInfo } from '../../../engine/analyzer/common/call-args'
+
const _ = require('lodash')
+const commonUtil = require('../../../util/common-util')
+const config = require('../../../config')
+const { handleException } = require('../../../engine/analyzer/common/exception-handler')
+
const IntroduceTaint = require('../common-kit/source-util')
const BasicRuleHandler = require('../../common/rules-basic-handler')
const SanitizerChecker = require('../../sanitizer/sanitizer-checker')
const { matchSinkAtFuncCall, matchRegex } = require('../common-kit/sink-util')
const TaintChecker = require('../taint-checker')
const TaintOutputStrategy = require('../../common/output/taint-output-strategy')
+const QidUnifyUtil = require('../../../util/qid-unify-util')
+const FileUtil = require('../../../util/file-util')
const TAINT_TAG_NAME_PYTHON = 'PYTHON_INPUT'
@@ -21,7 +29,25 @@ class PythonTaintAbstractChecker extends TaintChecker {
* @param info
*/
triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any) {
- IntroduceTaint.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value)
+ const result = IntroduceTaint.introduceTaintAtIdentifier(analyzer, scope, node, info.res, this.sourceScope.value)
+ if (result !== undefined) {
+ info.res = result
+ }
+ }
+
+ /**
+ *
+ * @param analyzer
+ * @param scope
+ * @param node
+ * @param state
+ * @param info
+ */
+ triggerAtFunctionDefinition(analyzer: any, scope: any, node: any, state: any, info: any) {
+ if (config.analyzer !== 'PythonAnalyzer') {
+ return
+ }
+ commonUtil.fillSourceScope(info.fclos, this.sourceScope)
}
/**
@@ -33,11 +59,11 @@ class PythonTaintAbstractChecker extends TaintChecker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
const funcCallArgTaintSource = this.checkerRuleConfigContent.sources?.FuncCallArgTaintSource
- IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, argvalues, funcCallArgTaintSource)
- this.checkByNameMatch(node, fclos, argvalues)
- this.checkByFieldMatch(node, fclos, argvalues)
+ IntroduceTaint.introduceFuncArgTaintByRuleConfig(fclos?.object, node, callInfo, funcCallArgTaintSource)
+ this.checkByNameMatch(node, fclos, callInfo, state)
+ this.checkByFieldMatch(node, fclos, callInfo, state)
}
/**
@@ -60,18 +86,20 @@ class PythonTaintAbstractChecker extends TaintChecker {
* @param node
* @param fclos
* @param argvalues
+ * @param callInfo
+ * @param state
* @returns {boolean}
*/
- checkByNameMatch(node: any, fclos: any, argvalues: any) {
+ checkByNameMatch(node: any, fclos: any, callInfo: CallInfo | undefined, state?: any) {
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
}
- let rule = matchSinkAtFuncCall(node, fclos, rules)
+ let rule = matchSinkAtFuncCall(node, fclos, rules, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- this.findArgsAndAddNewFinding(node, argvalues, fclos, rule)
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
}
}
@@ -80,9 +108,28 @@ class PythonTaintAbstractChecker extends TaintChecker {
* @param node
* @param fclos
* @param argvalues
- * @param scope
+ * @param state
+ * @param qid
*/
- checkByFieldMatch(node: any, fclos: any, argvalues: any) {
+ // 去除 qid 中每个 `.` 分隔段的调用参数元数据,如 connect(with(config)) → connect
+ stripCallMetadata(qid: string): string {
+ return qid
+ .split('.')
+ .map((seg) => {
+ const parenIdx = seg.indexOf('(')
+ return parenIdx >= 0 ? seg.substring(0, parenIdx) : seg
+ })
+ .join('.')
+ }
+
+ /**
+ *
+ * @param node
+ * @param fclos
+ * @param callInfo
+ * @param state
+ */
+ checkByFieldMatch(node: any, fclos: any, callInfo: CallInfo | undefined, state?: any) {
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
if (_.isEmpty(rules)) {
return
@@ -97,15 +144,21 @@ class PythonTaintAbstractChecker extends TaintChecker {
}
if (rule.fsig) {
if (rule.fsig === callFull) {
- this.findArgsAndAddNewFinding(node, argvalues, fclos, rule)
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
+ return true
+ }
+ // 去除参数元数据后做后缀匹配
+ const stripped = this.stripCallMetadata(callFull)
+ if (stripped.endsWith(rule.fsig)) {
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
return true
}
} else {
if (!rule.fregex) {
return false
}
- if (callFull.type === 'MemberAccess' && matchRegex(rule.fregex, fclos._qid)) {
- this.findArgsAndAddNewFinding(node, argvalues, fclos, rule)
+ if (callFull.type === 'MemberAccess' && matchRegex(rule.fregex, fclos.qid)) {
+ this.findArgsAndAddNewFinding(node, callInfo, fclos, rule, state)
return true
}
}
@@ -118,27 +171,24 @@ class PythonTaintAbstractChecker extends TaintChecker {
* @param fclos
*/
getObj(fclos: any): any {
- if (
- typeof fclos?._sid !== 'undefined' &&
- typeof fclos?._qid === 'undefined' &&
- typeof fclos?._this === 'undefined'
- ) {
- const index = fclos?._sid.indexOf('>.')
- const result = index !== -1 ? fclos?._sid.substring(index + 2) : fclos?._sid
- return result.replace('', '').replace('()', '')
+ if (typeof fclos?.sid !== 'undefined' && typeof fclos?.qid === 'undefined' && typeof fclos?._this === 'undefined') {
+ const index = fclos?.sid.indexOf('>.')
+ return index !== -1 ? fclos?.sid.substring(index + 2) : fclos?.sid
}
- if (typeof fclos?._qid !== 'undefined') {
- const index = fclos._qid.indexOf('>.')
- const result = index !== -1 ? fclos?._qid.substring(index + 2) : fclos?._qid
- return result.replace('', '').replace('()', '')
+ if (typeof fclos?.qid !== 'undefined' && typeof fclos.qid === 'string') {
+ const index = fclos.qid.indexOf('>.')
+ const result = index !== -1 ? fclos?.qid.substring(index + 2) : fclos?.qid
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(result)
}
if (!(fclos === fclos?._this)) {
return this.getObj(fclos._this)
}
- const index = fclos?._sid.indexOf('>.')
- const result = index !== -1 ? fclos?._sid.substring(index + 2) : fclos?._sid
- if (result) {
- return result.replace('', '').replace('()', '')
+ if (typeof fclos?.sid === 'string') {
+ const index = fclos?.sid.indexOf('>.')
+ const result = index !== -1 ? fclos?.sid.substring(index + 2) : fclos?.sid
+ if (result) {
+ return QidUnifyUtil.qidUnifyByRemoveAngleAndPrefix(result)
+ }
}
}
@@ -146,11 +196,13 @@ class PythonTaintAbstractChecker extends TaintChecker {
*
* @param node
* @param argvalues
+ * @param callInfo
* @param fclos
* @param rule
+ * @param state
*/
- findArgsAndAddNewFinding(node: any, argvalues: any, fclos: any, rule: any) {
- const args = BasicRuleHandler.prepareArgs(argvalues, fclos, rule)
+ findArgsAndAddNewFinding(node: any, callInfo: CallInfo | undefined, fclos: any, rule: any, state?: any) {
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -177,7 +229,8 @@ class PythonTaintAbstractChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME_PYTHON,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId)
@@ -187,4 +240,18 @@ class PythonTaintAbstractChecker extends TaintChecker {
}
}
-module.exports = PythonTaintAbstractChecker
+/**
+ *
+ */
+function loadPythonDefaultRule() {
+ let pythonDefaultRule
+ try {
+ const rulePath = FileUtil.getAbsolutePath('./resource/python/python-default-rule.json')
+ pythonDefaultRule = FileUtil.loadJSONfile(rulePath)
+ } catch (e) {
+ handleException(e, 'Error occurred in load python default rule', 'Error occurred in load python default rule')
+ }
+ return pythonDefaultRule
+}
+
+module.exports = { PythonTaintAbstractChecker, loadPythonDefaultRule }
diff --git a/src/checker/taint/python/python-taint-checker.ts b/src/checker/taint/python/python-taint-checker.ts
index 966d09fd..299cd480 100644
--- a/src/checker/taint/python/python-taint-checker.ts
+++ b/src/checker/taint/python/python-taint-checker.ts
@@ -1,18 +1,17 @@
-import { handleException } from '../../../engine/analyzer/common/exception-handler'
-
const _ = require('lodash')
-const PythonTaintAbstractChecker = require('./python-taint-abstract-checker')
+const { PythonTaintAbstractChecker } = require('./python-taint-abstract-checker')
const CommonUtil = require('../../../util/common-util')
const {
findPythonFcEntryPointAndSource,
+ buildFclosIndex,
+ lookupFclos,
} = require('../../../engine/analyzer/python/common/entrypoint-collector/python-entrypoint')
const Constant = require('../../../util/constant')
const EntryPoint = require('../../../engine/analyzer/common/entrypoint')
-const AstUtil = require('../../../util/ast-util')
const Config = require('../../../config')
-const FileUtil = require('../../../util/file-util')
const { extractRelativePath } = require('../../../util/file-util')
const logger = require('../../../util/logger')(__filename)
+const { loadPythonDefaultRule } = require('./python-taint-abstract-checker')
const TAINT_TAG_NAME_PYTHON = 'PYTHON_INPUT'
@@ -38,7 +37,8 @@ class PythonTaintChecker extends PythonTaintAbstractChecker {
* @param info
*/
triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { moduleManager, fileManager } = analyzer
+ const moduleManager = analyzer.topScope.context.modules
+ const fileManager = analyzer.topScope.context.files
this.prepareEntryPoints(analyzer, Config.maindir, moduleManager, fileManager)
analyzer.entryPoints.push(...this.entryPoints)
this.addSourceTagForSourceScope(TAINT_TAG_NAME_PYTHON, this.sourceScope.value)
@@ -58,7 +58,7 @@ class PythonTaintChecker extends PythonTaintAbstractChecker {
const { entrypoints: ruleConfigEntryPoints } = this.checkerRuleConfigContent
if (Config.entryPointMode !== 'ONLY_CUSTOM') {
- const pythonDefaultRule = this.loadPythonDefaultRule()
+ const pythonDefaultRule = loadPythonDefaultRule()
if (pythonDefaultRule[0].checkerIds.includes(this.getCheckerId())) {
this.checkerRuleConfigContent.sources = this.checkerRuleConfigContent.sources || {}
this.checkerRuleConfigContent.sources.TaintSource = this.checkerRuleConfigContent.sources.TaintSource || []
@@ -69,7 +69,11 @@ class PythonTaintChecker extends PythonTaintAbstractChecker {
: [this.checkerRuleConfigContent.sources.TaintSource]
this.checkerRuleConfigContent.sources.TaintSource.push(...pythonDefaultRule[0].sources.TaintSource)
}
- const { pyFcEntryPointArray, pyFcEntryPointSourceArray } = findPythonFcEntryPointAndSource(dir, fileManager)
+ const { pyFcEntryPointArray, pyFcEntryPointSourceArray } = findPythonFcEntryPointAndSource(
+ dir,
+ fileManager,
+ analyzer
+ )
if (pyFcEntryPointArray) {
funCallEntryPoints.push(...pyFcEntryPointArray)
}
@@ -101,26 +105,20 @@ class PythonTaintChecker extends PythonTaintAbstractChecker {
}
}
+ // 构建 fclos 索引,一次遍历替代多次查找
+ const fclosIndex = buildFclosIndex(moduleManager, dir, extractRelativePath)
+
for (const funCallEntryPoint of funCallEntryPoints) {
- let valFuncs = AstUtil.satisfy(
- moduleManager,
- (n: any) =>
- n.vtype === 'fclos' &&
- extractRelativePath(n?.ast?.loc?.sourcefile, dir) === funCallEntryPoint.filePath &&
- n?.ast?.id?.name === funCallEntryPoint.functionName,
- (node: any, prop: any) => prop === 'field',
- null,
- true
- )
+ // 使用索引查找,O(1) 操作
+ let valFuncs = lookupFclos(fclosIndex, funCallEntryPoint.filePath, funCallEntryPoint.functionName)
+
if (_.isEmpty(valFuncs)) {
logger.info('match entryPoint fail')
continue
}
- if (Array.isArray(valFuncs)) {
- valFuncs = _.uniqBy(valFuncs, (value: any) => value.fdef)
- } else {
- valFuncs = [valFuncs]
- }
+
+ // 去重
+ valFuncs = _.uniqBy(valFuncs, (value: any) => value.ast.fdef)
for (const valFunc of valFuncs) {
const entryPoint = new EntryPoint(Constant.ENGIN_START_FUNCALL)
@@ -134,13 +132,14 @@ class PythonTaintChecker extends PythonTaintAbstractChecker {
for (const fileEntryPoint of fileEntryPoints) {
const fullFilePath = `${Config.maindir}${fileEntryPoint.filePath}`.replace('//', '/')
- const file = fileManager[fullFilePath]
- if (file?.ast?.type === 'CompileUnit') {
+ const fileUuid = fileManager[fullFilePath]
+ const file = analyzer.symbolTable.get(fileUuid)
+ if (file?.ast?.node?.type === 'CompileUnit') {
const entryPoint = new EntryPoint(Constant.ENGIN_START_FILE_BEGIN)
entryPoint.scopeVal = file
entryPoint.argValues = undefined
entryPoint.functionName = undefined
- entryPoint.filePath = file?.ast?.loc?.sourcefile
+ entryPoint.filePath = file?.ast?.node?.loc?.sourcefile
entryPoint.attribute = fileEntryPoint.attribute
entryPoint.packageName = undefined
entryPoint.entryPointSymVal = file
@@ -150,20 +149,6 @@ class PythonTaintChecker extends PythonTaintAbstractChecker {
CommonUtil.initSourceScopeByTaintSourceWithLoc(this.sourceScope, this.checkerRuleConfigContent.sources?.TaintSource)
}
-
- /**
- *
- */
- loadPythonDefaultRule() {
- let pythonDefaultRule
- try {
- const rulePath = FileUtil.getAbsolutePath('./resource/python/python-default-rule.json')
- pythonDefaultRule = FileUtil.loadJSONfile(rulePath)
- } catch (e) {
- handleException(e, 'Error occurred in load python default rule', 'Error occurred in load python default rule')
- }
- return pythonDefaultRule
- }
}
module.exports = PythonTaintChecker
diff --git a/src/checker/taint/python/script-taint-checker.ts b/src/checker/taint/python/script-taint-checker.ts
new file mode 100644
index 00000000..e0e3c2a3
--- /dev/null
+++ b/src/checker/taint/python/script-taint-checker.ts
@@ -0,0 +1,81 @@
+const { PythonTaintAbstractChecker } = require('./python-taint-abstract-checker')
+const Config = require('../../../config')
+const { markTaintSource } = require('../common-kit/source-util')
+const AstUtil = require('../../../util/ast-util')
+
+// fclos.qid 匹配规则
+const ARGPARSE_QID_PATTERN = /\.argparse\.ArgumentParser\(.*\)\.(parse_args|parse_known_args)$/
+const OPTPARSE_QID_PATTERN = /\.optparse\.OptionParser\(.*\)\.(parse_args|parse_known_args)$/
+const INPUT_QID_PATTERN = /\.(input|raw_input)$/
+const GETOPT_QID_PATTERN = /\.getopt\.(getopt|gnu_getopt)$/
+const OS_GETENV_QID_PATTERN = /\.os\.getenv$/
+const OS_ENVIRON_GET_QID_PATTERN = /\.os\.environ\.get$/
+const SYS_STDIN_QID_PATTERN = /\.sys\.stdin\.(read|readline|readlines)$/
+
+// 文件 I/O source:open() / io.open() / codecs.open() 返回文件句柄,携带本地文件内容
+const FILE_OPEN_QID_PATTERN = /\.(open|io\.open|codecs\.open)$/
+// pathlib 文件读取
+const PATHLIB_READ_QID_PATTERN = /\.Path.*\.(read_text|read_bytes|read)$/
+
+const SCRIPT_SOURCE_QID_PATTERNS = [
+ ARGPARSE_QID_PATTERN,
+ OPTPARSE_QID_PATTERN,
+ INPUT_QID_PATTERN,
+ GETOPT_QID_PATTERN,
+ OS_GETENV_QID_PATTERN,
+ OS_ENVIRON_GET_QID_PATTERN,
+ SYS_STDIN_QID_PATTERN,
+ FILE_OPEN_QID_PATTERN,
+ PATHLIB_READ_QID_PATTERN,
+]
+
+/**
+ * Python 脚本污点追踪 checker
+ * Source: argparse.parse_args(), sys.argv, input(), os.environ, getopt, open() 等
+ * Entrypoint: 文件级入口(脚本从文件头开始执行)
+ */
+class ScriptTaintChecker extends PythonTaintAbstractChecker {
+ constructor(resultManager: any) {
+ super(resultManager, 'taint_flow_python_script_input')
+ }
+
+ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void {
+ this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent)
+ if (Config.entryPointMode === 'ONLY_CUSTOM') return
+ const fullCallGraphFileEntryPoint = require('../../common/full-callgraph-file-entrypoint')
+ const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer)
+ analyzer.entryPoints.push(...fullFileEntrypoint)
+ }
+
+ triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void {
+ super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info)
+ const { fclos, ret } = info
+ if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return
+
+ const { qid } = fclos
+ if (typeof qid !== 'string') return
+
+ for (const pattern of SCRIPT_SOURCE_QID_PATTERNS) {
+ if (pattern.test(qid)) {
+ markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' })
+ return
+ }
+ }
+ }
+
+ triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void {
+ if (Config.entryPointMode === 'ONLY_CUSTOM') return
+
+ // sys.argv
+ if (AstUtil.prettyPrintAST(node) === 'sys.argv') {
+ markTaintSource(info.res, { path: node, kind: 'PYTHON_INPUT' })
+ }
+
+ // os.environ
+ if (AstUtil.prettyPrintAST(node) === 'os.environ') {
+ markTaintSource(info.res, { path: node, kind: 'PYTHON_INPUT' })
+ }
+ }
+}
+
+module.exports = ScriptTaintChecker
diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts
index 24cf5dac..f3e96c6b 100644
--- a/src/checker/taint/python/tornado-taint-checker.ts
+++ b/src/checker/taint/python/tornado-taint-checker.ts
@@ -1,4 +1,6 @@
-const PythonTaintAbstractChecker = require('./python-taint-abstract-checker')
+import { getLegacyArgValues } from '../../../engine/analyzer/common/call-args'
+
+const { PythonTaintAbstractChecker } = require('./python-taint-abstract-checker')
const Config = require('../../../config')
const completeEntryPoint = require('../common-kit/entry-points-util')
const { markTaintSource } = require('../common-kit/source-util')
@@ -43,7 +45,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker {
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void {
super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info)
- const { fclos, argvalues } = info
+ const { fclos, callInfo } = info
+ const argvalues = getLegacyArgValues(callInfo)
if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return
const isApp = isTornadoCall(node, 'Application')
const isRouter = isTornadoCall(node, 'RuleRouter')
@@ -93,7 +96,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker {
const handler = val.value['1']
if (handler) {
const pathArg = val.value['0']
- const path = pathArg?.value || pathArg?.ast?.value
+ const path = pathArg?.value || pathArg?.ast?.node?.value
if (typeof path === 'string') {
this.finishRoute(analyzer, scope, state, handler)
return
@@ -134,15 +137,15 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker {
}
// 2. Handle Class Definition (Handler classes)
let cls = h
- if (cls.vtype !== 'class' && cls.ast?.type === 'ClassDefinition') {
+ if (cls.vtype !== 'class' && cls.ast?.node?.type === 'ClassDefinition') {
try {
- cls = analyzer.processInstruction(scope, cls.ast, state) || this.buildClassSymbol(cls.ast)
+ cls = analyzer.processInstruction(scope, cls.ast.node, state) || this.buildClassSymbol(cls.ast.node)
} catch (e) {
- cls = this.buildClassSymbol(cls.ast)
+ cls = this.buildClassSymbol(cls.ast.node)
}
- } else if (cls.vtype === 'symbol' && cls.cdef) {
+ } else if (cls.vtype === 'symbol' && cls.ast?.cdef) {
// If it's an instance symbol, get its class definition
- cls = cls.cdef
+ cls = cls.ast.cdef
}
if (cls && (cls.vtype === 'class' || cls.vtype === 'symbol')) {
this.registerEntryPoints(analyzer, cls)
@@ -156,14 +159,23 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker {
*/
private registerEntryPoints(analyzer: any, cls: any) {
const methods = ['get', 'post', 'put', 'delete', 'patch']
- // Look for methods in cls.value, cls.field, or cls.value.field (Python specificity)
- const classValue = cls.value?.field || cls.field || cls.value || {}
+ // 在 cls.value 或 cls.value.value 中查找方法(Python 类结构)
+ const classValue = cls.value?.value || cls.value || {}
Object.entries(classValue).forEach(([name, fclos]: [string, any]) => {
if (methods.includes(name)) {
const ep = completeEntryPoint(fclos)
if (ep) {
- analyzer.entryPoints.push(ep)
- const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[]
+ ep.funcReceiverType = cls.ast?.node?.id?.name || cls.sid || 'Unknown'
+ const isDuplicate = analyzer.entryPoints.some(
+ (existing: any) =>
+ existing.functionName === ep.functionName &&
+ existing.filePath === ep.filePath &&
+ existing.funcReceiverType === ep.funcReceiverType
+ )
+ if (!isDuplicate) {
+ analyzer.entryPoints.push(ep)
+ }
+ const actualParams = (fclos.ast?.fdef?.parameters || fclos.ast?.node?.parameters || []) as any[]
actualParams.forEach((p: any) => {
const pName = p.id?.name || p.name
if (pName === 'self') return
@@ -171,7 +183,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker {
this.sourceScope.value.push({
path: pName,
kind: 'PYTHON_INPUT',
- scopeFile: extractRelativePath(fclos?.ast?.loc?.sourcefile || ep.filePath, Config.maindir),
+ scopeFile: extractRelativePath(fclos?.ast?.node?.loc?.sourcefile || ep.filePath, Config.maindir),
scopeFunc: ep.functionName,
locStart: p.loc?.start?.line,
locEnd: p.loc?.end?.line,
@@ -213,7 +225,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker {
*/
triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void {
super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info)
- const { fclos, ret, argvalues } = info
+ const { fclos, ret, callInfo } = info
+ const argvalues = getLegacyArgValues(callInfo)
if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return
const name = node.callee?.property?.name || node.callee?.name
// 1. Record route info for Rule, URLSpec, url (Recording phase)
@@ -259,4 +272,4 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker {
}
}
-export = TornadoTaintChecker
+module.exports = TornadoTaintChecker
diff --git a/src/checker/taint/taint-checker.ts b/src/checker/taint/taint-checker.ts
index 2a7a7886..ab7800dd 100644
--- a/src/checker/taint/taint-checker.ts
+++ b/src/checker/taint/taint-checker.ts
@@ -23,8 +23,11 @@ class TaintChecker extends Checker {
this.sourceScope = {
complete: false,
value: [],
+ fillLineValues: [],
}
taintCheckerCommonUtil.initSourceScope(this.sourceScope, this.checkerRuleConfigContent.sources?.TaintSource)
+ this.sinkRuleArray = undefined
+ this.matchSinkRuleResultMap = new Map()
}
/**
@@ -37,7 +40,7 @@ class TaintChecker extends Checker {
const callNode = finding.node
const sinkRule = finding.ruleName
const { fclos, matchedSanitizerTags, callstack } = finding
- if (finding && argNode && argNode.hasTagRec) {
+ if (finding && argNode && argNode.taint?.isTaintedRec) {
let traceStack = TaintCheckerFindingUtil.getTrace(argNode, tagName)
const trace = TaintCheckerSourceLine.getNodeTrace(fclos, callNode)
// 暂时统一去掉Field,不然展示出来的链路会重复
@@ -61,8 +64,10 @@ class TaintChecker extends Checker {
sinkRule: finding.sinkRule,
sinkAttribute: finding.sinkAttribute,
}
+ const currentEntryPoint = entryPointConfig.getCurrentEntryPoint()
+ finding.entrypointLoc = currentEntryPoint?.entryPointSymVal?.ast?.node?.loc
finding.entrypoint = _.pickBy(
- _.clone(entryPointConfig.getCurrentEntryPoint()),
+ _.clone(currentEntryPoint),
(value: any) => !_.isObject(value)
)
finding.trace.push(trace)
diff --git a/src/checker/taint/test-taint-checker.ts b/src/checker/taint/test-taint-checker.ts
index 7edd372b..d544e72b 100644
--- a/src/checker/taint/test-taint-checker.ts
+++ b/src/checker/taint/test-taint-checker.ts
@@ -1,3 +1,5 @@
+import type { CallInfo } from '../../engine/analyzer/common/call-args'
+
const BasicRuleHandler = require('../common/rules-basic-handler')
const IntroduceTaint = require('./common-kit/source-util')
const SanitizerChecker = require('../sanitizer/sanitizer-checker')
@@ -53,8 +55,8 @@ class TestTaintChecker extends TaintChecker {
* @param info
*/
triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any) {
- const { fclos, argvalues } = info
- this.checkSinkAtFunctionCall(node, fclos, argvalues)
+ const { fclos, callInfo } = info
+ this.checkSinkAtFunctionCall(node, fclos, callInfo, state)
}
/**
@@ -76,7 +78,7 @@ class TestTaintChecker extends TaintChecker {
* @param info
*/
triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any) {
- IntroduceTaint.introduceTaintAtIdentifierDirect(node, info.res, this.sourceScope.value)
+ IntroduceTaint.introduceTaintAtIdentifierDirect(analyzer, scope, node, info.res, this.sourceScope.value)
}
/**
@@ -123,7 +125,7 @@ class TestTaintChecker extends TaintChecker {
/**
*
-
+
* @param analyzer
* @param scope
* @param node
@@ -144,7 +146,7 @@ class TestTaintChecker extends TaintChecker {
/**
*
-
+
* @param analyzer
* @param scope
* @param node
@@ -183,10 +185,11 @@ class TestTaintChecker extends TaintChecker {
// 使用callgraph边界作为entrypoint
fullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
const fullCallGraphEntrypoint = fullCallGraphFileEntryPoint.getAllEntryPointsUsingCallGraph(
- analyzer.ainfo?.callgraph
+ analyzer.ainfo?.callgraph,
+ analyzer
)
// 使用file作为entrypoint
- const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer.fileManager)
+ const fullFileEntrypoint = fullCallGraphFileEntryPoint.getAllFileEntryPointsUsingFileManager(analyzer)
this.entryPoints.push(...fullFileEntrypoint)
this.entryPoints.push(...fullCallGraphEntrypoint)
}
@@ -197,17 +200,18 @@ class TestTaintChecker extends TaintChecker {
* @param node
* @param fclos
* @param argValues
+ * @param state
*/
- checkSinkAtFunctionCall(node: any, fclos: any, argValues: any) {
+ checkSinkAtFunctionCall(node: any, fclos: any, callInfo: CallInfo | undefined, state?: any) {
if (!fclos) {
return
}
const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink
- let rule = matchSinkAtFuncCall(node, fclos, rules)
+ let rule = matchSinkAtFuncCall(node, fclos, rules, callInfo)
rule = rule.length > 0 ? rule[0] : null
if (rule) {
- const args = BasicRuleHandler.prepareArgs(argValues, fclos, rule)
+ const args = BasicRuleHandler.prepareArgs(callInfo, fclos, rule)
const sanitizers = SanitizerChecker.findSanitizerByIds(rule.sanitizerIds)
const ndResultWithMatchedSanitizerTagsArray = SanitizerChecker.findTagAndMatchedSanitizer(
node,
@@ -234,7 +238,8 @@ class TestTaintChecker extends TaintChecker {
fclos,
TAINT_TAG_NAME_TEST_TAINT,
ruleName,
- matchedSanitizerTags
+ matchedSanitizerTags,
+ state?.callstack
)
if (!TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) continue
this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId)
diff --git a/src/client.ts b/src/client.ts
index eb1a5889..981646d4 100644
--- a/src/client.ts
+++ b/src/client.ts
@@ -131,7 +131,7 @@ async function main(): Promise {
console.log(`arguments: ${args.join(' ')}`)
if (!args.includes('--singleCommand')) {
const analyzer = await initAnalyzer(null, args)
- analyzer.preProcess(Config.maindir)
+ await analyzer.preProcess(Config.maindir)
const fullCallGraphFileEntryPoint = require('./checker/common/full-callgraph-file-entrypoint')
fullCallGraphFileEntryPoint.makeFullCallGraph(analyzer)
BasicRuleHandler.setPreprocessReady(true)
diff --git a/src/config.ts b/src/config.ts
index f85c39ab..87e3f23c 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -33,6 +33,7 @@ export interface IConfig {
invokeCallbackOnUnknownFunction?: number
maxIterationTime?: number
shareSourceLineSet?: boolean
+ workerCount?: number // Worker数量:0表示自动计算,>0表示使用设置的worker数量
// Analysis
stateUnionLevel?: number
@@ -53,6 +54,7 @@ export interface IConfig {
entryPointAndSourceAtSameTime?: boolean
entryPointMode?: string
cgAlgo: string
+ taintTraceOutputStrategy?: string
// Allow additional properties
[key: string]: any
@@ -69,8 +71,12 @@ const configObject: IConfig = {
dumpAST: false, // dump ast to json format
dumpAllAST: false, // dump all ast to json format
- intermediateDir: '', // 中间文件缓存目录路径(默认使用 reportDir/ast-output)
+ intermediateDir: '', // 增量扫描缓存目录路径(默认使用 reportDir/ast-output)
incremental: false, // 增量分析模式(默认禁用,需要显式配置)
+ saveContextEnvironment: false, // 保存上下文缓存模式
+ miniSaveContextEnvironment: false, // 极简保存上下文缓存模式
+ loadContextEnvironment: false, // 加载上下文缓存模式
+ contextEnvironmentDir: '', // 上下文缓存文件目录
//* ***************************** path and so on ***************************
@@ -104,6 +110,9 @@ const configObject: IConfig = {
// multiple objects with the same source may share the same source line trace
shareSourceLineSet: false,
+ // Worker数量:0表示自动计算,>0表示使用设置的worker数量
+ workerCount: 0,
+
//* ***************************** analysis ***************************
stateUnionLevel: 2,
@@ -122,8 +131,21 @@ const configObject: IConfig = {
entryPointAndSourceAtSameTime: true,
entryPointMode: 'BOTH', // BOTH or ONLY_CUSTOM or SELF_COLLECT
+ // Taint trace output strategy: 'full' | 'callstack-only' (legacy alias: 'folded')
+ taintTraceOutputStrategy: 'callstack-only',
+
// CallGraph
cgAlgo: 'DEFAULT',
+
+ // Pruning
+ minEntryPointToEnablePrune: 200,
+
+ // Timeout
+ entryPointTimeoutMs: 300000,
+ entryPointTimeoutQuickMs: 120000,
+
+ // Prune parameters for aggressive prune mode
+ maxCallstackDepth: 12, // max callstack depth in aggressive prune mode
}
module.exports = configObject
diff --git a/src/engine/analyzer/common/analysis-context.ts b/src/engine/analyzer/common/analysis-context.ts
new file mode 100644
index 00000000..d25fda63
--- /dev/null
+++ b/src/engine/analyzer/common/analysis-context.ts
@@ -0,0 +1,14 @@
+/**
+ * AnalysisContext - 项目级分析上下文
+ *
+ * 仅 topScope 持有,Analyzer 共享引用。
+ * 统一归组项目级分析数据,替代 topScope 上散落的 manager 属性。
+ */
+export class AnalysisContext {
+ ast: any = null
+ symbols: any = null
+ modules: any = null
+ packages: any = null
+ files: Record | null = null
+ funcs: Record | null = null
+}
diff --git a/src/engine/analyzer/common/analyzer-cache.ts b/src/engine/analyzer/common/analyzer-cache.ts
new file mode 100644
index 00000000..5ec1d97a
--- /dev/null
+++ b/src/engine/analyzer/common/analyzer-cache.ts
@@ -0,0 +1,1832 @@
+import { RAW_TARGET, IS_UNION_ARRAY } from './value/symbols'
+import { yasaLog, yasaWarning, yasaError } from '../../../util/format-util'
+import { AnalysisContext } from './analysis-context'
+import { Scoped } from './value/scoped'
+import { ObjectValue } from './value/object'
+import { PrimitiveValue } from './value/primitive'
+import { SymbolValue } from './value/symbolic'
+import { PackageValue } from './value/package'
+import { UnionValue } from './value/union'
+import { UnknownValue } from './value/unkown'
+import { UndefinedValue } from './value/undefine'
+import { UninitializedValue } from './value/uninit'
+import { FunctionValue } from './value/function'
+import { BVTValue } from './value/bvt'
+import type { RType } from './value/data-value'
+
+const fs = require('fs')
+const path = require('path')
+const jsonfile = require('jsonfile')
+const util = require('util')
+const Config = require('../../../config')
+const { writeJSONfile } = require('../../../util/file-util')
+const { shallowCopyValue } = require('../../../util/clone-util')
+const { Graph } = require('../../../util/graph')
+
+const names: string[] = []
+
+/**
+ * 获取缓存目录路径
+ */
+function getCacheDir(): string {
+ const outputDir = Config.contextEnvironmentDir
+ if (!outputDir) {
+ yasaError(
+ '[CACHE]Config.intermediateDir is not set. Please set Config.intermediateDir before using cache functionality.'
+ )
+ throw new Error(
+ 'Config.intermediateDir is not set. Please set Config.intermediateDir before using cache functionality.'
+ )
+ }
+
+ let cacheDir: string
+ if (!path.isAbsolute(outputDir)) {
+ cacheDir = path.resolve(process.cwd(), outputDir)
+ } else {
+ cacheDir = outputDir
+ }
+ return cacheDir
+}
+
+/**
+ * 序列化对象,处理 astManager 时跳过 parent 属性
+ * @param obj 要序列化的对象
+ * @param skipParent 是否跳过 parent 属性(用于 astManager)
+ * @param maxDepth 最大深度,防止无限递归
+ * @param currentDepth 当前深度
+ * @param visited 已访问的对象集合,用于检测循环引用
+ * @param parentRelations
+ * @returns 序列化后的对象
+ */
+function serializeObject(
+ obj: any,
+ skipParent: boolean = false,
+ maxDepth: number = 100,
+ currentDepth: number = 0,
+ visited: WeakSet = new WeakSet(),
+ parentRelations?: Map // 用于记录 parent 关系:nodehash -> parent nodehash
+): any {
+ if (currentDepth > maxDepth) {
+ return '[Max Depth Reached]'
+ }
+
+ if (obj == null) {
+ return obj
+ }
+
+ // 处理基本类型
+ if (typeof obj !== 'object') {
+ return obj
+ }
+
+ // 检测循环引用
+ if (visited.has(obj)) {
+ return '[Circular Reference]'
+ }
+
+ // 处理 Map 类型
+ if (obj instanceof Map) {
+ visited.add(obj)
+ const result: any = {}
+ const constructorNames: any = {} // 用于记录每个 unit 的构造函数名称
+ for (const [key, value] of obj.entries()) {
+ // 检查是否是 topScope 对象(通过 sid 和 qid 判断)
+ if (value && typeof value === 'object' && value.sid === '' && value.qid === '') {
+ // 用特殊标记替换 topScope
+ result[key] = { __yasaTopScopeMarker: true }
+ } else if (value != null && typeof value === 'object') {
+ // 如果是 Unit 对象(有 vtype 属性),记录其构造函数名称
+ if (value.vtype && value.constructor?.name) {
+ constructorNames[key] = value.constructor.name
+ }
+ result[key] = serializeObject(value, skipParent, maxDepth, currentDepth + 1, visited, parentRelations)
+ } else {
+ result[key] = value
+ }
+ }
+ // 如果有构造函数名称记录,将其添加到结果中
+ if (Object.keys(constructorNames).length > 0) {
+ result.__yasaConstructorNames = constructorNames
+ }
+ visited.delete(obj)
+ return result
+ }
+
+ // 处理 Set 类型
+ if (obj instanceof Set) {
+ visited.add(obj)
+ const result: any[] = []
+ for (const item of obj.values()) {
+ if (item != null && typeof item === 'object') {
+ result.push(serializeObject(item, skipParent, maxDepth, currentDepth + 1, visited, parentRelations))
+ } else {
+ result.push(item)
+ }
+ }
+ visited.delete(obj)
+ return result
+ }
+
+ // 处理数组
+ if (Array.isArray(obj)) {
+ visited.add(obj)
+ const result = obj.map((item, index) => {
+ if (item != null && typeof item === 'object') {
+ return serializeObject(item, skipParent, maxDepth, currentDepth + 1, visited, parentRelations)
+ }
+ return item
+ })
+ visited.delete(obj)
+ return result
+ }
+
+ // 处理对象
+ visited.add(obj)
+
+ // 检测是否是 Proxy,如果是则获取原始对象
+ let targetObj = obj
+ if (util.types.isProxy && util.types.isProxy(obj)) {
+ // 尝试获取 Proxy 的原始对象
+ if ((obj as any)[RAW_TARGET]) {
+ targetObj = (obj as any)[RAW_TARGET]
+ } else if ((obj as any)[IS_UNION_ARRAY]) {
+ targetObj = (obj as any)[IS_UNION_ARRAY]
+ }
+ }
+
+ const result: any = {}
+
+ // 使用 Reflect.ownKeys 获取所有属性(包括不可枚举的)
+ // 对于 Unit 对象,我们需要同时检查 targetObj 和 obj,因为某些属性(如 astNodehash)可能在原对象上
+ // 使用 targetObj 而不是 obj,避免触发 Proxy 的 get trap
+ const allKeys = Reflect.ownKeys(targetObj)
+ // 对于 Unit 对象,也检查原对象上的属性(如果 targetObj 和 obj 不同)
+ const allKeysFromObj = targetObj !== obj ? Reflect.ownKeys(obj) : []
+ // 合并两个键集合,确保所有属性都被序列化
+ const allKeysSet = new Set([...allKeys, ...allKeysFromObj])
+
+ for (const key of allKeysSet) {
+ if (typeof key === 'symbol') {
+ continue
+ }
+
+ const keyStr = key as string
+
+ // 跳过内部属性和不可序列化的 ValueRefMap/ValueRefList 影子属性
+ if (keyStr.startsWith('__yasa') || keyStr === 'elements' || keyStr === '_children' || keyStr === 'set') {
+ if (!names.includes(keyStr)) {
+ names.push(keyStr)
+ }
+ continue
+ }
+
+ try {
+ // 直接访问 targetObj 的属性,避免触发 Proxy 的 get trap
+ // 如果 targetObj 上没有该属性,尝试从原对象获取(对于某些属性如 astNodehash)
+ let value = Reflect.get(targetObj, keyStr)
+ if (value === undefined && targetObj !== obj) {
+ // 如果 targetObj 上没有该属性,尝试从原对象获取
+ const descriptor = Object.getOwnPropertyDescriptor(obj, keyStr)
+ if (descriptor && 'value' in descriptor) {
+ value = descriptor.value
+ }
+ }
+
+ // 处理 parent 属性:记录 parent 的 nodehash 关系
+ if (keyStr === 'parent' && value && typeof value === 'object' && value.type) {
+ // 这是一个 AST 节点的 parent
+ const currentNodehash = targetObj._meta?.nodehash
+ const parentNodehash = value._meta?.nodehash
+ if (currentNodehash && parentNodehash && parentRelations) {
+ // 记录 parent 关系
+ parentRelations.set(currentNodehash, parentNodehash)
+ }
+ // 如果 skipParent 为 true,跳过序列化 parent 对象本身
+ if (skipParent) {
+ continue
+ }
+ // 否则继续序列化 parent(但会记录关系)
+ }
+
+ // 如果 skipParent 为 true 且是 parent 属性,跳过(但上面已经处理了)
+ if (skipParent && keyStr === 'parent') {
+ continue
+ }
+
+ // 对于 decls 和 overloaded,它们可能是 Proxy,需要访问内部存储
+ if (keyStr === 'decls' && util.types.isProxy && util.types.isProxy(value)) {
+ // 优先从 _ast._declsMap 读取(AstBinding),回退到 _declsNodehashMap
+ const astBinding = Reflect.get(targetObj, '_ast')
+ const declsMap = astBinding?._declsMap ?? Reflect.get(targetObj, '_declsNodehashMap')
+ if (declsMap instanceof Map) {
+ const declsData: any = {}
+ for (const [name, entry] of declsMap.entries()) {
+ // AstRef 对象取 .hash,裸字符串直接用
+ declsData[name] = entry?.hash ?? entry
+ }
+ result[keyStr] = declsData
+ } else {
+ result[keyStr] = serializeObject(value, skipParent, maxDepth, currentDepth + 1, visited, parentRelations)
+ }
+ continue
+ }
+
+ if (keyStr === 'overloaded') {
+ const overloadedList = Reflect.get(targetObj, 'overloaded')
+ if (overloadedList && overloadedList._refs) {
+ result[keyStr] = overloadedList._refs.map((ref: any) => ref.hash)
+ }
+ continue
+ }
+
+ // 检查是否是 topScope 对象(通过 sid 和 qid 判断)
+ if (value && typeof value === 'object' && value.sid === '' && value.qid === '') {
+ // 用特殊标记替换 topScope
+ result[keyStr] = { __yasaTopScopeMarker: true }
+ } else if (value != null && typeof value === 'object') {
+ result[keyStr] = serializeObject(value, skipParent, maxDepth, currentDepth + 1, visited, parentRelations)
+ } else {
+ // 对于基本类型(字符串、数字、布尔值、null、undefined),直接赋值
+ // 注意:JSON.stringify 会忽略 undefined,但会保留 null
+ // 为了确保 astNodehash 等属性被正确序列化,即使值是 undefined,我们也应该包含它
+ // 但 JSON 不支持 undefined,所以如果值是 undefined,我们将其序列化为 null
+ // 不过,为了保持一致性,我们直接赋值,让 JSON.stringify 处理
+ result[keyStr] = value
+ }
+ } catch (e) {
+ // 忽略访问器错误
+ yasaWarning(`Failed to serialize property ${keyStr}: ${e}`)
+ }
+ }
+
+ visited.delete(obj)
+ return result
+}
+
+/**
+ * 将大对象分割成多个文件
+ * @param data 要分割的数据
+ * @param basePath 基础路径
+ * @param chunkSize 每个文件的最大条目数(对于 Map,默认 1000)
+ * @param isMapData 是否是 Map 数据(序列化后的对象)
+ * @returns 保存的文件路径列表
+ */
+function splitAndSave(data: any, basePath: string, chunkSize: number = 1000, isMapData: boolean = false): string[] {
+ const savedFiles: string[] = []
+
+ if (Array.isArray(data)) {
+ // 如果是数组,按 chunkSize 分割
+ for (let i = 0; i < data.length; i += chunkSize) {
+ const chunk = data.slice(i, i + chunkSize)
+ const chunkPath = `${basePath}.part${Math.floor(i / chunkSize)}.json`
+ writeJSONfile(chunkPath, chunk)
+ savedFiles.push(chunkPath)
+ }
+ } else if (typeof data === 'object' && data !== null) {
+ const keys = Object.keys(data)
+ const mapChunkSize = 1000 // Map 数据使用 1000 作为 chunkSize
+
+ if (isMapData) {
+ // Map 数据:无论条目数多少都使用子文件夹结构
+ const baseDir = path.dirname(basePath)
+ const baseName = path.basename(basePath)
+ const mapDir = path.join(baseDir, baseName)
+
+ // 确保目录存在
+ if (!fs.existsSync(mapDir)) {
+ fs.mkdirSync(mapDir, { recursive: true })
+ }
+
+ const mapInfo = {
+ totalEntries: keys.length,
+ chunkSize: mapChunkSize,
+ numChunks: Math.ceil(keys.length / mapChunkSize),
+ }
+ writeJSONfile(path.join(mapDir, 'info.json'), mapInfo)
+ savedFiles.push(path.join(mapDir, 'info.json'))
+
+ // 按 chunkSize 分割,使用文件夹名字作为前缀
+ const prefix = baseName // 使用 baseName 作为前缀(如 astMap, symbolMap)
+ for (let i = 0; i < keys.length; i += mapChunkSize) {
+ const chunkKeys = keys.slice(i, i + mapChunkSize)
+ const chunk: any = {}
+ for (const key of chunkKeys) {
+ chunk[key] = data[key]
+ }
+ const chunkIndex = Math.floor(i / mapChunkSize)
+ const chunkPath = path.join(mapDir, `${prefix}-chunk${chunkIndex}.json`)
+ writeJSONfile(chunkPath, chunk)
+ savedFiles.push(chunkPath)
+ }
+ } else {
+ // 如果键数量很多,按键分割
+ for (let i = 0; i < keys.length; i += chunkSize) {
+ const chunkKeys = keys.slice(i, i + chunkSize)
+ const chunk: any = {}
+ for (const key of chunkKeys) {
+ chunk[key] = data[key]
+ }
+ const chunkPath = `${basePath}.part${Math.floor(i / chunkSize)}.json`
+ writeJSONfile(chunkPath, chunk)
+ savedFiles.push(chunkPath)
+ }
+ }
+ } else {
+ // 其他类型直接保存
+ writeJSONfile(`${basePath}.json`, data)
+ savedFiles.push(`${basePath}.json`)
+ }
+
+ return savedFiles
+}
+
+/**
+ * 反序列化对象,将 JSON 中的对象转换回 Map 和 Set
+ * @param obj 要反序列化的对象
+ * @param topScopeRef topScope 对象的引用(用于恢复特殊标记)
+ * @param skipParentForAST
+ * @returns 反序列化后的对象
+ */
+function deserializeObject(obj: any, topScopeRef?: any, skipParentForAST?: boolean): any {
+ if (obj == null || typeof obj !== 'object') {
+ return obj
+ }
+
+ // 检查是否是 topScope 特殊标记
+ if (obj && typeof obj === 'object' && obj.__yasaTopScopeMarker === true) {
+ return topScopeRef || obj
+ }
+
+ // 处理数组
+ if (Array.isArray(obj)) {
+ return obj.map((item) => deserializeObject(item, topScopeRef, skipParentForAST))
+ }
+
+ // 检查是否是 Map 的序列化格式(普通对象,但需要特殊处理)
+ // 对于 symbolTable,我们需要检查特定的属性名来判断是否需要转换为 Map
+ const result: any = {}
+
+ for (const key in obj) {
+ if (!Object.prototype.hasOwnProperty.call(obj, key)) {
+ continue
+ }
+
+ const value = obj[key]
+
+ // 对于 symbolTable 的特殊属性,需要转换为 Map 或 Set
+ if (key === 'symbolMap') {
+ if (value && typeof value === 'object' && !Array.isArray(value)) {
+ // 转换为 Map
+ const map = new Map()
+ // 检查是否有构造函数名称记录(仅对 symbolMap)
+ const constructorNames = key === 'symbolMap' ? value.__yasaConstructorNames : null
+ for (const mapKey in value) {
+ if (Object.prototype.hasOwnProperty.call(value, mapKey)) {
+ // 跳过构造函数名称记录
+ if (mapKey === '__yasaConstructorNames') {
+ continue
+ }
+ const mapValue = value[mapKey]
+ // 检查是否是 topScope 特殊标记
+ if (mapValue && typeof mapValue === 'object' && mapValue.__yasaTopScopeMarker === true) {
+ map.set(mapKey, topScopeRef || mapValue)
+ } else if (key === 'symbolMap' && constructorNames && constructorNames[mapKey]) {
+ // 对于 symbolMap 中的 Unit 对象,根据构造函数名称创建实例,然后复制所有属性
+ // 先反序列化对象(获取所有属性)
+ const deserializedUnit = deserializeObject(mapValue, topScopeRef, skipParentForAST)
+ const constructorName = constructorNames[mapKey]
+
+ // 根据构造函数名称创建新实例(使用最小参数,保持正确的原型链)
+ let recreatedUnit: any
+ const minimalOpts = {
+ sid: deserializedUnit.sid || deserializedUnit._sid || '',
+ qid: deserializedUnit.qid || deserializedUnit._qid || '',
+ parent: deserializedUnit.parent || null,
+ }
+
+ switch (constructorName) {
+ case 'Scoped':
+ recreatedUnit = Scoped.fromOpts('', minimalOpts)
+ break
+ case 'ObjectValue':
+ recreatedUnit = ObjectValue.fromOpts('', minimalOpts)
+ break
+ case 'PrimitiveValue':
+ recreatedUnit = PrimitiveValue.fromOpts('', minimalOpts)
+ break
+ case 'SymbolValue':
+ recreatedUnit = SymbolValue.fromOpts('', minimalOpts)
+ break
+ case 'PackageValue':
+ recreatedUnit = PackageValue.fromOpts('', minimalOpts)
+ break
+ case 'UnionValue':
+ recreatedUnit = UnionValue.fromOpts('', minimalOpts)
+ break
+ case 'UnknownValue':
+ recreatedUnit = UnknownValue.fromOpts('', minimalOpts)
+ break
+ case 'UndefinedValue':
+ recreatedUnit = UndefinedValue.fromOpts('', minimalOpts)
+ break
+ case 'UninitializedValue':
+ recreatedUnit = UninitializedValue.fromOpts('', minimalOpts)
+ break
+ case 'FunctionValue':
+ recreatedUnit = FunctionValue.fromOpts('', minimalOpts)
+ break
+ case 'BVTValue':
+ recreatedUnit = BVTValue.fromOpts('', minimalOpts)
+ break
+ default:
+ // 如果不知道构造函数,直接使用反序列化的对象
+ recreatedUnit = deserializedUnit
+ map.set(mapKey, recreatedUnit)
+ continue
+ }
+
+ // 将所有属性从反序列化的对象复制到新实例上(使用 Reflect.ownKeys 确保所有属性都被复制)
+ const allKeys = Reflect.ownKeys(deserializedUnit)
+ for (const propKey of allKeys) {
+ if (typeof propKey === 'symbol') {
+ continue
+ }
+ const propKeyStr = propKey as string
+ // 跳过一些不应该直接复制的属性(这些属性会在构造函数中设置)
+ if (propKeyStr === 'constructor' || propKeyStr === '__proto__') {
+ continue
+ }
+ try {
+ const descriptor = Object.getOwnPropertyDescriptor(deserializedUnit, propKeyStr)
+ if (descriptor) {
+ if ('value' in descriptor) {
+ // 直接设置属性值
+ ;(recreatedUnit as any)[propKeyStr] = descriptor.value
+ } else if ('get' in descriptor || 'set' in descriptor) {
+ // 对于 getter/setter,尝试复制描述符
+ try {
+ Object.defineProperty(recreatedUnit, propKeyStr, descriptor)
+ } catch (e) {
+ // 如果无法复制描述符,尝试直接访问值
+ try {
+ ;(recreatedUnit as any)[propKeyStr] = (deserializedUnit as any)[propKeyStr]
+ } catch (e2) {
+ // 忽略错误
+ }
+ }
+ }
+ } else {
+ // 如果没有描述符,尝试直接复制
+ try {
+ ;(recreatedUnit as any)[propKeyStr] = (deserializedUnit as any)[propKeyStr]
+ } catch (e) {
+ // 忽略错误
+ }
+ }
+ } catch (e) {
+ // 忽略复制错误
+ yasaWarning(`Failed to copy property ${propKeyStr} to recreated unit: ${e}`)
+ }
+ }
+
+ map.set(mapKey, recreatedUnit)
+ } else {
+ // 递归反序列化 Unit 对象,确保所有属性(包括 astNodehash)都被正确恢复
+ map.set(mapKey, deserializeObject(mapValue, topScopeRef, skipParentForAST))
+ }
+ }
+ }
+ result[key] = map
+ } else {
+ result[key] = deserializeObject(value, topScopeRef, skipParentForAST)
+ }
+ } else if (key === 'astMap') {
+ // astManager 的 astMap 需要转换为 Map
+ // 注意:在反序列化 AST 节点时,需要先跳过 parent 属性,后续再统一设置
+ if (value && typeof value === 'object' && !Array.isArray(value)) {
+ const map = new Map()
+ for (const mapKey in value) {
+ if (Object.prototype.hasOwnProperty.call(value, mapKey)) {
+ const mapValue = value[mapKey]
+ // 递归反序列化 AST 节点(跳过 parent,后续统一设置)
+ const astNode = deserializeObject(mapValue, topScopeRef, true) // 第三个参数表示跳过 parent
+ // 先删除可能存在的 parent 引用(避免指向错误的对象)
+ if (astNode && typeof astNode === 'object') {
+ delete astNode.parent
+ }
+ map.set(mapKey, astNode)
+ }
+ }
+ result[key] = map
+ } else {
+ result[key] = deserializeObject(value, topScopeRef)
+ }
+ } else {
+ // 对于其他属性,递归反序列化
+ // 这包括 Unit 对象的所有属性(如 astNodehash、declsNodehash、uuid、parent_uuid 等)
+ // 如果是 AST 节点的 parent 属性且 skipParentForAST 为 true,跳过它
+ if (skipParentForAST && key === 'parent') {
+ // 跳过 parent 属性,后续统一设置
+ continue
+ }
+ result[key] = deserializeObject(value, topScopeRef, skipParentForAST)
+ }
+ }
+
+ return result
+}
+
+/**
+ * 从分割的文件中加载数据
+ * @param basePath 基础路径
+ * @returns 加载的数据
+ */
+function loadFromSplit(basePath: string): any {
+ const baseDir = path.dirname(basePath)
+ const baseName = path.basename(basePath)
+ const mapDir = path.join(baseDir, baseName)
+
+ // 检查是否存在子文件夹结构(Map 数据)
+ if (fs.existsSync(mapDir) && fs.statSync(mapDir).isDirectory()) {
+ // 检查是否存在 info.json(表示是分 chunk 的大 Map)
+ const mapInfoPath = path.join(mapDir, 'info.json')
+ if (fs.existsSync(mapInfoPath)) {
+ // 大 Map:从多个 chunk 文件加载
+ const mapInfo = jsonfile.readFileSync(mapInfoPath)
+ const numChunks = mapInfo.numChunks || 0
+
+ // 加载所有 chunk 并合并,使用文件夹名字作为前缀查找
+ const result: any = {}
+ const prefix = baseName // 使用 baseName 作为前缀(如 astMap, symbolMap)
+ for (let i = 0; i < numChunks; i++) {
+ const chunkPath = path.join(mapDir, `${prefix}-chunk${i}.json`)
+ if (fs.existsSync(chunkPath)) {
+ const chunkData = jsonfile.readFileSync(chunkPath)
+ Object.assign(result, chunkData)
+ }
+ }
+
+ return result
+ }
+ // 小 Map:从单个文件加载
+ const singleFilePath = path.join(mapDir, `${baseName}.json`)
+ if (fs.existsSync(singleFilePath)) {
+ return jsonfile.readFileSync(singleFilePath)
+ }
+ // 如果文件夹存在但文件不存在,返回 null
+ return null
+ }
+
+ // 查找所有分割文件(part 文件)
+ const files: string[] = []
+ let partIndex = 0
+
+ // 查找所有分割文件
+ while (true) {
+ const partPath = `${basePath}.part${partIndex}.json`
+ if (fs.existsSync(partPath)) {
+ files.push(partPath)
+ partIndex++
+ } else {
+ break
+ }
+ }
+
+ // 如果没有分割文件,尝试加载单个文件
+ if (files.length === 0) {
+ const singlePath = `${basePath}.json`
+ if (fs.existsSync(singlePath)) {
+ return jsonfile.readFileSync(singlePath)
+ }
+ return null
+ }
+
+ // 加载所有分割文件并合并
+ const allData: any[] = []
+ for (const file of files) {
+ try {
+ const data = jsonfile.readFileSync(file)
+ if (Array.isArray(data)) {
+ allData.push(...data)
+ } else if (typeof data === 'object' && data !== null) {
+ allData.push(data)
+ }
+ } catch (err: any) {
+ yasaWarning(`Failed to load cache file ${file}: ${err.message}`)
+ }
+ }
+
+ if (allData.length === 0) {
+ return null
+ }
+
+ // 判断原始数据类型
+ const firstFile = jsonfile.readFileSync(files[0])
+ if (Array.isArray(firstFile)) {
+ return allData
+ }
+ if (typeof firstFile === 'object' && firstFile !== null) {
+ // 合并对象
+ const result: any = {}
+ for (const obj of allData) {
+ Object.assign(result, obj)
+ }
+ return result
+ }
+
+ return allData
+}
+
+/**
+ * 保存分析器缓存
+ * @param analyzer 分析器实例
+ * @param cacheId 缓存 ID(用于区分不同的缓存,如基于源路径的哈希)
+ */
+export function saveAnalyzerCache(analyzer: any, cacheId?: string): void {
+ try {
+ const cacheDir = getCacheDir()
+ if (!fs.existsSync(cacheDir)) {
+ fs.mkdirSync(cacheDir, { recursive: true })
+ }
+
+ // 如果没有提供 cacheId,使用默认值
+ const id = cacheId || 'default'
+ // 创建以 cacheId 命名的文件夹
+ const cacheFolder = path.join(cacheDir, id)
+ if (!fs.existsSync(cacheFolder)) {
+ fs.mkdirSync(cacheFolder, { recursive: true })
+ }
+ const cacheBasePath = cacheFolder
+
+ yasaLog(`[SAVE CACHE]Saving analyzer cache to ${cacheBasePath}...`)
+
+ // 获取 topScope 的原始对象(绕过 Proxy)
+ const topScopeTarget = (analyzer.topScope as any)[RAW_TARGET] || analyzer.topScope
+
+ // a. fileManager
+ if (!Config.miniSaveContextEnvironment) {
+ if (analyzer.fileManager) {
+ const fileManagerData = serializeObject(analyzer.fileManager)
+ splitAndSave(fileManagerData, path.join(cacheBasePath, 'fileManager'), 1000, true)
+ yasaLog('[SAVE CACHE]Saved fileManager')
+ }
+ }
+
+ // b. symbolTable
+ if (analyzer.symbolTable) {
+ const symbolTable = analyzer.symbolTable // 特殊处理 symbolMap:根据 Config.miniSaveContextEnvironment 决定是否简化
+ const symbolMap = symbolTable.getMap()
+ if (symbolMap instanceof Map) {
+ if (Config.miniSaveContextEnvironment) {
+ // 简化模式:根据节点类型过滤属性
+ const astManager = analyzer.astManager
+ const astMap = astManager.getMap()
+ if (astMap instanceof Map) {
+ const keysToDelete: string[] = []
+ for (const [nodehash, node] of astMap.entries()) {
+ if (node && typeof node === 'object' && node.type) {
+ if (node.type === 'VariableDeclaration') {
+ // 只保留 type、id、varType 属性
+ const allowedProps = ['type', 'id', 'varType', '_meta']
+ const allKeys = Reflect.ownKeys(node)
+ for (const prop of allKeys) {
+ if (typeof prop === 'string' && !allowedProps.includes(prop)) {
+ try {
+ delete (node as any)[prop]
+ } catch (e) {
+ // 忽略删除错误
+ }
+ }
+ }
+ } else if (node.type === 'FunctionDefinition') {
+ // 只保留 type、id、parameters、returnType 属性
+ const allowedProps = ['type', 'id', 'parameters', 'returnType', '_meta']
+ const allKeys = Reflect.ownKeys(node)
+ for (const prop of allKeys) {
+ if (typeof prop === 'string' && !allowedProps.includes(prop)) {
+ try {
+ delete (node as any)[prop]
+ } catch (e) {
+ // 忽略删除错误
+ }
+ }
+ }
+ } else {
+ // 其他类型,直接从 astMap 删除
+ keysToDelete.push(nodehash)
+ }
+ }
+ }
+ // 删除其他类型的节点
+ for (const nodehash of keysToDelete) {
+ astManager.astMap.delete(nodehash)
+ }
+ }
+
+ // 简化模式:只保留指定的属性
+ const allowedProps = [
+ 'vtype',
+ '_field',
+ '_sid',
+ '_qid',
+ 'uuid',
+ '_ast',
+ '_parentRef',
+ '_thisRef',
+ '_superRef',
+ '_packageScopeRef',
+ 'overloaded',
+ '_scopeCtx',
+ 'rtype',
+ ]
+
+ // 直接遍历 symbolMap,在原对象上删除不需要的属性
+ for (const [key, value] of symbolTable.symbolMap.entries()) {
+ if (value && typeof value === 'object' && value._ast?._nodeRef) {
+ const hash = value._ast._nodeRef.hash
+ if (astManager && astManager.astMap instanceof Map) {
+ if (!astManager.astMap.has(hash)) {
+ value.ast = null
+ }
+ } else {
+ value.ast = null
+ }
+ }
+ // 检查是否是 topScope 对象(通过 sid 和 qid 判断)
+ if (value != null && typeof value === 'object' && value.sid !== '' && value.qid !== '') {
+ // 获取所有属性键
+ const allKeys = Reflect.ownKeys(value)
+ // 删除不在允许列表中的属性
+ for (const prop of allKeys) {
+ if (typeof prop === 'string' && !allowedProps.includes(prop)) {
+ try {
+ delete (value as any)[prop]
+ } catch (e) {
+ // 忽略删除错误(可能是不可配置的属性)
+ }
+ }
+ if (prop === 'rtype') {
+ // 只保留 rtype 下的 type 和 name 字段,其他字段去掉
+ const rtype = (value as { rtype?: RType }).rtype
+ if (rtype && typeof rtype === 'object') {
+ const filteredRtype: Partial = {}
+ if ('type' in rtype) {
+ filteredRtype.type = rtype.type
+ }
+ if ('definiteType' in rtype) {
+ filteredRtype.definiteType = { type: rtype.definiteType?.type, name: rtype.definiteType?.name }
+ }
+ if ('vagueType' in rtype) {
+ filteredRtype.vagueType = rtype.vagueType
+ }
+ ;(value as { rtype: RType }).rtype = filteredRtype as RType
+ }
+ }
+ }
+ }
+ }
+ } else {
+ for (const [key, value] of symbolTable.symbolMap.entries()) {
+ // 检查是否是 topScope 对象(通过 sid 和 qid 判断)
+ if (value != null && typeof value === 'object' && value.sid !== '' && value.qid !== '') {
+ // 获取所有属性键
+ const allKeys = Reflect.ownKeys(value)
+ // 删除不在允许列表中的属性
+ for (const prop of allKeys) {
+ if (prop === 'rtype') {
+ // 只保留 rtype 下的 type 和 name 字段,其他字段去掉
+ const rtype = (value as { rtype?: RType }).rtype
+ if (rtype && typeof rtype === 'object') {
+ const filteredRtype: Partial = {}
+ if ('type' in rtype) {
+ filteredRtype.type = rtype.type
+ }
+ if ('definiteType' in rtype) {
+ filteredRtype.definiteType = { type: rtype.definiteType?.type, name: rtype.definiteType?.name }
+ }
+ if ('vagueType' in rtype) {
+ filteredRtype.vagueType = rtype.vagueType
+ }
+ ;(value as { rtype: RType }).rtype = filteredRtype as RType
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ const symbolTableData = serializeObject(analyzer.symbolTable)
+
+ // 创建 symbolTable 文件夹
+ const symbolTableDir = path.join(cacheBasePath, 'symbolTable')
+ if (!fs.existsSync(symbolTableDir)) {
+ fs.mkdirSync(symbolTableDir, { recursive: true })
+ }
+ // 特殊处理各个 Map 属性:按每 1000 个条目分割,使用子文件夹结构
+ for (const key in symbolTableData) {
+ if (Config.miniSaveContextEnvironment && key.includes('funcSymbolTable')) {
+ continue
+ }
+ if (key === '__yasaConstructorNames') {
+ continue
+ }
+ const value = symbolTableData[key]
+ if (value && typeof value === 'object') {
+ splitAndSave(value, path.join(symbolTableDir, key), 1000, true)
+ } else if (typeof value !== 'function') {
+ // 基本类型:直接保存
+ writeJSONfile(path.join(symbolTableDir, `${key}.json`), value)
+ }
+ }
+
+ yasaLog('[SAVE CACHE]Saved symbolTable')
+ }
+
+ // c. astManager (记录 parent 关系,但不序列化 parent 对象本身)
+ if (analyzer.astManager) {
+ // 创建 parent 关系映射
+ const parentRelations = new Map()
+ const astManagerData = serializeObject(analyzer.astManager, true, 100, 0, new WeakSet(), parentRelations) // skipParent = true, 但会记录关系
+
+ // 创建 astManager 文件夹
+ const astManagerDir = path.join(cacheBasePath, 'astManager')
+ if (!fs.existsSync(astManagerDir)) {
+ fs.mkdirSync(astManagerDir, { recursive: true })
+ }
+
+ // 特殊处理 astMap:按每 1000 个条目分割,使用子文件夹结构
+ if (astManagerData.astMap && typeof astManagerData.astMap === 'object') {
+ splitAndSave(astManagerData.astMap, path.join(astManagerDir, 'astMap'), 1000, true)
+ yasaLog(`[SAVE CACHE]Saved astManager.astMap (${Object.keys(astManagerData.astMap).length} entries)`)
+ }
+
+ // 保存 astManager 的其他属性(除了 astMap)
+ for (const key in astManagerData) {
+ if (key !== 'astMap' && key !== '__yasaConstructorNames') {
+ const value = astManagerData[key]
+ if (value != null) {
+ splitAndSave(value, path.join(astManagerDir, key))
+ }
+ }
+ }
+
+ // 保存 parent 关系映射
+ if (!Config.miniSaveContextEnvironment) {
+ if (parentRelations.size > 0) {
+ const parentRelationsObj: any = {}
+ for (const [nodehash, parentNodehash] of parentRelations.entries()) {
+ parentRelationsObj[nodehash] = parentNodehash
+ }
+ splitAndSave(parentRelationsObj, path.join(astManagerDir, 'parentRelations'))
+ yasaLog(`[SAVE CACHE]Saved astManager parent relations (${parentRelations.size} relations)`)
+ }
+ }
+ yasaLog('[SAVE CACHE]Saved astManager (parent relations recorded)')
+ }
+
+ // d. funcSymbolTable
+ if (!Config.miniSaveContextEnvironment) {
+ if (analyzer.funcSymbolTable) {
+ // 重要:funcSymbolTable 是一个 Proxy,需要绕过 Proxy 直接访问原始对象(target)
+ // 从 symbolTable 获取 funcSymbolTableTarget,这样可以直接获取 UUID 而不是符号值对象
+ const { funcSymbolTableTarget } = analyzer.symbolTable as any
+ if (funcSymbolTableTarget) {
+ // 直接序列化原始对象,其中存储的是 UUID
+ const funcSymbolTableData = serializeObject(funcSymbolTableTarget)
+ splitAndSave(funcSymbolTableData, path.join(cacheBasePath, 'funcSymbolTable'), 1000, true)
+ yasaLog('[SAVE CACHE]Saved funcSymbolTable')
+ } else {
+ // 如果没有 funcSymbolTableTarget,尝试直接序列化(可能不是 Proxy)
+ const funcSymbolTableData = serializeObject(analyzer.funcSymbolTable)
+ splitAndSave(funcSymbolTableData, path.join(cacheBasePath, 'funcSymbolTable'), 1000, true)
+ yasaLog('[SAVE CACHE]Saved funcSymbolTable')
+ }
+ }
+ }
+
+ // e. statistics
+ if (!Config.miniSaveContextEnvironment) {
+ if (analyzer.statistics) {
+ writeJSONfile(path.join(cacheBasePath, 'statistics.json'), analyzer.statistics)
+ yasaLog('[SAVE CACHE]Saved statistics')
+ }
+ }
+
+ // f. ainfo
+ if (!Config.miniSaveContextEnvironment) {
+ if (analyzer.ainfo) {
+ // 在序列化前,记录 callgraph 是否为 Graph 实例
+ const ainfoData = serializeObject(analyzer.ainfo)
+ // 如果 callgraph 是 Graph 实例,记录其类型
+ if (analyzer.ainfo.callgraph && analyzer.ainfo.callgraph.constructor?.name === 'GraphClass') {
+ ainfoData.__yasaCallgraphIsGraph = true
+ }
+ splitAndSave(ainfoData, path.join(cacheBasePath, 'ainfo'), 1000, true)
+ yasaLog('[SAVE CACHE]Saved ainfo')
+ }
+ }
+
+ // g. sourceCodeCache
+ if (!Config.miniSaveContextEnvironment) {
+ if (analyzer.sourceCodeCache) {
+ const sourceCodeCacheData = serializeObject(analyzer.sourceCodeCache)
+ splitAndSave(sourceCodeCacheData, path.join(cacheBasePath, 'sourceCodeCache'), 1000, true)
+ yasaLog('[SAVE CACHE]Saved sourceCodeCache')
+ }
+ }
+
+ // h. classMap
+ if (analyzer.classMap) {
+ const classMapData = serializeObject(analyzer.classMap)
+ splitAndSave(classMapData, path.join(cacheBasePath, 'classMap'), 1000, true)
+ yasaLog('[SAVE CACHE]Saved classMap')
+ }
+
+ // 创建 topScope 文件夹
+ const topScopeDir = path.join(cacheBasePath, 'topScope')
+ if (!fs.existsSync(topScopeDir)) {
+ fs.mkdirSync(topScopeDir, { recursive: true })
+ }
+
+ // i. topScope.context.modules (UUID for backward compatibility)
+ const modulesUuid = analyzer.topScope.context?.modules?.uuid
+ if (modulesUuid !== undefined) {
+ writeJSONfile(path.join(topScopeDir, 'moduleManagerUuid.json'), {
+ __moduleManagerUuid: modulesUuid,
+ })
+ yasaLog('[SAVE CACHE]Saved topScope.context.modules (moduleManagerUuid)')
+ }
+
+ // j. topScope.context.packages (UUID for backward compatibility)
+ const packagesUuid = analyzer.topScope.context?.packages?.uuid
+ if (packagesUuid !== undefined) {
+ writeJSONfile(path.join(topScopeDir, 'packageManagerUuid.json'), {
+ __packageManagerUuid: packagesUuid,
+ })
+ yasaLog('[SAVE CACHE]Saved topScope.context.packages (packageManagerUuid)')
+ }
+
+ // k. topScope.value(通过 getter 访问,EntityValue 返回 _members.getProxy())
+ const topScopeField = topScopeTarget.value
+ if (topScopeField !== undefined) {
+ const fieldData = serializeObject(topScopeField)
+ writeJSONfile(path.join(topScopeDir, 'field.json'), { _field: fieldData })
+ yasaLog('[SAVE CACHE]Saved topScope.value')
+ }
+
+ // l. topScope.uuid
+ if (topScopeTarget.uuid !== undefined) {
+ writeJSONfile(path.join(topScopeDir, 'uuid.json'), { uuid: topScopeTarget.uuid })
+ yasaLog('[SAVE CACHE]Saved topScope.uuid')
+ }
+
+ // m. topScope 的其他所有属性
+ // 获取 topScope 的所有属性,排除已经单独保存的属性
+ const excludedProps = new Set([
+ 'context',
+ '_field',
+ 'uuid',
+ 'funcSymbolTable',
+ 'symbolTable',
+ 'parent',
+ ])
+ const topScopeOtherProps: any = {}
+ const topScopePropTypes: any = {} // 记录每个属性的类型
+ const allTopScopeKeys = Reflect.ownKeys(topScopeTarget)
+ for (const key of allTopScopeKeys) {
+ if (typeof key === 'symbol') {
+ continue
+ }
+ const keyStr = key as string
+ // 跳过内部属性和已单独保存的属性
+ if (keyStr.startsWith('__yasa')) {
+ if (!names.includes(keyStr)) {
+ names.push(keyStr)
+ }
+ continue
+ }
+ if (excludedProps.has(keyStr)) {
+ continue
+ }
+ try {
+ const value = Reflect.get(topScopeTarget, keyStr)
+ // 记录属性类型
+ if (value instanceof Map) {
+ topScopePropTypes[keyStr] = 'Map'
+ } else if (value instanceof Set) {
+ topScopePropTypes[keyStr] = 'Set'
+ } else if (Array.isArray(value)) {
+ topScopePropTypes[keyStr] = 'Array'
+ } else if (value && typeof value === 'object' && value.constructor?.name) {
+ // 记录其他对象类型的构造函数名称
+ topScopePropTypes[keyStr] = value.constructor.name
+ }
+ // 序列化属性值
+ topScopeOtherProps[keyStr] = serializeObject(value)
+ } catch (e) {
+ yasaWarning(`Failed to serialize topScope property ${keyStr}: ${e}`)
+ }
+ }
+ // 如果有其他属性,保存它们
+ if (Object.keys(topScopeOtherProps).length > 0) {
+ // 将类型信息添加到数据中
+ topScopeOtherProps.__yasaPropTypes = topScopePropTypes
+ splitAndSave(topScopeOtherProps, path.join(topScopeDir, 'otherProps'))
+ yasaLog(`[SAVE CACHE]Saved topScope other properties (${Object.keys(topScopeOtherProps).length - 1} properties)`)
+ }
+
+ // 保存 checkerManager.registered_checkers 中每个 checker 的 sourceScope
+ if (analyzer.checkerManager && analyzer.checkerManager.registered_checkers) {
+ const checkerSourceScopes: any = {}
+ for (const checkerName in analyzer.checkerManager.registered_checkers) {
+ if (Object.prototype.hasOwnProperty.call(analyzer.checkerManager.registered_checkers, checkerName)) {
+ const checker = analyzer.checkerManager.registered_checkers[checkerName]
+ if (checker && checker.sourceScope) {
+ // 序列化 sourceScope
+ checkerSourceScopes[checkerName] = serializeObject(checker.sourceScope)
+ }
+ }
+ }
+ if (Object.keys(checkerSourceScopes).length > 0) {
+ writeJSONfile(path.join(cacheBasePath, 'checkerSourceScopes.json'), checkerSourceScopes)
+ yasaLog(`[SAVE CACHE]Saved checker sourceScopes (${Object.keys(checkerSourceScopes).length} checkers)`)
+ }
+ }
+
+ // 保存缓存元数据
+ const metadata = {
+ cacheId: id,
+ timestamp: new Date().toISOString(),
+ version: '1.0',
+ }
+ writeJSONfile(path.join(cacheBasePath, 'metadata.json'), metadata)
+ yasaLog(`[SAVE CACHE]Analyzer cache saved successfully to ${cacheBasePath}`)
+ } catch (err: any) {
+ yasaError(`[SAVE CACHE]Failed to save analyzer cache: ${err.message}`)
+ throw err
+ }
+}
+
+/**
+ * 加载分析器缓存
+ * @param analyzer 分析器实例
+ * @param cacheId 缓存 ID
+ * @param sourcePath
+ * @returns 是否成功加载
+ */
+export function loadAnalyzerCache(analyzer: any, cacheId?: string, sourcePath?: string): boolean {
+ try {
+ const cacheDir = getCacheDir()
+ let cacheFolder: string | null = null
+
+ if (cacheId) {
+ // 如果提供了 cacheId,直接使用
+ cacheFolder = path.join(cacheDir, cacheId)
+ if (!fs.existsSync(cacheFolder) || !fs.statSync(cacheFolder).isDirectory()) {
+ yasaLog(`[LOAD CACHE]Cache folder not found at ${cacheFolder}`)
+ cacheFolder = null
+ }
+ }
+
+ // 如果 cacheId 未提供或未找到,且提供了 sourcePath,则根据 repoName 和 hashPrefix 查找
+ if (!cacheFolder && sourcePath) {
+ cacheFolder = findCacheFolder(sourcePath)
+ if (cacheFolder) {
+ yasaLog(`[LOAD CACHE]Found cache folder by sourcePath: ${cacheFolder}`)
+ }
+ }
+
+ // 如果仍未找到,使用默认值
+ if (!cacheFolder) {
+ cacheFolder = path.join(cacheDir, 'default')
+ }
+
+ // 检查文件夹是否存在
+ if (!fs.existsSync(cacheFolder) || !fs.statSync(cacheFolder).isDirectory()) {
+ yasaLog(`[LOAD CACHE]Cache folder not found at ${cacheFolder}`)
+ return false
+ }
+
+ const cacheBasePath = cacheFolder
+
+ // 检查元数据文件是否存在
+ const metadataPath = path.join(cacheBasePath, 'metadata.json')
+ if (!fs.existsSync(metadataPath)) {
+ yasaLog(`[LOAD CACHE]Cache metadata not found at ${metadataPath}`)
+ return false
+ }
+
+ yasaLog(`[LOAD CACHE]Loading analyzer cache from ${cacheBasePath}...`)
+
+ // 获取 topScope 的原始对象(绕过 Proxy)
+ const topScopeTarget = (analyzer.topScope as any)[RAW_TARGET] || analyzer.topScope
+
+ // a. fileManager
+ const fileManagerData = loadFromSplit(path.join(cacheBasePath, 'fileManager'))
+ if (fileManagerData) {
+ analyzer.fileManager = fileManagerData
+ yasaLog('[LOAD CACHE]Loaded fileManager')
+ }
+
+ // b. symbolTable
+ // 检查是否存在子文件夹结构
+ const symbolTableDir = path.join(cacheBasePath, 'symbolTable')
+ let symbolTableData: any = null
+
+ if (fs.existsSync(symbolTableDir) && fs.statSync(symbolTableDir).isDirectory()) {
+ // 从子文件夹结构加载
+ symbolTableData = {}
+ const files = fs.readdirSync(symbolTableDir)
+ for (const file of files) {
+ // 跳过 info.json
+ if (file === 'info.json') {
+ continue
+ }
+ const filePath = path.join(symbolTableDir, file)
+ const stat = fs.statSync(filePath)
+
+ if (stat.isDirectory()) {
+ // 是文件夹,说明是 Map 数据,使用 loadFromSplit 加载
+ const propName = file
+ const propData = loadFromSplit(filePath)
+ if (propData !== null) {
+ symbolTableData[propName] = propData
+ }
+ } else if (file.endsWith('.json')) {
+ // 是 JSON 文件,直接加载
+ const propName = file.replace(/\.json$/, '').replace(/\.part\d+$/, '')
+ try {
+ const propData = jsonfile.readFileSync(filePath)
+ symbolTableData[propName] = propData
+ } catch (err: any) {
+ yasaWarning(`Failed to load symbolTable property ${propName}: ${err.message}`)
+ }
+ }
+ }
+ } else {
+ // 使用原来的方式加载
+ symbolTableData = loadFromSplit(path.join(cacheBasePath, 'symbolTable'))
+ }
+
+ if (symbolTableData && Object.keys(symbolTableData).length > 0) {
+ // 反序列化 Map 和 Set,传递 topScope 引用以便恢复特殊标记
+ const deserializedData = deserializeObject(symbolTableData, analyzer.topScope)
+ // 需要恢复 symbolTable 的方法和状态
+ // 先恢复 Map 类型的属性
+ if (deserializedData.symbolMap instanceof Map) {
+ const { symbolMap } = deserializedData
+ // 遍历 symbolMap,将 topScope 特殊标记替换为实际引用
+ // 同时确保所有 Unit 对象的属性都被正确恢复(包括 astNodehash)
+ for (const [key, value] of symbolMap.entries()) {
+ if (value && typeof value === 'object' && (value as any).__yasaTopScopeMarker === true) {
+ symbolMap.set(key, analyzer.topScope)
+ } else if (value && typeof value === 'object' && value.vtype) {
+ // 这是一个 Unit 对象,确保所有属性都被正确恢复
+ // deserializeObject 已经创建了新的对象,但我们需要确保所有属性都被正确赋值
+ // 这里 value 已经是反序列化后的对象,应该包含所有属性(包括 astNodehash)
+ // 不需要额外操作,因为 deserializeObject 已经处理了所有属性
+ }
+ }
+ ;(analyzer.symbolTable as any).symbolMap = symbolMap
+ }
+
+ // 恢复其他属性
+ for (const key in deserializedData) {
+ if (key !== 'symbolMap' && Object.prototype.hasOwnProperty.call(deserializedData, key)) {
+ ;(analyzer.symbolTable as any)[key] = deserializedData[key]
+ }
+ }
+ yasaLog('[LOAD CACHE]Loaded symbolTable')
+ }
+
+ // c. astManager
+ // 检查是否存在子文件夹结构
+ const astManagerDir = path.join(cacheBasePath, 'astManager')
+ let astManagerData: any = null
+
+ if (fs.existsSync(astManagerDir) && fs.statSync(astManagerDir).isDirectory()) {
+ // 从子文件夹结构加载
+ astManagerData = {}
+ const files = fs.readdirSync(astManagerDir)
+ for (const file of files) {
+ // 跳过 info.json
+ if (file === 'info.json') {
+ continue
+ }
+ const filePath = path.join(astManagerDir, file)
+ const stat = fs.statSync(filePath)
+
+ if (stat.isDirectory()) {
+ // 是文件夹,说明是 Map 数据(如 astMap),使用 loadFromSplit 加载
+ const propName = file
+ const propData = loadFromSplit(filePath)
+ if (propData !== null) {
+ astManagerData[propName] = propData
+ }
+ } else if (file.endsWith('.json')) {
+ // 是 JSON 文件,直接加载
+ const propName = file.replace(/\.json$/, '').replace(/\.part\d+$/, '')
+ try {
+ const propData = jsonfile.readFileSync(filePath)
+ astManagerData[propName] = propData
+ } catch (err: any) {
+ yasaWarning(`Failed to load astManager property ${propName}: ${err.message}`)
+ }
+ }
+ }
+ } else {
+ // 使用原来的方式加载
+ astManagerData = loadFromSplit(path.join(cacheBasePath, 'astManager'))
+ }
+
+ if (astManagerData) {
+ // 先反序列化 astManagerData,将 astMap 从普通对象转换为 Map
+ // 注意:在反序列化 AST 节点时跳过 parent 属性,后续统一设置
+ const deserializedAstManager = deserializeObject(astManagerData, analyzer.topScope, true) // skipParentForAST = true
+ Object.assign(analyzer.astManager, deserializedAstManager)
+
+ const { astMap } = analyzer.astManager as any
+ if (astMap instanceof Map) {
+ // 第一步:替换所有子节点为 astMap 中的对象
+ // 递归函数:替换节点及其所有子节点为 astMap 中的对象
+ const replaceChildrenWithAstMapNodes = (node: any, visited: WeakSet = new WeakSet()): void => {
+ if (!node || typeof node !== 'object' || visited.has(node)) {
+ return
+ }
+ visited.add(node)
+
+ // 遍历节点的所有属性
+ for (const key in node) {
+ if (Object.prototype.hasOwnProperty.call(node, key)) {
+ // 跳过 parent 属性(后续统一设置)
+ if (key === 'parent') {
+ continue
+ }
+
+ const value = node[key]
+
+ // 如果是数组,递归处理每个元素
+ if (Array.isArray(value)) {
+ for (let i = 0; i < value.length; i++) {
+ const item = value[i]
+ if (item && typeof item === 'object' && item.type && item._meta?.nodehash) {
+ // 如果子节点有 nodehash,从 astMap 中获取对应的对象
+ const { nodehash } = item._meta
+ const astMapNode = astMap.get(nodehash)
+ if (astMapNode && astMapNode !== item) {
+ // 替换为 astMap 中的对象
+ value[i] = astMapNode
+ // 继续递归处理替换后的节点
+ replaceChildrenWithAstMapNodes(astMapNode, visited)
+ } else if (astMapNode === item) {
+ // 已经是同一个对象,继续递归处理
+ replaceChildrenWithAstMapNodes(item, visited)
+ } else {
+ // 不在 astMap 中,继续递归处理(可能是新节点)
+ replaceChildrenWithAstMapNodes(item, visited)
+ }
+ } else if (item && typeof item === 'object' && item.type) {
+ // 没有 nodehash,但可能是 AST 节点,继续递归处理
+ replaceChildrenWithAstMapNodes(item, visited)
+ }
+ }
+ } else if (value && typeof value === 'object' && value.type && value._meta?.nodehash) {
+ // 如果子节点有 nodehash,从 astMap 中获取对应的对象
+ const { nodehash } = value._meta
+ const astMapNode = astMap.get(nodehash)
+ if (astMapNode && astMapNode !== value) {
+ // 替换为 astMap 中的对象
+ node[key] = astMapNode
+ // 继续递归处理替换后的节点
+ replaceChildrenWithAstMapNodes(astMapNode, visited)
+ } else if (astMapNode === value) {
+ // 已经是同一个对象,继续递归处理
+ replaceChildrenWithAstMapNodes(value, visited)
+ } else {
+ // 不在 astMap 中,继续递归处理(可能是新节点)
+ replaceChildrenWithAstMapNodes(value, visited)
+ }
+ } else if (value && typeof value === 'object' && value.type) {
+ // 没有 nodehash,但可能是 AST 节点,继续递归处理
+ replaceChildrenWithAstMapNodes(value, visited)
+ }
+ }
+ }
+ }
+
+ // 遍历所有节点,替换子节点为 astMap 中的对象
+ for (const [nodehash, astNode] of astMap.entries()) {
+ if (astNode && typeof astNode === 'object' && astNode.type) {
+ replaceChildrenWithAstMapNodes(astNode)
+ }
+ }
+
+ // 第二步:清理所有节点的 parent 引用
+ const cleanupParent = (node: any, visited: WeakSet = new WeakSet()): void => {
+ if (!node || typeof node !== 'object' || visited.has(node)) {
+ return
+ }
+ visited.add(node)
+
+ // 如果节点有 parent,先删除它(后续统一设置)
+ if (node.parent) {
+ delete node.parent
+ }
+
+ // 递归处理所有属性(可能是数组或对象)
+ for (const key in node) {
+ if (Object.prototype.hasOwnProperty.call(node, key)) {
+ const value = node[key]
+ if (Array.isArray(value)) {
+ for (const item of value) {
+ if (item && typeof item === 'object' && item.type) {
+ cleanupParent(item, visited)
+ }
+ }
+ } else if (value && typeof value === 'object' && value.type) {
+ cleanupParent(value, visited)
+ }
+ }
+ }
+ }
+
+ for (const [nodehash, astNode] of astMap.entries()) {
+ if (astNode && typeof astNode === 'object' && astNode.type) {
+ cleanupParent(astNode)
+ }
+ }
+
+ // 第三步:根据 parentRelations 统一设置 parent
+ const parentRelationsData = loadFromSplit(path.join(astManagerDir, 'parentRelations'))
+ if (parentRelationsData) {
+ let restoredCount = 0
+ let missingNodeCount = 0
+ let missingParentCount = 0
+ for (const nodehash in parentRelationsData) {
+ if (Object.prototype.hasOwnProperty.call(parentRelationsData, nodehash)) {
+ const parentNodehash = parentRelationsData[nodehash]
+ const astNode = astMap.get(nodehash)
+ const parentNode = astMap.get(parentNodehash)
+ if (!astNode) {
+ missingNodeCount++
+ // yasaWarning(`Node with nodehash ${nodehash} not found in astMap`)
+ continue
+ }
+ if (!parentNode) {
+ missingParentCount++
+ continue
+ }
+ // 确保 parent 指向 astMap 中的对象
+ astNode.parent = parentNode
+ restoredCount++
+ }
+ }
+ yasaLog(
+ `[LOAD CACHE]Restored ${restoredCount} parent relations in astManager (missing nodes: ${missingNodeCount}, missing parents: ${missingParentCount})`
+ )
+ } else {
+ yasaWarning('[LOAD CACHE]parentRelationsData not found')
+ }
+ } else {
+ yasaWarning('[LOAD CACHE]astManager.astMap is not a Map instance')
+ }
+
+ yasaLog('[LOAD CACHE]Loaded astManager')
+ }
+
+ // d. funcSymbolTable
+ const funcSymbolTableData = loadFromSplit(path.join(cacheBasePath, 'funcSymbolTable'))
+ if (funcSymbolTableData) {
+ // funcSymbolTable 的原始对象(target)
+ const funcSymbolTableTarget = funcSymbolTableData
+ // 重新创建 Proxy,自动处理 UUID 和对象的转换
+ analyzer.funcSymbolTable = new Proxy(funcSymbolTableTarget, {
+ get: (target, prop: string | symbol) => {
+ // 如果访问的是 Symbol 属性(如 Symbol.iterator),直接返回
+ if (typeof prop === 'symbol') {
+ return (target as any)[prop]
+ }
+ // 如果访问的是对象自身的方法或属性(如 toString, valueOf 等),直接返回
+ if (prop === 'toString' || prop === 'valueOf' || prop === 'constructor') {
+ return (target as any)[prop]
+ }
+ const value = target[prop]
+ // 如果是 UUID,从符号表中获取对象
+ if (value && typeof value === 'string' && value.startsWith('symuuid_')) {
+ const unit = analyzer.symbolTable.get(value)
+ // 如果从符号表获取到了对象,返回对象;否则返回 null
+ return unit || null
+ }
+ // 如果不是 UUID,直接返回原值(可能是 undefined、null 或其他值)
+ return value
+ },
+ set: (target, prop: string, value: any) => {
+ // 如果新值是符号值对象,转换为 UUID 存储
+ if (value && typeof value === 'object' && value.vtype && value.qid) {
+ const uuid = analyzer.symbolTable.register(value)
+ target[prop] = uuid
+ // 记录引用关系
+ ;(analyzer.symbolTable as any).addFuncSymbolTableRef?.(uuid, prop)
+ } else {
+ target[prop] = value
+ }
+ return true
+ },
+ deleteProperty: (target, prop: string) => {
+ delete target[prop]
+ return true
+ },
+ ownKeys: (target) => {
+ return Reflect.ownKeys(target)
+ },
+ has: (target, prop) => {
+ return prop in target
+ },
+ }) as Record
+ yasaLog('[LOAD CACHE]Loaded funcSymbolTable (Proxy restored)')
+ }
+
+ // e. statistics
+ const statisticsPath = path.join(cacheBasePath, 'statistics.json')
+ if (fs.existsSync(statisticsPath)) {
+ const statisticsData = jsonfile.readFileSync(statisticsPath)
+ analyzer.statistics = statisticsData
+ yasaLog('[LOAD CACHE]Loaded statistics')
+ }
+
+ // f. ainfo
+ const ainfoData = loadFromSplit(path.join(cacheBasePath, 'ainfo'))
+ if (ainfoData) {
+ // 检查是否需要恢复 callgraph 为 Graph 实例
+ if (ainfoData.__yasaCallgraphIsGraph && ainfoData.callgraph) {
+ // 创建新的 Graph 实例
+ const callgraph = new Graph()
+ // 恢复 nodes Map
+ if (ainfoData.callgraph.nodes && typeof ainfoData.callgraph.nodes === 'object') {
+ const nodesMap = new Map()
+ for (const key in ainfoData.callgraph.nodes) {
+ if (Object.prototype.hasOwnProperty.call(ainfoData.callgraph.nodes, key)) {
+ nodesMap.set(key, ainfoData.callgraph.nodes[key])
+ }
+ }
+ callgraph.nodes = nodesMap
+ }
+ // 恢复 edges Map
+ if (ainfoData.callgraph.edges && typeof ainfoData.callgraph.edges === 'object') {
+ const edgesMap = new Map()
+ for (const key in ainfoData.callgraph.edges) {
+ if (Object.prototype.hasOwnProperty.call(ainfoData.callgraph.edges, key)) {
+ edgesMap.set(key, ainfoData.callgraph.edges[key])
+ }
+ }
+ callgraph.edges = edgesMap
+ }
+ // 将 callgraph 设置为 Graph 实例
+ ainfoData.callgraph = callgraph
+ // 删除标记
+ delete ainfoData.__yasaCallgraphIsGraph
+ }
+ analyzer.ainfo = ainfoData
+ yasaLog('[LOAD CACHE]Loaded ainfo (callgraph restored as Graph instance)')
+ }
+
+ // g. sourceCodeCache
+ const sourceCodeCacheData = loadFromSplit(path.join(cacheBasePath, 'sourceCodeCache'))
+ if (sourceCodeCacheData) {
+ // 将反序列化的对象转换为 Map
+ if (sourceCodeCacheData && typeof sourceCodeCacheData === 'object' && !Array.isArray(sourceCodeCacheData)) {
+ const map = new Map()
+ for (const key in sourceCodeCacheData) {
+ if (Object.prototype.hasOwnProperty.call(sourceCodeCacheData, key)) {
+ const value = sourceCodeCacheData[key]
+ // 确保值是字符串
+ map.set(key, value)
+ }
+ }
+ analyzer.sourceCodeCache = map
+ } else {
+ analyzer.sourceCodeCache = new Map()
+ }
+ // 更新全局 analyzer 引用
+ const SourceLine = require('./source-line')
+ SourceLine.setGlobalAnalyzer(analyzer)
+ yasaLog('[LOAD CACHE]Loaded sourceCodeCache')
+ }
+
+ // h. classMap
+ const classMapData = loadFromSplit(path.join(cacheBasePath, 'classMap'))
+ if (classMapData) {
+ // 将反序列化的对象转换为 Map
+ if (classMapData && typeof classMapData === 'object' && !Array.isArray(classMapData)) {
+ const map = new Map()
+ for (const key in classMapData) {
+ if (Object.prototype.hasOwnProperty.call(classMapData, key)) {
+ const value = classMapData[key]
+ map.set(key, value)
+ }
+ }
+ analyzer.classMap = map
+ } else {
+ analyzer.classMap = new Map()
+ }
+ yasaLog('[LOAD CACHE]Loaded classMap')
+ }
+
+ // i. topScope.context.modules (resolve UUID from symbolTable)
+ const moduleManagerUuidPath = path.join(cacheBasePath, 'topScope', 'moduleManagerUuid.json')
+ if (fs.existsSync(moduleManagerUuidPath)) {
+ const moduleManagerUuidData = jsonfile.readFileSync(moduleManagerUuidPath)
+ if (moduleManagerUuidData.__moduleManagerUuid !== undefined) {
+ if (!analyzer.topScope.context) {
+ analyzer.topScope.context = new AnalysisContext()
+ }
+ analyzer.topScope.context.modules = analyzer.symbolTable.get(moduleManagerUuidData.__moduleManagerUuid)
+ yasaLog('[LOAD CACHE]Loaded topScope.context.modules')
+ }
+ }
+
+ // j. topScope.context.packages (resolve UUID from symbolTable)
+ const packageManagerUuidPath = path.join(cacheBasePath, 'topScope', 'packageManagerUuid.json')
+ if (fs.existsSync(packageManagerUuidPath)) {
+ const packageManagerUuidData = jsonfile.readFileSync(packageManagerUuidPath)
+ if (packageManagerUuidData.__packageManagerUuid !== undefined) {
+ if (!analyzer.topScope.context) {
+ analyzer.topScope.context = new AnalysisContext()
+ }
+ analyzer.topScope.context.packages = analyzer.symbolTable.get(packageManagerUuidData.__packageManagerUuid)
+ yasaLog('[LOAD CACHE]Loaded topScope.context.packages')
+ }
+ }
+
+ // j. topScope.value(从缓存恢复,JSON key 为 _field 保持向后兼容)
+ const fieldPath = path.join(cacheBasePath, 'topScope', 'field.json')
+ if (fs.existsSync(fieldPath)) {
+ const fieldData = jsonfile.readFileSync(fieldPath)
+ if (fieldData._field !== undefined) {
+ topScopeTarget.value = fieldData._field
+ yasaLog('[LOAD CACHE]Loaded topScope.value')
+ }
+ }
+
+ // k. topScope.uuid
+ const uuidPath = path.join(cacheBasePath, 'topScope', 'uuid.json')
+ if (fs.existsSync(uuidPath)) {
+ const uuidData = jsonfile.readFileSync(uuidPath)
+ if (uuidData.uuid !== undefined) {
+ topScopeTarget.uuid = uuidData.uuid
+ yasaLog('[LOAD CACHE]Loaded topScope.uuid')
+ }
+ }
+
+ // l. topScope 的其他所有属性
+ const topScopeOtherPropsData = loadFromSplit(path.join(cacheBasePath, 'topScope', 'otherProps'))
+ if (topScopeOtherPropsData) {
+ // 获取类型信息
+ const propTypes = topScopeOtherPropsData.__yasaPropTypes || {}
+ // 先根据类型信息还原 Map 和 Set,然后再反序列化其他嵌套对象
+ const processedProps: any = {}
+ for (const key in topScopeOtherPropsData) {
+ if (Object.prototype.hasOwnProperty.call(topScopeOtherPropsData, key)) {
+ // 跳过类型信息标记
+ if (key === '__yasaPropTypes') {
+ continue
+ }
+ const propType = propTypes[key]
+ const value = topScopeOtherPropsData[key]
+
+ // 根据类型信息还原对象类型(在反序列化之前)
+ if (propType === 'Map') {
+ // 将普通对象转换为 Map(包括空对象)
+ const map = new Map()
+ if (value && typeof value === 'object' && !Array.isArray(value)) {
+ for (const mapKey in value) {
+ if (Object.prototype.hasOwnProperty.call(value, mapKey)) {
+ // 递归反序列化 Map 的值
+ map.set(mapKey, deserializeObject(value[mapKey], analyzer.topScope))
+ }
+ }
+ }
+ // 即使 value 是 null/undefined 或空对象,也创建空 Map
+ processedProps[key] = map
+ } else if (propType === 'Set') {
+ // 将数组转换为 Set(包括空数组)
+ const set = new Set()
+ if (Array.isArray(value)) {
+ for (const item of value) {
+ // 递归反序列化 Set 的元素
+ set.add(deserializeObject(item, analyzer.topScope))
+ }
+ }
+ // 即使 value 是 null/undefined 或空数组,也创建空 Set
+ processedProps[key] = set
+ } else {
+ // 对于其他类型,先反序列化,然后再处理特殊类型
+ processedProps[key] = deserializeObject(value, analyzer.topScope)
+ }
+ }
+ }
+
+ // 现在处理其他对象类型的还原(需要在反序列化之后)
+ for (const key in processedProps) {
+ if (Object.prototype.hasOwnProperty.call(processedProps, key)) {
+ try {
+ const propType = propTypes[key]
+ let value = processedProps[key]
+
+ // 检查是否是 topScope 特殊标记
+ if (value && typeof value === 'object' && value.__yasaTopScopeMarker === true) {
+ value = analyzer.topScope
+ }
+
+ // 对于其他对象类型(非 Map、Set、Array),使用 shallowCopyValue 恢复
+ if (
+ propType &&
+ propType !== 'Map' &&
+ propType !== 'Set' &&
+ propType !== 'Array' &&
+ propType !== 'Object' &&
+ value &&
+ typeof value === 'object'
+ ) {
+ // 使用 shallowCopyValue 恢复
+ const objWithConstructor = { ...value }
+ Object.defineProperty(objWithConstructor, 'constructor', {
+ value: { name: propType },
+ writable: true,
+ enumerable: false,
+ configurable: true,
+ })
+ value = shallowCopyValue(objWithConstructor)
+ }
+
+ topScopeTarget[key] = value
+ } catch (e) {
+ yasaWarning(`Failed to restore topScope property ${key}: ${e}`)
+ }
+ }
+ }
+ const propCount = Object.keys(processedProps).length
+ yasaLog(`[LOAD CACHE]Loaded topScope other properties (${propCount} properties)`)
+ }
+
+ // 更新 topScope 的引用(确保 topScope.context 与 analyzer 中的引用一致)
+ if (analyzer.topScope) {
+ if (!analyzer.topScope.context) {
+ analyzer.topScope.context = new AnalysisContext()
+ }
+ analyzer.topScope.context.files = analyzer.fileManager
+ analyzer.topScope.context.ast = analyzer.astManager
+ analyzer.topScope.context.symbols = analyzer.symbolTable
+ analyzer.topScope.context.funcs = analyzer.funcSymbolTable
+ analyzer.context = analyzer.topScope.context
+ }
+
+ // 恢复 checkerManager.registered_checkers 中每个 checker 的 sourceScope
+ if (analyzer.checkerManager && analyzer.checkerManager.registered_checkers) {
+ const checkerSourceScopesPath = path.join(cacheBasePath, 'checkerSourceScopes.json')
+ if (fs.existsSync(checkerSourceScopesPath)) {
+ try {
+ const checkerSourceScopes = jsonfile.readFileSync(checkerSourceScopesPath)
+ let restoredCheckerCount = 0
+ for (const checkerName in checkerSourceScopes) {
+ if (Object.prototype.hasOwnProperty.call(checkerSourceScopes, checkerName)) {
+ // 检查 analyzer.checkerManager.registered_checkers 中是否有相同的 checker
+ if (Object.prototype.hasOwnProperty.call(analyzer.checkerManager.registered_checkers, checkerName)) {
+ const checker = analyzer.checkerManager.registered_checkers[checkerName]
+ if (checker) {
+ // 反序列化 sourceScope
+ const sourceScopeData = checkerSourceScopes[checkerName]
+ if (sourceScopeData) {
+ const restoredSourceScope = deserializeObject(sourceScopeData, analyzer.topScope)
+ // 覆盖 checker 的 sourceScope
+ checker.sourceScope = restoredSourceScope
+ restoredCheckerCount++
+ }
+ }
+ }
+ }
+ }
+ if (restoredCheckerCount > 0) {
+ yasaLog(`[LOAD CACHE]Restored sourceScope for ${restoredCheckerCount} checkers`)
+ }
+ } catch (err: any) {
+ yasaWarning(`[LOAD CACHE]Failed to load checker sourceScopes: ${err.message}`)
+ }
+ }
+ }
+
+ // 恢复所有 Unit 对象的 Proxy 结构(_field, decls, overloaded)
+ if (analyzer.symbolTable && (analyzer.symbolTable as any).symbolMap instanceof Map) {
+ const { symbolMap } = analyzer.symbolTable as any
+ for (const [uuid, unit] of symbolMap.entries()) {
+ if (unit && typeof unit === 'object' && unit.vtype) {
+ // 这是一个 Unit 对象,需要恢复其 Proxy
+ try {
+ // 在恢复 Proxy 之前,确保所有属性(包括 astNodehash)都已经被正确恢复
+ // deserializeObject 应该已经恢复了所有属性,但我们需要确保它们没有被覆盖
+
+ let fieldTarget = unit.value
+ if (fieldTarget && util.types.isProxy(fieldTarget)) {
+ fieldTarget = (fieldTarget as any)[RAW_TARGET] || fieldTarget
+ }
+ if (fieldTarget === undefined || fieldTarget === null || typeof fieldTarget !== 'object') {
+ fieldTarget = {}
+ }
+
+ if (unit.vtype === 'union' && typeof unit._syncElements === 'function') {
+ const arr = Array.isArray(fieldTarget) ? fieldTarget : Object.values(fieldTarget)
+ unit._syncElements(arr)
+ } else {
+ unit.value = fieldTarget
+ }
+ } catch (e) {
+ yasaWarning(`Failed to restore Proxy for Unit ${uuid}: ${e}`)
+ }
+ }
+ }
+ }
+
+ yasaLog(`[LOAD CACHE]Analyzer cache loaded successfully from ${cacheBasePath}`)
+ return true
+ } catch (err: any) {
+ yasaError(`[LOAD CACHE]Failed to load analyzer cache: ${err.message}`)
+ return false
+ }
+}
+
+/**
+ * 生成缓存 ID(基于源路径、日期和 MD5 哈希)
+ * @param sourcePath 源路径
+ * @returns 缓存 ID,格式:代码库名字_日期_MD5哈希(前8位)
+ */
+export function generateCacheId(sourcePath: string): string {
+ const crypto = require('crypto')
+
+ // 从路径中提取代码库名字(路径的最后一部分)
+ const normalizedPath = path.normalize(sourcePath)
+ const pathParts = normalizedPath.split(path.sep).filter((part: string) => part.length > 0)
+ const repoName = pathParts.length > 0 ? pathParts[pathParts.length - 1] : 'default'
+ // 清理代码库名字,移除特殊字符,只保留字母、数字、下划线和连字符
+ const cleanRepoName = repoName.replace(/[^a-zA-Z0-9_-]/g, '_')
+
+ // 获取当前日期(格式:YYYYMMDD)
+ const now = new Date()
+ const year = now.getFullYear()
+ const month = String(now.getMonth() + 1).padStart(2, '0')
+ const day = String(now.getDate()).padStart(2, '0')
+ const dateStr = `${year}${month}${day}`
+
+ // 使用源路径生成 MD5 哈希(保证幂等性)
+ const hash = crypto.createHash('md5').update(sourcePath).digest('hex')
+ const hashPrefix = hash.substring(0, 8) // 取前8位
+
+ // 组合:代码库名字_MD5哈希前缀_日期
+ return `${cleanRepoName}_${hashPrefix}_${dateStr}`
+}
+
+/**
+ * 根据 repoName 和 hashPrefix 查找缓存文件夹
+ * @param sourcePath 源路径
+ * @returns 缓存文件夹路径,如果不存在则返回 null
+ */
+export function findCacheFolder(sourcePath: string): string | null {
+ const crypto = require('crypto')
+
+ // 从路径中提取代码库名字(路径的最后一部分)
+ const normalizedPath = path.normalize(sourcePath)
+ const pathParts = normalizedPath.split(path.sep).filter((part: string) => part.length > 0)
+ const repoName = pathParts.length > 0 ? pathParts[pathParts.length - 1] : 'default'
+ // 清理代码库名字,移除特殊字符,只保留字母、数字、下划线和连字符
+ const cleanRepoName = repoName.replace(/[^a-zA-Z0-9_-]/g, '_')
+
+ // 使用源路径生成 MD5 哈希(保证幂等性)
+ const hash = crypto.createHash('md5').update(sourcePath).digest('hex')
+ const hashPrefix = hash.substring(0, 8) // 取前8位
+
+ // 查找匹配的缓存文件夹:repoName_hashPrefix_*
+ const cacheDir = getCacheDir()
+ if (!fs.existsSync(cacheDir)) {
+ return null
+ }
+
+ const prefix = `${cleanRepoName}_${hashPrefix}_`
+ const files = fs.readdirSync(cacheDir)
+
+ // 查找以 prefix 开头的文件夹
+ for (const file of files) {
+ if (file.startsWith(prefix)) {
+ const folderPath = path.join(cacheDir, file)
+ const stat = fs.statSync(folderPath)
+ if (stat.isDirectory()) {
+ // 检查是否有 metadata.json 文件
+ const metadataPath = path.join(folderPath, 'metadata.json')
+ if (fs.existsSync(metadataPath)) {
+ return folderPath
+ }
+ }
+ }
+ }
+
+ return null
+}
diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts
index 924607dd..f92ed184 100644
--- a/src/engine/analyzer/common/analyzer.ts
+++ b/src/engine/analyzer/common/analyzer.ts
@@ -1,38 +1,294 @@
+import { primitiveToString } from '../../../util/variable-util'
+import type { ISymbolTableManager } from './symbol-table-interface'
+import type { Invocation } from '../../../resolver/common/value/invocation'
+import type {
+ BaseNode,
+ Node,
+ Identifier,
+ Literal,
+ CompileUnit,
+ IfStatement,
+ SwitchStatement,
+ ForStatement,
+ WhileStatement,
+ RangeStatement,
+ ReturnStatement,
+ BreakStatement,
+ ContinueStatement,
+ ThrowStatement,
+ TryStatement,
+ ExpressionStatement,
+ ScopedStatement,
+ BinaryExpression,
+ UnaryExpression,
+ AssignmentExpression,
+ ConditionalExpression,
+ SuperExpression,
+ ThisExpression,
+ MemberAccess,
+ SliceExpression,
+ TupleExpression,
+ ObjectExpression,
+ CallExpression,
+ CastExpression,
+ NewExpression,
+ FunctionDefinition,
+ ClassDefinition,
+ VariableDeclaration,
+ ImportExpression,
+ SpreadElement,
+ YieldExpression,
+ ExportStatement,
+} from '../../../types/uast'
+import type {
+ Scope as ScopeType,
+ State,
+ Value,
+ SymbolValue as SymbolValueType,
+ VoidValue as VoidValueType,
+ SpreadValue as SpreadValueType,
+} from '../../../types/analyzer'
+import { BaseAnalyzer } from './base-analyzer'
+import { BinaryExprValue } from './value/binary-expr'
+import { UnaryExprValue } from './value/unary-expr'
+import { CallExprValue } from './value/call-expr'
+import { AnalysisContext } from './analysis-context'
+
const _ = require('lodash')
const Uuid = require('node-uuid')
-const chalk = require('chalk')
const logger = require('../../../util/logger')(__filename)
const Config = require('../../../config')
const Initializer = require('./initializer')
-const MemSpace = require('./memSpace')
const NativeResolver = require('./native-resolver')
+import type { CallArg, CallArgs, CallInfo, BoundParam, BoundCall } from './call-args'
+import { getLegacyArgValues, INTERNAL_CALL } from './call-args'
const MemState = require('./memState')
const Scope = require('./scope')
const SourceLine = require('./source-line')
const AstUtil = require('../../../util/ast-util')
-const ValueFormatter = require('../../../util/value-formatter')
const StateUtil = require('../../util/state-util')
const SymAddress = require('./sym-address')
const { unionAllValues } = require('./memStateBVT')
-const { cloneWithDepth } = require('../../../util/clone-util')
+const { shallowCopyValue, buildNewValueInstance, lodashCloneWithTag } = require('../../../util/clone-util')
const { handleException } = require('./exception-handler')
const {
- ValueUtil: { ObjectValue, Scoped, PrimitiveValue, UndefinedValue, UnionValue, SymbolValue, PackageValue },
+ ValueUtil: {
+ ObjectValue,
+ Scoped,
+ PrimitiveValue,
+ UndefinedValue,
+ UnionValue,
+ SymbolValue,
+ PackageValue,
+ VoidValue,
+ SpreadValue,
+ },
} = require('../../util/value-util')
const { filterDataFromScope, shallowEqual } = require('../../../util/common-util')
const Rules = require('../../../checker/common/rules-basic-handler')
const { getAbsolutePath, loadJSONfile } = require('../../../util/file-util')
+const { saveAnalyzerCache, loadAnalyzerCache, generateCacheId } = require('./analyzer-cache')
const { matchSinkAtFuncCallWithCalleeType } = require('../../../checker/taint/common-kit/sink-util')
-const { moveExistElementsToBuffer } = require('../java/common/builtins/buffer')
-const { PerformanceTracker } = require('../../../util/performance-tracker')
+const { moveExistElementsToBuffer, addElementToBuffer } = require('../java/common/builtins/buffer')
+const { performanceTracker } = require('../../../util/performance-tracker')
+const { checkInvocationMatchSink } = require('../../../checker/taint/common-kit/sink-util')
+const OutputStrategyAutoRegister = require('./output-strategy-auto-register')
+
+const ASTManager = require('./ast-manager')
+const SymbolTableManager = require('./symbol-table-manager')
+const { setGlobalASTManager, setGlobalSymbolTable, getGlobalSymbolTable } = require('../../../util/global-registry')
+const { prettyPrint } = require('../../../util/ast-util')
+
+/**
+ * 临时符号表管理器:包装原始符号表,在执行 symbolInterpretFn 期间自动拷贝符号值
+ * 实现 ISymbolTableManager 接口,与 SymbolTableManager 具有相同的接口
+ */
+class TemporarySymbolTableManager {
+ private originalSymbolTable: InstanceType // SymbolTableManager 实例
+
+ private tmpSymbolTableManager: InstanceType // SymbolTableManager 实例,其 symbolMap 作为临时符号表存储,同时提供 UUID 引用管理功能
+
+ private copiedUnits: Map // 记录已拷贝的 Unit 对象,避免重复拷贝
+
+ /**
+ *
+ * @param originalSymbolTable SymbolTableManager 实例
+ */
+ constructor(originalSymbolTable: InstanceType) {
+ this.originalSymbolTable = originalSymbolTable
+ // 使用 tmpSymbolTableManager 的 symbolMap 作为临时符号表存储,同时使用其 UUID 引用管理功能
+ this.tmpSymbolTableManager = new SymbolTableManager()
+ this.copiedUnits = new Map()
+ }
+
+ /**
+ * 获取临时符号表的 symbolMap(直接访问私有属性)
+ * @private
+ */
+ private getTmpSymbolMap(): Map {
+ // 通过反射访问私有属性 symbolMap
+ return (this.tmpSymbolTableManager as any).symbolMap
+ }
+
+ /**
+ * 拷贝 Unit 对象(按需拷贝,只拷贝当前对象,不递归拷贝 parent 和 field 中的引用)
+ * _parentRef 和 field 中的 uuid 保持原样,当真正访问时再按需拷贝
+ * 直接复制内存中的属性值,不触发 getter/setter,避免循环调用
+ * @param unit
+ */
+ private tmpTableCopyUnit(unit: any): any {
+ if (!unit || typeof unit !== 'object') {
+ return unit
+ }
+
+ // 如果已经拷贝过,直接返回
+ if (unit.uuid && this.copiedUnits.has(unit.uuid)) {
+ return this.copiedUnits.get(unit.uuid)
+ }
+
+ // 创建新对象,保持原型链
+ const copiedUnit = shallowCopyValue(unit)
+
+ // 确保 _parentRef 被正确拷贝(ValueRef 不可变,可安全共享引用)
+ const originalParentRef = unit._parentRef
+ if (originalParentRef && !copiedUnit._parentRef) {
+ copiedUnit._parentRef = originalParentRef
+ }
+
+ // 注册到临时符号表(直接存储到 tmpSymbolTableManager 的 symbolMap)
+ if (copiedUnit.uuid) {
+ this.getTmpSymbolMap().set(copiedUnit.uuid, copiedUnit)
+ this.copiedUnits.set(copiedUnit.uuid, copiedUnit)
+ }
+
+ return copiedUnit
+ }
+
+ /**
+ * 获取 Unit 对象:如果存在于临时符号表,直接返回;否则从原始符号表获取并拷贝
+ * 如果临时符号表中的符号值没有 parent,但从原始符号表查有 parent,则重新完整拷贝
+ * @param uuid
+ */
+ get(uuid: string | null | undefined): any {
+ if (!uuid) {
+ return null
+ }
+
+ // 先检查临时符号表(使用 tmpSymbolTableManager 的 symbolMap)
+ const tmpUnit = this.getTmpSymbolMap().get(uuid) || null
+ if (tmpUnit) {
+ // 检查临时符号表中的符号值是否有 parent(通过 _parentRef 判断)
+ if (!tmpUnit._parentRef) {
+ // 临时符号表中没有 parent,检查原始符号表中是否有
+ const originalUnit = this.originalSymbolTable.get(uuid)
+ if (originalUnit?._parentRef) {
+ // 从临时符号表中删除旧的拷贝
+ this.getTmpSymbolMap().delete(uuid)
+ this.copiedUnits.delete(uuid)
+ // 重新完整拷贝(包括 _parentRef)
+ return this.tmpTableCopyUnit(originalUnit)
+ }
+ }
+ return tmpUnit
+ }
+
+ // 从原始符号表获取
+ const originalUnit = this.originalSymbolTable.get(uuid)
+ if (!originalUnit) {
+ return null
+ }
+
+ // 深拷贝并注册到临时符号表
+ return this.tmpTableCopyUnit(originalUnit)
+ }
+
+ /**
+ * 注册 Unit 对象到临时符号表
+ * 当 UUID 变化时,自动更新所有引用该 UUID 的地方
+ * @param unit
+ */
+ register(unit: any): string | null {
+ if (!unit || typeof unit !== 'object') {
+ return null
+ }
+
+ // 使用临时符号表管理器计算 UUID
+ const uuid = this.tmpSymbolTableManager.calculateUUID(unit)
+ if (!uuid) {
+ return null
+ }
+
+ // 设置 UUID
+ unit.uuid = uuid
+
+ // 直接存储到 tmpSymbolTableManager 的 symbolMap(而不是调用 register,因为 register 会重新计算 UUID)
+ if (uuid) {
+ this.getTmpSymbolMap().set(uuid, unit)
+ }
+
+ return uuid
+ }
+
+ /**
+ * 检查 UUID 是否存在
+ * @param uuid
+ */
+ has(uuid: string | null | undefined): boolean {
+ if (!uuid) {
+ return false
+ }
+ return this.getTmpSymbolMap().has(uuid) || this.originalSymbolTable.has(uuid)
+ }
+
+ /**
+ * 计算 UUID
+ * @param unit
+ * @param qidSuffix
+ */
+ calculateUUID(unit: any, qidSuffix?: any): string | null {
+ return this.tmpSymbolTableManager.calculateUUID(unit, qidSuffix)
+ }
+
+ /**
+ * 删除 Unit 对象
+ * @param uuid
+ */
+ delete(uuid: string | null | undefined): void {
+ if (uuid) {
+ this.getTmpSymbolMap().delete(uuid)
+ }
+ }
+
+ /**
+ * 清空临时符号表
+ */
+ clear(): void {
+ this.getTmpSymbolMap().clear()
+ this.copiedUnits.clear()
+ }
+
+ /**
+ * 获取临时符号表大小
+ */
+ size(): number {
+ return this.getTmpSymbolMap().size
+ }
+
+ /**
+ * 获取临时符号表
+ */
+ getMap(): Map {
+ return this.tmpSymbolTableManager.getMap()
+ }
+}
/**
* The main AST analyzer with checker invoking
* @param checker
* @constructor
*/
-class Analyzer extends MemSpace {
+class Analyzer extends BaseAnalyzer {
options: any
checkerManager: any
@@ -41,17 +297,17 @@ class Analyzer extends MemSpace {
lastReturnValue: any
- thisFClos: any
+ _thisFClos: any // 内部存储,通过 getter/setter 访问
- entry_fclos: any
+ _entry_fclos: any // 内部存储,通过 getter/setter 访问
inRange: boolean
ainfo: Record
- sourceCodeCache: Record
+ sourceCodeCache: Map
- lastProcessedNode: any
+ _lastProcessedNode: any // 内部存储,通过 getter/setter 访问
thisIterationTime: number
@@ -63,9 +319,9 @@ class Analyzer extends MemSpace {
libFuncTagPropagationRuleArray: any[]
- moduleManager: any
+ context!: AnalysisContext
- packageManager: any
+ libArgToThisSidBlacklistKeywords: string[]
fileManager!: Record
@@ -73,10 +329,21 @@ class Analyzer extends MemSpace {
topScope: any
+ astManager: any
+
+ // 操作符号表:基于analyzer中使用this.symbolTable,基于符号值使用getSymbolTable()
+ symbolTable!: ISymbolTableManager
+
preprocessState: boolean | undefined
performanceTracker: import('../../../util/performance-tracker').IPerformanceTracker
+ backUpSymbolTable: any
+
+ tmpSymbolTable: any
+
+ isTmpSymbolTableOpen: boolean
+
/**
*
* @param checkerManager
@@ -85,18 +352,21 @@ class Analyzer extends MemSpace {
constructor(checkerManager: any, options?: any) {
super()
this.options = options || {}
+ this.isTmpSymbolTableOpen = false
this.checkerManager = checkerManager // 关联的检查器管理器
- this.performanceTracker = new PerformanceTracker()
+ this.performanceTracker = performanceTracker // 使用单例
this.enablePerformanceLogging = this.options.enablePerformanceLogging || false // 默认关闭
// 启用详细指令统计(如果启用了性能日志,输出 top 信息)
this.performanceTracker.setEnableDetailedInstructionStats(this.enablePerformanceLogging)
this.lastReturnValue = null // 记录最后一次函数调用的返回值
- this.thisFClos = null // 当前分析函数的闭包
- this.entry_fclos = null // 最外层函数的闭包
+ this._thisFClos = null // 当前分析函数的闭包(存储 UUID)
+ this._entry_fclos = null // 最外层函数的闭包(存储 UUID)
this.inRange = false // 范围语句标志
this.ainfo = {} // 整个分析过程中的信息
- this.sourceCodeCache = {} // 缓存的源代码
- this.lastProcessedNode = null
+ this.sourceCodeCache = new Map() // 缓存的源代码(文件路径 -> 代码行数组)
+ // 设置全局 analyzer 引用,使 source-line.ts 可以访问 sourceCodeCache
+ SourceLine.setGlobalAnalyzer(this)
+ this._lastProcessedNode = null // 最后处理的节点(存储 UUID 或 AST 节点)
// 超时控制
this.thisIterationTime = 0
this.prevIterationTime = 0
@@ -107,6 +377,113 @@ class Analyzer extends MemSpace {
this.initValTreeStruct()
this.entryPoints = []
this.libFuncTagPropagationRuleArray = this.loadLibFuncTagPropagationRule()
+ this.libArgToThisSidBlacklistKeywords = this.loadLibArgToThisSidBlacklistKeywords()
+ }
+
+ /**
+ * thisFClos getter: 如果存储的是 UUID,从符号表中获取对象
+ */
+ get thisFClos() {
+ if (this._thisFClos === null || this._thisFClos === undefined) {
+ return null
+ }
+ // 如果是 UUID,从符号表中获取对象
+ if (typeof this._thisFClos === 'string' && this._thisFClos.startsWith('symuuid_')) {
+ const unit = this.symbolTable.get(this._thisFClos)
+ return unit || null
+ }
+ // 如果不是 UUID,直接返回(向后兼容)
+ return this._thisFClos
+ }
+
+ /**
+ * thisFClos setter: 如果值是符号值对象,转换为 UUID 存储
+ */
+ set thisFClos(val) {
+ if (val === null || val === undefined) {
+ this._thisFClos = null
+ return
+ }
+ // 如果是符号值对象,转换为 UUID 存储
+ if (val && typeof val === 'object' && val.vtype && val.qid) {
+ const uuid = this.symbolTable.register(val)
+ this._thisFClos = uuid
+ } else {
+ // 如果不是符号值对象,直接存储(向后兼容)
+ this._thisFClos = val
+ }
+ }
+
+ /**
+ * entry_fclos getter: 如果存储的是 UUID,从符号表中获取对象
+ */
+ get entry_fclos() {
+ if (this._entry_fclos === null || this._entry_fclos === undefined) {
+ return null
+ }
+ // 如果是 UUID,从符号表中获取对象
+ if (typeof this._entry_fclos === 'string' && this._entry_fclos.startsWith('symuuid_')) {
+ const unit = this.symbolTable.get(this._entry_fclos)
+ return unit || null
+ }
+ // 如果不是 UUID,直接返回(向后兼容)
+ return this._entry_fclos
+ }
+
+ /**
+ * entry_fclos setter: 如果值是符号值对象,转换为 UUID 存储
+ */
+ set entry_fclos(val) {
+ if (val === null || val === undefined) {
+ this._entry_fclos = null
+ return
+ }
+ // 如果是符号值对象,转换为 UUID 存储
+ if (val && typeof val === 'object' && val.vtype && val.qid) {
+ const uuid = this.symbolTable.register(val)
+ this._entry_fclos = uuid
+ } else {
+ // 如果不是符号值对象,直接存储(向后兼容)
+ this._entry_fclos = val
+ }
+ }
+
+ /**
+ * lastProcessedNode getter: 如果存储的是 nodehash,从 AST 管理器中获取 AST 节点
+ */
+ get lastProcessedNode() {
+ if (this._lastProcessedNode === null || this._lastProcessedNode === undefined) {
+ return null
+ }
+ // 如果是字符串,尝试从 AST 管理器中获取 AST 节点(可能是 nodehash)
+ if (typeof this._lastProcessedNode === 'string') {
+ const astNode = this.astManager?.get(this._lastProcessedNode)
+ if (astNode) {
+ return astNode
+ }
+ // 如果获取不到,可能是其他字符串,直接返回(向后兼容)
+ return this._lastProcessedNode
+ }
+ // 如果不是字符串,直接返回(向后兼容)
+ return this._lastProcessedNode
+ }
+
+ /**
+ * lastProcessedNode setter: 如果值是 AST 节点,转换为 nodehash 存储
+ */
+ set lastProcessedNode(val) {
+ if (val === null || val === undefined) {
+ this._lastProcessedNode = null
+ return
+ }
+ // 如果是 AST 节点(有 type 属性),注册并存储 nodehash
+ if (val && typeof val === 'object' && val.type && this.astManager) {
+ const nodehash = this.astManager.register(val)
+ this._lastProcessedNode = nodehash
+ } else {
+ // 如果不是 AST 节点,直接存储(向后兼容)
+ this._lastProcessedNode = val
+ }
}
/**
@@ -154,91 +531,140 @@ class Analyzer extends MemSpace {
* 初始化符号值树
*/
initValTreeStruct() {
- this.moduleManager = Scoped({
- parent: null, // will set to topScope right away
+ this.astManager = new ASTManager()
+ this.symbolTable = new SymbolTableManager()
+ setGlobalASTManager(this.astManager)
+ setGlobalSymbolTable(this.symbolTable)
+
+ const moduleManager = new Scoped('', {
sid: 'moduleManager',
- }) // cache of imported module
+ })
- this.packageManager = PackageValue({
- parent: null, // will set to topScope right away
- sid: '',
- id: '',
+ const packageManager = new PackageValue('', {
+ parent: null,
+ sid: 'packageManager',
name: 'packageManager',
- }) // cache of imported module
+ })
this.fileManager = {}
- this.funcSymbolTable = {} // 函数符号值集合,可快速搜索全局函数,向QL/断点粘连提供快速检索能力
- this.topScope = Scoped({
- id: '',
+
+ const funcSymbolTableTarget: Record = {}
+ const { symbolTable } = this
+ this.funcSymbolTable = new Proxy(funcSymbolTableTarget, {
+ get: (target, prop: string | symbol) => {
+ if (typeof prop === 'symbol') {
+ return (target as any)[prop]
+ }
+ if (prop === 'toString' || prop === 'valueOf' || prop === 'constructor') {
+ return (target as any)[prop]
+ }
+ const value = target[prop]
+ if (value && typeof value === 'string' && value.startsWith('symuuid_')) {
+ const unit = symbolTable.get(value)
+ return unit || null
+ }
+ return value
+ },
+ set: (target, prop: string, value: any) => {
+ if (value && typeof value === 'object' && value.vtype && value.qid) {
+ const uuid = symbolTable.register(value)
+ target[prop] = uuid
+ ;(symbolTable as any).addFuncSymbolTableRef?.(uuid, prop)
+ } else {
+ target[prop] = value
+ }
+ return true
+ },
+ deleteProperty: (target, prop: string) => {
+ delete target[prop]
+ return true
+ },
+ ownKeys: (target) => {
+ return Reflect.ownKeys(target)
+ },
+ has: (target, prop) => {
+ return prop in target
+ },
+ }) as Record
+
+ this.topScope = new Scoped('', {
sid: '',
- moduleManager: this.moduleManager,
- packageManager: this.packageManager,
- fileManager: this.fileManager,
- funcSymbolTable: this.funcSymbolTable,
+ qid: '',
parent: null,
})
- this.funcSymbolTable.parent = this.topScope
- this.fileManager.parent = this.topScope
- this.moduleManager.parent = this.topScope
- this.packageManager.parent = this.topScope
- this.fileManager.parent = this.topScope
+
+ this.context = new AnalysisContext()
+ this.context.ast = this.astManager
+ this.context.symbols = this.symbolTable
+ this.context.modules = moduleManager
+ this.context.packages = packageManager
+ this.context.files = this.fileManager
+ this.context.funcs = this.funcSymbolTable
+ this.topScope.context = this.context
+
+ moduleManager.parent = this.topScope
+ packageManager.parent = this.topScope
this.thisFClos = this.topScope
}
/**
- * 执行分析流程的通用方法,统一处理性能追踪(同步版本)
- *
- * **重要说明:**
- * - 此方法仅用于同步 preProcess 场景,preProcessFn 必须返回 void(不能返回 Promise)
- * - 如果 preProcessFn 可能返回 Promise,请使用 executeAnalysisPipelineAsync 方法
- *
- * @param preProcessFn - 执行同步 preProcess 的函数(必须返回 void,不能返回 Promise)
- * @param symbolInterpretFn - 执行 symbolInterpret 的函数
+ * 切换到临时符号表,在执行 symbolInterpretFn 期间自动拷贝符号值
*/
- private executeAnalysisPipeline(preProcessFn: () => void, symbolInterpretFn: () => void): void {
- // 开始整体性能追踪
- this.performanceTracker.start()
- this.performanceTracker.start('preProcess')
-
- Rules.setPreprocessReady(false)
- // 启用指令级别的性能监控(如果已启用性能日志)
- this.performanceTracker.startInstructionMonitor()
-
- // 执行同步 preProcess
- preProcessFn()
-
- this.performanceTracker.end('preProcess')
- this.performanceTracker.start('startAnalyze')
-
- this.startAnalyze()
-
- this.performanceTracker.end('startAnalyze')
- Rules.setPreprocessReady(true)
-
- this.performanceTracker.start('symbolInterpret')
+ protected switchToTemporarySymbolTable(): void {
+ // 确保当前 symbolTable 是 SymbolTableManager,不是 TemporarySymbolTableManager
+ // 如果已经是 TemporarySymbolTableManager,说明存在嵌套调用,这是不支持的
+ if (this.symbolTable instanceof TemporarySymbolTableManager) {
+ throw new Error(
+ 'Nested TemporarySymbolTableManager is not supported. symbolInterpretFn should not be called recursively.'
+ )
+ }
- symbolInterpretFn()
+ // 创建临时符号表,在执行 symbolInterpretFn 期间自动拷贝符号值
+ const tmpSymbolTable = new TemporarySymbolTableManager(this.symbolTable as InstanceType)
+ const originalGlobalSymbolTable = getGlobalSymbolTable()
+ const originalAnalyzerSymbolTable = this.symbolTable
+ const originalTopScopeSymbolTable = (this.topScope?.context?.symbols as ISymbolTableManager | null) || null
- this.performanceTracker.end('symbolInterpret')
- this.endAnalyze()
+ setGlobalSymbolTable(tmpSymbolTable)
+ this.symbolTable = tmpSymbolTable
+ if (this.topScope?.context) {
+ this.topScope.context.symbols = tmpSymbolTable
+ }
+ this.isTmpSymbolTableOpen = true
+ this.tmpSymbolTable = tmpSymbolTable
+ this.backUpSymbolTable = {
+ originalGlobalSymbolTable,
+ originalAnalyzerSymbolTable,
+ originalTopScopeSymbolTable,
+ }
+ }
- // 记录性能数据并输出摘要(会自动输出指令统计)
- this.performanceTracker.logPerformance(this)
+ /**
+ * 恢复原始符号表引用,并清理临时符号表
+ */
+ protected restoreSymbolTable(): void {
+ // 恢复所有符号表引用
+ setGlobalSymbolTable(this.backUpSymbolTable.originalGlobalSymbolTable)
+ this.symbolTable = this.backUpSymbolTable.originalAnalyzerSymbolTable
+ if (this.topScope?.context) {
+ this.topScope.context.symbols = this.backUpSymbolTable.originalTopScopeSymbolTable
+ }
+ this.isTmpSymbolTableOpen = false
+ // 清理临时符号表
+ this.tmpSymbolTable.clear()
}
/**
- * 执行分析流程的通用方法(异步版本),统一处理性能追踪
- *
- * 用于处理异步 preProcess 场景,避免 analyzeProjectAsync 中的代码重复。
- *
- * @param preProcessFn - 执行异步 preProcess 的函数
- * @param symbolInterpretFn - 执行 symbolInterpret 的函数
+ * 执行分析流程的通用方法,统一处理性能追踪
+ * @param initAfterUsingCache
+ * @param preProcessFn - 执行同步 preProcess 的函数(必须返回 void,不能返回 Promise)
+ * @returns {Promise} 分析结果
*/
- private async executeAnalysisPipelineAsync(
- preProcessFn: () => Promise,
- symbolInterpretFn: () => void
- ): Promise {
+ private async executeAnalysisPipeline(
+ initAfterUsingCache: () => void,
+ preProcessFn: () => void | Promise
+ ): Promise {
// 开始整体性能追踪
this.performanceTracker.start()
this.performanceTracker.start('preProcess')
@@ -247,10 +673,59 @@ class Analyzer extends MemSpace {
// 启用指令级别的性能监控(如果已启用性能日志)
this.performanceTracker.startInstructionMonitor()
- // 执行异步 preProcess
- await preProcessFn()
+ // 尝试加载缓存
+ let cacheLoaded = false
+ let shouldPreProcess = true
+ if (Config.loadContextEnvironment) {
+ shouldPreProcess = false
+ this.performanceTracker.start('loadContextEnvironment')
+ try {
+ // 根据源路径查找缓存文件夹(基于 repoName 和 hashPrefix)
+ const sourcePath = this.options?.maindir || Config.prefixPath || process.cwd()
+ cacheLoaded = loadAnalyzerCache(this, Config.loadContextEnvironmentId, sourcePath)
+ if (cacheLoaded) {
+ logger.info('Analyzer cache loaded successfully')
+ }
+ if (cacheLoaded && Config.maindirPrefix) {
+ const name = Config.maindirPrefix.split('/').pop() || Config.maindirPrefix
+ if (!Config.loadContextEnvironmentId || !Config.loadContextEnvironmentId.startsWith(`${name}_`)) {
+ shouldPreProcess = true
+ }
+ }
+ if (!shouldPreProcess && typeof initAfterUsingCache === 'function') {
+ initAfterUsingCache()
+ }
+ } catch (err: any) {
+ logger.warn(`Failed to load analyzer cache: ${err.message}`)
+ }
+ this.performanceTracker.end('loadContextEnvironment')
+ }
+
+ if (shouldPreProcess) {
+ const result = preProcessFn()
+ if (result instanceof Promise) {
+ await result
+ }
+ }
this.performanceTracker.end('preProcess')
+
+ // 保存缓存(在 startAnalyze 之前)
+ if (Config.saveContextEnvironment || Config.miniSaveContextEnvironment) {
+ try {
+ this.performanceTracker.start('saveContextEnvironment')
+ const sourcePath = this.options?.maindir
+ const cacheId = generateCacheId(sourcePath)
+ saveAnalyzerCache(this, cacheId)
+ logger.info('Analyzer cache saved successfully')
+ // 保存完成后结束分析
+ this.performanceTracker.end('saveContextEnvironment')
+ return
+ } catch (err: any) {
+ logger.warn(`Failed to save analyzer cache: ${err.message}`)
+ }
+ }
+
this.performanceTracker.start('startAnalyze')
this.startAnalyze()
@@ -260,86 +735,76 @@ class Analyzer extends MemSpace {
this.performanceTracker.start('symbolInterpret')
- symbolInterpretFn()
+ // 切换到临时符号表
+ this.switchToTemporarySymbolTable()
+ try {
+ this.symbolInterpret()
+ } finally {
+ // 恢复原始符号表
+ this.restoreSymbolTable()
+ }
this.performanceTracker.end('symbolInterpret')
this.endAnalyze()
// 记录性能数据并输出摘要(会自动输出指令统计)
- this.performanceTracker.logPerformance(this)
+ performanceTracker.collectAnalysisData(this)
+
+ return this.recordCheckerFindings()
}
/**
* 分析单个文件
- *
- * 性能追踪逻辑已统一到 executeAnalysisPipeline 方法,避免代码重复。
- *
* @param source - 源代码内容
* @param fileName - 文件名
* @returns 分析结果
*/
- analyzeSingleFile(source: any, fileName: any) {
+ async analyzeSingleFile(source: any, fileName: any) {
try {
+ // 单文件就不要用缓存了
+ Config.loadContextEnvironment = false
+ Config.saveContextEnvironment = false
+ Config.miniSaveContextEnvironment = false
if (typeof this.preProcess4SingleFile === 'function' && typeof this.symbolInterpret === 'function') {
- this.executeAnalysisPipeline(
- () => this.preProcess4SingleFile(source, fileName),
- () => this.symbolInterpret()
+ return await this.executeAnalysisPipeline(
+ () => {},
+ () => this.preProcess4SingleFile(source, fileName)
)
- } else {
- logger.info(`this analyzer has not support analyzeSingleFile yet`)
}
+ logger.info(`this analyzer has not support analyzeSingleFile yet`)
return this.recordCheckerFindings()
} catch (e) {
handleException(e, 'Error occurred in analyzer analyzeSingleFile', 'Error occurred in analyzer analyzeSingleFile')
+ return false
}
}
/**
- * 异步分析项目
- *
- * 用于处理支持异步 preProcess 的分析器(如 Go Analyzer、Python Analyzer)。
- *
+ * 分析项目
* @param processingDir - 要分析的项目目录
* @returns 分析结果
*/
- async analyzeProjectAsync(processingDir: any) {
+ async analyzeProject(processingDir: any) {
try {
if (typeof this.preProcess === 'function' && typeof this.symbolInterpret === 'function') {
- await this.executeAnalysisPipelineAsync(
- () => this.preProcess(processingDir),
- () => this.symbolInterpret()
+ if (typeof this.initAfterUsingCache !== 'function') {
+ this.initAfterUsingCache = () => {}
+ }
+ return await this.executeAnalysisPipeline(
+ () => this.initAfterUsingCache(),
+ () => this.preProcess(processingDir)
)
}
return this.recordCheckerFindings()
- } catch (e) {
+ } catch (e: any) {
+ const errorMsg = e?.message || String(e)
+ const errorStack = e?.stack || ''
handleException(
e,
- 'Error occurred in analyzer analyzeProjectAsync',
- 'Error occurred in analyzer analyzeProjectAsync'
+ `Error occurred in analyzer analyzeProject: ${errorMsg}\n${errorStack}`,
+ `Error occurred in analyzer analyzeProject: ${errorMsg}`
)
- }
- }
-
- /**
- * 同步分析项目
- *
- * 用于处理同步 preProcess 的分析器(如 Java Analyzer、JavaScript Analyzer)。
- * 性能追踪逻辑已统一到 executeAnalysisPipeline 方法,避免代码重复。
- *
- * @param processingDir - 要分析的项目目录
- * @returns 分析结果
- */
- analyzeProject(processingDir: any) {
- try {
- if (typeof this.preProcess === 'function' && typeof this.symbolInterpret === 'function') {
- this.executeAnalysisPipeline(
- () => this.preProcess(processingDir),
- () => this.symbolInterpret()
- )
- }
- return this.recordCheckerFindings()
- } catch (e) {
- handleException(e, 'Error occurred in analyzer analyzeProject', 'Error occurred in analyzer analyzeProject')
+ return false
}
}
@@ -359,13 +824,6 @@ class Analyzer extends MemSpace {
*/
initTopScope() {}
- /**
- *
- * @param source
- * @param filename
- */
- parseUast(source: any, filename: any) {}
-
/**
*
* @param uast
@@ -391,68 +849,6 @@ class Analyzer extends MemSpace {
}
}
- /**
- *
- * @param target
- * @param topScopeTemp
- */
- findValInTree(target: any, topScopeTemp: any): any {
- const passVals: any[] = []
- let current = target
- while (current) {
- if (current.sid === '') {
- break
- }
- passVals.push(current)
- current = current.parent
- }
- passVals.reverse()
- let scope = topScopeTemp
- for (const val of passVals) {
- let hasFind = false
- for (const s of Object.values(scope) as any[]) {
- if (
- s &&
- val.vtype === s.vtype &&
- val.id === s.id &&
- val.sid === s.sid &&
- val.qid === s.qid &&
- val.sort === s.sort &&
- val.name === s.name &&
- val.ast === s.ast &&
- val.parent?.vtype === s.parent?.vtype
- ) {
- scope = s
- hasFind = true
- break // 提前退出循环
- }
- }
- if (!hasFind && scope.field) {
- for (const s of Object.values(scope.field) as any[]) {
- if (
- s &&
- val.vtype === s.vtype &&
- val.id === s.id &&
- val.sid === s.sid &&
- val.qid === s.qid &&
- val.sort === s.sort &&
- val.name === s.name &&
- val.ast === s.ast &&
- val.parent?.vtype === s.parent?.vtype
- ) {
- scope = s
- hasFind = true
- break // 提前退出循环
- }
- }
- }
- if (!hasFind) {
- return null
- }
- }
- return scope
- }
-
/**
*
* @param instructionType
@@ -476,47 +872,6 @@ class Analyzer extends MemSpace {
return load(this)
}
- /**
- *
- * @param node
- */
- debugInstruction(node: any) {
- if (!Array.isArray(node)) {
- const code = this.sourceCodeCache[node?.loc?.sourcefile]
-
- if (code) {
- const { start, end } = node.loc
- const showLine = getLine(code, node.loc.start.line)
- const startColumn = start.column
- let endColumn = end.column
- if (start.line !== end.line) {
- endColumn = start.column
- }
- const msg = `${start.line} ${showLine.substring(0, startColumn)}${chalk.blue(
- showLine.substring(startColumn, endColumn)
- )}${showLine.substring(endColumn, showLine.length)}`
- logger.debug(msg)
- }
- }
-
- /**
- *
- * @param code
- * @param n
- */
- function getLine(code: any, n: any) {
- // 将代码分割成行数组
- const lines = code.split('\n')
-
- // 检查行数是否在有效范围内
- if (n > 0 && n <= lines.length) {
- // 获取第N行的内容
- return lines[n - 1]
- }
- return null // 行数无效,返回null或其他适当的值
- }
- }
-
// prePostFlag
/**
*
@@ -527,7 +882,7 @@ class Analyzer extends MemSpace {
*/
processInstruction(scope: any, node: any, state: any, prePostFlag?: any): any {
if (!node || !scope) {
- return UndefinedValue()
+ return new UndefinedValue()
}
if (node.vtype) {
return node
@@ -535,7 +890,12 @@ class Analyzer extends MemSpace {
this.lastProcessedNode = node
if (scope.vtype === 'union') {
- const res = UnionValue()
+ const res = new UnionValue(
+ undefined,
+ undefined,
+ `${scope.qid}.`,
+ node
+ )
for (const scp of scope.value) {
const val = this.processInstruction(scp, node, state, prePostFlag)
res.appendValue(val)
@@ -553,7 +913,10 @@ class Analyzer extends MemSpace {
const action = prePostFlag ? `${prePostFlag}Process` : 'process'
const inst = this.loadInstruction(action + node.type)
if (!inst) {
- return SymbolValue(node)
+ if (Config.saveContextEnvironment || Config.miniSaveContextEnvironment) {
+ return new SymbolValue(scope.qid, { sid: '' })
+ }
+ return new SymbolValue(scope.qid, { ...node, sid: '' })
}
// TODO 添加判断,后续指令是否是跟在return或throw后且在同一个scope内无法执行的指令 4+
this.statistics.numProcessedInstructions++
@@ -565,12 +928,11 @@ class Analyzer extends MemSpace {
try {
val = inst.call(this, scope, node, state)
} catch (e) {
- handleException(
- e,
- '',
- `process${node.type} error! loc is${node.loc.sourcefile}::${node.loc.start.line}_${node.loc.end.line}`
- )
- val = UndefinedValue()
+ const locInfo = node.loc
+ ? `${node.loc.sourcefile}::${node.loc.start?.line}_${node.loc.end?.line}`
+ : ''
+ handleException(e, '', `process${node.type} error! loc is${locInfo}`)
+ val = new UndefinedValue()
}
// 性能追踪:结束指令执行并更新统计(内部会检查是否启用)
@@ -594,10 +956,10 @@ class Analyzer extends MemSpace {
processPre(val: any, state: any) {
switch (val?.vtype) {
case 'class':
- this.processClassDefinition(val.parent, val.cdef, state)
+ this.processClassDefinition(val.parent, val.ast.cdef, state)
break
case 'fclos':
- this.processFunctionDefinition(val.parent, val.fdef, state)
+ this.processFunctionDefinition(val.parent, val.ast.fdef, state)
break
}
}
@@ -609,7 +971,7 @@ class Analyzer extends MemSpace {
* @param state
*/
processNoop(scope: any, node: any, state: any) {
- return UndefinedValue()
+ return new UndefinedValue()
}
/**
@@ -618,8 +980,16 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processLiteral(scope: any, node: any, state: any) {
- return PrimitiveValue({ ...node, ast: node, qid: node.value, sid: node.value, id: node.value })
+ processLiteral(scope: ScopeType, node: Literal, state: State): SymbolValueType {
+ return new PrimitiveValue(
+ scope.qid,
+ primitiveToString(node.value),
+ node.value,
+ node.literalType,
+ node.type,
+ node.loc,
+ node
+ )
}
/**
@@ -628,19 +998,25 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processIdentifier(scope: any, node: any, state: any) {
- if (node.name === 'undefined') return PrimitiveValue({ type: 'Literal', value: undefined })
- const res = this.getMemberValue(scope, node, state)
+ processIdentifier(scope: ScopeType, node: Identifier, state: State): SymbolValueType {
+ if (node.name === 'undefined') {
+ return new PrimitiveValue(scope.qid, 'undefined', undefined, null, 'Literal')
+ }
+ let res
+ if (state?.findIdInCurScope) {
+ res = this.getMemberValueInCurrentScope(scope, node, state)
+ } else {
+ res = this.getMemberValue(scope, node, state)
+ }
if (res.vtype === 'fclos') {
res._this = this.topScope
}
if (res.vtype === 'undefine' || res.vtype === 'uninitialized' || res.vtype === 'symbol') {
- res.vtype = 'symbol'
- res._id = node.name
- res._sid = node.name
+ res.sid = node.name
}
- this.checkerManager.checkAtIdentifier(this, scope, node, state, { res })
- return res
+ const info = { res }
+ this.checkerManager.checkAtIdentifier(this, scope, node, state, info)
+ return info.res
}
/**
@@ -649,7 +1025,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processCompileUnit(scope: any, node: any, state: any) {
+ processCompileUnit(scope: ScopeType, node: CompileUnit, state: State): Value {
if (this.checkerManager && this.checkerManager.checkAtCompileUnit) {
this.checkerManager.checkAtCompileUnit(this, scope, node, state, {
pcond: state.pcond,
@@ -667,6 +1043,7 @@ class Analyzer extends MemSpace {
// node.body.filter(n => needCompileFirst(n.type)).forEach(n => this.processInstruction(scope, n, state));
// process Compile First twice in order to handle elements which can't be correctly compiled once first
node.body.forEach((n: any) => this.processInstruction(scope, n, state))
+ return new VoidValue()
}
/**
@@ -675,7 +1052,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processExportStatement(scope: any, node: any, state: any) {
+ processExportStatement(scope: ScopeType, node: ExportStatement, state: State): VoidValueType {
// locate exports
const exports = this.getExportsScope(scope)
const val = this.processInstruction(scope, node.argument, state)
@@ -684,6 +1061,7 @@ class Analyzer extends MemSpace {
} else if (exports) {
this.saveVarInCurrentScope(exports, node.alias, val, state)
}
+ return new VoidValue()
}
/**
@@ -711,7 +1089,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processIfStatement(scope: any, node: any, state: any) {
+ processIfStatement(scope: ScopeType, node: IfStatement, state: State): VoidValueType {
/*
{ test,
consequent,
@@ -727,7 +1105,13 @@ class Analyzer extends MemSpace {
})
}
- const b: string = 'U' // abstraction.evaluate(test, state.pcond);
+ let b: string = 'U' // abstraction.evaluate(test, state.pcond);
+ if (test?.type === 'Literal' && test.value === true) {
+ b = 'T'
+ } else if (test?.type === 'Literal' && test.value === false) {
+ b = 'F'
+ }
+
switch (b) {
case 'T':
this.processInstruction(scope, node.consequent, state)
@@ -770,6 +1154,7 @@ class Analyzer extends MemSpace {
}
}
}
+ return new VoidValue()
}
/**
@@ -778,19 +1163,20 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processSwitchStatement(scope: any, node: any, state: any) {
+ processSwitchStatement(scope: ScopeType, node: SwitchStatement, state: State): VoidValueType {
// cases: [ SwitchCase ]
const test = this.processInstruction(scope, node.discriminant, state)
if (test && test.type === 'Literal') {
+ const testValue = (test as any as Literal).value
for (const caseClause of node.cases) {
if (
!caseClause.test || // FIXME
- caseClause.test.value === test.value
+ (caseClause.test.type === 'Literal' && (caseClause.test as any as Literal).value === testValue)
) {
return this.processInstruction(scope, caseClause.body, state)
}
}
- return UndefinedValue()
+ return new UndefinedValue()
}
const scopes = []
@@ -804,7 +1190,7 @@ class Analyzer extends MemSpace {
this.processInstruction(scope1, caseClause.body, st)
}
MemState.unionValues(scopes, substates, state.brs)
- return UndefinedValue()
+ return new UndefinedValue()
}
/**
@@ -813,7 +1199,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processForStatement(scope: any, node: any, state: any) {
+ processForStatement(scope: ScopeType, node: ForStatement, state: State): VoidValueType {
StateUtil.pushLoopInfo(state, node)
if (node.init) {
this.processInstruction(scope, node.init, state)
@@ -836,7 +1222,7 @@ class Analyzer extends MemSpace {
} else this.processInstruction(scope, node.body, state)
StateUtil.popLoopInfo(state)
- return UndefinedValue()
+ return new UndefinedValue()
}
/**
@@ -845,7 +1231,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processWhileStatement(scope: any, node: any, state: any) {
+ processWhileStatement(scope: ScopeType, node: WhileStatement, state: State): VoidValueType {
/*
{ test,
body,
@@ -869,7 +1255,7 @@ class Analyzer extends MemSpace {
// // fixed-point on values (with scopes) for data-flow calculation
// scope.value = MemState.computeValueFixedPoint(scope).value;
- return UndefinedValue()
+ return new UndefinedValue()
}
/**
@@ -878,9 +1264,9 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processRangeStatement(scope: any, node: any, state: any) {
+ processRangeStatement(scope: ScopeType, node: RangeStatement, state: State): any {
const { key, value, right, body } = node
- scope = Analyzer.createSubScope(
+ scope = Scope.createSubScope(
``,
scope
)
@@ -889,7 +1275,7 @@ class Analyzer extends MemSpace {
!Array.isArray(rightVal) &&
(this.inRange ||
rightVal?.vtype === 'primitive' ||
- Object.keys(rightVal.getRawValue()).length === 0 ||
+ Object.keys(rightVal.getRawValue()).filter((key) => !key.startsWith('__yasa')).length === 0 ||
rightVal?.vtype === 'union')
) {
if (value) {
@@ -897,6 +1283,7 @@ class Analyzer extends MemSpace {
this.saveVarInCurrentScope(scope, value.id, rightVal, state)
} else if (value.type === 'TupleExpression') {
for (const ele of value.elements) {
+ // Runtime may have 'name' property even if not in type definition
this.saveVarInCurrentScope(scope, ele.name, rightVal, state)
}
} else {
@@ -912,7 +1299,7 @@ class Analyzer extends MemSpace {
this.inRange = true
if (this.isNullLiteral(rightVal)) {
this.inRange = false
- return
+ return undefined as any // 保持历史行为(25282dbd)
}
const itr = this.getValueIterator(rightVal, filterDataFromScope)
let countLimit = 30
@@ -928,13 +1315,18 @@ class Analyzer extends MemSpace {
} else {
// 如果是string,将其构造出符号值再存储
// TODO 250731 将符号的字面量(而非符号值)作为key存储是否合适,有待商榷。
- if (_.isString(k)) k = PrimitiveValue({ ...key, value: k, ast: key, qid: k, sid: k, id: k })
+ if (_.isString(k)) k = new PrimitiveValue(scope.qid, k, k, null, key.type, key.loc, key)
this.saveVarInScope(scope, key, k, state)
}
}
if (value) {
if (value.type === 'VariableDeclaration') {
this.saveVarInCurrentScope(scope, value.id, v, state)
+ } else if (value.type === 'TupleExpression') {
+ for (let i = 0; i < value.elements.length; i++) {
+ const eleVal = v?.members?.get(String(i)) ?? v
+ this.saveVarInCurrentScope(scope, value.elements[i].name, eleVal, state)
+ }
} else {
this.saveVarInScope(scope, value, v, state)
}
@@ -943,6 +1335,7 @@ class Analyzer extends MemSpace {
}
this.inRange = false
}
+ return new VoidValue()
}
/**
@@ -951,25 +1344,36 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processReturnStatement(scope: any, node: any, state: any) {
+ processReturnStatement(scope: ScopeType, node: ReturnStatement, state: State): VoidValueType {
// { expression }
// lastReturnValue should be treated as union since there are multi return points in one func
if (node.argument) {
- const return_value = this.processInstruction(scope, node.argument, state)
+ const returnValue = this.processInstruction(scope, node.argument, state)
if (!node.isYield) {
if (!this.lastReturnValue) {
- this.lastReturnValue = return_value
+ this.lastReturnValue = returnValue
} else if (this.lastReturnValue.vtype === 'union') {
- if (return_value === this.lastReturnValue || return_value.value === this.lastReturnValue.value) {
- const new_return_value = cloneWithDepth(return_value, 2)
- this.lastReturnValue.appendValue(new_return_value, false)
+ if (returnValue === this.lastReturnValue || returnValue.value === this.lastReturnValue.value) {
+ const newReturnValue = buildNewValueInstance(
+ this,
+ returnValue,
+ node,
+ scope,
+ () => {
+ return false
+ },
+ (v: any) => {
+ return !v
+ }
+ )
+ this.lastReturnValue.appendValue(newReturnValue, false)
} else {
- this.lastReturnValue.appendValue(return_value, false)
+ this.lastReturnValue.appendValue(returnValue, false)
}
} else {
- const tmp = UnionValue()
+ const tmp = new UnionValue(undefined, undefined, `${scope.qid}.`, node)
tmp.appendValue(this.lastReturnValue)
- tmp.appendValue(return_value)
+ tmp.appendValue(returnValue)
this.lastReturnValue = tmp
}
if (node.loc && this.lastReturnValue)
@@ -981,9 +1385,9 @@ class Analyzer extends MemSpace {
'[return value]'
)
}
- return return_value
+ return returnValue
}
- return PrimitiveValue({ type: 'Literal', value: null, loc: node.loc })
+ return new PrimitiveValue(scope.qid, 'undefined', null, null, 'Literal', node.loc)
}
// TODO break statement
@@ -993,8 +1397,8 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processBreakStatement(scope: any, node: any, state: any) {
- return UndefinedValue()
+ processBreakStatement(scope: ScopeType, node: BreakStatement, state: State): VoidValueType {
+ return new UndefinedValue()
}
// TODO continue statement
@@ -1004,8 +1408,8 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processContinueStatement(scope: any, node: any, state: any) {
- return UndefinedValue()
+ processContinueStatement(scope: ScopeType, node: ContinueStatement, state: State): VoidValueType {
+ return new UndefinedValue()
}
// TODO throw
@@ -1015,7 +1419,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processThrowStatement(scope: any, node: any, state: any) {
+ processThrowStatement(scope: ScopeType, node: ThrowStatement, state: State): VoidValueType {
// 原本是注释的,打开了,throw和return 还是有很大区别的
// throw会沿着调用栈传递,return 只会传到调用层 没处理就结束了
// const ret = this.processReturnStatement(scope, node, state);
@@ -1030,15 +1434,25 @@ class Analyzer extends MemSpace {
node,
node.loc && node.loc.sourcefile,
'Throw Pass: ',
- node.argument.name
+ (node.argument.type === 'Identifier' ? node.argument.name : null) ||
+ AstUtil.prettyPrintAST(node.argument).slice(0, 50)
)
// 没有被try处理的异常
state.throwstack = state.throwstack ?? []
state.throwstack.push(throw_value)
return throw_value
}
+ state.throwstackScopeAndState = state.throwstackScopeAndState ?? []
+ state.throwstackScopeAndState.push({ scope, state })
}
- return PrimitiveValue({ type: 'Literal', value: node.argument, loc: node.loc })
+ return new PrimitiveValue(
+ scope.qid,
+ ``,
+ node.argument,
+ null,
+ 'Literal',
+ node.loc
+ )
}
/**
@@ -1047,13 +1461,14 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processTryStatement(scope: any, node: any, state: any) {
+ processTryStatement(scope: ScopeType, node: TryStatement, state: State): VoidValueType {
// 此处processInstruction的返回值是undefine 因此无法拿到try里面是否抛出异常的信息
this.processInstruction(scope, node.body, state)
const { handlers } = node
if (handlers) {
for (const clause of handlers) {
- scope = Analyzer.createSubScope(
+ if (!clause) continue
+ scope = Scope.createSubScope(
``,
scope
)
@@ -1062,7 +1477,7 @@ class Analyzer extends MemSpace {
}
}
if (node.finalizer) this.processInstruction(scope, node.finalizer, state)
- return UndefinedValue()
+ return new UndefinedValue()
}
/**
@@ -1071,7 +1486,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processExpressionStatement(scope: any, node: any, state: any) {
+ processExpressionStatement(scope: ScopeType, node: ExpressionStatement, state: State): VoidValueType {
// { expression }
return this.processInstruction(scope, node.expression, state)
}
@@ -1082,17 +1497,17 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processScopedStatement(scope: any, node: any, state: any) {
+ processScopedStatement(scope: ScopeType, node: ScopedStatement, state: State): any {
/*
{ statements }
*/
const { loc } = node
let scopeName
if (loc) {
- if (!scope._qid) {
- const relateFileName = loc.sourcefile.startsWith(Config.maindirPrefix)
- ? loc.sourcefile?.substring(Config.maindirPrefix.length).split('.')[0]
- : loc.sourcefile.split('.')[0]
+ if (!scope.qid) {
+ const prefix = loc.sourcefile?.substring(Config.maindirPrefix.length)
+ const lastDotIndex = prefix?.lastIndexOf('.') ?? -1
+ const relateFileName = lastDotIndex >= 0 ? prefix?.substring(0, lastDotIndex) : prefix
scopeName = `${relateFileName}`
} else {
scopeName = ``
@@ -1112,6 +1527,7 @@ class Analyzer extends MemSpace {
if (this.checkerManager && this.checkerManager.checkAtEndOfBlock) {
this.checkerManager.checkAtEndOfBlock(this, scope, node, state, {})
}
+ return new VoidValue()
}
/**
@@ -1120,27 +1536,22 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processBinaryExpression(scope: any, node: any, state: any) {
- /*
- { operator,
- left,
- right
- }
- */
- const new_node = _.clone(node)
- new_node.ast = node
- const new_left = (new_node.left = this.processInstruction(scope, node.left, state))
- const new_right = (new_node.right = this.processInstruction(scope, node.right, state))
+ processBinaryExpression(scope: ScopeType, node: BinaryExpression, state: State): BinaryExprValue {
+ const new_left = this.processInstruction(scope, node.left, state)
+ const new_right = this.processInstruction(scope, node.right, state)
- const has_tag = (new_left && new_left.hasTagRec) || (new_right && new_right.hasTagRec)
- if (has_tag) {
- new_node.hasTagRec = has_tag
- }
+ const has_tag = (new_left && new_left.taint?.isTaintedRec) || (new_right && new_right.taint?.isTaintedRec)
+ // checkerManager 需要 newNode 兼容对象
+ const newNode: any = { ...node, ast: node, left: new_left, right: new_right, isTainted: has_tag || null }
if (this.checkerManager && this.checkerManager.checkAtBinaryOperation)
- this.checkerManager.checkAtBinaryOperation(this, scope, node, state, { newNode: new_node })
+ this.checkerManager.checkAtBinaryOperation(this, scope, node, state, { newNode })
- return SymbolValue(new_node)
+ const result = new BinaryExprValue(scope.qid, node.operator, new_left, new_right, node, node.loc)
+ if (has_tag) {
+ result.taint?.mergeFrom([new_left, new_right])
+ }
+ return result
}
/**
@@ -1149,14 +1560,12 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processUnaryExpression(scope: any, node: any, state: any) {
- const new_node = SymbolValue(_.clone(node))
- new_node.ast = node
- new_node.argument = this.processInstruction(scope, node.argument, state)
- // return nativeResolver.simplifyUnaryExpression(new_node);
- const hasTags = new_node.argument && new_node.argument.hasTagRec
- if (hasTags) new_node.hasTagRec = hasTags
- return new_node
+ processUnaryExpression(scope: ScopeType, node: UnaryExpression, state: State): UnaryExprValue {
+ const unaryArg = this.processInstruction(scope, node.argument, state)
+ const result = new UnaryExprValue(scope.qid, node.operator, unaryArg, node, node.loc, node.isSuffix)
+ const hasTags = unaryArg && unaryArg.taint?.isTaintedRec
+ if (hasTags) result.taint?.mergeFrom([unaryArg])
+ return result
}
/**
@@ -1165,7 +1574,7 @@ class Analyzer extends MemSpace {
* @param node
* @param state
*/
- processAssignmentExpression(scope: any, node: any, state: any) {
+ processAssignmentExpression(scope: ScopeType, node: AssignmentExpression, state: State): any {
/*
{ operator,
left,
@@ -1178,17 +1587,15 @@ class Analyzer extends MemSpace {
const { left } = node
const { right } = node
let tmpVal = this.processInstruction(scope, right, state)
- if (node.cloned && !tmpVal?.refCount) {
- tmpVal = _.clone(tmpVal)
- tmpVal.value = _.clone(tmpVal.value)
- }
const oldVal = this.processInstruction(scope, left, state)
// TODO: clean the following up
if (left.type === 'TupleExpression') {
for (let k = 0; k < left.elements.length; k++) {
const x = left.elements[k]
- if (!x || x.name === '_') continue
+ if (!x) continue
+ const xName = x.type === 'Identifier' ? x.name : undefined
+ if (xName === '_') continue
let val = tmpVal && tmpVal.type === 'TupleExpression' ? tmpVal.elements[k] : tmpVal
const oldV = oldVal && oldVal.type === 'TupleExpression' ? oldVal.elements[k] : oldVal
@@ -1212,11 +1619,20 @@ class Analyzer extends MemSpace {
}
}
} else {
- if (!tmpVal)
- // explicit null value
- tmpVal = PrimitiveValue({ type: 'Literal', value: null, loc: right.loc })
+ if (!tmpVal) {
+ tmpVal = new PrimitiveValue(scope.qid, 'undefined', null, null, 'Literal', right.loc)
+ }
+ if (typeof tmpVal !== 'object') {
+ tmpVal = new PrimitiveValue(scope.qid, ``, tmpVal, null, 'Literal', right.loc)
+ }
const sid = SymAddress.toStringID(node.left)
- tmpVal.sid = !tmpVal.id || tmpVal.id === '' ? sid : tmpVal.id
+ if (
+ tmpVal.sid === undefined ||
+ tmpVal.sid === null ||
+ (typeof tmpVal.sid === 'string' && tmpVal.sid.includes('