diff --git a/.azure/templates/build-deb.yml b/.azure/templates/build-deb.yml new file mode 100644 index 000000000..0fca6c58b --- /dev/null +++ b/.azure/templates/build-deb.yml @@ -0,0 +1,76 @@ +# Azure DevOps YAML Template: Build SONiC gNMI deb package +# +# Shared template for building sonic-gnmi .deb packages on both amd64 and arm64. +# Handles: checkout → install deps → build mgmt-common → build gnmi → publish artifacts. +# +# Usage: +# - template: .azure/templates/build-deb.yml +# parameters: +# arch: amd64 +# buildBranch: $(BUILD_BRANCH) + +parameters: +- name: buildBranch + type: string + default: $(BUILD_BRANCH) +- name: arch + type: string + default: amd64 + values: + - amd64 + - arm64 +- name: commonLibArtifact + type: string + default: common-lib +- name: swssCommonArtifact + type: string + default: sonic-swss-common-bookworm +- name: publishArtifact + type: string + default: sonic-gnmi + +steps: +# Checkout all required repositories +- checkout: self + clean: true + submodules: recursive + displayName: 'Checkout code' + +- checkout: sonic-mgmt-common + clean: true + submodules: recursive + displayName: 'Checkout sonic-mgmt-common' + +- checkout: sonic-swss-common + clean: true + submodules: recursive + displayName: 'Checkout sonic-swss-common' + +# Install dependencies (architecture-aware) +- template: install-dependencies.yml + parameters: + buildBranch: ${{ parameters.buildBranch }} + arch: ${{ parameters.arch }} + installTestDeps: false + commonLibArtifact: ${{ parameters.commonLibArtifact }} + swssCommonArtifact: ${{ parameters.swssCommonArtifact }} + +# Build sonic-mgmt-common and sonic-gnmi +- script: | + set -ex + pushd sonic-mgmt-common + NO_TEST_BINS=1 dpkg-buildpackage -rfakeroot -b -us -uc + popd + + pushd sonic-gnmi + ENABLE_TRANSLIB_WRITE=y ENABLE_NATIVE_WRITE=y dpkg-buildpackage -rfakeroot -us -uc -b -j$(nproc) && cp ../*.deb $(Build.ArtifactStagingDirectory)/ + displayName: "Build ${{ parameters.arch }} deb" + +# Clean up downloaded artifacts from staging +- script: rm -rf $(Build.ArtifactStagingDirectory)/download + displayName: "Remove downloaded dependencies from artifacts" + +# Publish deb artifacts +- publish: $(Build.ArtifactStagingDirectory)/ + artifact: ${{ parameters.publishArtifact }} + displayName: "Archive ${{ parameters.arch }} artifacts" diff --git a/.azure/templates/install-dependencies.yml b/.azure/templates/install-dependencies.yml index dc88ec587..d56689ff4 100644 --- a/.azure/templates/install-dependencies.yml +++ b/.azure/templates/install-dependencies.yml @@ -1,116 +1,157 @@ # Azure DevOps YAML Template: SONiC Dependencies Installation # -# This template contains all the common dependency installation steps shared between -# jobs that require CGO/SONiC dependencies (MemoryLeakJob and IntegrationCIJob). +# Unified template for both amd64 and arm64 architectures. +# Controls architecture-specific behavior via the `arch` parameter. # -# Usage in pipeline jobs: +# Usage: # - template: .azure/templates/install-dependencies.yml # parameters: # buildBranch: $(BUILD_BRANCH) +# arch: amd64 # or arm64 +# installTestDeps: true # install pytest, redis, .NET (for test jobs) # -# Dependencies installed: -# - libyang (from sonic-buildimage artifacts) -# - libnl packages (from sonic-buildimage artifacts) +# Dependencies installed (all architectures): +# - libyang and libnl packages (from sonic-buildimage.common_libs) # - sonic-swss-common libraries +# - sonic yang models (from sonic-buildimage.vs, arch-independent) # - protobuf compiler # -# Note: Building sonic-mgmt-common and sonic-gnmi is NOT included here. -# Only the IntegrationCIJob needs to build and publish artifacts, so that -# step is defined directly in azure-pipelines.yml to avoid duplicate builds. -# -# This eliminates code duplication and ensures consistent dependency setup -# across all jobs that need SONiC/CGO dependencies. +# Additional dependencies when installTestDeps=true (amd64 only): +# - pytest, jsonpatch +# - redis-server +# - .NET SDK 8.0 parameters: - name: buildBranch type: string default: $(BUILD_BRANCH) +- name: arch + type: string + default: amd64 + values: + - amd64 + - arm64 +- name: installTestDeps + type: boolean + default: false +- name: commonLibArtifact + type: string + default: common-lib +- name: swssCommonArtifact + type: string + default: sonic-swss-common-bookworm steps: -# Download basic dependencies from sonic-buildimage +# === Download libyang + libnl debs from common_libs === - task: DownloadPipelineArtifact@2 inputs: source: specific project: build - pipeline: 142 - artifact: sonic-buildimage.vs + pipeline: Azure.sonic-buildimage.common_libs runVersion: 'latestFromBranch' runBranch: 'refs/heads/${{ parameters.buildBranch }}' + path: $(Build.ArtifactStagingDirectory)/download + artifact: ${{ parameters.commonLibArtifact }} patterns: | - target/debs/bookworm/libyang*.deb - target/debs/bookworm/libnl*.deb - target/python-wheels/bookworm/sonic_yang_models*.whl - displayName: "Download bookworm debs" - -# Install pytest, jsonpatch, redis and libyang -- script: | - # PYTEST - sudo pip3 install -U pytest - sudo pip3 install -U jsonpatch + target/debs/bookworm/libyang_1.0*.deb + target/debs/bookworm/libyang-*_1.0*.deb + target/debs/bookworm/libnl-3-200_*.deb + target/debs/bookworm/libnl-genl-3-200_*.deb + target/debs/bookworm/libnl-route-3-200_*.deb + target/debs/bookworm/libnl-nf-3-200_*.deb + displayName: "Download libyang and libnl from common_libs (${{ parameters.arch }})" - # REDIS - sudo apt-get update - sudo apt-get install -y redis-server - sudo sed -ri 's/^# unixsocket/unixsocket/' /etc/redis/redis.conf - sudo sed -ri 's/^unixsocketperm .../unixsocketperm 777/' /etc/redis/redis.conf - sudo sed -ri 's/redis-server.sock/redis.sock/' /etc/redis/redis.conf - sudo service redis-server start +# === Install test dependencies (amd64 test jobs only) === +- ${{ if and(eq(parameters.arch, 'amd64'), eq(parameters.installTestDeps, true)) }}: + - script: | + # PYTEST + sudo pip3 install -U pytest + sudo pip3 install -U jsonpatch - # LIBYANG - # Note: Must use version-specific pattern to avoid conflicts with libyang3 packages - sudo dpkg -i ../target/debs/bookworm/libyang*1.0.73*.deb - displayName: "Install dependency" + # REDIS + sudo apt-get update + sudo apt-get install -y redis-server + sudo sed -ri 's/^# unixsocket/unixsocket/' /etc/redis/redis.conf + sudo sed -ri 's/^unixsocketperm .../unixsocketperm 777/' /etc/redis/redis.conf + sudo sed -ri 's/redis-server.sock/redis.sock/' /etc/redis/redis.conf + sudo service redis-server start + displayName: "Install test dependencies (pytest, redis)" -# Install sonic yangs +# === Install libyang + libnl debs === - script: | - # SONIC YANGS set -ex - sudo pip3 install ../target/python-wheels/bookworm/sonic_yang_models*.whl - displayName: "Install sonic yangs" + sudo apt-get -y purge libnl-3-dev libnl-route-3-dev || true + sudo dpkg -i $(find $(Build.ArtifactStagingDirectory)/download -name '*.deb') + displayName: "Install libyang and libnl debs" -# Install libswsscommon dependencies -- script: | - # LIBSWSSCOMMON dependencies - sudo apt-get -y purge libnl-3-dev libnl-route-3-dev - sudo dpkg -i ../target/debs/bookworm/libnl-3-200_*.deb - sudo dpkg -i ../target/debs/bookworm/libnl-genl-3-200_*.deb - sudo dpkg -i ../target/debs/bookworm/libnl-route-3-200_*.deb - sudo dpkg -i ../target/debs/bookworm/libnl-nf-3-200_*.deb - displayName: "Install libswsscommon dependencies" +# === Download and install sonic yang models (arch-independent, from pipeline 142) === +- task: DownloadPipelineArtifact@2 + inputs: + source: specific + project: build + pipeline: 142 + artifact: sonic-buildimage.vs + runVersion: 'latestFromBranch' + runBranch: 'refs/heads/${{ parameters.buildBranch }}' + patterns: | + target/python-wheels/bookworm/sonic_yang_models*.whl + displayName: "Download sonic yang models" -# Install .NET Core - script: | set -ex - # Install .NET CORE - curl -sSL https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - - sudo apt-add-repository https://packages.microsoft.com/debian/12/prod - sudo apt-get update - sudo apt-get install -y dotnet-sdk-8.0 - displayName: "Install .NET CORE" + sudo pip3 install ../target/python-wheels/bookworm/sonic_yang_models*.whl + displayName: "Install sonic yangs" + +# === Install .NET Core (amd64 test jobs only) === +- ${{ if and(eq(parameters.arch, 'amd64'), eq(parameters.installTestDeps, true)) }}: + - script: | + set -ex + curl -sSL https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - + sudo apt-add-repository https://packages.microsoft.com/debian/12/prod + sudo apt-get update + sudo apt-get install -y dotnet-sdk-8.0 + displayName: "Install .NET CORE" -# Download sonic-swss-common +# === Download and install sonic-swss-common === - task: DownloadPipelineArtifact@2 inputs: source: specific project: build pipeline: Azure.sonic-swss-common - artifact: sonic-swss-common-bookworm + artifact: ${{ parameters.swssCommonArtifact }} runVersion: 'latestFromBranch' runBranch: 'refs/heads/${{ parameters.buildBranch }}' - displayName: "Download sonic-swss-common" + displayName: "Download sonic-swss-common (${{ parameters.arch }})" -# Install libswsscommon packages -- script: | - set -ex - # LIBSWSSCOMMON - sudo dpkg -i libswsscommon_1.0.0_amd64.deb - sudo dpkg -i libswsscommon-dev_1.0.0_amd64.deb - sudo dpkg -i python3-swsscommon_1.0.0_amd64.deb - workingDirectory: $(Pipeline.Workspace)/ - displayName: 'Install libswsscommon package' +# amd64: install libswsscommon + python3-swsscommon +# arm64: install libswsscommon only (no python3 package) +- ${{ if eq(parameters.arch, 'amd64') }}: + - script: | + set -ex + sudo dpkg -i libswsscommon_1.0.0_amd64.deb + sudo dpkg -i libswsscommon-dev_1.0.0_amd64.deb + sudo dpkg -i python3-swsscommon_1.0.0_amd64.deb + workingDirectory: $(Pipeline.Workspace)/ + displayName: 'Install libswsscommon (amd64)' -# Install protoc -- script: | - sudo apt-get install -y protobuf-compiler - protoc --version - displayName: 'Install protoc' +- ${{ if eq(parameters.arch, 'arm64') }}: + - script: | + set -ex + sudo dpkg -i libswsscommon_1.0.0_arm64.deb + sudo dpkg -i libswsscommon-dev_1.0.0_arm64.deb + workingDirectory: $(Pipeline.Workspace)/ + displayName: 'Install libswsscommon (arm64)' + +# === Install protoc === +- ${{ if eq(parameters.arch, 'arm64') }}: + - script: | + sudo apt-get update + sudo apt-get install -y protobuf-compiler + protoc --version + displayName: 'Install protoc' + +- ${{ if eq(parameters.arch, 'amd64') }}: + - script: | + sudo apt-get install -y protobuf-compiler + protoc --version + displayName: 'Install protoc' diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ec18ad566..29e292762 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -132,6 +132,8 @@ stages: - template: .azure/templates/install-dependencies.yml parameters: buildBranch: $(BUILD_BRANCH) + arch: amd64 + installTestDeps: true # Memory leak tests with JUnit XML generation @@ -185,14 +187,12 @@ stages: submodules: recursive displayName: 'Checkout sonic-swss-common' - # Integration tests have been separated from pure package tests - # Pure package CI now runs in the separate PureCIJob above - # The steps below are for integration testing with SONiC dependencies - # Install SONiC dependencies using shared template - template: .azure/templates/install-dependencies.yml parameters: buildBranch: $(BUILD_BRANCH) + arch: amd64 + installTestDeps: true # Build sonic-mgmt-common and sonic-gnmi - script: | @@ -245,3 +245,25 @@ stages: codeCoverageTool: Cobertura summaryFileLocation: '$(System.DefaultWorkingDirectory)/sonic-gnmi/coverage.xml' displayName: 'Publish coverage' + +- stage: BuildArm64 + dependsOn: [] + jobs: + - job: arm64 + displayName: "arm64 deb build" + timeoutInMinutes: 60 + + pool: + name: sonicso1ES-arm64 + + container: + image: sonicdev-microsoft.azurecr.io:443/sonic-slave-bookworm:$(BUILD_BRANCH)-arm64 + + steps: + - template: .azure/templates/build-deb.yml + parameters: + buildBranch: $(BUILD_BRANCH) + arch: arm64 + commonLibArtifact: common-lib.arm64 + swssCommonArtifact: sonic-swss-common-bookworm.arm64 + publishArtifact: sonic-gnmi.arm64 diff --git a/pkg/interceptors/dpuproxy/proxy.go b/pkg/interceptors/dpuproxy/proxy.go index 0645dd1b4..12c348482 100644 --- a/pkg/interceptors/dpuproxy/proxy.go +++ b/pkg/interceptors/dpuproxy/proxy.go @@ -177,13 +177,18 @@ func (p *DPUProxy) getConnection(ctx context.Context, dpuIndex, ipAddress string target := fmt.Sprintf("%s:%s", ipAddress, port) glog.Infof("[DPUProxy] Trying to connect to DPU%s at %s (attempt %d/%d)", dpuIndex, target, i+1, len(portsToTry)) - // Create connection with keepalive settings for long-lived connections + // Create connection with keepalive settings for long-lived connections. + // Use a conservative ping interval to avoid triggering the server's + // default EnforcementPolicy (MinTime=5m). Operations like SetPackage + // can block for minutes during image installation; aggressive pinging + // causes the server to send GOAWAY with "too_many_pings". + // See: https://github.com/sonic-net/sonic-gnmi/issues/619 conn, err := grpc.NewClient( target, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithKeepaliveParams(keepalive.ClientParameters{ - Time: 10 * time.Second, // Send keepalive ping every 10s - Timeout: 3 * time.Second, // Wait 3s for ping ack before considering connection dead + Time: 30 * time.Second, // Send keepalive ping every 30s + Timeout: 10 * time.Second, // Wait 10s for ping ack before considering connection dead PermitWithoutStream: true, // Send pings even when no active RPCs }), ) diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 187b0a5cb..366da0ca0 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -527,6 +527,17 @@ func startGNMIServer(telemetryCfg *TelemetryConfig, cfg *gnmi.Config, serverCont MaxConnectionIdle: time.Duration(*telemetryCfg.IdleConnDuration) * time.Second, // duration in which idle connection will be closed, default is inf } + // Allow clients (e.g. DPU proxy) to send keepalive pings at a + // reasonable rate. Without this the default MinTime is 5 minutes, + // causing "too_many_pings" GOAWAY for clients that ping more + // frequently during long-running operations like SetPackage. + // See: https://github.com/sonic-net/sonic-gnmi/issues/619 + keep_alive_policy := keepalive.EnforcementPolicy{ + MinTime: 20 * time.Second, // Allow pings as frequent as every 20s + PermitWithoutStream: true, // Allow pings when there are no active streams + } + commonOpts = append(commonOpts, grpc.KeepaliveEnforcementPolicy(keep_alive_policy)) + tlsOpts = []grpc.ServerOption{grpc.Creds(credentials.NewTLS(tlsCfg))} if *telemetryCfg.IdleConnDuration > 0 { // non inf case