Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 59 additions & 7 deletions internal/app/learning/handlers/confidence.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ import (
"strings"
"time"

"openappsec.io/smartsync-service/models"
"openappsec.io/errors"
"openappsec.io/log"
"openappsec.io/smartsync-service/models"
)

const (
Expand All @@ -38,7 +38,7 @@ type Repository interface {
PostFile(ctx context.Context, tenantID string, path string, compress bool, data interface{}) error
}

//NewConfidenceCalculator creates a new struct of confidence calculator
// NewConfidenceCalculator creates a new struct of confidence calculator
func NewConfidenceCalculator(id models.SyncID, params models.ConfidenceParams, tuningDecisions models.TuningEvents, repo Repository) *ConfidenceCalculator {
cc := &ConfidenceCalculator{
params: params,
Expand All @@ -60,7 +60,7 @@ func NewConfidenceCalculator(id models.SyncID, params models.ConfidenceParams, t
return cc
}

//ConfidenceCalculator represents a confidence calculator handler
// ConfidenceCalculator represents a confidence calculator handler
type ConfidenceCalculator struct {
dependencies map[models.SyncID]models.SyncHandler
filterSources *ScannersDetectorHandler
Expand Down Expand Up @@ -133,7 +133,7 @@ func (cd *confidenceData) GetOriginalPath(ids models.SyncID) string {
return fmt.Sprintf("/%v/%v/%v/processed/confidence.data", ids.TenantID, ids.AssetID, ids.Type)
}

//GetFilePath returns the path to the state file saved by the service
// GetFilePath returns the path to the state file saved by the service
func (cd *confidenceData) GetFilePath(id models.SyncID) string {
return fmt.Sprintf("/%v/%v/%v/remote/confidence.data", id.TenantID, id.AssetID, id.Type)
}
Expand Down Expand Up @@ -171,7 +171,7 @@ func (c *ConfidenceCalculator) MergeData(data interface{}) {
}
}

//NewState returns a struct representing the state
// NewState returns a struct representing the state
func (c *ConfidenceCalculator) NewState() models.State {
return &confidenceData{}
}
Expand Down Expand Up @@ -230,7 +230,7 @@ func convertToDataStruct(key string, valuesAndSources map[string]map[string]bool
return ret
}

//ProcessData gets the last state and update the state according to the collected data
// ProcessData gets the last state and update the state according to the collected data
func (c *ConfidenceCalculator) ProcessData(ctx context.Context, state models.State) models.State {
stateStruct := state.(*confidenceData)
stateModel := convertStateToModel(stateStruct)
Expand Down Expand Up @@ -436,11 +436,63 @@ func (c *ConfidenceCalculator) calculateConfidenceDelta(
return delta
}

//GetDependencies return the handlers that this handler is dependent on
// GetDependencies return the handlers that this handler is dependent on
func (c *ConfidenceCalculator) GetDependencies() map[models.SyncID]models.SyncHandler {
return c.dependencies
}

// ClearMergedData releases references to large in-memory merged data maps to allow GC to reclaim memory
// after a processing cycle finishes. Should be called once the state has been derived.
func (c *ConfidenceCalculator) ClearMergedData() {
// Replace with fresh empty map instead of nil to avoid nil map assignment panics in future merges
c.md.log = mms{}
}

// ProcessDataFromCentralData processes data from the central data source
func (c *ConfidenceCalculator) ProcessDataFromCentralData(ctx context.Context, state models.State, mergedData *models.CentralData) models.State {
log.WithContext(ctx).Debugf("ConfidenceCalculator.ProcessDataFromCentralData id: %+v", c.id)
c.md.log = mms{} // reset
// Use pointer-based deduplication (already done at unmarshal time)
for key, entry := range mergedData.Logger {
if _, ok := c.md.log[key]; !ok {
c.md.log[key] = map[string]map[string]bool{}
}
if c.id.Type == models.TypesConfidence {
for typeKey, sourcePtrs := range entry.Types {
if typeKey == "long_random_text" {
continue
}
if _, ok := c.md.log[key][typeKey]; !ok {
c.md.log[key][typeKey] = map[string]bool{}
}
// Dereference pointers - strings already deduplicated at unmarshal
for _, srcPtr := range sourcePtrs {
c.md.log[key][typeKey][*srcPtr] = true
}
}
} else {
for indKey, sourcePtrs := range entry.Indicators {
if _, ok := c.md.log[key][indKey]; !ok {
c.md.log[key][indKey] = map[string]bool{}
}
// Dereference pointers - strings already deduplicated at unmarshal
for _, srcPtr := range sourcePtrs {
c.md.log[key][indKey][*srcPtr] = true
}
}
}
if len(entry.TotalSources) > 0 {
allSources := map[string]bool{}
// Dereference pointers - strings already deduplicated at unmarshal
for _, srcPtr := range entry.TotalSources {
allSources[*srcPtr] = true
}
c.md.log[key][c.params.NullObject] = allSources
}
}
return c.ProcessData(ctx, state)
}

func (c *ConfidenceCalculator) isParamBenign(key string) bool {
keySplit := strings.SplitN(key, "#", 2)
param := key
Expand Down
15 changes: 14 additions & 1 deletion internal/app/learning/handlers/scanners_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,20 @@ func (s *ScannersDetectorHandler) SetCompressionEnabled() {
// MergeData merges the data from agent into struct member
func (s *ScannersDetectorHandler) MergeData(data interface{}) {
monitor := data.(*scannersMonitor)
mergeAndConvertCerealToGo(monitor.Monitor, s.mergedMonitors.Monitor)
mergeAndConvertCerealToGo(monitor.Monitor, s.mergedMonitors.Monitor, false)
}

// ClearMergedData releases references to merged data to free memory between runs.
func (s *ScannersDetectorHandler) ClearMergedData() {
s.mergedMonitors = scannersDetectMergedData{Monitor: map[string]map[string]map[string]bool{}}
}

// ProcessDataFromCentralData processes data from central data
// ScannersDetector is handled as a dependency in indicators confidence, not directly from central data
func (s *ScannersDetectorHandler) ProcessDataFromCentralData(ctx context.Context, state models.State, mergedData *models.CentralData) models.State {
// ScannersDetector uses its own monitor data structure, not CentralData
// This is a pass-through to ProcessData for interface compliance
return s.ProcessData(ctx, state)
}

//NewState returns a struct representing the state
Expand Down
118 changes: 104 additions & 14 deletions internal/app/learning/handlers/trusted_sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,26 @@ import (
"context"
"fmt"

"openappsec.io/log"
"openappsec.io/smartsync-service/models"
)

//TrustedSourcesHandler represents a trusted sources handler
// TrustedSourcesHandler represents a trusted sources handler
type TrustedSourcesHandler struct {
id models.SyncID
trustedLog mms
}

type trustedSourcesState struct {
Logger []mapToMapToSet `json:"logger"`
}

//GetFilePath returns the path to the state file saved by the service
// GetFilePath returns the path to the state file saved by the service
func (ts *trustedSourcesState) GetFilePath(id models.SyncID) string {
return fmt.Sprintf("/%v/%v/%v/remote/data.data", id.TenantID, id.AssetID, id.Type)
}

//GetOriginalPath returns the path to the state file saved by the agent
// GetOriginalPath returns the path to the state file saved by the agent
func (ts *trustedSourcesState) GetOriginalPath(id models.SyncID) string {
return fmt.Sprintf("/%v/%v/%v/processed/data.data", id.TenantID, id.AssetID, id.Type)
}
Expand All @@ -43,9 +45,9 @@ func (ts *trustedSourcesState) ShouldRebase() bool {
return false
}

//NewTrustedSources creates new trusted sources handler
func NewTrustedSources() *TrustedSourcesHandler {
return &TrustedSourcesHandler{trustedLog: mms{}}
// NewTrustedSources creates new trusted sources handler
func NewTrustedSources(id models.SyncID) *TrustedSourcesHandler {
return &TrustedSourcesHandler{id: id, trustedLog: mms{}}
}

// NewDataStruct returns a struct representation of the data put by agent
Expand All @@ -56,10 +58,10 @@ func (ts *TrustedSourcesHandler) NewDataStruct() interface{} {
// MergeData merges the data from agent into struct member
func (ts *TrustedSourcesHandler) MergeData(data interface{}) {
state := data.(*trustedSourcesState)
mergeAndConvertCerealToGo(state.Logger, ts.trustedLog)
mergeAndConvertCerealToGo(state.Logger, ts.trustedLog, false)
}

//NewState returns a struct representing the state
// NewState returns a struct representing the state
func (ts *TrustedSourcesHandler) NewState() models.State {
return &trustedSourcesState{[]mapToMapToSet{}}
}
Expand All @@ -69,9 +71,97 @@ func (ts *TrustedSourcesHandler) SetCompressionEnabled() {
// do nothing
}

//ProcessData gets the last state and update the state according to the collected data
func (ts *TrustedSourcesHandler) ProcessData(ctx context.Context, _ models.State) models.State {
state := &trustedSourcesState{[]mapToMapToSet{}}
// ClearMergedData releases references to merged trusted sources data to free memory between runs.
func (ts *TrustedSourcesHandler) ClearMergedData() {
ts.trustedLog = mms{}
}

func containsTrustedSourcePtrs(sources []*string, trustedMap map[string]bool) bool {
// Check if any source is trusted
for _, srcPtr := range sources {
if _, ok := trustedMap[*srcPtr]; ok {
return true
}
}
return false
}

// ProcessDataFromCentralData processes data from central data
func (ts *TrustedSourcesHandler) ProcessDataFromCentralData(ctx context.Context, state models.State, mergedData *models.CentralData) models.State {
log.WithContext(ctx).Debugf("TrustedSourcesHandler.ProcessDataFromCentralData ids: %+v", ts.id)
ts.trustedLog = mms{} // reset
// Use pointer-based deduplication (already done at unmarshal time)
if len(mergedData.TrustedSources) == 0 {
// No trusted sources reported, return empty state
log.WithContext(ctx).Debugf("TrustedSourcesHandler.ProcessDataFromCentralData no trusted sources reported, returning empty state")
return ts.ProcessData(ctx, state)
}
// Create trusted sources lookup map for O(1) checks - dereference pointers
trustedMap := make(map[string]bool, len(mergedData.TrustedSources))
for _, srcPtr := range mergedData.TrustedSources {
trustedMap[*srcPtr] = true
}

for key, entry := range mergedData.Logger {
if !containsTrustedSourcePtrs(entry.TotalSources, trustedMap) {
// skip if no trusted source reported this key
continue
}
if ts.id.Type == models.TypesTrusted {
for typeKey, sourcePtrs := range entry.Types {
// check if any source is trusted else skip
for _, srcPtr := range sourcePtrs {
src := *srcPtr
// Only add src if it was reported in trusted sources
if trustedMap[src] {
if _, ok := ts.trustedLog[key]; !ok {
ts.trustedLog[key] = map[string]map[string]bool{}
}
if _, ok := ts.trustedLog[key][typeKey]; !ok {
ts.trustedLog[key][typeKey] = map[string]bool{}
}
log.WithContext(ctx).Debugf("TrustedSourcesHandler.ProcessDataFromCentralData adding src: %v to key: %v typeKey: %v", src, key, typeKey)
ts.trustedLog[key][typeKey][src] = true
}
}
}
} else {
for indKey, sourcePtrs := range entry.Indicators {
for _, srcPtr := range sourcePtrs {
src := *srcPtr
// Only add src if it was reported in trusted sources
if trustedMap[src] {
if _, ok := ts.trustedLog[key]; !ok {
ts.trustedLog[key] = map[string]map[string]bool{}
}
if _, ok := ts.trustedLog[key][indKey]; !ok {
ts.trustedLog[key][indKey] = map[string]bool{}
}
log.WithContext(ctx).Debugf("TrustedSourcesHandler.ProcessDataFromCentralData adding src: %v to key: %v indKey: %v", src, key, indKey)
ts.trustedLog[key][indKey][src] = true
}
}
}
}
}
// Return processed state
return ts.ProcessData(ctx, state)
}

// ProcessData gets the last state and update the state according to the collected data
func (ts *TrustedSourcesHandler) ProcessData(ctx context.Context, stateIfs models.State) models.State {
log.WithContext(ctx).Debugf("TrustedSourcesHandler.ProcessData ids: %+v", ts.id)
// Handle nil state - treat it as empty state
var existingState *trustedSourcesState
if stateIfs == nil {
log.WithContext(ctx).Debugf("TrustedSourcesHandler.ProcessData received nil state, using empty state")
existingState = &trustedSourcesState{[]mapToMapToSet{}}
} else {
existingState = stateIfs.(*trustedSourcesState)
}
// merge the state with the trusted log
mergeAndConvertCerealToGo(existingState.Logger, ts.trustedLog, false)
newState := &trustedSourcesState{[]mapToMapToSet{}}
for key, keyData := range ts.trustedLog {
keyObj := mapToMapToSet{Key: key}
for value, sources := range keyData {
Expand All @@ -81,12 +171,12 @@ func (ts *TrustedSourcesHandler) ProcessData(ctx context.Context, _ models.State
}
keyObj.Value = append(keyObj.Value, valueObj)
}
state.Logger = append(state.Logger, keyObj)
newState.Logger = append(newState.Logger, keyObj)
}
return state
return newState
}

//GetDependencies return the handlers that this handler is dependent on
// GetDependencies return the handlers that this handler is dependent on
func (ts *TrustedSourcesHandler) GetDependencies() map[models.SyncID]models.SyncHandler {
return nil
}
33 changes: 29 additions & 4 deletions internal/app/learning/handlers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,46 @@

package handlers

import (
"strings"
)

// Configuration defines the get configuration interface
type Configuration interface {
GetString(key string) (string, error)
}

// mms type defines a map to map to set.
type mms map[string]map[string]map[string]bool

func mergeAndConvertCerealToGo(src []mapToMapToSet, dst mms) {
func mergeAndConvertCerealToGo(src []mapToMapToSet, dst mms, shouldStrip bool) {
for _, srcData := range src {
if _, ok := dst[srcData.Key]; !ok {
dst[srcData.Key] = map[string]map[string]bool{}
}
for _, keyData := range srcData.Value {
if _, ok := dst[srcData.Key][keyData.Key]; !ok {
dst[srcData.Key][keyData.Key] = map[string]bool{}
key := keyData.Key
if shouldStrip {
key = stripSuffix(key)
}
if _, ok := dst[srcData.Key][key]; !ok {
dst[srcData.Key][key] = map[string]bool{}
}
for _, indicator := range keyData.Value {
dst[srcData.Key][keyData.Key][indicator] = true
dst[srcData.Key][key][indicator] = true
}
}
}
}

func stripSuffix(key string) string {
suffices := []string{".pipe", ".sem", ".amp", ".comma", ".asterisk"}
strippedKey := ""
for _, suffix := range suffices {
strippedKey = strings.TrimSuffix(key, suffix)
if strippedKey != key {
return stripSuffix(strippedKey)
}
}
return strippedKey
}
Loading