Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions django-backend/soroscan/ingest/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import json
from datetime import datetime
import requests as http_requests
import hashlib

from .models import (
AlertExecution,
Expand All @@ -38,6 +39,7 @@
EventSchema,
IndexerState,
IngestError,
EventDeduplicationConfig,
Organization,
OrganizationBudget,
OrganizationCostSnapshot,
Expand Down Expand Up @@ -1319,3 +1321,52 @@ class ContractABIVersionAdmin(admin.ModelAdmin):
list_filter = ["has_breaking_changes", "created_at"]
search_fields = ["contract__contract_id", "contract__name"]
readonly_fields = ["created_at"]


@admin.register(EventDeduplicationConfig)
class EventDeduplicationConfigAdmin(AdminAuditMixin, admin.ModelAdmin):
list_display = ["contract", "enabled", "updated_at"]
search_fields = ["contract__name", "contract__contract_id"]
readonly_fields = ["created_at", "updated_at"]

def get_urls(self):
urls = super().get_urls()
custom = [
path(
"test/<int:contract_id>/",
self.admin_site.admin_view(self.test_dedup_view),
name="soroscan_dedup_test",
),
]
return custom + urls

def test_dedup_view(self, request, contract_id):
try:
contract = TrackedContract.objects.get(pk=contract_id)
except TrackedContract.DoesNotExist:
return HttpResponse(json.dumps({"error": "contract not found"}), content_type="application/json", status=404)

try:
body = request.body.decode("utf-8") if request.body else "{}"
payload = json.loads(body)
except Exception:
payload = {}

config = getattr(contract, "dedup_config", None)
if not config or not config.enabled:
return HttpResponse(json.dumps({"dedup_enabled": False}), content_type="application/json")

material = {}
for f in config.fields:
if f in ("event_type", "ledger", "event_index", "tx_hash"):
material[f] = payload.get(f)
else:
material[f] = payload.get("payload", {}).get(f)

dedup_material = json.dumps(material, sort_keys=True, default=str)
dedup_hash = hashlib.sha256(dedup_material.encode("utf-8")).hexdigest()

return HttpResponse(
json.dumps({"dedup_hash": dedup_hash, "material": material}),
content_type="application/json",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = [
("ingest", "0040_alter_trackedcontract_contract_id"),
]

operations = [
migrations.CreateModel(
name="EventDeduplicationConfig",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"enabled",
models.BooleanField(
default=True,
help_text="Enable deduplication for this contract",
),
),
(
"fields",
models.JSONField(
blank=True,
default=list,
help_text="List of event fields (or special tokens) to include in dedup key",
),
),
(
"created_at",
models.DateTimeField(auto_now_add=True),
),
(
"updated_at",
models.DateTimeField(auto_now=True),
),
(
"contract",
models.OneToOneField(
help_text="Contract this dedup config applies to",
on_delete=django.db.models.deletion.CASCADE,
related_name="dedup_config",
to="ingest.trackedcontract",
),
),
],
options={
"verbose_name": "Event Deduplication Config",
"verbose_name_plural": "Event Deduplication Configs",
},
),
]
30 changes: 30 additions & 0 deletions django-backend/soroscan/ingest/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,36 @@ def __str__(self):
return f"{self.key}: {self.value}"


class EventDeduplicationConfig(models.Model):
"""
Per-contract configuration that defines which event fields should be
considered when computing the deduplication fingerprint.

The `fields` JSONField is a list of strings naming top-level keys from
the event payload (or special tokens like 'event_type', 'tx_hash',
'ledger', 'event_index') that will be used to build the dedup material.
"""

contract = models.OneToOneField(
TrackedContract,
on_delete=models.CASCADE,
related_name="dedup_config",
help_text="Contract this dedup config applies to",
)
enabled = models.BooleanField(default=True, help_text="Enable deduplication for this contract")
# list of field names to include when computing dedup fingerprint
fields = models.JSONField(default=list, blank=True, help_text="List of event fields (or special tokens) to include in dedup key")
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)

class Meta:
verbose_name = "Event Deduplication Config"
verbose_name_plural = "Event Deduplication Configs"

def __str__(self):
return f"Dedup config for {self.contract.name} (enabled={self.enabled})"


# ---------------------------------------------------------------------------
# Issue #X: Tiered rate limiting with per-API-key and per-contract quotas
# ---------------------------------------------------------------------------
Expand Down
73 changes: 73 additions & 0 deletions django-backend/soroscan/ingest/tests/test_dedup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
import hashlib

import pytest
from django.urls import reverse

from soroscan.ingest.models import EventDeduplicationConfig
from soroscan.ingest.tests.factories import TrackedContractFactory, UserFactory


@pytest.mark.django_db
def test_event_dedup_config_save_and_update():
contract = TrackedContractFactory()

cfg = EventDeduplicationConfig.objects.create(
contract=contract, enabled=True, fields=["event_type", "amount", "tx_hash"]
)

assert cfg.pk is not None
assert cfg.enabled is True
assert cfg.fields == ["event_type", "amount", "tx_hash"]

# update fields
cfg.fields = ["event_type", "payload_field"]
cfg.save()
cfg.refresh_from_db()
assert cfg.fields == ["event_type", "payload_field"]


@pytest.mark.django_db
def test_admin_test_endpoint_computes_hash_and_handles_missing_payload(client):
contract = TrackedContractFactory()
# create a superuser and login
admin = UserFactory(is_staff=True, is_superuser=True)
client.force_login(admin)

# No dedup config => should report disabled
url = reverse("admin:soroscan_dedup_test", args=[contract.pk])
resp = client.post(url, data="{}", content_type="application/json")
assert resp.status_code == 200
data = json.loads(resp.content)
assert data.get("dedup_enabled") is False

# Create config and test proper hashing
EventDeduplicationConfig.objects.create(
contract=contract, enabled=True, fields=["event_type", "ledger", "payload_field"]
)

payload = {
"event_type": "transfer",
"ledger": 12345,
"event_index": 0,
"tx_hash": "deadbeef",
"payload": {"payload_field": "hello", "other": 1},
}

resp = client.post(url, data=json.dumps(payload), content_type="application/json")
assert resp.status_code == 200
data = json.loads(resp.content)
assert "dedup_hash" in data
assert "material" in data

# Recompute expected hash using same logic as admin view
material = {"event_type": payload.get("event_type"), "ledger": payload.get("ledger"), "payload_field": payload.get("payload", {}).get("payload_field")}
dedup_material = json.dumps(material, sort_keys=True, default=str)
expected_hash = hashlib.sha256(dedup_material.encode("utf-8")).hexdigest()
assert data["dedup_hash"] == expected_hash

# Malformed JSON body should be handled gracefully (admin view falls back to {} payload)
resp = client.post(url, data="not-json", content_type="application/json")
assert resp.status_code == 200
data = json.loads(resp.content)
assert "dedup_hash" in data
5 changes: 3 additions & 2 deletions django-backend/soroscan/ingest/tests/test_migration_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ def test_single_leaf_node():
assert len(leaf_nodes) == 1, (
f"Expected 1 leaf node for 'ingest', found {len(leaf_nodes)}: {leaf_nodes}"
)
assert leaf_nodes[0][1] == "0040_alter_trackedcontract_contract_id", (
"Expected leaf node '0040_alter_trackedcontract_contract_id', "
# After adding EventDeduplicationConfig the expected single leaf is 0041
assert leaf_nodes[0][1] == "0041_eventdeduplicationconfig", (
"Expected leaf node '0041_eventdeduplicationconfig', "
f"got '{leaf_nodes[0][1]}'"
)

Expand Down
Loading