diff --git a/django-backend/soroscan/ingest/admin.py b/django-backend/soroscan/ingest/admin.py index 4741838e..cb2164c9 100644 --- a/django-backend/soroscan/ingest/admin.py +++ b/django-backend/soroscan/ingest/admin.py @@ -12,6 +12,7 @@ import json from datetime import datetime import requests as http_requests +import hashlib from .models import ( AlertExecution, @@ -38,6 +39,7 @@ EventSchema, IndexerState, IngestError, + EventDeduplicationConfig, Organization, OrganizationBudget, OrganizationCostSnapshot, @@ -1319,3 +1321,52 @@ class ContractABIVersionAdmin(admin.ModelAdmin): list_filter = ["has_breaking_changes", "created_at"] search_fields = ["contract__contract_id", "contract__name"] readonly_fields = ["created_at"] + + +@admin.register(EventDeduplicationConfig) +class EventDeduplicationConfigAdmin(AdminAuditMixin, admin.ModelAdmin): + list_display = ["contract", "enabled", "updated_at"] + search_fields = ["contract__name", "contract__contract_id"] + readonly_fields = ["created_at", "updated_at"] + + def get_urls(self): + urls = super().get_urls() + custom = [ + path( + "test//", + self.admin_site.admin_view(self.test_dedup_view), + name="soroscan_dedup_test", + ), + ] + return custom + urls + + def test_dedup_view(self, request, contract_id): + try: + contract = TrackedContract.objects.get(pk=contract_id) + except TrackedContract.DoesNotExist: + return HttpResponse(json.dumps({"error": "contract not found"}), content_type="application/json", status=404) + + try: + body = request.body.decode("utf-8") if request.body else "{}" + payload = json.loads(body) + except Exception: + payload = {} + + config = getattr(contract, "dedup_config", None) + if not config or not config.enabled: + return HttpResponse(json.dumps({"dedup_enabled": False}), content_type="application/json") + + material = {} + for f in config.fields: + if f in ("event_type", "ledger", "event_index", "tx_hash"): + material[f] = payload.get(f) + else: + material[f] = payload.get("payload", {}).get(f) + + dedup_material = json.dumps(material, sort_keys=True, default=str) + dedup_hash = hashlib.sha256(dedup_material.encode("utf-8")).hexdigest() + + return HttpResponse( + json.dumps({"dedup_hash": dedup_hash, "material": material}), + content_type="application/json", + ) diff --git a/django-backend/soroscan/ingest/migrations/0041_eventdeduplicationconfig.py b/django-backend/soroscan/ingest/migrations/0041_eventdeduplicationconfig.py new file mode 100644 index 00000000..789211bd --- /dev/null +++ b/django-backend/soroscan/ingest/migrations/0041_eventdeduplicationconfig.py @@ -0,0 +1,61 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ("ingest", "0040_alter_trackedcontract_contract_id"), + ] + + operations = [ + migrations.CreateModel( + name="EventDeduplicationConfig", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "enabled", + models.BooleanField( + default=True, + help_text="Enable deduplication for this contract", + ), + ), + ( + "fields", + models.JSONField( + blank=True, + default=list, + help_text="List of event fields (or special tokens) to include in dedup key", + ), + ), + ( + "created_at", + models.DateTimeField(auto_now_add=True), + ), + ( + "updated_at", + models.DateTimeField(auto_now=True), + ), + ( + "contract", + models.OneToOneField( + help_text="Contract this dedup config applies to", + on_delete=django.db.models.deletion.CASCADE, + related_name="dedup_config", + to="ingest.trackedcontract", + ), + ), + ], + options={ + "verbose_name": "Event Deduplication Config", + "verbose_name_plural": "Event Deduplication Configs", + }, + ), + ] diff --git a/django-backend/soroscan/ingest/models.py b/django-backend/soroscan/ingest/models.py index 85352e05..58ee400c 100644 --- a/django-backend/soroscan/ingest/models.py +++ b/django-backend/soroscan/ingest/models.py @@ -1021,6 +1021,36 @@ def __str__(self): return f"{self.key}: {self.value}" +class EventDeduplicationConfig(models.Model): + """ + Per-contract configuration that defines which event fields should be + considered when computing the deduplication fingerprint. + + The `fields` JSONField is a list of strings naming top-level keys from + the event payload (or special tokens like 'event_type', 'tx_hash', + 'ledger', 'event_index') that will be used to build the dedup material. + """ + + contract = models.OneToOneField( + TrackedContract, + on_delete=models.CASCADE, + related_name="dedup_config", + help_text="Contract this dedup config applies to", + ) + enabled = models.BooleanField(default=True, help_text="Enable deduplication for this contract") + # list of field names to include when computing dedup fingerprint + fields = models.JSONField(default=list, blank=True, help_text="List of event fields (or special tokens) to include in dedup key") + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + verbose_name = "Event Deduplication Config" + verbose_name_plural = "Event Deduplication Configs" + + def __str__(self): + return f"Dedup config for {self.contract.name} (enabled={self.enabled})" + + # --------------------------------------------------------------------------- # Issue #X: Tiered rate limiting with per-API-key and per-contract quotas # --------------------------------------------------------------------------- diff --git a/django-backend/soroscan/ingest/tests/test_dedup.py b/django-backend/soroscan/ingest/tests/test_dedup.py new file mode 100644 index 00000000..33d4d939 --- /dev/null +++ b/django-backend/soroscan/ingest/tests/test_dedup.py @@ -0,0 +1,73 @@ +import json +import hashlib + +import pytest +from django.urls import reverse + +from soroscan.ingest.models import EventDeduplicationConfig +from soroscan.ingest.tests.factories import TrackedContractFactory, UserFactory + + +@pytest.mark.django_db +def test_event_dedup_config_save_and_update(): + contract = TrackedContractFactory() + + cfg = EventDeduplicationConfig.objects.create( + contract=contract, enabled=True, fields=["event_type", "amount", "tx_hash"] + ) + + assert cfg.pk is not None + assert cfg.enabled is True + assert cfg.fields == ["event_type", "amount", "tx_hash"] + + # update fields + cfg.fields = ["event_type", "payload_field"] + cfg.save() + cfg.refresh_from_db() + assert cfg.fields == ["event_type", "payload_field"] + + +@pytest.mark.django_db +def test_admin_test_endpoint_computes_hash_and_handles_missing_payload(client): + contract = TrackedContractFactory() + # create a superuser and login + admin = UserFactory(is_staff=True, is_superuser=True) + client.force_login(admin) + + # No dedup config => should report disabled + url = reverse("admin:soroscan_dedup_test", args=[contract.pk]) + resp = client.post(url, data="{}", content_type="application/json") + assert resp.status_code == 200 + data = json.loads(resp.content) + assert data.get("dedup_enabled") is False + + # Create config and test proper hashing + EventDeduplicationConfig.objects.create( + contract=contract, enabled=True, fields=["event_type", "ledger", "payload_field"] + ) + + payload = { + "event_type": "transfer", + "ledger": 12345, + "event_index": 0, + "tx_hash": "deadbeef", + "payload": {"payload_field": "hello", "other": 1}, + } + + resp = client.post(url, data=json.dumps(payload), content_type="application/json") + assert resp.status_code == 200 + data = json.loads(resp.content) + assert "dedup_hash" in data + assert "material" in data + + # Recompute expected hash using same logic as admin view + material = {"event_type": payload.get("event_type"), "ledger": payload.get("ledger"), "payload_field": payload.get("payload", {}).get("payload_field")} + dedup_material = json.dumps(material, sort_keys=True, default=str) + expected_hash = hashlib.sha256(dedup_material.encode("utf-8")).hexdigest() + assert data["dedup_hash"] == expected_hash + + # Malformed JSON body should be handled gracefully (admin view falls back to {} payload) + resp = client.post(url, data="not-json", content_type="application/json") + assert resp.status_code == 200 + data = json.loads(resp.content) + assert "dedup_hash" in data diff --git a/django-backend/soroscan/ingest/tests/test_migration_graph.py b/django-backend/soroscan/ingest/tests/test_migration_graph.py index b038f488..e097af7d 100644 --- a/django-backend/soroscan/ingest/tests/test_migration_graph.py +++ b/django-backend/soroscan/ingest/tests/test_migration_graph.py @@ -31,8 +31,9 @@ def test_single_leaf_node(): assert len(leaf_nodes) == 1, ( f"Expected 1 leaf node for 'ingest', found {len(leaf_nodes)}: {leaf_nodes}" ) - assert leaf_nodes[0][1] == "0040_alter_trackedcontract_contract_id", ( - "Expected leaf node '0040_alter_trackedcontract_contract_id', " + # After adding EventDeduplicationConfig the expected single leaf is 0041 + assert leaf_nodes[0][1] == "0041_eventdeduplicationconfig", ( + "Expected leaf node '0041_eventdeduplicationconfig', " f"got '{leaf_nodes[0][1]}'" )