From c2a9efd243d040ebf941cf9d5177c0aa7d21eb9d Mon Sep 17 00:00:00 2001 From: xaxxoo Date: Mon, 1 Jun 2026 08:34:05 +0100 Subject: [PATCH] feat: implement blacklist to block indexing of malicious contracts Closes #470 - Add BlacklistedContract model with contract_id (unique, validated Base32 Soroban address), reason, added_by (FK to User), and created_at fields, along with migration 0042. - Update ingest_latest_events in tasks.py to fetch all blacklisted contract_ids before the RPC query, exclude them from the list of active contracts sent to get_events, and emit an INFO log entry for each skipped contract so operators can audit the decision. - Add test_blacklisted_contract.py covering: model creation, duplicate rejection, invalid address validation, optional fields, ingestion exclusion from RPC query, skip logging, and multi-contract scenarios. --- .../migrations/0042_blacklistedcontract.py | 70 +++++++++ django-backend/soroscan/ingest/models.py | 45 ++++++ django-backend/soroscan/ingest/tasks.py | 14 +- .../ingest/tests/test_blacklisted_contract.py | 146 ++++++++++++++++++ 4 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 django-backend/soroscan/ingest/migrations/0042_blacklistedcontract.py create mode 100644 django-backend/soroscan/ingest/tests/test_blacklisted_contract.py diff --git a/django-backend/soroscan/ingest/migrations/0042_blacklistedcontract.py b/django-backend/soroscan/ingest/migrations/0042_blacklistedcontract.py new file mode 100644 index 00000000..f805840d --- /dev/null +++ b/django-backend/soroscan/ingest/migrations/0042_blacklistedcontract.py @@ -0,0 +1,70 @@ +from django.conf import settings +from django.db import migrations, models +import django.core.validators +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("ingest", "0041_eventdeduplicationconfig"), + ] + + operations = [ + migrations.CreateModel( + name="BlacklistedContract", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "contract_id", + models.CharField( + db_index=True, + help_text="Stellar contract address to block from indexing (C...)", + max_length=56, + unique=True, + validators=[ + django.core.validators.RegexValidator( + message="Contract address must start with 'C' and be exactly 56 characters using valid Base32 characters (A-Z, 2-7).", + regex="^C[A-Z2-7]{55}$", + ) + ], + ), + ), + ( + "reason", + models.TextField( + blank=True, + help_text="Human-readable explanation of why this contract is blacklisted", + ), + ), + ( + "created_at", + models.DateTimeField(auto_now_add=True, db_index=True), + ), + ( + "added_by", + models.ForeignKey( + blank=True, + help_text="User who added this entry", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="blacklisted_contracts", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "verbose_name": "Blacklisted Contract", + "verbose_name_plural": "Blacklisted Contracts", + "ordering": ["-created_at"], + }, + ), + ] diff --git a/django-backend/soroscan/ingest/models.py b/django-backend/soroscan/ingest/models.py index 58ee400c..c123223e 100644 --- a/django-backend/soroscan/ingest/models.py +++ b/django-backend/soroscan/ingest/models.py @@ -2141,3 +2141,48 @@ class Meta: def __str__(self): return f"ABI v{self.version_number} for {self.contract.contract_id[:8]}... (ledger {self.valid_from_ledger}–{self.valid_to_ledger or '∞'})" + + +class BlacklistedContract(models.Model): + """ + Contracts whose events must not be indexed. + + Any contract_id present in this table is silently skipped by the + ingestion loop, regardless of whether it also exists in + TrackedContract. A log entry is written each time a skip occurs + so operators can audit the decision. + """ + + contract_id = models.CharField( + max_length=56, + unique=True, + db_index=True, + validators=[ + RegexValidator( + regex=r"^C[A-Z2-7]{55}$", + message="Contract address must start with 'C' and be exactly 56 characters using valid Base32 characters (A-Z, 2-7).", + ) + ], + help_text="Stellar contract address to block from indexing (C...)", + ) + reason = models.TextField( + blank=True, + help_text="Human-readable explanation of why this contract is blacklisted", + ) + added_by = models.ForeignKey( + User, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="blacklisted_contracts", + help_text="User who added this entry", + ) + created_at = models.DateTimeField(auto_now_add=True, db_index=True) + + class Meta: + verbose_name = "Blacklisted Contract" + verbose_name_plural = "Blacklisted Contracts" + ordering = ["-created_at"] + + def __str__(self): + return f"Blacklisted({self.contract_id[:8]}...)" diff --git a/django-backend/soroscan/ingest/tasks.py b/django-backend/soroscan/ingest/tasks.py index a8f7bbf0..08dae1b2 100644 --- a/django-backend/soroscan/ingest/tasks.py +++ b/django-backend/soroscan/ingest/tasks.py @@ -39,6 +39,7 @@ _SENTINEL, ) from .models import ( + BlacklistedContract, ContractABI, ContractEvent, ContractSigningKey, @@ -1977,11 +1978,22 @@ def ingest_latest_events() -> int: new_events = 0 try: - contract_ids = list( + blacklisted_ids = set( + BlacklistedContract.objects.values_list("contract_id", flat=True) + ) + all_active_ids = list( TrackedContract.objects.filter(is_active=True).values_list( "contract_id", flat=True ) ) + for cid in all_active_ids: + if cid in blacklisted_ids: + logger.info( + "Skipping blacklisted contract %s — not indexing events", + cid, + extra={"contract_id": cid, "reason": "blacklisted"}, + ) + contract_ids = [cid for cid in all_active_ids if cid not in blacklisted_ids] # Always update the gauge, even when there are no active contracts. m.active_contracts_gauge.set(len(contract_ids)) diff --git a/django-backend/soroscan/ingest/tests/test_blacklisted_contract.py b/django-backend/soroscan/ingest/tests/test_blacklisted_contract.py new file mode 100644 index 00000000..5445c091 --- /dev/null +++ b/django-backend/soroscan/ingest/tests/test_blacklisted_contract.py @@ -0,0 +1,146 @@ +""" +Tests for BlacklistedContract model and ingestion-loop skip logic (issue #470). +""" +import logging +from unittest.mock import MagicMock, patch + +import pytest +from django.core.exceptions import ValidationError + +from soroscan.ingest.models import BlacklistedContract, TrackedContract +from soroscan.ingest.tests.factories import TrackedContractFactory, UserFactory + +VALID_ID = "C" + "A" * 55 + + +# ── Model tests ─────────────────────────────────────────────────────────────── + +@pytest.mark.django_db +class TestBlacklistedContractModel: + def test_create_with_valid_address(self): + entry = BlacklistedContract.objects.create( + contract_id=VALID_ID, + reason="emits spam events", + ) + assert entry.pk is not None + assert str(entry).startswith("Blacklisted(") + + def test_duplicate_contract_id_raises(self): + BlacklistedContract.objects.create(contract_id=VALID_ID) + with pytest.raises(Exception): # IntegrityError / unique violation + BlacklistedContract.objects.create(contract_id=VALID_ID) + + def test_invalid_address_rejected(self): + entry = BlacklistedContract(contract_id="GABCDEFGHIJKLMNOPQRSTUVWXYZ234567ABCDEFGHIJKLMNOPQRSTU") + with pytest.raises(ValidationError): + entry.full_clean() + + def test_reason_is_optional(self): + entry = BlacklistedContract.objects.create(contract_id=VALID_ID) + assert entry.reason == "" + + def test_added_by_nullable(self): + entry = BlacklistedContract.objects.create(contract_id=VALID_ID) + assert entry.added_by is None + + def test_added_by_links_user(self): + user = UserFactory() + entry = BlacklistedContract.objects.create(contract_id=VALID_ID, added_by=user) + assert entry.added_by == user + + +# ── Ingestion skip logic tests ──────────────────────────────────────────────── + +@pytest.mark.django_db +class TestIngestionBlacklistSkip: + """ + Verify that ingest_latest_events excludes blacklisted contract_ids from + the RPC query and emits a log entry for each skipped contract. + """ + + def _make_contract(self, **kwargs) -> TrackedContract: + return TrackedContractFactory(**kwargs) + + def test_blacklisted_contract_excluded_from_rpc_query(self, caplog): + contract = self._make_contract() + BlacklistedContract.objects.create( + contract_id=contract.contract_id, + reason="test spam", + ) + + mock_server = MagicMock() + mock_server.get_events.return_value = MagicMock(events=[]) + + with patch("soroscan.ingest.tasks.SorobanServer", return_value=mock_server), \ + patch("soroscan.ingest.tasks.IndexerState.objects.get_or_create", + return_value=(MagicMock(value="100"), True)): + from soroscan.ingest.tasks import ingest_latest_events + ingest_latest_events() + + call_kwargs = mock_server.get_events.call_args + if call_kwargs: + filters = call_kwargs[1].get("filters") or call_kwargs[0][1] + queried_ids = filters[0]["contractIds"] if filters else [] + assert contract.contract_id not in queried_ids + + def test_blacklisted_contract_logged(self, caplog): + contract = self._make_contract() + BlacklistedContract.objects.create( + contract_id=contract.contract_id, + reason="known malicious", + ) + + mock_server = MagicMock() + mock_server.get_events.return_value = MagicMock(events=[]) + + with caplog.at_level(logging.INFO, logger="soroscan.ingest.tasks"), \ + patch("soroscan.ingest.tasks.SorobanServer", return_value=mock_server), \ + patch("soroscan.ingest.tasks.IndexerState.objects.get_or_create", + return_value=(MagicMock(value="100"), True)): + from soroscan.ingest.tasks import ingest_latest_events + ingest_latest_events() + + assert any( + "blacklisted" in r.message.lower() and contract.contract_id in r.message + for r in caplog.records + ) + + def test_non_blacklisted_contract_included(self): + contract = self._make_contract() + + mock_server = MagicMock() + mock_server.get_events.return_value = MagicMock(events=[]) + + with patch("soroscan.ingest.tasks.SorobanServer", return_value=mock_server), \ + patch("soroscan.ingest.tasks.IndexerState.objects.get_or_create", + return_value=(MagicMock(value="100"), True)): + from soroscan.ingest.tasks import ingest_latest_events + ingest_latest_events() + + call_kwargs = mock_server.get_events.call_args + if call_kwargs: + filters = call_kwargs[1].get("filters") or call_kwargs[0][1] + queried_ids = filters[0]["contractIds"] if filters else [] + assert contract.contract_id in queried_ids + + def test_multiple_blacklisted_all_excluded(self, caplog): + contracts = [self._make_contract() for _ in range(3)] + for c in contracts: + BlacklistedContract.objects.create(contract_id=c.contract_id) + + mock_server = MagicMock() + mock_server.get_events.return_value = MagicMock(events=[]) + + with caplog.at_level(logging.INFO, logger="soroscan.ingest.tasks"), \ + patch("soroscan.ingest.tasks.SorobanServer", return_value=mock_server), \ + patch("soroscan.ingest.tasks.IndexerState.objects.get_or_create", + return_value=(MagicMock(value="100"), True)): + from soroscan.ingest.tasks import ingest_latest_events + ingest_latest_events() + + # All three should be logged as blacklisted + blacklisted_messages = [ + r.message for r in caplog.records if "blacklisted" in r.message.lower() + ] + for c in contracts: + assert any(c.contract_id in msg for msg in blacklisted_messages)