From f0f5b68fd914ae50f1342797592090447b3c93a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 09:33:37 +0200
Subject: [PATCH 01/25] =?UTF-8?q?refactor(deduplikator):=20zmie=C5=84=20na?=
 =?UTF-8?q?zw=C4=99=20IgnoredAuthor=20=E2=86=92=20IgnoredScientist?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pierwszy krok przygotowania pod tryb general — istniejący IgnoredAuthor
był specyficzny dla PBN (FK→Scientist) i zwalniamy nazwę pod nowy model
ignorujący autorów BPP w trybie ogólnym.
---
 src/bpp/system.py                             |  4 ++--
 src/deduplikator_autorow/admin.py             |  6 ++---
 ...9_rename_ignoredauthor_ignoredscientist.py | 23 +++++++++++++++++++
 src/deduplikator_autorow/models.py            |  8 +++----
 src/deduplikator_autorow/tasks.py             |  4 ++--
 .../duplicate_authors.html                    |  2 +-
 src/deduplikator_autorow/utils/finders.py     |  4 ++--
 src/deduplikator_autorow/views.py             | 12 +++++-----
 8 files changed, 43 insertions(+), 20 deletions(-)
 create mode 100644 src/deduplikator_autorow/migrations/0009_rename_ignoredauthor_ignoredscientist.py

diff --git a/src/bpp/system.py b/src/bpp/system.py
index 17be57ff6..210421b15 100644
--- a/src/bpp/system.py
+++ b/src/bpp/system.py
@@ -78,7 +78,7 @@
 from bpp.models.struktura import Jednostka_Wydzial
 from bpp.models.system import Charakter_PBN
 from bpp.models.wydawca import Poziom_Wydawcy, Wydawca
-from deduplikator_autorow.models import IgnoredAuthor, LogScalania, NotADuplicate
+from deduplikator_autorow.models import IgnoredScientist, LogScalania, NotADuplicate
 from dynamic_columns.models import ModelAdmin, ModelAdminColumn
 from ewaluacja_common.models import Rodzaj_Autora
 from ewaluacja_liczba_n.models import IloscUdzialowDlaAutoraZaRok, LiczbaNDlaUczelni
@@ -189,7 +189,7 @@
         RozbieznosciZrodelView,
         NotADuplicate,
         LogScalania,
-        IgnoredAuthor,
+        IgnoredScientist,
     ],
     "indeks autorów": [Autor, Autor_Jednostka],
     "administracja": [
diff --git a/src/deduplikator_autorow/admin.py b/src/deduplikator_autorow/admin.py
index 8f4fe5e8d..0e200327e 100644
--- a/src/deduplikator_autorow/admin.py
+++ b/src/deduplikator_autorow/admin.py
@@ -9,7 +9,7 @@
 from .models import (
     DuplicateCandidate,
     DuplicateScanRun,
-    IgnoredAuthor,
+    IgnoredScientist,
     LogScalania,
     NotADuplicate,
 )
@@ -76,8 +76,8 @@ def get_author_last_name(self, obj):
     get_author_last_name.admin_order_field = "scientist_pk"
 
 
-@admin.register(IgnoredAuthor)
-class IgnoredAuthorAdmin(DynamicAdminFilterMixin, admin.ModelAdmin):
+@admin.register(IgnoredScientist)
+class IgnoredScientistAdmin(DynamicAdminFilterMixin, admin.ModelAdmin):
     list_display = [
         "get_scientist_display",
         "get_autor_display",
diff --git a/src/deduplikator_autorow/migrations/0009_rename_ignoredauthor_ignoredscientist.py b/src/deduplikator_autorow/migrations/0009_rename_ignoredauthor_ignoredscientist.py
new file mode 100644
index 000000000..cff55c56e
--- /dev/null
+++ b/src/deduplikator_autorow/migrations/0009_rename_ignoredauthor_ignoredscientist.py
@@ -0,0 +1,23 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("deduplikator_autorow", "0008_add_priority_field"),
+    ]
+
+    operations = [
+        migrations.RenameModel(
+            old_name="IgnoredAuthor",
+            new_name="IgnoredScientist",
+        ),
+        migrations.AlterModelOptions(
+            name="ignoredscientist",
+            options={
+                "ordering": ["-created_on"],
+                "verbose_name": "Ignorowany Scientist (PBN)",
+                "verbose_name_plural": "Ignorowani Scientist (PBN)",
+            },
+        ),
+    ]
diff --git a/src/deduplikator_autorow/models.py b/src/deduplikator_autorow/models.py
index 9def5b9ea..c0ccffc37 100644
--- a/src/deduplikator_autorow/models.py
+++ b/src/deduplikator_autorow/models.py
@@ -30,8 +30,8 @@ def __str__(self):
         return f"Autor {self.autor} (not duplicate) - {self.created_by}"
 
 
-class IgnoredAuthor(models.Model):
-    """Authors that should be completely ignored in the deduplication process"""
+class IgnoredScientist(models.Model):
+    """Scientists from PBN that should be completely ignored in deduplication"""
 
     scientist = models.OneToOneField(
         "pbn_api.Scientist",
@@ -66,8 +66,8 @@ class IgnoredAuthor(models.Model):
     )
 
     class Meta:
-        verbose_name = "Ignorowany autor"
-        verbose_name_plural = "Ignorowani autorzy"
+        verbose_name = "Ignorowany Scientist (PBN)"
+        verbose_name_plural = "Ignorowani Scientist (PBN)"
         ordering = ["-created_on"]
 
     def __str__(self):
diff --git a/src/deduplikator_autorow/tasks.py b/src/deduplikator_autorow/tasks.py
index 101027480..0fa4a5dd3 100644
--- a/src/deduplikator_autorow/tasks.py
+++ b/src/deduplikator_autorow/tasks.py
@@ -240,7 +240,7 @@ def scan_for_duplicates(self, user_id=None, min_confidence=MIN_CONFIDENCE_TO_STO
     """
     from pbn_api.models import OsobaZInstytucji
 
-    from .models import DuplicateCandidate, DuplicateScanRun, IgnoredAuthor
+    from .models import DuplicateCandidate, DuplicateScanRun, IgnoredScientist
 
     logger.info("Starting duplicate scan task...")
 
@@ -257,7 +257,7 @@ def scan_for_duplicates(self, user_id=None, min_confidence=MIN_CONFIDENCE_TO_STO
         logger.info(f"Deleted {deleted_count} existing candidates")
 
         ignored_scientist_ids = set(
-            IgnoredAuthor.objects.values_list("scientist_id", flat=True)
+            IgnoredScientist.objects.values_list("scientist_id", flat=True)
         )
 
         osoby_query = OsobaZInstytucji.objects.select_related("personId").all()
diff --git a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
index 0f9e904e5..371c3544a 100644
--- a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
+++ b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
@@ -313,7 +313,7 @@
                                 <p class="deduplikator-autorow__status-warning deduplikator-autorow__status-text--spaced">
                                     <span class="fi-alert"></span> Obecnie: {{ ignored_authors_count }} ignorowanych
                                 </p>
-                                <a href="/admin/deduplikator_autorow/ignoredauthor/"
+                                <a href="/admin/deduplikator_autorow/ignoredscientist/"
                                    class="button secondary expanded small deduplikator-autorow__button-margin-bottom"
                                    target="_blank">
                                     <span class="fi-list"></span> Zobacz listę
diff --git a/src/deduplikator_autorow/utils/finders.py b/src/deduplikator_autorow/utils/finders.py
index 4fe01f48c..4270d70aa 100644
--- a/src/deduplikator_autorow/utils/finders.py
+++ b/src/deduplikator_autorow/utils/finders.py
@@ -3,7 +3,7 @@
 """
 
 from bpp.models import Autor
-from deduplikator_autorow.models import IgnoredAuthor
+from deduplikator_autorow.models import IgnoredScientist
 from pbn_api.models import OsobaZInstytucji, Scientist
 
 from .analysis import autor_ma_publikacje_z_lat
@@ -36,7 +36,7 @@ def znajdz_pierwszego_autora_z_duplikatami(  # noqa: C901
 
     # Pobierz IDs ignorowanych autorów
     ignored_scientist_ids = list(
-        IgnoredAuthor.objects.values_list("scientist_id", flat=True)
+        IgnoredScientist.objects.values_list("scientist_id", flat=True)
     )
 
     # Przeszukaj wszystkie rekordy OsobaZInstytucji, wykluczając określonych autorów
diff --git a/src/deduplikator_autorow/views.py b/src/deduplikator_autorow/views.py
index 8d6be892f..d3819abc5 100644
--- a/src/deduplikator_autorow/views.py
+++ b/src/deduplikator_autorow/views.py
@@ -22,7 +22,7 @@
 from .models import (
     DuplicateCandidate,
     DuplicateScanRun,
-    IgnoredAuthor,
+    IgnoredScientist,
     LogScalania,
     NotADuplicate,
 )
@@ -200,7 +200,7 @@ def duplicate_authors_view(request):  # noqa: C901
 
     # Common context
     not_duplicate_count = NotADuplicate.objects.count()
-    ignored_authors_count = IgnoredAuthor.objects.count()
+    ignored_authors_count = IgnoredScientist.objects.count()
     latest_pbn_download = PbnDownloadTask.get_latest_task()
 
     # Check PBN people data freshness
@@ -510,7 +510,7 @@ def ignore_author(request):
         scientist = Scientist.objects.get(pk=scientist_id)
 
         # Check if already ignored
-        if IgnoredAuthor.objects.filter(scientist=scientist).exists():
+        if IgnoredScientist.objects.filter(scientist=scientist).exists():
             messages.warning(
                 request, f"Autor {scientist} jest już oznaczony jako ignorowany."
             )
@@ -520,7 +520,7 @@ def ignore_author(request):
             if hasattr(scientist, "rekord_w_bpp"):
                 autor = scientist.rekord_w_bpp
 
-            IgnoredAuthor.objects.create(
+            IgnoredScientist.objects.create(
                 scientist=scientist, autor=autor, reason=reason, created_by=request.user
             )
             messages.success(
@@ -543,8 +543,8 @@ def reset_ignored_authors(request):
     """
     Remove all ignored author markings.
     """
-    count = IgnoredAuthor.objects.count()
-    IgnoredAuthor.objects.all().delete()
+    count = IgnoredScientist.objects.count()
+    IgnoredScientist.objects.all().delete()
     messages.success(request, f"Zresetowano {count} ignorowanych autorów.")
     return redirect("deduplikator_autorow:duplicate_authors")
 

From 49d2c80d22583920a97a76744209beff010bd667 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 09:44:33 +0200
Subject: [PATCH 02/25] =?UTF-8?q?feat(deduplikator):=20nowy=20model=20Igno?=
 =?UTF-8?q?redAuthor=20(FK=E2=86=92Autor)=20dla=20trybu=20general?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/deduplikator_autorow/admin.py             |  37 +++++
 .../migrations/0010_add_ignored_author.py     | 142 ++++++++++++++++++
 src/deduplikator_autorow/models.py            |  33 ++++
 .../tests/test_models_ignored.py              |  37 +++++
 4 files changed, 249 insertions(+)
 create mode 100644 src/deduplikator_autorow/migrations/0010_add_ignored_author.py
 create mode 100644 src/deduplikator_autorow/tests/test_models_ignored.py

diff --git a/src/deduplikator_autorow/admin.py b/src/deduplikator_autorow/admin.py
index 0e200327e..407f2e8a2 100644
--- a/src/deduplikator_autorow/admin.py
+++ b/src/deduplikator_autorow/admin.py
@@ -9,6 +9,7 @@
 from .models import (
     DuplicateCandidate,
     DuplicateScanRun,
+    IgnoredAuthor,
     IgnoredScientist,
     LogScalania,
     NotADuplicate,
@@ -133,6 +134,42 @@ def save_model(self, request, obj, form, change):
         super().save_model(request, obj, form, change)
 
 
+@admin.register(IgnoredAuthor)
+class IgnoredAuthorAdmin(DynamicAdminFilterMixin, admin.ModelAdmin):
+    list_display = [
+        "get_autor_display",
+        "reason",
+        "created_by",
+        "created_on",
+    ]
+
+    list_filter = ["created_on", "created_by"]
+
+    search_fields = [
+        "autor__nazwisko",
+        "autor__imiona",
+        "reason",
+        "created_by__username",
+    ]
+
+    readonly_fields = ["created_on"]
+    date_hierarchy = "created_on"
+    ordering = ["-created_on"]
+
+    def get_autor_display(self, obj):
+        if obj.autor:
+            url = reverse("admin:bpp_autor_change", args=[obj.autor.pk])
+            return mark_safe(f'<a href="{url}">{obj.autor}</a>')
+        return "-"
+
+    get_autor_display.short_description = "Autor (BPP)"
+
+    def save_model(self, request, obj, form, change):
+        if not change:
+            obj.created_by = request.user
+        super().save_model(request, obj, form, change)
+
+
 @admin.register(LogScalania)
 class LogScalaniaAdmin(DynamicAdminFilterMixin, admin.ModelAdmin):
     list_display = [
diff --git a/src/deduplikator_autorow/migrations/0010_add_ignored_author.py b/src/deduplikator_autorow/migrations/0010_add_ignored_author.py
new file mode 100644
index 000000000..b9f55e6dc
--- /dev/null
+++ b/src/deduplikator_autorow/migrations/0010_add_ignored_author.py
@@ -0,0 +1,142 @@
+import django.db.models.deletion
+import django.utils.timezone
+from django.conf import settings
+from django.db import migrations, models
+
+
+def rename_leftover_ignoredauthor_indexes(apps, schema_editor):
+    """Rename indexes that PostgreSQL kept after RenameModel in 0009.
+
+    Migration 0009 renamed the IgnoredAuthor model to IgnoredScientist, which
+    in PostgreSQL renames the table but keeps existing index names. Those
+    `deduplikator_autorow_ignoredauthor_*` index names would collide with
+    auto-generated names for the new IgnoredAuthor model created here.
+
+    We rename them to match the new (IgnoredScientist) table to avoid the
+    collision and keep names consistent with the actual table. SQL is
+    idempotent (uses IF EXISTS) so it works against fresh DBs too.
+    """
+    renames = [
+        (
+            "deduplikator_autorow_ignoredauthor_autor_id_5e237500",
+            "deduplikator_autorow_ignoredsci_autor_id_5e237500",
+        ),
+        (
+            "deduplikator_autorow_ignoredauthor_created_by_id_3d0a197e",
+            "deduplikator_autorow_ignoredsci_created_by_id_3d0a197e",
+        ),
+        (
+            "deduplikator_autorow_ignoredauthor_scientist_id_ae6083d3_like",
+            "deduplikator_autorow_ignoredsci_scientist_id_ae6083d3_like",
+        ),
+        (
+            "deduplikator_autorow_ignoredauthor_pkey",
+            "deduplikator_autorow_ignoredscientist_pkey",
+        ),
+        (
+            "deduplikator_autorow_ignoredauthor_scientist_id_key",
+            "deduplikator_autorow_ignoredscientist_scientist_id_key",
+        ),
+    ]
+    with schema_editor.connection.cursor() as cursor:
+        for old_name, new_name in renames:
+            cursor.execute(
+                f'ALTER INDEX IF EXISTS "{old_name}" RENAME TO "{new_name}"'
+            )
+
+
+def reverse_rename_leftover_ignoredauthor_indexes(apps, schema_editor):
+    renames = [
+        (
+            "deduplikator_autorow_ignoredsci_autor_id_5e237500",
+            "deduplikator_autorow_ignoredauthor_autor_id_5e237500",
+        ),
+        (
+            "deduplikator_autorow_ignoredsci_created_by_id_3d0a197e",
+            "deduplikator_autorow_ignoredauthor_created_by_id_3d0a197e",
+        ),
+        (
+            "deduplikator_autorow_ignoredsci_scientist_id_ae6083d3_like",
+            "deduplikator_autorow_ignoredauthor_scientist_id_ae6083d3_like",
+        ),
+        (
+            "deduplikator_autorow_ignoredscientist_pkey",
+            "deduplikator_autorow_ignoredauthor_pkey",
+        ),
+        (
+            "deduplikator_autorow_ignoredscientist_scientist_id_key",
+            "deduplikator_autorow_ignoredauthor_scientist_id_key",
+        ),
+    ]
+    with schema_editor.connection.cursor() as cursor:
+        for old_name, new_name in renames:
+            cursor.execute(
+                f'ALTER INDEX IF EXISTS "{old_name}" RENAME TO "{new_name}"'
+            )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("bpp", "0413_bppuser_autor_onetoone"),
+        ("deduplikator_autorow", "0009_rename_ignoredauthor_ignoredscientist"),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            rename_leftover_ignoredauthor_indexes,
+            reverse_rename_leftover_ignoredauthor_indexes,
+        ),
+        migrations.CreateModel(
+            name="IgnoredAuthor",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "reason",
+                    models.CharField(
+                        blank=True,
+                        max_length=500,
+                        verbose_name="Powód ignorowania",
+                    ),
+                ),
+                (
+                    "created_on",
+                    models.DateTimeField(
+                        default=django.utils.timezone.now,
+                        verbose_name="Data utworzenia",
+                    ),
+                ),
+                (
+                    "autor",
+                    models.OneToOneField(
+                        help_text="Autor BPP do ignorowania w deduplikacji ogólnej",
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to="bpp.autor",
+                        verbose_name="Autor (BPP)",
+                    ),
+                ),
+                (
+                    "created_by",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        to=settings.AUTH_USER_MODEL,
+                        verbose_name="Utworzył",
+                    ),
+                ),
+            ],
+            options={
+                "verbose_name": "Ignorowany autor (BPP)",
+                "verbose_name_plural": "Ignorowani autorzy (BPP)",
+                "ordering": ["-created_on"],
+            },
+        ),
+    ]
diff --git a/src/deduplikator_autorow/models.py b/src/deduplikator_autorow/models.py
index c0ccffc37..7b1bfa6dd 100644
--- a/src/deduplikator_autorow/models.py
+++ b/src/deduplikator_autorow/models.py
@@ -76,6 +76,39 @@ def __str__(self):
         return f"Ignorowany: Scientist #{self.scientist.pk}"
 
 
+class IgnoredAuthor(models.Model):
+    """BPP authors (without PBN-Scientist link) that should be ignored in deduplication."""
+
+    autor = models.OneToOneField(
+        "bpp.Autor",
+        on_delete=models.CASCADE,
+        db_index=True,
+        verbose_name="Autor (BPP)",
+        help_text="Autor BPP do ignorowania w deduplikacji ogólnej",
+    )
+
+    reason = models.CharField(
+        max_length=500,
+        blank=True,
+        verbose_name="Powód ignorowania",
+    )
+
+    created_on = models.DateTimeField("Data utworzenia", default=timezone.now)
+    created_by = models.ForeignKey(
+        BppUser,
+        on_delete=models.CASCADE,
+        verbose_name="Utworzył",
+    )
+
+    class Meta:
+        verbose_name = "Ignorowany autor (BPP)"
+        verbose_name_plural = "Ignorowani autorzy (BPP)"
+        ordering = ["-created_on"]
+
+    def __str__(self):
+        return f"Ignorowany autor: {self.autor}"
+
+
 class LogScalania(models.Model):
     """Log of author merge operations with detailed tracking"""
 
diff --git a/src/deduplikator_autorow/tests/test_models_ignored.py b/src/deduplikator_autorow/tests/test_models_ignored.py
new file mode 100644
index 000000000..90f86df6f
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_models_ignored.py
@@ -0,0 +1,37 @@
+"""Testy modelu IgnoredAuthor (general) i IgnoredScientist (PBN)."""
+
+import pytest
+from model_bakery import baker
+
+from deduplikator_autorow.models import IgnoredAuthor, IgnoredScientist
+
+
+@pytest.mark.django_db
+def test_ignored_scientist_can_be_created():
+    scientist = baker.make("pbn_api.Scientist")
+    user = baker.make("bpp.BppUser")
+    obj = IgnoredScientist.objects.create(scientist=scientist, created_by=user)
+    assert obj.pk is not None
+    assert obj.scientist == scientist
+
+
+@pytest.mark.django_db
+def test_ignored_author_can_be_created():
+    autor = baker.make("bpp.Autor")
+    user = baker.make("bpp.BppUser")
+    obj = IgnoredAuthor.objects.create(autor=autor, created_by=user, reason="test")
+    assert obj.pk is not None
+    assert obj.autor == autor
+    assert obj.reason == "test"
+
+
+@pytest.mark.django_db
+def test_ignored_author_one_to_one_constraint():
+    """Próba podwójnego dodania tego samego autora rzuca IntegrityError."""
+    from django.db import IntegrityError
+
+    autor = baker.make("bpp.Autor")
+    user = baker.make("bpp.BppUser")
+    IgnoredAuthor.objects.create(autor=autor, created_by=user)
+    with pytest.raises(IntegrityError):
+        IgnoredAuthor.objects.create(autor=autor, created_by=user)

From 698984d4a1048abc53020fb6107ad8bcdf2f6ecb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 09:56:05 +0200
Subject: [PATCH 03/25] feat(deduplikator): pola phase, scan_mode, status
 PARTIAL_COMPLETED, constraint

---
 .../0011_scan_mode_phase_partial.py           |  71 +++++++++++
 src/deduplikator_autorow/models.py            |  24 +++-
 .../tests/test_models_scan_fields.py          | 113 ++++++++++++++++++
 3 files changed, 206 insertions(+), 2 deletions(-)
 create mode 100644 src/deduplikator_autorow/migrations/0011_scan_mode_phase_partial.py
 create mode 100644 src/deduplikator_autorow/tests/test_models_scan_fields.py

diff --git a/src/deduplikator_autorow/migrations/0011_scan_mode_phase_partial.py b/src/deduplikator_autorow/migrations/0011_scan_mode_phase_partial.py
new file mode 100644
index 000000000..9e56f826e
--- /dev/null
+++ b/src/deduplikator_autorow/migrations/0011_scan_mode_phase_partial.py
@@ -0,0 +1,71 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("deduplikator_autorow", "0010_add_ignored_author"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="duplicatescanrun",
+            name="status",
+            field=models.CharField(
+                choices=[
+                    ("pending", "Oczekuje"),
+                    ("running", "W trakcie"),
+                    ("completed", "Zakończone"),
+                    (
+                        "partial_completed",
+                        "Częściowo zakończone (faza PBN OK, general anulowana)",
+                    ),
+                    ("cancelled", "Anulowane"),
+                    ("failed", "Błąd"),
+                ],
+                db_index=True,
+                default="pending",
+                max_length=20,
+                verbose_name="Status",
+            ),
+        ),
+        migrations.AddField(
+            model_name="duplicatescanrun",
+            name="phase",
+            field=models.CharField(
+                blank=True,
+                choices=[("pbn", "Faza PBN"), ("general", "Faza ogólna")],
+                max_length=20,
+                verbose_name="Aktualna faza",
+            ),
+        ),
+        migrations.AddField(
+            model_name="duplicatecandidate",
+            name="scan_mode",
+            field=models.CharField(
+                choices=[("pbn", "PBN"), ("general", "Ogólny")],
+                db_index=True,
+                default="pbn",
+                max_length=20,
+                verbose_name="Tryb skanowania",
+            ),
+        ),
+        migrations.RemoveConstraint(
+            model_name="duplicatecandidate",
+            name="unique_scan_main_duplicate",
+        ),
+        migrations.AddIndex(
+            model_name="duplicatecandidate",
+            index=models.Index(
+                fields=["scan_run", "scan_mode", "status"],
+                name="deduplikato_scan_ru_78ad22_idx",
+            ),
+        ),
+        migrations.AddConstraint(
+            model_name="duplicatecandidate",
+            constraint=models.UniqueConstraint(
+                fields=("scan_run", "scan_mode", "main_autor", "duplicate_autor"),
+                name="unique_scan_mode_main_duplicate",
+            ),
+        ),
+    ]
diff --git a/src/deduplikator_autorow/models.py b/src/deduplikator_autorow/models.py
index 7b1bfa6dd..33c299126 100644
--- a/src/deduplikator_autorow/models.py
+++ b/src/deduplikator_autorow/models.py
@@ -259,6 +259,10 @@ class Status(models.TextChoices):
         PENDING = "pending", "Oczekuje"
         RUNNING = "running", "W trakcie"
         COMPLETED = "completed", "Zakończone"
+        PARTIAL_COMPLETED = (
+            "partial_completed",
+            "Częściowo zakończone (faza PBN OK, general anulowana)",
+        )
         CANCELLED = "cancelled", "Anulowane"
         FAILED = "failed", "Błąd"
 
@@ -307,6 +311,13 @@ class Status(models.TextChoices):
         blank=True,
     )
 
+    phase = models.CharField(
+        "Aktualna faza",
+        max_length=20,
+        blank=True,
+        choices=[("pbn", "Faza PBN"), ("general", "Faza ogólna")],
+    )
+
     class Meta:
         verbose_name = "Skanowanie duplikatów"
         verbose_name_plural = "Skanowania duplikatów"
@@ -385,6 +396,14 @@ class Status(models.TextChoices):
         help_text="Priorytet wyświetlania: 100=prace 2022-2025 z dyscyplinami, 50=prace 2022-2025, 0=inne",
     )
 
+    scan_mode = models.CharField(
+        "Tryb skanowania",
+        max_length=20,
+        choices=[("pbn", "PBN"), ("general", "Ogólny")],
+        default="pbn",
+        db_index=True,
+    )
+
     # Status tracking
     status = models.CharField(
         "Status",
@@ -435,11 +454,12 @@ class Meta:
             models.Index(fields=["scan_run", "status"]),
             models.Index(fields=["main_autor", "status"]),
             models.Index(fields=["priority", "confidence_score"]),
+            models.Index(fields=["scan_run", "scan_mode", "status"]),
         ]
         constraints = [
             models.UniqueConstraint(
-                fields=["scan_run", "main_autor", "duplicate_autor"],
-                name="unique_scan_main_duplicate",
+                fields=["scan_run", "scan_mode", "main_autor", "duplicate_autor"],
+                name="unique_scan_mode_main_duplicate",
             ),
         ]
 
diff --git a/src/deduplikator_autorow/tests/test_models_scan_fields.py b/src/deduplikator_autorow/tests/test_models_scan_fields.py
new file mode 100644
index 000000000..5e19fa249
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_models_scan_fields.py
@@ -0,0 +1,113 @@
+"""Testy nowych pól: phase, scan_mode, PARTIAL_COMPLETED status."""
+
+import pytest
+from model_bakery import baker
+
+from deduplikator_autorow.models import DuplicateCandidate, DuplicateScanRun
+
+
+@pytest.mark.django_db
+def test_scan_run_phase_field_default_blank():
+    scan = DuplicateScanRun.objects.create()
+    assert scan.phase == ""
+
+
+@pytest.mark.django_db
+def test_scan_run_phase_field_can_be_set():
+    scan = DuplicateScanRun.objects.create(phase="general")
+    scan.refresh_from_db()
+    assert scan.phase == "general"
+
+
+@pytest.mark.django_db
+def test_scan_run_partial_completed_status():
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.PARTIAL_COMPLETED
+    )
+    scan.refresh_from_db()
+    assert scan.status == "partial_completed"
+    assert scan.get_status_display() == (
+        "Częściowo zakończone (faza PBN OK, general anulowana)"
+    )
+
+
+@pytest.mark.django_db
+def test_candidate_scan_mode_default_pbn():
+    scan = DuplicateScanRun.objects.create()
+    autor1 = baker.make("bpp.Autor")
+    autor2 = baker.make("bpp.Autor")
+    cand = DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=autor1,
+        duplicate_autor=autor2,
+        confidence_score=80,
+        confidence_percent=0.5,
+        main_autor_name="Test Main",
+        duplicate_autor_name="Test Dup",
+    )
+    cand.refresh_from_db()
+    assert cand.scan_mode == "pbn"
+
+
+@pytest.mark.django_db
+def test_candidate_scan_mode_general():
+    scan = DuplicateScanRun.objects.create()
+    autor1 = baker.make("bpp.Autor")
+    autor2 = baker.make("bpp.Autor")
+    cand = DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=autor1,
+        duplicate_autor=autor2,
+        confidence_score=80,
+        confidence_percent=0.5,
+        main_autor_name="Test Main",
+        duplicate_autor_name="Test Dup",
+        scan_mode="general",
+    )
+    cand.refresh_from_db()
+    assert cand.scan_mode == "general"
+
+
+@pytest.mark.django_db
+def test_candidate_unique_constraint_includes_scan_mode():
+    """Ta sama para (main, dup) może istnieć w obu trybach, ale nie dwa razy w jednym."""
+    from django.db import IntegrityError, transaction
+
+    scan = DuplicateScanRun.objects.create()
+    autor1 = baker.make("bpp.Autor")
+    autor2 = baker.make("bpp.Autor")
+
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=autor1,
+        duplicate_autor=autor2,
+        confidence_score=80,
+        confidence_percent=0.5,
+        main_autor_name="A",
+        duplicate_autor_name="B",
+        scan_mode="pbn",
+    )
+    # Ta sama para w trybie general — OK
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=autor1,
+        duplicate_autor=autor2,
+        confidence_score=80,
+        confidence_percent=0.5,
+        main_autor_name="A",
+        duplicate_autor_name="B",
+        scan_mode="general",
+    )
+    # Drugi raz w trybie pbn — IntegrityError
+    with pytest.raises(IntegrityError):
+        with transaction.atomic():
+            DuplicateCandidate.objects.create(
+                scan_run=scan,
+                main_autor=autor1,
+                duplicate_autor=autor2,
+                confidence_score=80,
+                confidence_percent=0.5,
+                main_autor_name="A",
+                duplicate_autor_name="B",
+                scan_mode="pbn",
+            )

From f3b377e19b71cf8f19dad7a1955ee09d4bb13de3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 09:59:35 +0200
Subject: [PATCH 04/25] =?UTF-8?q?feat(deduplikator):=20utils.cluster=20?=
 =?UTF-8?q?=E2=80=94=20union-find=20dla=20klastr=C3=B3w=20autor=C3=B3w?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tests/test_cluster.py                     | 39 ++++++++++++++++++
 src/deduplikator_autorow/utils/cluster.py     | 41 +++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 src/deduplikator_autorow/tests/test_cluster.py
 create mode 100644 src/deduplikator_autorow/utils/cluster.py

diff --git a/src/deduplikator_autorow/tests/test_cluster.py b/src/deduplikator_autorow/tests/test_cluster.py
new file mode 100644
index 000000000..42b214e35
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_cluster.py
@@ -0,0 +1,39 @@
+"""Testy union-find (connected components)."""
+
+from deduplikator_autorow.utils.cluster import find_clusters
+
+
+def test_two_disjoint_pairs():
+    pairs = [(1, 2), (3, 4)]
+    clusters = sorted(find_clusters(pairs), key=min)
+    assert clusters == [{1, 2}, {3, 4}]
+
+
+def test_transitive_cluster():
+    """A~B and B~C → cluster {A, B, C}."""
+    pairs = [(1, 2), (2, 3)]
+    clusters = list(find_clusters(pairs))
+    assert clusters == [{1, 2, 3}]
+
+
+def test_single_pair():
+    pairs = [(7, 8)]
+    clusters = list(find_clusters(pairs))
+    assert clusters == [{7, 8}]
+
+
+def test_no_pairs():
+    assert list(find_clusters([])) == []
+
+
+def test_isolated_nodes_with_pairs():
+    """Tylko węzły mające połączenia trafiają do klastrów."""
+    pairs = [(1, 2), (5, 6), (2, 3)]
+    clusters = sorted(find_clusters(pairs), key=min)
+    assert clusters == [{1, 2, 3}, {5, 6}]
+
+
+def test_duplicate_pairs_are_idempotent():
+    pairs = [(1, 2), (1, 2), (2, 1)]
+    clusters = list(find_clusters(pairs))
+    assert clusters == [{1, 2}]
diff --git a/src/deduplikator_autorow/utils/cluster.py b/src/deduplikator_autorow/utils/cluster.py
new file mode 100644
index 000000000..219eb8aba
--- /dev/null
+++ b/src/deduplikator_autorow/utils/cluster.py
@@ -0,0 +1,41 @@
+"""Union-find (connected components) dla par autorów.
+
+Dla zbioru par (a, b) zwraca spójne komponenty grafu.
+"""
+
+
+def find_clusters(pairs):
+    """Zwraca listę zbiorów (klastrów) z par.
+
+    Args:
+        pairs: iterable krotek (pk_a, pk_b).
+
+    Returns:
+        list[set[int]]: lista klastrów (każdy klaster to set PKów).
+    """
+    parent: dict = {}
+
+    def find(x):
+        while parent[x] != x:
+            parent[x] = parent[parent[x]]  # path compression
+            x = parent[x]
+        return x
+
+    def union(a, b):
+        ra, rb = find(a), find(b)
+        if ra != rb:
+            parent[ra] = rb
+
+    for a, b in pairs:
+        if a not in parent:
+            parent[a] = a
+        if b not in parent:
+            parent[b] = b
+        union(a, b)
+
+    clusters_by_root: dict = {}
+    for node in parent:
+        root = find(node)
+        clusters_by_root.setdefault(root, set()).add(node)
+
+    return list(clusters_by_root.values())

From b1f61708713de189da0ad017393fcbfc3b00377d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:01:35 +0200
Subject: [PATCH 05/25] =?UTF-8?q?feat(deduplikator):=20utils.main=5Fselect?=
 =?UTF-8?q?ion=20=E2=80=94=20hierarchia=20wyboru=20g=C5=82=C3=B3wnego?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tests/test_main_selection.py              | 75 +++++++++++++++++++
 .../utils/main_selection.py                   | 38 ++++++++++
 2 files changed, 113 insertions(+)
 create mode 100644 src/deduplikator_autorow/tests/test_main_selection.py
 create mode 100644 src/deduplikator_autorow/utils/main_selection.py

diff --git a/src/deduplikator_autorow/tests/test_main_selection.py b/src/deduplikator_autorow/tests/test_main_selection.py
new file mode 100644
index 000000000..6dbeea8d2
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_main_selection.py
@@ -0,0 +1,75 @@
+"""Testy hierarchii wyboru głównego rekordu (hierarchia B)."""
+
+from deduplikator_autorow.utils.main_selection import pick_main_pk
+
+
+def _meta(**kwargs):
+    """Helper — minimalny wpis meta."""
+    base = {
+        "ma_orcid": False,
+        "ma_pbn_uid": False,
+        "ma_tytul": False,
+        "ma_dyscypline": False,
+        "publikacje_count": 0,
+        "max_rok": 0,
+    }
+    base.update(kwargs)
+    return base
+
+
+def test_orcid_wins_over_everything():
+    metas = {
+        1: _meta(ma_orcid=False, publikacje_count=100, max_rok=2025),
+        2: _meta(ma_orcid=True, publikacje_count=1, max_rok=2000),
+    }
+    cluster = {1, 2}
+    assert pick_main_pk(cluster, metas) == 2
+
+
+def test_pbn_uid_wins_when_orcid_tied():
+    metas = {
+        1: _meta(ma_orcid=True, ma_pbn_uid=False),
+        2: _meta(ma_orcid=True, ma_pbn_uid=True),
+    }
+    assert pick_main_pk({1, 2}, metas) == 2
+
+
+def test_tytul_wins_when_above_tied():
+    metas = {
+        1: _meta(ma_orcid=True, ma_pbn_uid=True, ma_tytul=False),
+        2: _meta(ma_orcid=True, ma_pbn_uid=True, ma_tytul=True),
+    }
+    assert pick_main_pk({1, 2}, metas) == 2
+
+
+def test_dyscyplina_wins_when_above_tied():
+    metas = {
+        1: _meta(ma_orcid=True, ma_pbn_uid=True, ma_tytul=True, ma_dyscypline=False),
+        2: _meta(ma_orcid=True, ma_pbn_uid=True, ma_tytul=True, ma_dyscypline=True),
+    }
+    assert pick_main_pk({1, 2}, metas) == 2
+
+
+def test_publikacje_count_wins_when_above_tied():
+    metas = {
+        1: _meta(publikacje_count=5),
+        2: _meta(publikacje_count=10),
+    }
+    assert pick_main_pk({1, 2}, metas) == 2
+
+
+def test_max_rok_wins_when_publikacje_tied():
+    metas = {
+        1: _meta(publikacje_count=5, max_rok=2020),
+        2: _meta(publikacje_count=5, max_rok=2025),
+    }
+    assert pick_main_pk({1, 2}, metas) == 2
+
+
+def test_pk_lowest_wins_when_all_tied():
+    metas = {
+        77: _meta(),
+        12: _meta(),
+        99: _meta(),
+    }
+    assert pick_main_pk({77, 12, 99}, metas) == 12
diff --git a/src/deduplikator_autorow/utils/main_selection.py b/src/deduplikator_autorow/utils/main_selection.py
new file mode 100644
index 000000000..ef5fc6ff9
--- /dev/null
+++ b/src/deduplikator_autorow/utils/main_selection.py
@@ -0,0 +1,38 @@
+"""Wybór głównego rekordu (main) w klastrze duplikatów.
+
+Hierarchia (kolejne kryteria odpalają tylko przy remisie):
+1. ma_orcid (DESC)
+2. ma_pbn_uid (DESC)
+3. ma_tytul (DESC)
+4. ma_dyscypline (DESC)
+5. publikacje_count (DESC)
+6. max_rok (DESC)
+7. pk (ASC)
+"""
+
+
+def _selection_key(pk: int, meta: dict) -> tuple:
+    """Klucz sortowania — niższe wartości = lepszy kandydat na main."""
+    return (
+        not meta["ma_orcid"],
+        not meta["ma_pbn_uid"],
+        not meta["ma_tytul"],
+        not meta["ma_dyscypline"],
+        -meta["publikacje_count"],
+        -(meta["max_rok"] or 0),
+        pk,
+    )
+
+
+def pick_main_pk(cluster: set[int], metas: dict[int, dict]) -> int:
+    """Z klastra (set PKów) wybiera PK głównego rekordu.
+
+    Args:
+        cluster: set PKów członków klastra.
+        metas: {pk -> meta dict z polami ma_orcid, ma_pbn_uid, ma_tytul,
+                ma_dyscypline, publikacje_count, max_rok}.
+
+    Returns:
+        PK rekordu wybranego jako main.
+    """
+    return min(cluster, key=lambda pk: _selection_key(pk, metas[pk]))

From 552865f32dfe1bf64c07a46c5a5ea8d9cd55483d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:07:05 +0200
Subject: [PATCH 06/25] =?UTF-8?q?feat(deduplikator):=20utils.meta=20?=
 =?UTF-8?q?=E2=80=94=20pre-load=20wszystkich=20autor=C3=B3w=20do=20pami?=
 =?UTF-8?q?=C4=99ci?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/deduplikator_autorow/tests/test_meta.py |  86 +++++++++++
 src/deduplikator_autorow/utils/meta.py      | 150 ++++++++++++++++++++
 2 files changed, 236 insertions(+)
 create mode 100644 src/deduplikator_autorow/tests/test_meta.py
 create mode 100644 src/deduplikator_autorow/utils/meta.py

diff --git a/src/deduplikator_autorow/tests/test_meta.py b/src/deduplikator_autorow/tests/test_meta.py
new file mode 100644
index 000000000..f93602b3f
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_meta.py
@@ -0,0 +1,86 @@
+"""Testy budowniczego meta-cache dla autorów."""
+
+import pytest
+from django.db import connection
+from django.test.utils import CaptureQueriesContext
+from model_bakery import baker
+
+from deduplikator_autorow.utils.meta import build_autor_meta, build_buckets
+
+
+@pytest.mark.django_db
+def test_meta_includes_basic_fields():
+    autor = baker.make(
+        "bpp.Autor",
+        nazwisko="Kowalski",
+        imiona="Jan",
+        orcid="0000-0001-2345-6789",
+    )
+    meta = build_autor_meta()
+    assert autor.pk in meta
+    m = meta[autor.pk]
+    assert m["nazwisko_norm"] == "kowalski"
+    assert m["imiona_norm"] == ["jan"]
+    assert m["ma_orcid"] is True
+    assert m["orcid_value"] == "0000-0001-2345-6789"
+    assert m["ma_pbn_uid"] is False
+    assert m["ma_tytul"] is False
+    assert m["publikacje_count"] == 0
+    assert m["max_rok"] == 0
+    assert m["lata_publikacji"] == set()
+
+
+@pytest.mark.django_db
+def test_meta_compound_lastname_parts():
+    autor = baker.make("bpp.Autor", nazwisko="Gal-Cisoń", imiona="Anna")
+    meta = build_autor_meta()
+    parts = meta[autor.pk]["nazwisko_parts"]
+    assert sorted(parts) == ["cisoń", "gal"]
+
+
+@pytest.mark.django_db
+def test_meta_ma_osoba_z_instytucji_true():
+    # Scientist nie ma pola "rekord_w_bpp" — to cached_property po stronie
+    # Scientist; związek jest definiowany przez Autor.pbn_uid → Scientist.
+    scientist = baker.make("pbn_api.Scientist")
+    autor = baker.make("bpp.Autor", nazwisko="Xtest", pbn_uid=scientist)
+    baker.make("pbn_api.OsobaZInstytucji", personId=scientist)
+
+    meta = build_autor_meta()
+    assert meta[autor.pk]["ma_osoba_z_instytucji"] is True
+
+
+@pytest.mark.django_db
+def test_meta_constant_query_count():
+    """Sanity: dodanie autorów nie zwiększa liczby zapytań (no N+1)."""
+    baker.make("bpp.Autor", _quantity=5, nazwisko="A")
+    with CaptureQueriesContext(connection) as ctx_small:
+        build_autor_meta()
+    n_small = len(ctx_small.captured_queries)
+
+    baker.make("bpp.Autor", _quantity=20, nazwisko="B")
+    with CaptureQueriesContext(connection) as ctx_big:
+        build_autor_meta()
+    n_big = len(ctx_big.captured_queries)
+
+    assert n_small == n_big, (
+        f"N+1 detected: small={n_small} queries, big={n_big} queries"
+    )
+
+
+@pytest.mark.django_db
+def test_buckets_includes_lastname_and_parts():
+    a1 = baker.make("bpp.Autor", nazwisko="Kowalski")
+    a2 = baker.make("bpp.Autor", nazwisko="Gal-Cisoń")
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+
+    assert "kowalski" in buckets
+    assert a1.pk in buckets["kowalski"]
+    assert "gal" in buckets
+    assert "cisoń" in buckets
+    assert "gal-cisoń" in buckets
+    # reversed compound:
+    assert "cisoń-gal" in buckets
+    assert a2.pk in buckets["gal-cisoń"]
+    assert a2.pk in buckets["cisoń-gal"]
diff --git a/src/deduplikator_autorow/utils/meta.py b/src/deduplikator_autorow/utils/meta.py
new file mode 100644
index 000000000..fd2b0e824
--- /dev/null
+++ b/src/deduplikator_autorow/utils/meta.py
@@ -0,0 +1,150 @@
+"""Budowniczy meta-cache dla wszystkich autorów BPP.
+
+Pre-loaduje wszystkie metadane autorów potrzebne do fazy ``general``
+deduplikatora w stałej liczbie zapytań SQL — niezależnie od N.
+
+Agregaty publikacji liczone są bezpośrednio na tabelach źródłowych
+(``Wydawnictwo_Ciagle_Autor``, ``Wydawnictwo_Zwarte_Autor``,
+``Patent_Autor``), żeby działać niezależnie od stanu materializowanych
+widoków (``bpp_rekord_mat`` / ``bpp_autorzy_mat``) — które w testach
+mogą nie być odświeżone po ``baker.make``.
+"""
+
+from collections import defaultdict
+
+from django.contrib.postgres.aggregates import ArrayAgg
+from django.db.models import Count, Max
+
+from bpp.models import (
+    Autor,
+    Autor_Dyscyplina,
+    Patent_Autor,
+    Wydawnictwo_Ciagle_Autor,
+    Wydawnictwo_Zwarte_Autor,
+)
+from pbn_api.models import OsobaZInstytucji
+
+
+def _normalize(s: str | None) -> str:
+    return (s or "").strip().lower()
+
+
+def _split_compound(nazwisko: str | None) -> list[str]:
+    if not nazwisko:
+        return []
+    return [_normalize(p) for p in nazwisko.split("-") if p.strip()]
+
+
+def _aggregate_publications(model, autorzy_meta: dict[int, dict]) -> None:
+    """Doliczy do meta agregaty z jednej tabeli ``*_Autor``.
+
+    Wykonuje DOKŁADNIE jedno zapytanie z ``GROUP BY autor_id``.
+    """
+    rows = (
+        model.objects.values("autor_id")
+        .annotate(
+            cnt=Count("id"),
+            max_rok=Max("rekord__rok"),
+            lata=ArrayAgg("rekord__rok", distinct=True),
+        )
+        .filter(autor_id__isnull=False)
+    )
+    for row in rows:
+        pk = row["autor_id"]
+        m = autorzy_meta.get(pk)
+        if m is None:
+            continue
+        m["publikacje_count"] += row["cnt"] or 0
+        rok_max = row["max_rok"] or 0
+        if rok_max > m["max_rok"]:
+            m["max_rok"] = rok_max
+        for r in row["lata"] or []:
+            if r:
+                m["lata_publikacji"].add(r)
+
+
+def build_autor_meta() -> dict[int, dict]:
+    """Buduje słownik ``{autor_pk -> meta}`` w stałej liczbie zapytań SQL.
+
+    Zapytania:
+
+    1. ``Autor.objects.only(...)`` — pobranie wszystkich autorów.
+    2. Agregat publikacji z ``Wydawnictwo_Ciagle_Autor`` (GROUP BY).
+    3. Agregat publikacji z ``Wydawnictwo_Zwarte_Autor`` (GROUP BY).
+    4. Agregat publikacji z ``Patent_Autor`` (GROUP BY).
+    5. ``Autor_Dyscyplina`` — DISTINCT autor_id.
+    6. ``OsobaZInstytucji`` — wszystkie ``personId_id``.
+
+    Łącznie 6 zapytań, niezależnie od liczby autorów.
+    """
+    autorzy_meta: dict[int, dict] = {}
+    autor_qs = Autor.objects.only(
+        "pk", "nazwisko", "imiona", "orcid", "pbn_uid_id", "tytul_id"
+    )
+    for a in autor_qs.iterator():
+        autorzy_meta[a.pk] = {
+            "obj": a,
+            "nazwisko_norm": _normalize(a.nazwisko),
+            "nazwisko_parts": _split_compound(a.nazwisko),
+            "imiona_norm": [_normalize(i) for i in (a.imiona or "").split() if i],
+            "ma_orcid": bool(a.orcid),
+            "orcid_value": a.orcid or None,
+            "ma_pbn_uid": bool(a.pbn_uid_id),
+            "ma_tytul": bool(a.tytul_id),
+            "tytul_id": a.tytul_id,
+            "ma_osoba_z_instytucji": False,
+            "ma_dyscypline": False,
+            "publikacje_count": 0,
+            "lata_publikacji": set(),
+            "max_rok": 0,
+        }
+
+    # Agregaty publikacji — po jednym zapytaniu na typ rekordu.
+    for model in (
+        Wydawnictwo_Ciagle_Autor,
+        Wydawnictwo_Zwarte_Autor,
+        Patent_Autor,
+    ):
+        _aggregate_publications(model, autorzy_meta)
+
+    # Dyscypliny — jedno DISTINCT.
+    for pk in Autor_Dyscyplina.objects.values_list("autor_id", flat=True).distinct():
+        m = autorzy_meta.get(pk)
+        if m is not None:
+            m["ma_dyscypline"] = True
+
+    # OsobaZInstytucji — match po Autor.pbn_uid_id == Scientist.pk
+    # (Scientist jest OneToOne z OsobaZInstytucji jako personId).
+    osoba_scientist_ids = set(
+        OsobaZInstytucji.objects.values_list("personId_id", flat=True)
+    )
+    for m in autorzy_meta.values():
+        pbn_uid_id = m["obj"].pbn_uid_id
+        if pbn_uid_id and pbn_uid_id in osoba_scientist_ids:
+            m["ma_osoba_z_instytucji"] = True
+
+    return autorzy_meta
+
+
+def build_buckets(meta: dict[int, dict]) -> dict[str, list[int]]:
+    """Buckety ``{nazwisko_norm -> [pk1, pk2, ...]}`` dla pair-generation.
+
+    Autor trafia do bucketu pod swoim znormalizowanym nazwiskiem,
+    pod każdym członem nazwiska złożonego (split na ``-``) oraz pod
+    odwróconym nazwiskiem złożonym (np. ``Gal-Cisoń`` → ``cisoń-gal``).
+    """
+    buckets: dict[str, list[int]] = defaultdict(list)
+    for pk, m in meta.items():
+        nazwisko_norm = m["nazwisko_norm"]
+        if not nazwisko_norm:
+            continue
+        buckets[nazwisko_norm].append(pk)
+        parts = m["nazwisko_parts"]
+        for part in parts:
+            if len(part) > 2 and part != nazwisko_norm:
+                buckets[part].append(pk)
+        if len(parts) == 2:
+            reversed_name = "-".join(reversed(parts))
+            if reversed_name != nazwisko_norm:
+                buckets[reversed_name].append(pk)
+    return dict(buckets)

From 8e5710c2cb48bb5a0897062794ecbb43fc0d6620 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:11:23 +0200
Subject: [PATCH 07/25] =?UTF-8?q?feat(deduplikator):=20utils.analysis=5Fme?=
 =?UTF-8?q?ta=20=E2=80=94=20scoring=20par=20bez=20SQL?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tests/test_analysis_meta.py               |  83 ++++++++++++
 .../utils/analysis_meta.py                    | 119 ++++++++++++++++++
 2 files changed, 202 insertions(+)
 create mode 100644 src/deduplikator_autorow/tests/test_analysis_meta.py
 create mode 100644 src/deduplikator_autorow/utils/analysis_meta.py

diff --git a/src/deduplikator_autorow/tests/test_analysis_meta.py b/src/deduplikator_autorow/tests/test_analysis_meta.py
new file mode 100644
index 000000000..e436dcdd3
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_analysis_meta.py
@@ -0,0 +1,83 @@
+"""Testy analiza_pary_meta — scoring par autorów na bazie meta."""
+
+from deduplikator_autorow.utils.analysis_meta import analiza_pary_meta
+
+
+def _meta(
+    nazwisko="kowalski",
+    imiona=("jan",),
+    orcid=None,
+    pbn_uid=False,
+    tytul=False,
+    pubs=0,
+    max_rok=0,
+    lata=None,
+):
+    return {
+        "nazwisko_norm": nazwisko,
+        "nazwisko_parts": nazwisko.split("-"),
+        "imiona_norm": list(imiona),
+        "orcid_value": orcid,
+        "ma_orcid": bool(orcid),
+        "ma_pbn_uid": pbn_uid,
+        "ma_tytul": tytul,
+        "tytul_id": 1 if tytul else None,
+        "publikacje_count": pubs,
+        "max_rok": max_rok,
+        "lata_publikacji": set(lata or []),
+    }
+
+
+def test_identyczne_orcid_dodaje_50():
+    a = _meta(orcid="0000-0001-2345-6789")
+    b = _meta(orcid="0000-0001-2345-6789")
+    score, reasons = analiza_pary_meta(a, b)
+    assert score >= 50
+    assert any("ORCID" in r for r in reasons)
+
+
+def test_rozne_orcid_odejmuje_50():
+    # Różne nazwiska/imiona, żeby ORCID był dominującym sygnałem.
+    a = _meta(
+        nazwisko="kowalski",
+        imiona=("jan",),
+        orcid="0000-0001-1111-1111",
+    )
+    b = _meta(
+        nazwisko="nowak",
+        imiona=("piotr",),
+        orcid="0000-0002-2222-2222",
+    )
+    score, reasons = analiza_pary_meta(a, b)
+    assert score <= -40  # -50 plus drobne plusy z innych kryteriów
+    assert any("różny ORCID" in r for r in reasons)
+
+
+def test_identyczne_nazwisko_dodaje_40():
+    a = _meta(nazwisko="kowalski")
+    b = _meta(nazwisko="kowalski")
+    score, reasons = analiza_pary_meta(a, b)
+    assert score >= 40
+    assert any("nazwisko" in r.lower() for r in reasons)
+
+
+def test_wspolne_lata_publikacji_dodaje_20():
+    a = _meta(lata=[2020, 2021, 2022])
+    b = _meta(lata=[2021, 2022])
+    score, reasons = analiza_pary_meta(a, b)
+    assert any("wspólne lata" in r.lower() for r in reasons)
+
+
+def test_score_to_int():
+    a = _meta()
+    b = _meta()
+    score, _ = analiza_pary_meta(a, b)
+    assert isinstance(score, int)
+
+
+def test_swap_imienia_z_nazwiskiem_dodaje_50():
+    """Pełna zamiana imię ↔ nazwisko: A 'kowalski jan', B 'jan kowalski'."""
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="jan", imiona=("kowalski",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert any("zamian" in r.lower() for r in reasons)
diff --git a/src/deduplikator_autorow/utils/analysis_meta.py b/src/deduplikator_autorow/utils/analysis_meta.py
new file mode 100644
index 000000000..d3639e723
--- /dev/null
+++ b/src/deduplikator_autorow/utils/analysis_meta.py
@@ -0,0 +1,119 @@
+"""Analiza pary autorów na bazie wyłącznie meta-cache (bez SQL).
+
+Mirror'uje wagi punktowe z ``utils/analysis.py:analiza_duplikatow`` żeby
+zachować spójność scoringu między fazą PBN i general.
+Pomija tylko analizę płci (która w wersji DB-owej używa
+``Autor.plec`` + heurystyki na imieniu — nie potrzebne w v1 trybu general).
+"""
+
+
+def _common_initials(imiona_a: list[str], imiona_b: list[str]) -> int:
+    initials_a = {x[0] for x in imiona_a if x}
+    initials_b = {x[0] for x in imiona_b if x}
+    return len(initials_a & initials_b)
+
+
+def analiza_pary_meta(a: dict, b: dict) -> tuple[int, list[str]]:  # noqa: C901
+    """Zwraca (score, reasons) dla pary (a, b) na bazie meta-cache."""
+    score = 0
+    reasons: list[str] = []
+
+    pubs_b = b["publikacje_count"]
+    if pubs_b <= 5:
+        score += 10
+        reasons.append(f"mało publikacji ({pubs_b}) - prawdopodobny duplikat")
+    elif pubs_b <= 10:
+        score -= 10
+        reasons.append(f"średnio publikacji ({pubs_b}) - możliwy duplikat")
+    else:
+        score -= 20
+        reasons.append(f"wiele publikacji ({pubs_b}) - mało prawdopodobny duplikat")
+
+    if not b["ma_tytul"] and a["ma_tytul"]:
+        score += 15
+        reasons.append("brak tytułu naukowego u kandydata - prawdopodobny duplikat")
+    elif b["ma_tytul"] and a["ma_tytul"]:
+        if a.get("tytul_id") == b.get("tytul_id"):
+            score += 10
+            reasons.append("identyczny tytuł naukowy")
+        else:
+            score -= 15
+            reasons.append("różny tytuł naukowy")
+
+    if not b["ma_orcid"] and a["ma_orcid"]:
+        score += 15
+        reasons.append("brak ORCID u kandydata - prawdopodobny duplikat")
+    elif b["ma_orcid"] and a["ma_orcid"]:
+        if a.get("orcid_value") == b.get("orcid_value"):
+            score += 50
+            reasons.append("identyczny ORCID - to ten sam autor")
+        else:
+            score -= 50
+            reasons.append("różny ORCID - to różni autorzy")
+
+    if a["nazwisko_norm"] and b["nazwisko_norm"]:
+        if a["nazwisko_norm"] == b["nazwisko_norm"]:
+            score += 40
+            reasons.append("identyczne nazwisko")
+        elif (
+            a["nazwisko_norm"] in b["nazwisko_norm"]
+            or b["nazwisko_norm"] in a["nazwisko_norm"]
+        ):
+            score += 30
+            reasons.append("podobne nazwisko (zawieranie)")
+
+    if (
+        a["nazwisko_norm"]
+        and b["nazwisko_norm"]
+        and a["imiona_norm"]
+        and b["imiona_norm"]
+    ):
+        if (a["nazwisko_norm"] in b["imiona_norm"]) and (
+            b["nazwisko_norm"] in a["imiona_norm"]
+        ):
+            score += 50
+            reasons.append("wykryto pełną zamianę imienia z nazwiskiem")
+
+    common = set(a["imiona_norm"]) & set(b["imiona_norm"])
+    if common:
+        score += 30 * len(common)
+        reasons.append(f"wspólne imię ({len(common)})")
+
+    similar = 0
+    for ia in a["imiona_norm"]:
+        for ib in b["imiona_norm"]:
+            if len(ia) >= 3 and len(ib) >= 3 and ia != ib:
+                if ia.startswith(ib[:3]) or ib.startswith(ia[:3]):
+                    similar += 1
+    if similar:
+        score += 15 * similar
+        reasons.append(f"podobne imię ({similar})")
+
+    init_count = _common_initials(a["imiona_norm"], b["imiona_norm"])
+    if init_count:
+        score += 5 * init_count
+        reasons.append(f"pasujące inicjały ({init_count})")
+
+    if not b["imiona_norm"] and a["imiona_norm"]:
+        score += 10
+        reasons.append("brak imion u kandydata")
+
+    common_lata = a["lata_publikacji"] & b["lata_publikacji"]
+    if common_lata:
+        score += 20
+        reasons.append(f"wspólne lata publikacji: {sorted(common_lata)}")
+    elif a["lata_publikacji"] and b["lata_publikacji"]:
+        min_dist = min(
+            abs(ra - rb) for ra in a["lata_publikacji"] for rb in b["lata_publikacji"]
+        )
+        if min_dist <= 2:
+            score += 15
+            reasons.append(f"bliskie lata publikacji (różnica {min_dist})")
+        elif min_dist <= 7:
+            score -= 5
+            reasons.append(f"średnia odległość lat publikacji ({min_dist})")
+        else:
+            score -= 20
+            reasons.append(f"duża odległość lat publikacji ({min_dist})")
+
+    return score, reasons

From 97ced1e651753fd813b4b694dd6a56b0c52f7bb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:15:58 +0200
Subject: [PATCH 08/25] =?UTF-8?q?feat(deduplikator):=20utils.search=5Fgene?=
 =?UTF-8?q?ral=20=E2=80=94=20generator=20par=20dla=20trybu=20general?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dodaje in-memory generator par kandydatów oparty o buckety nazwisk:
iteruje po bucketach (skipuje > BUCKET_MAX_SIZE=200 z warningiem),
generuje pary nieuporządkowane (pk_a < pk_b), deduplikuje symetryczne
(autor może trafić do wielu bucketów przez compound nazwisko / reverse),
filtruje przez ignored_pks/notadup_pks i emituje tylko pary
score >= MIN_CONFIDENCE_TO_STORE (50).

Rozszerza analiza_pary_meta o detekcję compound nazwisk po
nazwisko_parts: pełna permutacja członów (np. 'Gal-Cisoń' ↔
'Cisoń-Gal') daje +35, częściowe pokrycie +20 — bez tego sygnał
z bucketu reverse-compound nie miał szans przekroczyć progu.
---
 .../tests/test_search_general.py              | 79 +++++++++++++++++++
 .../utils/analysis_meta.py                    | 16 ++++
 .../utils/search_general.py                   | 55 +++++++++++++
 3 files changed, 150 insertions(+)
 create mode 100644 src/deduplikator_autorow/tests/test_search_general.py
 create mode 100644 src/deduplikator_autorow/utils/search_general.py

diff --git a/src/deduplikator_autorow/tests/test_search_general.py b/src/deduplikator_autorow/tests/test_search_general.py
new file mode 100644
index 000000000..88a0cc162
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_search_general.py
@@ -0,0 +1,79 @@
+"""Testy generowania par kandydatów w fazie general."""
+
+import pytest
+from model_bakery import baker
+
+from deduplikator_autorow.utils.meta import build_autor_meta, build_buckets
+from deduplikator_autorow.utils.search_general import (
+    BUCKET_MAX_SIZE,
+    generate_pairs,
+)
+
+
+@pytest.mark.django_db
+def test_simple_lastname_pair():
+    a1 = baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    a2 = baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+    pairs = list(generate_pairs(buckets, meta, ignored_pks=set(), notadup_pks=set()))
+    pks = {(min(p, q), max(p, q)) for p, q, _, _ in pairs}
+    assert (min(a1.pk, a2.pk), max(a1.pk, a2.pk)) in pks
+
+
+@pytest.mark.django_db
+def test_compound_lastname_pair():
+    a1 = baker.make("bpp.Autor", nazwisko="Gal-Cisoń", imiona="Anna")
+    a2 = baker.make("bpp.Autor", nazwisko="Cisoń-Gal", imiona="Anna")
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+    pairs = list(generate_pairs(buckets, meta, ignored_pks=set(), notadup_pks=set()))
+    pks = {(min(p, q), max(p, q)) for p, q, _, _ in pairs}
+    assert (min(a1.pk, a2.pk), max(a1.pk, a2.pk)) in pks
+
+
+@pytest.mark.django_db
+def test_pair_dedup():
+    """Para (a, b) emitowana tylko raz, niezależnie od ile bucketów ją łączy."""
+    baker.make("bpp.Autor", nazwisko="Smith", imiona="John")
+    baker.make("bpp.Autor", nazwisko="Smith", imiona="John")
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+    pairs = list(generate_pairs(buckets, meta, ignored_pks=set(), notadup_pks=set()))
+    pair_set = [(p, q) for p, q, _, _ in pairs]
+    assert len(pair_set) == len(set(pair_set))
+
+
+@pytest.mark.django_db
+def test_ignored_excluded():
+    a1 = baker.make("bpp.Autor", nazwisko="Brown", imiona="Bob")
+    baker.make("bpp.Autor", nazwisko="Brown", imiona="Bob")
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+    pairs = list(generate_pairs(buckets, meta, ignored_pks={a1.pk}, notadup_pks=set()))
+    assert pairs == []
+
+
+@pytest.mark.django_db
+def test_notadup_excluded():
+    a1 = baker.make("bpp.Autor", nazwisko="Wilson", imiona="Tim")
+    baker.make("bpp.Autor", nazwisko="Wilson", imiona="Tim")
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+    pairs = list(generate_pairs(buckets, meta, ignored_pks=set(), notadup_pks={a1.pk}))
+    assert pairs == []
+
+
+@pytest.mark.django_db
+def test_oversized_bucket_skipped():
+    """Bucket > BUCKET_MAX_SIZE jest pomijany."""
+    baker.make(
+        "bpp.Autor",
+        nazwisko="PopularName",
+        _quantity=BUCKET_MAX_SIZE + 1,
+    )
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+    pairs = list(generate_pairs(buckets, meta, ignored_pks=set(), notadup_pks=set()))
+    # Dla tego bucketu (PopularName) — żadne pary nie powinny zostać wyemitowane
+    assert pairs == []
diff --git a/src/deduplikator_autorow/utils/analysis_meta.py b/src/deduplikator_autorow/utils/analysis_meta.py
index d3639e723..34a6d3244 100644
--- a/src/deduplikator_autorow/utils/analysis_meta.py
+++ b/src/deduplikator_autorow/utils/analysis_meta.py
@@ -61,6 +61,22 @@ def analiza_pary_meta(a: dict, b: dict) -> tuple[int, list[str]]:  # noqa: C901
         ):
             score += 30
             reasons.append("podobne nazwisko (zawieranie)")
+        else:
+            parts_a = set(a.get("nazwisko_parts") or [])
+            parts_b = set(b.get("nazwisko_parts") or [])
+            common_parts = parts_a & parts_b
+            if common_parts and (len(parts_a) > 1 or len(parts_b) > 1):
+                if parts_a == parts_b:
+                    # Pełny zestaw członów się zgadza (np. permutacja
+                    # 'gal-cisoń' ↔ 'cisoń-gal').
+                    score += 35
+                    reasons.append("identyczne człony nazwiska złożonego (permutacja)")
+                else:
+                    score += 20
+                    reasons.append(
+                        f"wspólny człon nazwiska złożonego "
+                        f"({', '.join(sorted(common_parts))})"
+                    )
 
     if (
         a["nazwisko_norm"]
diff --git a/src/deduplikator_autorow/utils/search_general.py b/src/deduplikator_autorow/utils/search_general.py
new file mode 100644
index 000000000..cdd17aac1
--- /dev/null
+++ b/src/deduplikator_autorow/utils/search_general.py
@@ -0,0 +1,55 @@
+"""Generator par kandydatów w fazie general — in-memory bucket comparisons."""
+
+import logging
+
+from .analysis_meta import analiza_pary_meta
+
+logger = logging.getLogger(__name__)
+
+BUCKET_MAX_SIZE = 200
+MIN_CONFIDENCE_TO_STORE = 50
+
+
+def generate_pairs(
+    buckets: dict[str, list[int]],
+    meta: dict[int, dict],
+    ignored_pks: set[int],
+    notadup_pks: set[int],
+    min_confidence: int = MIN_CONFIDENCE_TO_STORE,
+):
+    """Yield (pk_a, pk_b, score, reasons) gdzie pk_a < pk_b i score >= min_confidence.
+
+    Args:
+        buckets: {nazwisko_norm -> [pk1, pk2, ...]} z `build_buckets`.
+        meta: {pk -> meta dict} z `build_autor_meta`.
+        ignored_pks: PK do pominięcia jako pivot/kandydat (z IgnoredAuthor).
+        notadup_pks: PK oznaczone jako NotADuplicate (też pomijane).
+        min_confidence: próg score-u poniżej którego para nie jest emitowana.
+    """
+    seen_pairs: set[tuple[int, int]] = set()
+    skipped_buckets = 0
+    for bucket_name, pks in buckets.items():
+        if len(pks) > BUCKET_MAX_SIZE:
+            logger.warning(
+                "Skipping oversized bucket '%s' (%d members)",
+                bucket_name,
+                len(pks),
+            )
+            skipped_buckets += 1
+            continue
+        active = [p for p in pks if p not in ignored_pks]
+        for i, pk_a in enumerate(active):
+            for pk_b in active[i + 1 :]:
+                if pk_a == pk_b:
+                    continue
+                key = (min(pk_a, pk_b), max(pk_a, pk_b))
+                if key in seen_pairs:
+                    continue
+                seen_pairs.add(key)
+                if key[0] in notadup_pks or key[1] in notadup_pks:
+                    continue
+                score, reasons = analiza_pary_meta(meta[key[0]], meta[key[1]])
+                if score >= min_confidence:
+                    yield key[0], key[1], score, reasons
+    if skipped_buckets:
+        logger.info("Skipped %d oversized buckets in general phase", skipped_buckets)

From 8726db3f077c6cdd8085ab7a0486beb45ec8f8d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:20:28 +0200
Subject: [PATCH 09/25] =?UTF-8?q?feat(deduplikator):=20=5Frun=5Fgeneral=5F?=
 =?UTF-8?q?phase=20w=20tasks.py=20=E2=80=94=20algorytm=20fazy=20general?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/deduplikator_autorow/tasks.py             |  97 +++++++++++++++
 .../tests/test_general_phase.py               | 111 ++++++++++++++++++
 2 files changed, 208 insertions(+)
 create mode 100644 src/deduplikator_autorow/tests/test_general_phase.py

diff --git a/src/deduplikator_autorow/tasks.py b/src/deduplikator_autorow/tasks.py
index 0fa4a5dd3..2bc456c85 100644
--- a/src/deduplikator_autorow/tasks.py
+++ b/src/deduplikator_autorow/tasks.py
@@ -217,6 +217,103 @@ def _process_author_duplicates(osoba_z_instytucji, scan_run, min_confidence):
     return candidates
 
 
+def _run_general_phase(scan_run, min_confidence=MIN_CONFIDENCE_TO_STORE):
+    """Faza 2 skanu — duplikaty general (no SQL on hot path).
+
+    Algorytm:
+    1. build_autor_meta + build_buckets — pre-load wszystkich autorów.
+    2. Read IgnoredAuthor / NotADuplicate exclusions.
+    3. generate_pairs — pary score >= min_confidence.
+    4. find_clusters — connected components.
+    5. Cluster-skip jeśli ktokolwiek w klastrze ma OsobaZInstytucji.
+    6. Pick main przez hierarchię B; emit pary (main, dup) jako
+       DuplicateCandidate(scan_mode='general').
+    7. Sprawdza scan_run.status == CANCELLED między batchami.
+    """
+    from .models import (
+        DuplicateCandidate,
+        DuplicateScanRun,
+        IgnoredAuthor,
+        NotADuplicate,
+    )
+    from .utils.analysis_meta import analiza_pary_meta
+    from .utils.cluster import find_clusters
+    from .utils.main_selection import pick_main_pk
+    from .utils.meta import build_autor_meta, build_buckets
+    from .utils.search_general import generate_pairs
+
+    logger.info("General phase: building meta cache...")
+    meta = build_autor_meta()
+    buckets = build_buckets(meta)
+    logger.info("General phase: %d autorów, %d bucketów", len(meta), len(buckets))
+
+    ignored_pks = set(IgnoredAuthor.objects.values_list("autor_id", flat=True))
+    notadup_pks = set(NotADuplicate.objects.values_list("autor_id", flat=True))
+
+    pairs_data: dict[tuple[int, int], tuple[int, list[str]]] = {}
+    for pk_a, pk_b, score, reasons in generate_pairs(
+        buckets, meta, ignored_pks, notadup_pks, min_confidence
+    ):
+        pairs_data[(pk_a, pk_b)] = (score, reasons)
+    logger.info("General phase: znaleziono %d par", len(pairs_data))
+
+    clusters = find_clusters(list(pairs_data.keys()))
+    logger.info("General phase: %d klastrów wstępnych", len(clusters))
+
+    skipped_count = 0
+    candidates_to_create: list[DuplicateCandidate] = []
+    for cluster in clusters:
+        if any(meta[pk]["ma_osoba_z_instytucji"] for pk in cluster):
+            skipped_count += 1
+            continue
+        main_pk = pick_main_pk(cluster, meta)
+        for dup_pk in cluster - {main_pk}:
+            key = (min(main_pk, dup_pk), max(main_pk, dup_pk))
+            if key in pairs_data:
+                score, reasons = pairs_data[key]
+            else:
+                score, reasons = analiza_pary_meta(meta[main_pk], meta[dup_pk])
+            main_obj = meta[main_pk]["obj"]
+            dup_obj = meta[dup_pk]["obj"]
+            candidates_to_create.append(
+                DuplicateCandidate(
+                    scan_run=scan_run,
+                    main_autor=main_obj,
+                    duplicate_autor=dup_obj,
+                    confidence_score=score,
+                    confidence_percent=normalize_confidence(score),
+                    reasons=reasons,
+                    priority=calculate_author_priority(dup_obj),
+                    main_autor_name=str(main_obj),
+                    duplicate_autor_name=str(dup_obj),
+                    main_publications_count=meta[main_pk]["publikacje_count"],
+                    duplicate_publications_count=meta[dup_pk]["publikacje_count"],
+                    scan_mode="general",
+                )
+            )
+            if len(candidates_to_create) >= 1000:
+                with transaction.atomic():
+                    DuplicateCandidate.objects.bulk_create(
+                        candidates_to_create, ignore_conflicts=True
+                    )
+                candidates_to_create = []
+                scan_run.refresh_from_db()
+                if scan_run.status == DuplicateScanRun.Status.CANCELLED:
+                    logger.info("General phase cancelled mid-batch")
+                    return
+
+    if candidates_to_create:
+        with transaction.atomic():
+            DuplicateCandidate.objects.bulk_create(
+                candidates_to_create, ignore_conflicts=True
+            )
+
+    logger.info(
+        "General phase: %d klastrów pominiętych (z OsobaZInstytucji)",
+        skipped_count,
+    )
+
+
 @shared_task(bind=True, name="deduplikator_autorow.scan_for_duplicates")
 def scan_for_duplicates(self, user_id=None, min_confidence=MIN_CONFIDENCE_TO_STORE):
     """
diff --git a/src/deduplikator_autorow/tests/test_general_phase.py b/src/deduplikator_autorow/tests/test_general_phase.py
new file mode 100644
index 000000000..76cb48e2b
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_general_phase.py
@@ -0,0 +1,111 @@
+"""Testy fazy general w skanowaniu duplikatów."""
+
+import pytest
+from model_bakery import baker
+
+from deduplikator_autorow.models import (
+    DuplicateCandidate,
+    DuplicateScanRun,
+    IgnoredAuthor,
+    NotADuplicate,
+)
+from deduplikator_autorow.tasks import _run_general_phase
+
+
+@pytest.mark.django_db
+def test_general_finds_simple_pair():
+    """Dwóch autorów o tym samym nazwisku/imieniu, żaden bez OsobaZInstytucji."""
+    baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    scan = DuplicateScanRun.objects.create()
+    _run_general_phase(scan, min_confidence=50)
+    cands = DuplicateCandidate.objects.filter(scan_run=scan, scan_mode="general")
+    assert cands.count() == 1
+
+
+@pytest.mark.django_db
+def test_general_skips_cluster_with_osoba_instytucji():
+    """Klaster {A, B, C} gdzie B ma OsobaZInstytucji → klaster pominięty."""
+    baker.make("bpp.Autor", nazwisko="Nowak", imiona="Anna")
+    b = baker.make("bpp.Autor", nazwisko="Nowak", imiona="Anna")
+    baker.make("bpp.Autor", nazwisko="Nowak", imiona="Anna")
+    scientist = baker.make("pbn_api.Scientist")
+    b.pbn_uid = scientist
+    b.save()
+    baker.make("pbn_api.OsobaZInstytucji", personId=scientist)
+
+    scan = DuplicateScanRun.objects.create()
+    _run_general_phase(scan, min_confidence=50)
+    cands = DuplicateCandidate.objects.filter(scan_run=scan, scan_mode="general")
+    assert cands.count() == 0
+
+
+@pytest.mark.django_db
+def test_general_main_chosen_by_orcid():
+    """Z dwóch autorów ORCID-owany wygrywa jako main."""
+    a = baker.make("bpp.Autor", nazwisko="Adams", imiona="Eve", orcid=None)
+    b = baker.make(
+        "bpp.Autor",
+        nazwisko="Adams",
+        imiona="Eve",
+        orcid="0000-0001-2345-6789",
+    )
+    scan = DuplicateScanRun.objects.create()
+    _run_general_phase(scan, min_confidence=50)
+    cand = DuplicateCandidate.objects.get(scan_run=scan, scan_mode="general")
+    assert cand.main_autor_id == b.pk
+    assert cand.duplicate_autor_id == a.pk
+
+
+@pytest.mark.django_db
+def test_general_pk_tiebreaker():
+    """Wszystko równe → niższy pk wygrywa jako main."""
+    a = baker.make("bpp.Autor", nazwisko="Black", imiona="Carl")
+    b = baker.make("bpp.Autor", nazwisko="Black", imiona="Carl")
+    lower_pk = min(a.pk, b.pk)
+    higher_pk = max(a.pk, b.pk)
+
+    scan = DuplicateScanRun.objects.create()
+    _run_general_phase(scan, min_confidence=50)
+    cand = DuplicateCandidate.objects.get(scan_run=scan, scan_mode="general")
+    assert cand.main_autor_id == lower_pk
+    assert cand.duplicate_autor_id == higher_pk
+
+
+@pytest.mark.django_db
+def test_general_respects_ignored_author():
+    a = baker.make("bpp.Autor", nazwisko="Yellow", imiona="Sun")
+    baker.make("bpp.Autor", nazwisko="Yellow", imiona="Sun")
+    user = baker.make("bpp.BppUser")
+    IgnoredAuthor.objects.create(autor=a, created_by=user)
+
+    scan = DuplicateScanRun.objects.create()
+    _run_general_phase(scan, min_confidence=50)
+    assert DuplicateCandidate.objects.filter(scan_run=scan).count() == 0
+
+
+@pytest.mark.django_db
+def test_general_respects_not_a_duplicate():
+    a = baker.make("bpp.Autor", nazwisko="Green", imiona="Mike")
+    baker.make("bpp.Autor", nazwisko="Green", imiona="Mike")
+    user = baker.make("bpp.BppUser")
+    NotADuplicate.objects.create(autor=a, created_by=user)
+
+    scan = DuplicateScanRun.objects.create()
+    _run_general_phase(scan, min_confidence=50)
+    assert DuplicateCandidate.objects.filter(scan_run=scan).count() == 0
+
+
+@pytest.mark.django_db
+def test_general_transitive_cluster():
+    """Trzech 'Linker Jan' tworzy klaster {A,B,C} → 2 pary z jednym main."""
+    a = baker.make("bpp.Autor", nazwisko="Linker", imiona="Jan")
+    b = baker.make("bpp.Autor", nazwisko="Linker", imiona="Jan")
+    c = baker.make("bpp.Autor", nazwisko="Linker", imiona="Jan")
+    scan = DuplicateScanRun.objects.create()
+    _run_general_phase(scan, min_confidence=50)
+    cands = DuplicateCandidate.objects.filter(scan_run=scan, scan_mode="general")
+    assert cands.count() == 2
+    main_pks = {c.main_autor_id for c in cands}
+    assert len(main_pks) == 1
+    assert main_pks == {min(a.pk, b.pk, c.pk)}

From 6eece6aa699e8cdeb8c46cc40ced140b6ae69b92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:24:35 +0200
Subject: [PATCH 10/25] feat(deduplikator): scan_for_duplicates dwufazowo (PBN
 + general) z PARTIAL_COMPLETED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wyodrębnia istniejące ciało scan_for_duplicates do _run_pbn_phase
i przepina task na orkiestrację dwóch faz (PBN → general) w jednym
DuplicateScanRun. Cancellation w fazie PBN daje status CANCELLED
(bez wyników general), w fazie general daje PARTIAL_COMPLETED
(wyniki PBN zachowane). Pole `phase` ustawiane na 'pbn'/'general'
w trakcie pracy. Zachowane behaviour PBN: replace mode, polling
scan_run.status między autorami, periodyczny update progress.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/deduplikator_autorow/tasks.py             | 207 +++++++++++-------
 .../tests/test_combined_scan.py               | 104 +++++++++
 2 files changed, 230 insertions(+), 81 deletions(-)
 create mode 100644 src/deduplikator_autorow/tests/test_combined_scan.py

diff --git a/src/deduplikator_autorow/tasks.py b/src/deduplikator_autorow/tasks.py
index 2bc456c85..a112dec0a 100644
--- a/src/deduplikator_autorow/tasks.py
+++ b/src/deduplikator_autorow/tasks.py
@@ -314,124 +314,169 @@ def _run_general_phase(scan_run, min_confidence=MIN_CONFIDENCE_TO_STORE):
     )
 
 
-@shared_task(bind=True, name="deduplikator_autorow.scan_for_duplicates")
-def scan_for_duplicates(self, user_id=None, min_confidence=MIN_CONFIDENCE_TO_STORE):
-    """
-    Background task to scan all authors for potential duplicates.
-
-    This task:
-    1. Creates a DuplicateScanRun record
-    2. Deletes all existing DuplicateCandidate records (replace mode)
-    3. Iterates through all OsobaZInstytucji
-    4. For each, calls szukaj_kopii() to find candidates
-    5. For each candidate, calls analiza_duplikatow() and stores in DuplicateCandidate
-    6. Updates progress periodically
-    7. Marks run as completed
+def _run_pbn_phase(scan_run, min_confidence=MIN_CONFIDENCE_TO_STORE):
+    """Faza 1 skanu — duplikaty PBN (OsobaZInstytucji).
 
-    Args:
-        user_id: Optional ID of the user who triggered the scan
-        min_confidence: Minimum confidence score to store a candidate (default: 50)
+    Iteruje przez wszystkie OsobaZInstytucji (z wyjątkiem IgnoredScientist),
+    dla każdej szuka kopii (`szukaj_kopii`), analizuje (`analiza_duplikatow`)
+    i tworzy DuplicateCandidate. Polluje `scan_run.status` między autorami —
+    jeśli zewnętrzny `cancel_scan` ustawił CANCELLED, kończy wcześnie
+    (status pozostaje CANCELLED — caller decyduje o finalizacji).
 
-    Returns:
-        dict: Result with status, scan_run_id, and statistics
+    Aktualizuje pola `total_authors_to_scan`, `authors_scanned` i
+    `duplicates_found` na `scan_run` w trakcie pracy.
     """
     from pbn_api.models import OsobaZInstytucji
 
     from .models import DuplicateCandidate, DuplicateScanRun, IgnoredScientist
 
-    logger.info("Starting duplicate scan task...")
-
-    user = _get_user_by_id(user_id)
-
-    scan_run = DuplicateScanRun.objects.create(
-        status=DuplicateScanRun.Status.RUNNING,
-        created_by=user,
-        celery_task_id=self.request.id or "",
+    ignored_scientist_ids = set(
+        IgnoredScientist.objects.values_list("scientist_id", flat=True)
     )
 
-    try:
-        deleted_count = DuplicateCandidate.objects.all().delete()[0]
-        logger.info(f"Deleted {deleted_count} existing candidates")
+    osoby_query = OsobaZInstytucji.objects.select_related("personId").all()
+    if ignored_scientist_ids:
+        osoby_query = osoby_query.exclude(personId__pk__in=ignored_scientist_ids)
 
-        ignored_scientist_ids = set(
-            IgnoredScientist.objects.values_list("scientist_id", flat=True)
-        )
-
-        osoby_query = OsobaZInstytucji.objects.select_related("personId").all()
-        if ignored_scientist_ids:
-            osoby_query = osoby_query.exclude(personId__pk__in=ignored_scientist_ids)
-
-        total_count = osoby_query.count()
-        scan_run.total_authors_to_scan = total_count
-        scan_run.save(update_fields=["total_authors_to_scan"])
-
-        logger.info(f"Scanning {total_count} authors for duplicates...")
+    total_count = osoby_query.count()
+    scan_run.total_authors_to_scan = total_count
+    scan_run.save(update_fields=["total_authors_to_scan"])
 
-        authors_scanned = 0
-        duplicates_found = 0
-        candidates_to_create = []
+    logger.info(f"PBN phase: scanning {total_count} authors...")
 
-        for osoba_z_instytucji in osoby_query.iterator():
-            scan_run.refresh_from_db()
-            if scan_run.status == DuplicateScanRun.Status.CANCELLED:
-                logger.info("Scan cancelled by user")
-                return {
-                    "status": "cancelled",
-                    "scan_run_id": scan_run.pk,
-                    "authors_scanned": authors_scanned,
-                    "duplicates_found": duplicates_found,
-                }
+    authors_scanned = 0
+    duplicates_found = 0
+    candidates_to_create = []
 
-            authors_scanned += 1
-
-            new_candidates = _process_author_duplicates(
-                osoba_z_instytucji, scan_run, min_confidence
-            )
-            candidates_to_create.extend(new_candidates)
-            duplicates_found += len(new_candidates)
-
-            if len(candidates_to_create) >= 1000:
+    for osoba_z_instytucji in osoby_query.iterator():
+        scan_run.refresh_from_db()
+        if scan_run.status == DuplicateScanRun.Status.CANCELLED:
+            logger.info("PBN phase cancelled by user")
+            if candidates_to_create:
                 with transaction.atomic():
                     DuplicateCandidate.objects.bulk_create(
                         candidates_to_create, ignore_conflicts=True
                     )
-                candidates_to_create = []
+            scan_run.authors_scanned = authors_scanned
+            scan_run.duplicates_found = duplicates_found
+            scan_run.save(update_fields=["authors_scanned", "duplicates_found"])
+            return
 
-            if authors_scanned % PROGRESS_UPDATE_INTERVAL == 0:
-                scan_run.authors_scanned = authors_scanned
-                scan_run.duplicates_found = duplicates_found
-                scan_run.save(update_fields=["authors_scanned", "duplicates_found"])
-                logger.info(
-                    f"Progress: {authors_scanned}/{total_count} authors, "
-                    f"{duplicates_found} duplicates found"
-                )
+        authors_scanned += 1
+
+        new_candidates = _process_author_duplicates(
+            osoba_z_instytucji, scan_run, min_confidence
+        )
+        candidates_to_create.extend(new_candidates)
+        duplicates_found += len(new_candidates)
 
-        if candidates_to_create:
+        if len(candidates_to_create) >= 1000:
             with transaction.atomic():
                 DuplicateCandidate.objects.bulk_create(
                     candidates_to_create, ignore_conflicts=True
                 )
+            candidates_to_create = []
+
+        if authors_scanned % PROGRESS_UPDATE_INTERVAL == 0:
+            scan_run.authors_scanned = authors_scanned
+            scan_run.duplicates_found = duplicates_found
+            scan_run.save(update_fields=["authors_scanned", "duplicates_found"])
+            logger.info(
+                f"PBN progress: {authors_scanned}/{total_count} authors, "
+                f"{duplicates_found} duplicates found"
+            )
+
+    if candidates_to_create:
+        with transaction.atomic():
+            DuplicateCandidate.objects.bulk_create(
+                candidates_to_create, ignore_conflicts=True
+            )
+
+    scan_run.authors_scanned = authors_scanned
+    scan_run.duplicates_found = duplicates_found
+    scan_run.save(update_fields=["authors_scanned", "duplicates_found"])
+
+    logger.info(
+        f"PBN phase done: {authors_scanned} authors scanned, "
+        f"{duplicates_found} duplicates found"
+    )
+
+
+@shared_task(bind=True, name="deduplikator_autorow.scan_for_duplicates")
+def scan_for_duplicates(self, user_id=None, min_confidence=MIN_CONFIDENCE_TO_STORE):
+    """Combined task: faza PBN + faza general w jednym przebiegu.
+
+    Statusy końcowe:
+    - COMPLETED: obie fazy ukończone.
+    - PARTIAL_COMPLETED: faza PBN OK, faza general anulowana → wyniki PBN
+      dostępne.
+    - CANCELLED: faza PBN anulowana → brak wyników.
+    - FAILED: nieobsłużony wyjątek.
+    """
+    from .models import DuplicateCandidate, DuplicateScanRun
+
+    logger.info("Starting duplicate scan task (combined PBN + general)...")
+
+    user = _get_user_by_id(user_id)
+    scan_run = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.RUNNING,
+        created_by=user,
+        celery_task_id=self.request.id or "",
+    )
+
+    try:
+        # Replace mode: clear all previous candidates
+        deleted_count = DuplicateCandidate.objects.all().delete()[0]
+        logger.info(f"Deleted {deleted_count} existing candidates")
+
+        # FAZA 1: PBN
+        scan_run.phase = "pbn"
+        scan_run.save(update_fields=["phase"])
+        _run_pbn_phase(scan_run, min_confidence)
+        scan_run.refresh_from_db()
+        if scan_run.status == DuplicateScanRun.Status.CANCELLED:
+            scan_run.finished_at = timezone.now()
+            scan_run.save(update_fields=["finished_at"])
+            logger.info("Scan cancelled in PBN phase")
+            return {
+                "status": "cancelled",
+                "scan_run_id": scan_run.pk,
+            }
+
+        # FAZA 2: general
+        scan_run.phase = "general"
+        scan_run.save(update_fields=["phase"])
+        _run_general_phase(scan_run, min_confidence)
+        scan_run.refresh_from_db()
+        if scan_run.status == DuplicateScanRun.Status.CANCELLED:
+            scan_run.status = DuplicateScanRun.Status.PARTIAL_COMPLETED
+            scan_run.finished_at = timezone.now()
+            scan_run.save(update_fields=["status", "finished_at"])
+            logger.info("Scan cancelled in general phase → PARTIAL_COMPLETED")
+            return {
+                "status": "partial_completed",
+                "scan_run_id": scan_run.pk,
+            }
 
+        total_cands = DuplicateCandidate.objects.filter(scan_run=scan_run).count()
         scan_run.status = DuplicateScanRun.Status.COMPLETED
         scan_run.finished_at = timezone.now()
-        scan_run.authors_scanned = authors_scanned
-        scan_run.duplicates_found = duplicates_found
+        scan_run.duplicates_found = total_cands
         scan_run.save()
 
         logger.info(
-            f"Scan completed: {authors_scanned} authors scanned, "
-            f"{duplicates_found} duplicates found"
+            f"Scan completed: {scan_run.authors_scanned} authors scanned, "
+            f"{total_cands} duplicates found"
         )
 
         return {
             "status": "success",
             "scan_run_id": scan_run.pk,
-            "authors_scanned": authors_scanned,
-            "duplicates_found": duplicates_found,
+            "duplicates_found": total_cands,
         }
 
     except Exception as e:
-        logger.error(f"Error during duplicate scan: {str(e)}", exc_info=True)
+        logger.exception("Error during duplicate scan")
         scan_run.status = DuplicateScanRun.Status.FAILED
         scan_run.finished_at = timezone.now()
         scan_run.error_message = str(e)
diff --git a/src/deduplikator_autorow/tests/test_combined_scan.py b/src/deduplikator_autorow/tests/test_combined_scan.py
new file mode 100644
index 000000000..b748a0101
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_combined_scan.py
@@ -0,0 +1,104 @@
+"""Testy combined task scan_for_duplicates (PBN + general)."""
+
+from unittest import mock
+
+import pytest
+from model_bakery import baker
+
+from deduplikator_autorow.models import DuplicateCandidate, DuplicateScanRun
+from deduplikator_autorow.tasks import scan_for_duplicates
+
+
+@pytest.mark.django_db
+def test_combined_scan_runs_both_phases_status_completed():
+    """Sukces obu faz → status COMPLETED."""
+    result = scan_for_duplicates.apply().result
+    assert result["status"] == "success"
+    scan = DuplicateScanRun.objects.get(pk=result["scan_run_id"])
+    assert scan.status == DuplicateScanRun.Status.COMPLETED
+
+
+@pytest.mark.django_db
+def test_combined_scan_general_finds_duplicates():
+    """Faza general dodaje DuplicateCandidate(scan_mode='general')."""
+    baker.make("bpp.Autor", nazwisko="Hawkins", imiona="Lee")
+    baker.make("bpp.Autor", nazwisko="Hawkins", imiona="Lee")
+    result = scan_for_duplicates.apply().result
+    scan = DuplicateScanRun.objects.get(pk=result["scan_run_id"])
+    assert (
+        DuplicateCandidate.objects.filter(scan_run=scan, scan_mode="general").count()
+        >= 1
+    )
+
+
+@pytest.mark.django_db
+def test_cancel_during_general_phase_leaves_partial_completed():
+    """Anulowanie w fazie 2 (general) → PARTIAL_COMPLETED."""
+    baker.make("bpp.Autor", nazwisko="Igor", imiona="Test")
+    baker.make("bpp.Autor", nazwisko="Igor", imiona="Test")
+
+    def fake_general(scan_run, *args, **kwargs):
+        scan_run.status = DuplicateScanRun.Status.CANCELLED
+        scan_run.save(update_fields=["status"])
+
+    with mock.patch(
+        "deduplikator_autorow.tasks._run_general_phase",
+        side_effect=fake_general,
+    ):
+        result = scan_for_duplicates.apply().result
+
+    scan = DuplicateScanRun.objects.get(pk=result["scan_run_id"])
+    assert scan.status == DuplicateScanRun.Status.PARTIAL_COMPLETED
+    assert result["status"] == "partial_completed"
+
+
+@pytest.mark.django_db
+def test_cancel_during_pbn_phase_leaves_cancelled():
+    """Anulowanie w fazie 1 (PBN) → CANCELLED, faza 2 nie startuje."""
+
+    def fake_pbn(scan_run, *args, **kwargs):
+        scan_run.status = DuplicateScanRun.Status.CANCELLED
+        scan_run.save(update_fields=["status"])
+
+    with (
+        mock.patch(
+            "deduplikator_autorow.tasks._run_pbn_phase",
+            side_effect=fake_pbn,
+        ),
+        mock.patch("deduplikator_autorow.tasks._run_general_phase") as general_mock,
+    ):
+        result = scan_for_duplicates.apply().result
+        general_mock.assert_not_called()
+
+    scan = DuplicateScanRun.objects.get(pk=result["scan_run_id"])
+    assert scan.status == DuplicateScanRun.Status.CANCELLED
+    assert result["status"] == "cancelled"
+
+
+@pytest.mark.django_db
+def test_phase_field_set_during_run():
+    """Pole `phase` ustawione na 'pbn' przy fazie 1, 'general' przy fazie 2."""
+    phases_seen = []
+
+    from deduplikator_autorow import tasks as deduptasks
+
+    original_pbn = deduptasks._run_pbn_phase
+    original_general = deduptasks._run_general_phase
+
+    def spy_pbn(scan_run, *a, **kw):
+        scan_run.refresh_from_db()
+        phases_seen.append(("pbn", scan_run.phase))
+        return original_pbn(scan_run, *a, **kw)
+
+    def spy_general(scan_run, *a, **kw):
+        scan_run.refresh_from_db()
+        phases_seen.append(("general", scan_run.phase))
+        return original_general(scan_run, *a, **kw)
+
+    with (
+        mock.patch.object(deduptasks, "_run_pbn_phase", spy_pbn),
+        mock.patch.object(deduptasks, "_run_general_phase", spy_general),
+    ):
+        scan_for_duplicates.apply()
+
+    assert phases_seen == [("pbn", "pbn"), ("general", "general")]

From ebcec98ded51dae29d6eea0b4bb327d4bbfb806a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:30:00 +0200
Subject: [PATCH 11/25] feat(deduplikator): mode filter w widoku +
 get_latest_usable_scan (PARTIAL_COMPLETED)

---
 .../tests/test_view_mode_filter.py            | 124 ++++++++++++++++++
 src/deduplikator_autorow/utils/counters.py    |  21 +++
 src/deduplikator_autorow/views.py             |  66 +++++++---
 3 files changed, 192 insertions(+), 19 deletions(-)
 create mode 100644 src/deduplikator_autorow/tests/test_view_mode_filter.py

diff --git a/src/deduplikator_autorow/tests/test_view_mode_filter.py b/src/deduplikator_autorow/tests/test_view_mode_filter.py
new file mode 100644
index 000000000..8027341d5
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_view_mode_filter.py
@@ -0,0 +1,124 @@
+"""Testy filtra mode w widoku duplicate_authors."""
+
+import pytest
+from django.contrib.auth.models import Group
+from django.urls import reverse
+from django.utils import timezone
+from model_bakery import baker
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from deduplikator_autorow.models import DuplicateCandidate, DuplicateScanRun
+
+
+@pytest.fixture
+def auth_client(client, db):
+    user = baker.make("bpp.BppUser", is_active=True)
+    user.set_password("xx")
+    user.save()
+    grp, _ = Group.objects.get_or_create(name=GR_WPROWADZANIE_DANYCH)
+    user.groups.add(grp)
+    client.force_login(user)
+    return client
+
+
+@pytest.fixture
+def scan_with_both_modes(db):
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.COMPLETED,
+        finished_at=timezone.now(),
+    )
+    a1 = baker.make("bpp.Autor", nazwisko="Pbn1", imiona="Jan")
+    a2 = baker.make("bpp.Autor", nazwisko="Pbn1", imiona="Jan")
+    g1 = baker.make("bpp.Autor", nazwisko="Gen1", imiona="Anna")
+    g2 = baker.make("bpp.Autor", nazwisko="Gen1", imiona="Anna")
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=a1,
+        duplicate_autor=a2,
+        confidence_score=80,
+        confidence_percent=0.6,
+        main_autor_name="Pbn1 Jan",
+        duplicate_autor_name="Pbn1 Jan",
+        scan_mode="pbn",
+    )
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=g1,
+        duplicate_autor=g2,
+        confidence_score=80,
+        confidence_percent=0.6,
+        main_autor_name="Gen1 Anna",
+        duplicate_autor_name="Gen1 Anna",
+        scan_mode="general",
+    )
+    return scan
+
+
+def test_view_mode_filter_pbn(auth_client, scan_with_both_modes):
+    response = auth_client.get(
+        reverse("deduplikator_autorow:duplicate_authors") + "?mode=pbn"
+    )
+    assert response.status_code == 200
+    content = response.content.decode()
+    assert "Pbn1" in content
+    assert "Gen1" not in content
+
+
+def test_view_mode_filter_general(auth_client, scan_with_both_modes):
+    response = auth_client.get(
+        reverse("deduplikator_autorow:duplicate_authors") + "?mode=general"
+    )
+    assert response.status_code == 200
+    content = response.content.decode()
+    assert "Gen1" in content
+    assert "Pbn1" not in content
+
+
+def test_view_mode_filter_both_default(auth_client, scan_with_both_modes):
+    """Default mode (no GET param) — pokazuje któryś (zwykle pierwszy w sort order)."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    assert response.status_code == 200
+    # Powinien być w kontekście choć jeden z dwóch
+    content = response.content.decode()
+    assert "Pbn1" in content or "Gen1" in content
+
+
+def test_view_pending_counters_split_by_mode(auth_client, scan_with_both_modes):
+    """Counters per-tryb."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    assert response.context["pending_pbn_count"] == 1
+    assert response.context["pending_general_count"] == 1
+
+
+def test_view_invalid_mode_falls_back_to_both(auth_client, scan_with_both_modes):
+    response = auth_client.get(
+        reverse("deduplikator_autorow:duplicate_authors") + "?mode=zzzunknown"
+    )
+    assert response.status_code == 200
+    assert response.context["mode"] == "both"
+
+
+def test_view_partial_completed_scan_used(auth_client):
+    """get_latest_usable_scan zwraca PARTIAL_COMPLETED."""
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.PARTIAL_COMPLETED,
+        finished_at=timezone.now(),
+    )
+    a1 = baker.make("bpp.Autor", nazwisko="Sole", imiona="One")
+    a2 = baker.make("bpp.Autor", nazwisko="Sole", imiona="One")
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=a1,
+        duplicate_autor=a2,
+        confidence_score=80,
+        confidence_percent=0.6,
+        main_autor_name="x",
+        duplicate_autor_name="y",
+        scan_mode="pbn",
+    )
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    assert response.status_code == 200
+    # context "completed_scan" should still be set even though status is PARTIAL_COMPLETED
+    # (the field is named completed_scan in the existing view but its semantics are
+    # "scan with usable results")
+    assert response.context.get("completed_scan") is not None
diff --git a/src/deduplikator_autorow/utils/counters.py b/src/deduplikator_autorow/utils/counters.py
index 76657d4bd..c6fd3cbfe 100644
--- a/src/deduplikator_autorow/utils/counters.py
+++ b/src/deduplikator_autorow/utils/counters.py
@@ -21,6 +21,27 @@ def get_latest_completed_scan():
     )
 
 
+def get_latest_usable_scan():
+    """Pobiera ostatnie skanowanie z użytecznymi wynikami.
+
+    "Użyteczne" = COMPLETED lub PARTIAL_COMPLETED (faza PBN ukończona,
+    nawet jeśli general anulowana).
+
+    Returns:
+        DuplicateScanRun lub None
+    """
+    return (
+        DuplicateScanRun.objects.filter(
+            status__in=[
+                DuplicateScanRun.Status.COMPLETED,
+                DuplicateScanRun.Status.PARTIAL_COMPLETED,
+            ]
+        )
+        .order_by("-finished_at")
+        .first()
+    )
+
+
 def get_latest_scan_stats():
     """
     Pobiera statystyki ostatniego skanowania.
diff --git a/src/deduplikator_autorow/views.py b/src/deduplikator_autorow/views.py
index d3819abc5..4c13bb829 100644
--- a/src/deduplikator_autorow/views.py
+++ b/src/deduplikator_autorow/views.py
@@ -33,6 +33,7 @@
     search_author_by_lastname,
     znajdz_pierwszego_autora_z_duplikatami,
 )
+from .utils.counters import get_latest_usable_scan
 
 # Minimalny próg pewności do wyświetlania duplikatów
 # Duplikaty z pewnością poniżej tego progu nie będą pokazywane
@@ -196,7 +197,12 @@ def duplicate_authors_view(request):  # noqa: C901
 
     # Get scan status
     running_scan = get_running_scan()
-    completed_scan = get_latest_completed_scan()
+    completed_scan = get_latest_usable_scan()
+
+    # Filter mode: pbn|general|both (default both)
+    mode = request.GET.get("mode", "both")
+    if mode not in ("pbn", "general", "both"):
+        mode = "both"
 
     # Common context
     not_duplicate_count = NotADuplicate.objects.count()
@@ -232,6 +238,10 @@ def duplicate_authors_view(request):  # noqa: C901
         "completed_scan": completed_scan,
         "no_scan_available": not completed_scan and not running_scan,
         "pending_candidates_count": 0,
+        "pending_pbn_count": 0,
+        "pending_general_count": 0,
+        # Filter mode (pbn|general|both)
+        "mode": mode,
         # Navigation
         "skip_count": 0,
         # PBN data freshness
@@ -257,6 +267,16 @@ def duplicate_authors_view(request):  # noqa: C901
     ).count()
     context["pending_candidates_count"] = pending_count
     context["total_authors_with_duplicates"] = pending_count
+    context["pending_pbn_count"] = DuplicateCandidate.objects.filter(
+        scan_run=completed_scan,
+        status=DuplicateCandidate.Status.PENDING,
+        scan_mode="pbn",
+    ).count()
+    context["pending_general_count"] = DuplicateCandidate.objects.filter(
+        scan_run=completed_scan,
+        status=DuplicateCandidate.Status.PENDING,
+        scan_mode="general",
+    ).count()
 
     # Handle search by lastname
     search_lastname = request.GET.get("search_lastname", "").strip()
@@ -273,6 +293,8 @@ def duplicate_authors_view(request):  # noqa: C901
             .select_related("main_autor", "duplicate_autor")
             .order_by("-priority", "-confidence_score")
         )
+        if mode != "both":
+            candidates = candidates.filter(scan_mode=mode)
 
         context["search_results_count"] = (
             candidates.values("main_autor").distinct().count()
@@ -294,7 +316,7 @@ def duplicate_authors_view(request):  # noqa: C901
 
         # Get next author with pending duplicates using offset
         glowny_autor, candidates_for_author, skip_count = _get_next_candidate_group(
-            completed_scan, skip_count=skip_count
+            completed_scan, skip_count=skip_count, mode=mode
         )
         context["skip_count"] = skip_count
 
@@ -640,15 +662,6 @@ def download_duplicates_xlsx(request):
         return redirect("deduplikator_autorow:duplicate_authors")
 
 
-def get_latest_completed_scan():
-    """Get the most recent completed scan run."""
-    return (
-        DuplicateScanRun.objects.filter(status=DuplicateScanRun.Status.COMPLETED)
-        .order_by("-finished_at")
-        .first()
-    )
-
-
 def get_running_scan():
     """Get the currently running scan, if any."""
     return DuplicateScanRun.objects.filter(
@@ -777,26 +790,41 @@ def _get_pending_candidates_for_main_autor(main_autor_id, scan_run):
     )
 
 
-def _get_next_candidate_group(scan_run, skip_count=0):
+def _get_next_candidate_group(scan_run, skip_count=0, mode="both"):
     """
     Get the next group of candidates (all for the same main author).
-    Returns (main_autor, candidates_queryset, skip_count) or (None, None, 0) if no more pending.
+    Returns (main_autor, candidates_queryset, skip_count) or (None, None, 0)
+    if no more pending.
 
     Args:
         scan_run: The scan run to get candidates from
         skip_count: Number of main authors to skip (offset)
+        mode: Filter by scan_mode ("pbn", "general", or "both"). When "both",
+            PBN candidates are sorted before general (PBN is canonical).
 
     Returns:
         Tuple of (main_autor, candidates_queryset, current_skip_count)
     """
-    # Get distinct main authors with pending candidates, ordered by priority then confidence
-    # We need to find distinct main_autor_ids in priority/confidence order
+    from django.db.models import Case, IntegerField, Value, When
+
+    qs = DuplicateCandidate.objects.filter(
+        scan_run=scan_run,
+        status=DuplicateCandidate.Status.PENDING,
+    )
+    if mode != "both":
+        qs = qs.filter(scan_mode=mode)
+
+    # PBN-first ordering when mode=both
     distinct_main_autor_ids = (
-        DuplicateCandidate.objects.filter(
-            scan_run=scan_run,
-            status=DuplicateCandidate.Status.PENDING,
+        qs.annotate(
+            mode_order=Case(
+                When(scan_mode="pbn", then=Value(0)),
+                When(scan_mode="general", then=Value(1)),
+                default=Value(2),
+                output_field=IntegerField(),
+            )
         )
-        .order_by("-priority", "-confidence_score", "main_autor_id")
+        .order_by("mode_order", "-priority", "-confidence_score", "main_autor_id")
         .values_list("main_autor_id", flat=True)
         .distinct()
     )

From d32be38fe63af65ff5f4005b18df8ffc23dc3af7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:38:31 +0200
Subject: [PATCH 12/25] feat(deduplikator): scal_autorow_view backwards-compat
 + split ignore_autor/scientist

- scal_autorow_view: accept main_autor_id/duplicate_autor_id (preferred) +
  legacy main_scientist_id/duplicate_scientist_id mapped via Scientist.rekord_w_bpp;
  view now calls scal_autora directly with Autor objects (the scal_autorow wrapper
  in utils/merge.py stays untouched for any external callers).
- Split ignore endpoint: ignore_author -> ignore_scientist (writes IgnoredScientist),
  add new ignore_autor (writes IgnoredAuthor with FK->Autor). Reset endpoints
  renamed accordingly: reset_ignored_authors -> reset_ignored_scientists, plus
  new reset_ignored_autorzy. URL names + paths updated; template references
  adjusted in duplicate_authors.html.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../duplicate_authors.html                    |   4 +-
 .../tests/test_ignore_views.py                |  62 +++++++
 .../tests/test_scal_view.py                   |  70 ++++++++
 src/deduplikator_autorow/urls.py              |  14 +-
 src/deduplikator_autorow/views.py             | 166 ++++++++++++++----
 5 files changed, 273 insertions(+), 43 deletions(-)
 create mode 100644 src/deduplikator_autorow/tests/test_ignore_views.py
 create mode 100644 src/deduplikator_autorow/tests/test_scal_view.py

diff --git a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
index 371c3544a..a3f486be2 100644
--- a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
+++ b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
@@ -318,7 +318,7 @@
                                    target="_blank">
                                     <span class="fi-list"></span> Zobacz listę
                                 </a>
-                                <form method="post" action="{% url 'deduplikator_autorow:reset_ignored_authors' %}" class="deduplikator-autorow__form">
+                                <form method="post" action="{% url 'deduplikator_autorow:reset_ignored_scientists' %}" class="deduplikator-autorow__form">
                                     {% csrf_token %}
                                     <button type="submit"
                                             class="button alert expanded small"
@@ -397,7 +397,7 @@ <h1>Deduplikator Autorów PBN <span class="label warning deduplikator-autorow__b
                                 <a href="{% url 'deduplikator_autorow:duplicate_authors' %}?skip_count={{ skip_count|add:"-1" }}"
                                    class="button secondary"><span class="fi-arrow-left"></span> Poprzedni autor</a>
                             {% endif %}
-                            <form method="post" action="{% url 'deduplikator_autorow:ignore_author' %}" class="deduplikator-autorow__form--inline">
+                            <form method="post" action="{% url 'deduplikator_autorow:ignore_scientist' %}" class="deduplikator-autorow__form--inline">
                                 {% csrf_token %}
                                 <input type="hidden" name="scientist_id" value="{{ scientist.pk }}">
                                 <input type="hidden" name="reason" value="Pominięty przez użytkownika">
diff --git a/src/deduplikator_autorow/tests/test_ignore_views.py b/src/deduplikator_autorow/tests/test_ignore_views.py
new file mode 100644
index 000000000..dde4bf9f9
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_ignore_views.py
@@ -0,0 +1,62 @@
+import pytest
+from django.contrib.auth.models import Group
+from django.urls import reverse
+from model_bakery import baker
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from deduplikator_autorow.models import IgnoredAuthor, IgnoredScientist
+
+
+@pytest.fixture
+def auth_client(client, db):
+    user = baker.make("bpp.BppUser", is_active=True)
+    user.set_password("xx")
+    user.save()
+    grp, _ = Group.objects.get_or_create(name=GR_WPROWADZANIE_DANYCH)
+    user.groups.add(grp)
+    client.force_login(user)
+    return client
+
+
+@pytest.mark.django_db
+def test_ignore_scientist_endpoint(auth_client):
+    sci = baker.make("pbn_api.Scientist")
+    response = auth_client.post(
+        reverse("deduplikator_autorow:ignore_scientist"),
+        {"scientist_id": sci.pk, "reason": "test"},
+    )
+    assert response.status_code == 302
+    assert IgnoredScientist.objects.filter(scientist=sci).exists()
+
+
+@pytest.mark.django_db
+def test_ignore_autor_endpoint(auth_client):
+    autor = baker.make("bpp.Autor")
+    response = auth_client.post(
+        reverse("deduplikator_autorow:ignore_autor"),
+        {"autor_id": autor.pk, "reason": "test"},
+    )
+    assert response.status_code == 302
+    assert IgnoredAuthor.objects.filter(autor=autor).exists()
+
+
+@pytest.mark.django_db
+def test_reset_ignored_autorzy_endpoint(auth_client):
+    autor = baker.make("bpp.Autor")
+    user = baker.make("bpp.BppUser")
+    IgnoredAuthor.objects.create(autor=autor, created_by=user)
+    response = auth_client.post(reverse("deduplikator_autorow:reset_ignored_autorzy"))
+    assert response.status_code == 302
+    assert IgnoredAuthor.objects.count() == 0
+
+
+@pytest.mark.django_db
+def test_reset_ignored_scientists_endpoint(auth_client):
+    sci = baker.make("pbn_api.Scientist")
+    user = baker.make("bpp.BppUser")
+    IgnoredScientist.objects.create(scientist=sci, created_by=user)
+    response = auth_client.post(
+        reverse("deduplikator_autorow:reset_ignored_scientists")
+    )
+    assert response.status_code == 302
+    assert IgnoredScientist.objects.count() == 0
diff --git a/src/deduplikator_autorow/tests/test_scal_view.py b/src/deduplikator_autorow/tests/test_scal_view.py
new file mode 100644
index 000000000..2b2eda2b7
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_scal_view.py
@@ -0,0 +1,70 @@
+"""Testy backwards-compat dla scal_autorow_view."""
+
+import pytest
+from django.contrib.auth.models import Group
+from django.urls import reverse
+from model_bakery import baker
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+
+
+@pytest.fixture
+def auth_client(client, db):
+    user = baker.make("bpp.BppUser", is_active=True)
+    user.set_password("xx")
+    user.save()
+    grp, _ = Group.objects.get_or_create(name=GR_WPROWADZANIE_DANYCH)
+    user.groups.add(grp)
+    client.force_login(user)
+    return client
+
+
+@pytest.mark.django_db
+def test_scal_autorow_accepts_main_autor_id(auth_client):
+    main = baker.make("bpp.Autor")
+    dup = baker.make("bpp.Autor")
+    response = auth_client.post(
+        reverse("deduplikator_autorow:scal_autorow"),
+        {
+            "main_autor_id": main.pk,
+            "duplicate_autor_id": dup.pk,
+            "skip_pbn": "true",
+        },
+    )
+    # 200 OK or 500 on internal merge issues — but NOT 400 "missing params"
+    assert response.status_code in (200, 500)
+    assert b"Brak wymaganych" not in response.content
+
+
+@pytest.mark.django_db
+def test_scal_autorow_backwards_compat_scientist_ids(auth_client):
+    """Legacy scientist_id maps to autor_id via rekord_w_bpp."""
+    main = baker.make("bpp.Autor")
+    dup = baker.make("bpp.Autor")
+    main_sci = baker.make("pbn_api.Scientist")
+    dup_sci = baker.make("pbn_api.Scientist")
+    main.pbn_uid = main_sci
+    main.save()
+    dup.pbn_uid = dup_sci
+    dup.save()
+
+    response = auth_client.post(
+        reverse("deduplikator_autorow:scal_autorow"),
+        {
+            "main_scientist_id": main_sci.pk,
+            "duplicate_scientist_id": dup_sci.pk,
+            "skip_pbn": "true",
+        },
+    )
+    assert response.status_code in (200, 500)
+    assert b"Brak wymaganych" not in response.content
+
+
+@pytest.mark.django_db
+def test_scal_autorow_missing_params_returns_400(auth_client):
+    response = auth_client.post(
+        reverse("deduplikator_autorow:scal_autorow"),
+        {},
+    )
+    assert response.status_code == 400
+    assert b"Brak wymaganych" in response.content
diff --git a/src/deduplikator_autorow/urls.py b/src/deduplikator_autorow/urls.py
index 27c4069e1..7ea91e41f 100644
--- a/src/deduplikator_autorow/urls.py
+++ b/src/deduplikator_autorow/urls.py
@@ -15,11 +15,17 @@
     path(
         "reset-not-duplicates/", views.reset_not_duplicates, name="reset_not_duplicates"
     ),
-    path("ignore-author/", views.ignore_author, name="ignore_author"),
+    path("ignore-scientist/", views.ignore_scientist, name="ignore_scientist"),
     path(
-        "reset-ignored-authors/",
-        views.reset_ignored_authors,
-        name="reset_ignored_authors",
+        "reset-ignored-scientists/",
+        views.reset_ignored_scientists,
+        name="reset_ignored_scientists",
+    ),
+    path("ignore-autor/", views.ignore_autor, name="ignore_autor"),
+    path(
+        "reset-ignored-autorzy/",
+        views.reset_ignored_autorzy,
+        name="reset_ignored_autorzy",
     ),
     path("delete-author/", views.delete_author, name="delete_author"),
     path("scal-autorow/", views.scal_autorow_view, name="scal_autorow"),
diff --git a/src/deduplikator_autorow/views.py b/src/deduplikator_autorow/views.py
index 4c13bb829..42acc5252 100644
--- a/src/deduplikator_autorow/views.py
+++ b/src/deduplikator_autorow/views.py
@@ -22,6 +22,7 @@
 from .models import (
     DuplicateCandidate,
     DuplicateScanRun,
+    IgnoredAuthor,
     IgnoredScientist,
     LogScalania,
     NotADuplicate,
@@ -29,7 +30,7 @@
 from .utils import (
     count_authors_with_lastname,
     export_duplicates_to_xlsx,
-    scal_autorow,
+    scal_autora,
     search_author_by_lastname,
     znajdz_pierwszego_autora_z_duplikatami,
 )
@@ -362,15 +363,49 @@ def duplicate_authors_view(request):  # noqa: C901
     return render(request, "deduplikator_autorow/duplicate_authors.html", context)
 
 
+def _read_param(request, *names):
+    """Read first non-empty param from GET/POST by trying multiple names."""
+    for name in names:
+        val = request.GET.get(name) or request.POST.get(name)
+        if val:
+            return val
+    return None
+
+
+def _scientist_id_to_autor_id(scientist_id):
+    """Map Scientist PK to Autor PK via rekord_w_bpp. Returns None if not found."""
+    try:
+        sci = Scientist.objects.get(pk=scientist_id)
+    except Scientist.DoesNotExist:
+        return None
+    autor = sci.rekord_w_bpp
+    return autor.pk if autor is not None else None
+
+
+def _resolve_autor_id(request, autor_param, scientist_param):
+    """Resolve Autor PK from preferred autor_param or legacy scientist_param.
+
+    Preference: explicit autor_id over scientist_id (mapped via rekord_w_bpp).
+    """
+    autor_id = _read_param(request, autor_param)
+    if autor_id:
+        return autor_id
+    sci_id = _read_param(request, scientist_param)
+    if sci_id:
+        return _scientist_id_to_autor_id(sci_id)
+    return None
+
+
 @group_required(GR_WPROWADZANIE_DANYCH)
 @require_http_methods(["GET", "POST"])
 def scal_autorow_view(request):
     """
     Widok do scalania autorów automatycznie.
 
-    Przyjmuje parametry:
-    - main_scientist_id: ID głównego autora (Scientist)
-    - duplicate_scientist_id: ID duplikatu autora (Scientist)
+    Przyjmuje parametry (warianty):
+    - main_autor_id / duplicate_autor_id: ID autorów BPP (preferowane)
+    - main_scientist_id / duplicate_scientist_id: ID Scientist z PBN
+      (mapowane do Autor przez rekord_w_bpp; backwards-compat)
     - skip_pbn: Opcjonalnie, jeśli true nie wysyła publikacji do PBN
     - candidate_id: Opcjonalnie, ID DuplicateCandidate do oznaczenia jako scalony
     - auto_assign_discipline: Opcjonalnie, jeśli true przypisuje główną dyscyplinę
@@ -380,42 +415,44 @@ def scal_autorow_view(request):
     """
     from django.utils import timezone
 
-    if request.method == "GET":
-        main_scientist_id = request.GET.get("main_scientist_id")
-        duplicate_scientist_id = request.GET.get("duplicate_scientist_id")
-        skip_pbn = request.GET.get("skip_pbn", "false").lower() == "true"
-        candidate_id = request.GET.get("candidate_id")
-        auto_assign_discipline = (
-            request.GET.get("auto_assign_discipline", "false").lower() == "true"
-        )
-        use_subdiscipline = (
-            request.GET.get("use_subdiscipline", "false").lower() == "true"
-        )
-    else:
-        main_scientist_id = request.POST.get("main_scientist_id")
-        duplicate_scientist_id = request.POST.get("duplicate_scientist_id")
-        skip_pbn = request.POST.get("skip_pbn", "false").lower() == "true"
-        candidate_id = request.POST.get("candidate_id")
-        auto_assign_discipline = (
-            request.POST.get("auto_assign_discipline", "false").lower() == "true"
-        )
-        use_subdiscipline = (
-            request.POST.get("use_subdiscipline", "false").lower() == "true"
-        )
+    skip_pbn = (_read_param(request, "skip_pbn") or "false").lower() == "true"
+    candidate_id = _read_param(request, "candidate_id")
+    auto_assign_discipline = (
+        _read_param(request, "auto_assign_discipline") or "false"
+    ).lower() == "true"
+    use_subdiscipline = (
+        _read_param(request, "use_subdiscipline") or "false"
+    ).lower() == "true"
+
+    main_autor_id = _resolve_autor_id(request, "main_autor_id", "main_scientist_id")
+    duplicate_autor_id = _resolve_autor_id(
+        request, "duplicate_autor_id", "duplicate_scientist_id"
+    )
 
-    if not main_scientist_id or not duplicate_scientist_id:
+    if not main_autor_id or not duplicate_autor_id:
         return JsonResponse(
             {
                 "success": False,
-                "error": "Brak wymaganych parametrów: main_scientist_id i duplicate_scientist_id",
+                "error": (
+                    "Brak wymaganych parametrów: main_autor_id i duplicate_autor_id"
+                ),
             },
             status=400,
         )
 
     try:
-        result = scal_autorow(
-            main_scientist_id,
-            duplicate_scientist_id,
+        try:
+            main_autor = Autor.objects.get(pk=main_autor_id)
+            duplicate_autor = Autor.objects.get(pk=duplicate_autor_id)
+        except Autor.DoesNotExist as e:
+            return JsonResponse(
+                {"success": False, "error": f"Nie znaleziono autora: {e}"},
+                status=404,
+            )
+
+        result = scal_autora(
+            main_autor,
+            duplicate_autor,
             request.user,
             skip_pbn=skip_pbn,
             auto_assign_discipline=auto_assign_discipline,
@@ -431,7 +468,8 @@ def scal_autorow_view(request):
                 candidate.reviewed_by = request.user
                 candidate.save()
             except DuplicateCandidate.DoesNotExist:
-                pass  # Candidate may have been deleted, that's ok
+                # Candidate may have been deleted in the meantime
+                pass  # not an error - merge already succeeded
 
         return JsonResponse({"success": result.get("success", False), "result": result})
     except NotImplementedError as e:
@@ -513,9 +551,9 @@ def reset_skipped_authors(request):
 
 @group_required(GR_WPROWADZANIE_DANYCH)
 @require_http_methods(["POST"])
-def ignore_author(request):
+def ignore_scientist(request):
     """
-    Mark a scientist as ignored in the deduplication process.
+    Mark a Scientist (PBN) as ignored in the deduplication process.
 
     Parameters:
     - scientist_id: ID of the Scientist to ignore
@@ -561,13 +599,67 @@ def ignore_author(request):
 
 @group_required(GR_WPROWADZANIE_DANYCH)
 @require_http_methods(["POST"])
-def reset_ignored_authors(request):
+def ignore_autor(request):
     """
-    Remove all ignored author markings.
+    Mark a BPP Autor (without PBN-Scientist link) as ignored.
+
+    Parameters:
+    - autor_id: ID of the Autor to ignore
+    - reason: Optional reason for ignoring (from POST)
+    """
+    autor_id = request.POST.get("autor_id")
+    reason = request.POST.get("reason", "")
+
+    if not autor_id:
+        messages.error(request, "Brak wymaganego parametru: autor_id")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    try:
+        autor = Autor.objects.get(pk=autor_id)
+
+        if IgnoredAuthor.objects.filter(autor=autor).exists():
+            messages.warning(
+                request, f"Autor {autor} jest już oznaczony jako ignorowany."
+            )
+        else:
+            IgnoredAuthor.objects.create(
+                autor=autor, reason=reason, created_by=request.user
+            )
+            messages.success(
+                request, f"Autor {autor} został oznaczony jako ignorowany."
+            )
+
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    except Autor.DoesNotExist:
+        messages.error(request, f"Nie znaleziono autora o ID: {autor_id}")
+        return redirect("deduplikator_autorow:duplicate_authors")
+    except Exception as e:
+        messages.error(request, f"Błąd podczas ignorowania autora: {str(e)}")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def reset_ignored_scientists(request):
+    """
+    Remove all IgnoredScientist (PBN) markings.
     """
     count = IgnoredScientist.objects.count()
     IgnoredScientist.objects.all().delete()
-    messages.success(request, f"Zresetowano {count} ignorowanych autorów.")
+    messages.success(request, f"Zresetowano {count} ignorowanych autorów (PBN).")
+    return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def reset_ignored_autorzy(request):
+    """
+    Remove all IgnoredAuthor (BPP) markings.
+    """
+    count = IgnoredAuthor.objects.count()
+    IgnoredAuthor.objects.all().delete()
+    messages.success(request, f"Zresetowano {count} ignorowanych autorów (BPP).")
     return redirect("deduplikator_autorow:duplicate_authors")
 
 

From 085b0e6c9715a4f94155b83c1966be9d236d7beb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:43:28 +0200
Subject: [PATCH 13/25] =?UTF-8?q?feat(deduplikator):=20UI=20=E2=80=94=20ra?=
 =?UTF-8?q?dio=20mode,=20badges,=20banner=20PARTIAL=5FCOMPLETED,=20fazy=20?=
 =?UTF-8?q?progress?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../scss/deduplikator_autorow.scss            | 38 +++++++++++++++
 .../duplicate_authors.html                    | 46 ++++++++++++++++++-
 .../tests/test_view_mode_filter.py            | 27 +++++++++++
 src/deduplikator_autorow/views.py             |  3 ++
 4 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss b/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
index 45f7d7f94..086113f84 100644
--- a/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
+++ b/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
@@ -481,3 +481,41 @@
 .deduplikator-autorow__confidence-low {
   color: red;
 }
+
+// Mode badges, partial-completed banner, mode filter, scan phase
+.deduplikator-autorow {
+  &__badge {
+    display: inline-block;
+    padding: 2px 8px;
+    border-radius: 3px;
+    font-size: 0.75em;
+    font-weight: 600;
+    color: #fff;
+    margin-left: 0.5em;
+
+    &--pbn {
+      background-color: #2196f3;
+    }
+
+    &--general {
+      background-color: #ff9800;
+    }
+  }
+
+  &__partial-banner {
+    margin: 1em 0;
+  }
+
+  &__mode-filter {
+    margin: 1em 0;
+
+    label {
+      margin-right: 1.5em;
+    }
+  }
+
+  &__scan-phase {
+    margin-top: 0.5em;
+    font-style: italic;
+  }
+}
diff --git a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
index a3f486be2..1884e82fb 100644
--- a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
+++ b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
@@ -46,6 +46,11 @@
                                     <p id="scan-eta-text" class="deduplikator-autorow__status-text--muted">
                                         <span class="fi-clock"></span> ETA: obliczanie...
                                     </p>
+                                    {% if running_scan.phase %}
+                                      <div class="deduplikator-autorow__scan-phase">
+                                        Faza: <strong>{{ running_scan.get_phase_display|default:running_scan.phase }}</strong>
+                                      </div>
+                                    {% endif %}
                                     <form method="post" action="{% url 'deduplikator_autorow:cancel_scan' %}" class="deduplikator-autorow__form">
                                         {% csrf_token %}
                                         <button type="submit" class="button alert expanded small"
@@ -384,6 +389,40 @@
             <!-- Local messages placeholder that respects sidebar layout -->
             <div id="localMessagesPlaceholder" class="deduplikator-autorow__messages"></div>
 
+            {% if completed_scan and completed_scan.status == "partial_completed" %}
+              <div class="callout warning deduplikator-autorow__partial-banner">
+                <strong>Skanowanie częściowo zakończone:</strong>
+                Faza ogólna została anulowana. Wyniki PBN są dostępne, ale duplikaty
+                ogólne nie zostały przeskanowane. Uruchom skan ponownie, aby zobaczyć
+                wszystkie duplikaty.
+              </div>
+            {% endif %}
+
+            {# Mode filter #}
+            <div class="row deduplikator-autorow__mode-filter">
+              <div class="small-12 columns">
+                <strong>Pokaż wyniki:</strong>
+                <label>
+                  <input type="radio" name="mode" value="pbn"
+                         {% if mode == "pbn" %}checked{% endif %}
+                         onchange="location.href='?mode=pbn'">
+                  PBN ({{ pending_pbn_count }})
+                </label>
+                <label>
+                  <input type="radio" name="mode" value="general"
+                         {% if mode == "general" %}checked{% endif %}
+                         onchange="location.href='?mode=general'">
+                  Ogólny ({{ pending_general_count }})
+                </label>
+                <label>
+                  <input type="radio" name="mode" value="both"
+                         {% if mode == "both" or not mode %}checked{% endif %}
+                         onchange="location.href='?mode=both'">
+                  Oba
+                </label>
+              </div>
+            </div>
+
             {% include "includes/pbn_freshness_warning.html" with custom_message="Analiza duplikatów opiera się na lokalnej kopii danych autorów z PBN. Wyniki mogą być nieaktualne." %}
 
             <div class="grid-x grid-padding-x align-middle deduplikator-autorow__header">
@@ -452,6 +491,11 @@ <h3><span class="fi-loop"></span> Skanowanie w toku...</h3>
                     <p id="main-scan-eta-text" class="deduplikator-autorow__status-text--medium">
                         <span class="fi-clock"></span> ETA: obliczanie...
                     </p>
+                    {% if running_scan.phase %}
+                      <div class="deduplikator-autorow__scan-phase">
+                        Faza: <strong>{{ running_scan.get_phase_display|default:running_scan.phase }}</strong>
+                      </div>
+                    {% endif %}
                     <p>Strona odświeży się automatycznie po zakończeniu skanowania.</p>
                 </div>
             {% elif not glowny_autor %}
@@ -470,7 +514,7 @@ <h3><span class="fi-check"></span> Gratulacje!</h3>
             {% else %}
                 {% if glowny_autor %}
                     <div class="callout primary">
-                        <h3>Główny rekord autora</h3>
+                        <h3>Główny rekord autora{% if first_candidate %}{% if first_candidate.scan_mode == "pbn" %}<span class="deduplikator-autorow__badge deduplikator-autorow__badge--pbn">PBN</span>{% else %}<span class="deduplikator-autorow__badge deduplikator-autorow__badge--general">OGÓLNY</span>{% endif %}{% endif %}</h3>
                         <div class="grid-x grid-padding-x">
                             <div class="cell medium-4">
                                 <strong>Imię i nazwisko:</strong><br>
diff --git a/src/deduplikator_autorow/tests/test_view_mode_filter.py b/src/deduplikator_autorow/tests/test_view_mode_filter.py
index 8027341d5..85521284f 100644
--- a/src/deduplikator_autorow/tests/test_view_mode_filter.py
+++ b/src/deduplikator_autorow/tests/test_view_mode_filter.py
@@ -122,3 +122,30 @@ def test_view_partial_completed_scan_used(auth_client):
     # (the field is named completed_scan in the existing view but its semantics are
     # "scan with usable results")
     assert response.context.get("completed_scan") is not None
+
+
+@pytest.mark.django_db
+def test_view_partial_completed_shows_banner(auth_client):
+    """View renderuje banner ostrzegający przy PARTIAL_COMPLETED scan."""
+    DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.PARTIAL_COMPLETED,
+        finished_at=timezone.now(),
+    )
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    assert response.status_code == 200
+    content = response.content.decode()
+    assert "Częściowo zakończone" in content or "anulowana" in content.lower()
+
+
+@pytest.mark.django_db
+def test_view_mode_filter_radio_present(auth_client, scan_with_both_modes):
+    """Mode-filter radio widoczny w HTML."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    assert response.status_code == 200
+    content = response.content.decode()
+    # Radio "PBN", "Ogólny", "Oba" są w widoku
+    assert "PBN" in content
+    assert "Ogólny" in content
+    # Counters w widoku
+    assert response.context["pending_pbn_count"] == 1
+    assert response.context["pending_general_count"] == 1
diff --git a/src/deduplikator_autorow/views.py b/src/deduplikator_autorow/views.py
index 42acc5252..0fde5d5ae 100644
--- a/src/deduplikator_autorow/views.py
+++ b/src/deduplikator_autorow/views.py
@@ -343,6 +343,9 @@ def duplicate_authors_view(request):  # noqa: C901
         duplikaty_z_publikacjami.append(pub_data)
 
     context["duplikaty_z_publikacjami"] = duplikaty_z_publikacjami
+    context["first_candidate"] = (
+        candidates_for_author.first() if candidates_for_author else None
+    )
 
     # Get main author's publications and disciplines
     context["glowny_autor_dyscypliny"] = (

From 245935a1c05e7e3f1fd1a69ba6a204bdb1bd2e53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:49:10 +0200
Subject: [PATCH 14/25] feat(deduplikator): kolumna Tryb w XLSX export

---
 .../tests/test_xlsx_export.py                 | 57 +++++++++++++++++--
 src/deduplikator_autorow/utils/export.py      |  3 +
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/src/deduplikator_autorow/tests/test_xlsx_export.py b/src/deduplikator_autorow/tests/test_xlsx_export.py
index c36122174..29309bc0f 100644
--- a/src/deduplikator_autorow/tests/test_xlsx_export.py
+++ b/src/deduplikator_autorow/tests/test_xlsx_export.py
@@ -3,6 +3,7 @@
 from django.contrib.auth.models import Group
 from django.test import RequestFactory
 from django.urls import reverse
+from model_bakery import baker
 
 from bpp.const import GR_WPROWADZANIE_DANYCH
 from deduplikator_autorow.utils import export_duplicates_to_xlsx
@@ -23,7 +24,7 @@ def test_export_duplicates_to_xlsx_basic():
         assert len(result) > 1000  # At least 1KB for headers and structure
     except Exception as e:
         # Function should not crash even with no data
-        raise AssertionError(f"export_duplicates_to_xlsx crashed: {e}")
+        raise AssertionError(f"export_duplicates_to_xlsx crashed: {e}") from e
 
 
 @pytest.mark.django_db
@@ -47,7 +48,7 @@ def test_download_duplicates_xlsx_view():
         assert response.status_code in [200, 302]
     except Exception as e:
         # Should not crash on valid request
-        raise AssertionError(f"download_duplicates_xlsx view crashed: {e}")
+        raise AssertionError(f"download_duplicates_xlsx view crashed: {e}") from e
 
 
 @pytest.mark.django_db
@@ -107,7 +108,7 @@ def test_xlsx_content_type():
 
 
 @pytest.mark.django_db
-def test_xlsx_structure_and_format():
+def test_xlsx_structure_and_format():  # noqa: C901
     """Test that XLSX has correct structure with new columns and formatting"""
     from io import BytesIO
 
@@ -202,5 +203,53 @@ def test_xlsx_structure_and_format():
         # This test might fail due to missing test data, but should not crash on structure
         # Only fail if it's a structural issue, not data issue
         if "no attribute" in str(e).lower() or "nonetype" in str(e).lower():
-            raise AssertionError(f"XLSX structure test failed: {e}")
+            raise AssertionError(f"XLSX structure test failed: {e}") from e
         # Otherwise, pass - might be due to missing test data
+
+
+@pytest.mark.django_db
+def test_xlsx_export_includes_tryb_column():
+    from io import BytesIO
+
+    from openpyxl import load_workbook
+
+    from deduplikator_autorow.models import DuplicateCandidate, DuplicateScanRun
+    from deduplikator_autorow.utils.export import export_duplicates_to_xlsx
+
+    scan = DuplicateScanRun.objects.create(status=DuplicateScanRun.Status.COMPLETED)
+    a1 = baker.make("bpp.Autor", nazwisko="X", imiona="A")
+    a2 = baker.make("bpp.Autor", nazwisko="X", imiona="A")
+    b1 = baker.make("bpp.Autor", nazwisko="Y", imiona="B")
+    b2 = baker.make("bpp.Autor", nazwisko="Y", imiona="B")
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=a1,
+        duplicate_autor=a2,
+        confidence_score=80,
+        confidence_percent=0.6,
+        main_autor_name="X A",
+        duplicate_autor_name="X A",
+        scan_mode="pbn",
+    )
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=b1,
+        duplicate_autor=b2,
+        confidence_score=80,
+        confidence_percent=0.6,
+        main_autor_name="Y B",
+        duplicate_autor_name="Y B",
+        scan_mode="general",
+    )
+    content = export_duplicates_to_xlsx()
+    wb = load_workbook(BytesIO(content))
+    ws = wb.active
+    headers = [c.value for c in ws[1]]
+    assert "Tryb" in headers, f"Headers do not include 'Tryb': {headers}"
+
+    tryb_col_idx = headers.index("Tryb") + 1
+    tryby = {
+        ws.cell(row=r, column=tryb_col_idx).value for r in range(2, ws.max_row + 1)
+    }
+    assert "PBN" in tryby
+    assert "Ogólny" in tryby
diff --git a/src/deduplikator_autorow/utils/export.py b/src/deduplikator_autorow/utils/export.py
index 68747532b..ffec7ca99 100644
--- a/src/deduplikator_autorow/utils/export.py
+++ b/src/deduplikator_autorow/utils/export.py
@@ -57,6 +57,7 @@ def _build_candidate_row(candidate, site_domain, duplicate_counts):
         _create_pbn_url(dup.pbn_uid_id),
         round(candidate.confidence_percent, 2),
         duplicate_counts[candidate.main_autor_id],
+        "PBN" if candidate.scan_mode == "pbn" else "Ogólny",
     ]
 
 
@@ -94,6 +95,7 @@ def export_duplicates_to_xlsx():
     - Kolumna J: PBN URL duplikatu (kliknij link)
     - Kolumna K: Pewność podobieństwa (0.0-1.0)
     - Kolumna L: Ilość duplikatów
+    - Kolumna M: Tryb (PBN / Ogólny)
 
     Returns:
         bytes: Zawartość pliku XLSX
@@ -138,6 +140,7 @@ def export_duplicates_to_xlsx():
         "PBN URL duplikatu",
         "Pewność podobieństwa",
         "Ilość duplikatów",
+        "Tryb",
     ]
 
     ws.append(headers)

From 73339e6fbbc65a1326d782e5d8566e7476bf547e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 10:51:55 +0200
Subject: [PATCH 15/25] =?UTF-8?q?docs(newsfragment):=20tryb=20og=C3=B3lny?=
 =?UTF-8?q?=20deduplikatora=20autor=C3=B3w?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../newsfragments/+deduplikator-autorow-general.feature.rst | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 src/bpp/newsfragments/+deduplikator-autorow-general.feature.rst

diff --git a/src/bpp/newsfragments/+deduplikator-autorow-general.feature.rst b/src/bpp/newsfragments/+deduplikator-autorow-general.feature.rst
new file mode 100644
index 000000000..2da6d38b7
--- /dev/null
+++ b/src/bpp/newsfragments/+deduplikator-autorow-general.feature.rst
@@ -0,0 +1,6 @@
+Deduplikator autorów: nowy tryb "ogólny" znajdujący duplikaty wśród
+autorów spoza listy pracowników instytucji w PBN. Jeden przycisk
+"Skanuj duplikaty" uruchamia obie fazy (PBN + ogólna) sekwencyjnie.
+Widok pozwala filtrować wyniki radio-button-em (PBN/Ogólny/Oba),
+eksport XLSX zawiera kolumnę "Tryb". Anulowanie fazy ogólnej skutkuje
+statusem "Częściowo zakończone" — wyniki PBN pozostają dostępne.

From 0e1fcacf367d734f2ac222d94f97f657397bfda2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Fri, 1 May 2026 18:54:20 +0200
Subject: [PATCH 16/25] fix(deduplikator): naprawy z final review
 (PARTIAL_COMPLETED, perf, export)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- views.scan_status_view: dodaj PARTIAL_COMPLETED do listy "finished",
  żeby frontend JS poll przestał odpytywać po przejściu skanu w ten stan.
- tasks._run_general_phase: użyj nowego helpera
  _calculate_priority_from_meta zamiast calculate_author_priority
  (2 SQL per candidate). Helper czyta z meta-cache; przybliżenie
  ma_dyscypline (any year vs 2022-2025) udokumentowane jako TODO,
  bo priority to sort hint, nie correctness invariant.
- utils.meta.build_autor_meta: dorzuć poprzednie_nazwiska,
  pokazuj_poprzednie_nazwiska, pseudonim do .only(), bo Autor.__str__
  je czyta — bez tego str(autor) w hot-path emituje 2 deferred-load
  SQL per autor.
- utils.export.export_duplicates_to_xlsx: materializuj queryset raz
  (list(...)), żeby Counter i list-comprehension nie iterowały dwa
  razy (każda iteracja = pełny SQL).
- views._get_next_candidate_group: zamień .values_list().distinct() na
  iterację z dedupe w Pythonie. PostgreSQL DISTINCT + ORDER BY z
  annotacją to tricky semantics — Django może odrzucić annotation
  z SELECT, dając runtime error lub niedeterministyczną kolejność.
- testy: scan_status_view dla PARTIAL_COMPLETED, regresja
  per-candidate SQL w _run_general_phase.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/deduplikator_autorow/tasks.py             | 29 ++++++++++++++-
 .../tests/test_general_phase.py               | 36 +++++++++++++++++++
 .../tests/test_view_mode_filter.py            | 18 ++++++++++
 src/deduplikator_autorow/utils/export.py      |  7 ++--
 src/deduplikator_autorow/utils/meta.py        | 15 +++++++-
 src/deduplikator_autorow/views.py             | 20 ++++++++---
 6 files changed, 115 insertions(+), 10 deletions(-)

diff --git a/src/deduplikator_autorow/tasks.py b/src/deduplikator_autorow/tasks.py
index a112dec0a..3027491d5 100644
--- a/src/deduplikator_autorow/tasks.py
+++ b/src/deduplikator_autorow/tasks.py
@@ -56,6 +56,33 @@ def _get_user_by_id(user_id):
         return None
 
 
+def _calculate_priority_from_meta(meta_entry: dict) -> int:
+    """Computes priority from meta dict (no SQL).
+
+    Mirrors :func:`calculate_author_priority` but uses cached fields
+    from the meta dict produced by ``build_autor_meta``. Avoids
+    per-candidate SQL on the hot path of ``_run_general_phase``.
+
+    Priority values:
+        100 - has 2022-2025 publications WITH disciplines
+        50 - has 2022-2025 publications (any)
+        0 - no recent publications
+
+    TODO: ``calculate_author_priority`` checks disciplines specifically
+    in 2022-2025 (``Autor_Dyscyplina.objects.filter(rok__gte=2022,
+    rok__lte=2025)``). The meta-cache only stores ``ma_dyscypline``
+    (any year), so this is an approximation. Acceptable for v1 since
+    priority is a sort hint, not a correctness invariant. To achieve
+    exact parity, store year-filtered discipline data in meta.
+    """
+    recent_lata = {rok for rok in meta_entry["lata_publikacji"] if 2022 <= rok <= 2025}
+    if not recent_lata:
+        return 0
+    if meta_entry["ma_dyscypline"]:
+        return 100
+    return 50
+
+
 def calculate_author_priority(autor):
     """
     Calculate priority based on publication dates and disciplines.
@@ -283,7 +310,7 @@ def _run_general_phase(scan_run, min_confidence=MIN_CONFIDENCE_TO_STORE):
                     confidence_score=score,
                     confidence_percent=normalize_confidence(score),
                     reasons=reasons,
-                    priority=calculate_author_priority(dup_obj),
+                    priority=_calculate_priority_from_meta(meta[dup_pk]),
                     main_autor_name=str(main_obj),
                     duplicate_autor_name=str(dup_obj),
                     main_publications_count=meta[main_pk]["publikacje_count"],
diff --git a/src/deduplikator_autorow/tests/test_general_phase.py b/src/deduplikator_autorow/tests/test_general_phase.py
index 76cb48e2b..586e6ace5 100644
--- a/src/deduplikator_autorow/tests/test_general_phase.py
+++ b/src/deduplikator_autorow/tests/test_general_phase.py
@@ -96,6 +96,42 @@ def test_general_respects_not_a_duplicate():
     assert DuplicateCandidate.objects.filter(scan_run=scan).count() == 0
 
 
+@pytest.mark.django_db
+def test_general_phase_no_sql_per_candidate():
+    """_run_general_phase nie robi SQL per candidate (meta-cache)."""
+    from django.db import connection
+    from django.test.utils import CaptureQueriesContext
+
+    # 5 par z dwoma autorami każda → 5 candidates
+    for nazwisko in ["Aaa", "Bbb", "Ccc", "Ddd", "Eee"]:
+        baker.make("bpp.Autor", nazwisko=nazwisko, imiona="Jan")
+        baker.make("bpp.Autor", nazwisko=nazwisko, imiona="Jan")
+
+    scan = DuplicateScanRun.objects.create()
+    with CaptureQueriesContext(connection) as ctx:
+        _run_general_phase(scan, min_confidence=50)
+    n5 = len(ctx.captured_queries)
+
+    # Drugi run z 10 par
+    for nazwisko in ["Fff", "Ggg", "Hhh", "Iii", "Jjj"]:
+        baker.make("bpp.Autor", nazwisko=nazwisko, imiona="Jan")
+        baker.make("bpp.Autor", nazwisko=nazwisko, imiona="Jan")
+
+    scan2 = DuplicateScanRun.objects.create()
+    with CaptureQueriesContext(connection) as ctx:
+        _run_general_phase(scan2, min_confidence=50)
+    n10 = len(ctx.captured_queries)
+
+    # Liczba zapytań nie powinna rosnąć liniowo z liczbą candidates.
+    # Bulk_create może tworzyć 1-2 dodatkowych SAVEPOINT/INSERT, ale
+    # nie 5+ per candidate.
+    diff = n10 - n5
+    assert diff <= 5, (
+        f"Per-candidate SQL detected: 5 candidates → {n5} queries, "
+        f"10 candidates → {n10} queries (diff={diff})"
+    )
+
+
 @pytest.mark.django_db
 def test_general_transitive_cluster():
     """Trzech 'Linker Jan' tworzy klaster {A,B,C} → 2 pary z jednym main."""
diff --git a/src/deduplikator_autorow/tests/test_view_mode_filter.py b/src/deduplikator_autorow/tests/test_view_mode_filter.py
index 85521284f..41f2f75bc 100644
--- a/src/deduplikator_autorow/tests/test_view_mode_filter.py
+++ b/src/deduplikator_autorow/tests/test_view_mode_filter.py
@@ -137,6 +137,24 @@ def test_view_partial_completed_shows_banner(auth_client):
     assert "Częściowo zakończone" in content or "anulowana" in content.lower()
 
 
+@pytest.mark.django_db
+def test_scan_status_view_finished_for_partial_completed(auth_client):
+    """scan_status_view zwraca finished=True dla PARTIAL_COMPLETED."""
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.PARTIAL_COMPLETED,
+        finished_at=timezone.now(),
+        total_authors_to_scan=10,
+        authors_scanned=10,
+    )
+    response = auth_client.get(
+        reverse("deduplikator_autorow:scan_status", kwargs={"scan_id": scan.pk})
+    )
+    assert response.status_code == 200
+    data = response.json()
+    assert data["finished"] is True
+    assert data["status"] == "partial_completed"
+
+
 @pytest.mark.django_db
 def test_view_mode_filter_radio_present(auth_client, scan_with_both_modes):
     """Mode-filter radio widoczny w HTML."""
diff --git a/src/deduplikator_autorow/utils/export.py b/src/deduplikator_autorow/utils/export.py
index ffec7ca99..599072569 100644
--- a/src/deduplikator_autorow/utils/export.py
+++ b/src/deduplikator_autorow/utils/export.py
@@ -102,9 +102,10 @@ def export_duplicates_to_xlsx():
     """
     site_domain = _get_site_domain()
 
-    # JEDNO zapytanie zamiast tysięcy!
-    # Pobierz wszystkich kandydatów ze statusem PENDING
-    candidates = (
+    # JEDNO zapytanie zamiast tysięcy! Materializujemy raz, żeby Counter
+    # i list-comprehension nie wykonywały dwóch iteracji po queryset
+    # (każda iteracja = ponowny SQL).
+    candidates = list(
         DuplicateCandidate.objects.filter(status=DuplicateCandidate.Status.PENDING)
         .select_related(
             "main_autor",
diff --git a/src/deduplikator_autorow/utils/meta.py b/src/deduplikator_autorow/utils/meta.py
index fd2b0e824..218b182a1 100644
--- a/src/deduplikator_autorow/utils/meta.py
+++ b/src/deduplikator_autorow/utils/meta.py
@@ -78,8 +78,21 @@ def build_autor_meta() -> dict[int, dict]:
     Łącznie 6 zapytań, niezależnie od liczby autorów.
     """
     autorzy_meta: dict[int, dict] = {}
+    # NOTE: include `poprzednie_nazwiska`, `pokazuj_poprzednie_nazwiska`
+    # and `pseudonim` because Autor.__str__ reads them — without them
+    # `str(autor)` (used by callers such as
+    # ``_run_general_phase``) triggers a deferred field load per author
+    # (2+ queries per author = O(N) hot-path SQL).
     autor_qs = Autor.objects.only(
-        "pk", "nazwisko", "imiona", "orcid", "pbn_uid_id", "tytul_id"
+        "pk",
+        "nazwisko",
+        "imiona",
+        "orcid",
+        "pbn_uid_id",
+        "tytul_id",
+        "poprzednie_nazwiska",
+        "pokazuj_poprzednie_nazwiska",
+        "pseudonim",
     )
     for a in autor_qs.iterator():
         autorzy_meta[a.pk] = {
diff --git a/src/deduplikator_autorow/views.py b/src/deduplikator_autorow/views.py
index 0fde5d5ae..d027d7537 100644
--- a/src/deduplikator_autorow/views.py
+++ b/src/deduplikator_autorow/views.py
@@ -860,6 +860,7 @@ def scan_status_view(request, scan_id):
                 "finished": scan_run.status
                 in [
                     DuplicateScanRun.Status.COMPLETED,
+                    DuplicateScanRun.Status.PARTIAL_COMPLETED,
                     DuplicateScanRun.Status.CANCELLED,
                     DuplicateScanRun.Status.FAILED,
                 ],
@@ -909,8 +910,13 @@ def _get_next_candidate_group(scan_run, skip_count=0, mode="both"):
     if mode != "both":
         qs = qs.filter(scan_mode=mode)
 
-    # PBN-first ordering when mode=both
-    distinct_main_autor_ids = (
+    # Annotate then iterate to dedupe in stable order. PostgreSQL's
+    # DISTINCT + ORDER BY semantics require ordering columns in SELECT,
+    # which Django's .values_list().distinct() may strip when an
+    # annotation is involved — leading to runtime errors or
+    # non-deterministic ordering. Materialize to Python and dedupe
+    # explicitly: simple, deterministic, side-effect free.
+    rows = (
         qs.annotate(
             mode_order=Case(
                 When(scan_mode="pbn", then=Value(0)),
@@ -921,11 +927,15 @@ def _get_next_candidate_group(scan_run, skip_count=0, mode="both"):
         )
         .order_by("mode_order", "-priority", "-confidence_score", "main_autor_id")
         .values_list("main_autor_id", flat=True)
-        .distinct()
     )
 
-    # Convert to list to enable indexing
-    main_autor_ids = list(distinct_main_autor_ids)
+    # Stable dedupe preserving order of first occurrence.
+    seen: set[int] = set()
+    main_autor_ids: list[int] = []
+    for pk in rows:
+        if pk not in seen:
+            seen.add(pk)
+            main_autor_ids.append(pk)
 
     if not main_autor_ids:
         return None, None, 0

From acc8b08a02a06e4563d62663b3a7ac47c3af3648 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sat, 2 May 2026 11:44:08 +0200
Subject: [PATCH 17/25] =?UTF-8?q?fix(deduplikator=5Fautorow):=20UI=20popra?=
 =?UTF-8?q?wki=20=E2=80=94=20layout,=20bold=20publikacji,=20nawigacja=20wy?=
 =?UTF-8?q?szukiwania,=20make=20assets?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Publikacje: nadpisz Foundation .callout a font-weight:bolder na normal
  (tytuł w <b> pozostaje bold, reszta normalna)
- Top bar: justify-content flex-start, szukajka zaraz po przyciskach trybu
- Możliwe duplikaty: nagłówek w osobnym wierszu, przyciski pewności poniżej;
  cały rząd ukryty gdy tylko jedna kategoria kandydatów
- Wyszukiwanie: dodana nawigacja Poprzedni/Następny po wynikach szukania
  (skip_count + search_has_prev/search_has_next)
- Makefile: stamp file zamiast per-CSS-target, SCSS wildcard obejmuje
  src/*/static/*/scss/*.scss, stamp usuwany w make clean
---
 Makefile                                      |  22 +-
 .../scss/deduplikator_autorow.scss            | 263 +++++-
 .../duplicate_authors.html                    | 824 +++++++++++++-----
 src/deduplikator_autorow/views.py             | 294 +++++--
 4 files changed, 1101 insertions(+), 302 deletions(-)

diff --git a/Makefile b/Makefile
index 001e82d0f..fac15cb9e 100644
--- a/Makefile
+++ b/Makefile
@@ -119,13 +119,13 @@ clean-pycache: ## Usuń __pycache__, *.pyc oraz .eggs/.cache
 	rm -rf .eggs .cache
 
 clean: clean-pycache ## Szersze czyszczenie: egg-info, logi, build, dist, staticroot/CACHE, .tox
+	rm -f .grunt-build-stamp
 	find . -type d -name \*egg-info -print0 | xargs -0 rm -rf
 	find . -name \*~ -print0 | xargs -0 rm -f
 	find . -name \*.prof -print0 | xargs -0 rm -f
 	rm -rf prof/
 	find . -name \*\\.log -print0 | xargs -0 rm -f
-	find . -name \*\\.log -print0 | xargs -0 rm -f
-	find . -name \#\* -print0 | xargs -0 rm -f
+	find . -name \#\* -not -path './node_modules/*' -print0 | xargs -0 rm -rf
 	rm -rf build dist/*django_bpp*whl dist/*bpp_iplweb*whl *.log dist
 	rm -rf src/django_bpp/staticroot/CACHE
 	rm -rf .tox
@@ -149,11 +149,13 @@ distclean: clean ## Pełne czyszczenie: + node_modules, staticroot, media, dist,
 grunt-build: ## Uruchom `grunt build` (SCSS → CSS, bundling JS)
 	grunt build
 
-# CSS output files (targets)
-CSS_TARGETS := src/bpp/static/scss/app-blue.css src/bpp/static/scss/app-green.css src/bpp/static/scss/app-orange.css
+# grunt build kompiluje WSZYSTKIE SCSS → CSS za jednym odpaleniem.
+# Pattern rule $(CSS_TARGETS): $(SCSS_SOURCES) odpalałby grunt N razy
+# (raz per out-of-date target). Zamiast tego: jeden stamp file zależy od
+# wszystkich SCSS + node_modules; grunt dotyka stampu po zakończeniu.
 
-# SCSS source files
-SCSS_SOURCES := $(wildcard src/bpp/static/scss/*.scss)
+SCSS_SOURCES := $(wildcard src/bpp/static/scss/*.scss) \
+                $(wildcard src/*/static/*/scss/*.scss)
 
 # Node modules dependency
 NODE_MODULES := node_modules/.installed
@@ -166,14 +168,16 @@ $(NODE_MODULES): package.json yarn.lock
 	export PUPPETEER_SKIP_CHROME_DOWNLOAD=true PUPPETEER_SKIP_CHROME_HEADLESS_SHELL_DOWNLOAD=true && $(YARN_CMD) install  --no-progress --emoji false -s
 	touch $(NODE_MODULES)
 
-$(CSS_TARGETS): $(SCSS_SOURCES) $(NODE_MODULES)
+CSS_STAMP := .grunt-build-stamp
+
+$(CSS_STAMP): $(SCSS_SOURCES) $(NODE_MODULES)
 	grunt build
+	@touch $(CSS_STAMP)
 
 $(MO_FILES): $(PO_FILES)
-	# cd src &&  django-admin compilemessages
 	uv run python src/manage.py compilemessages --locale=pl --ignore=site-packages
 
-assets: $(CSS_TARGETS) $(MO_FILES) ## Zbuduj frontend (CSS + .mo); uruchamia `yarn install` jeśli trzeba
+assets: $(CSS_STAMP) $(MO_FILES) ## Zbuduj frontend (CSS + .mo); uruchamia `yarn install` jeśli trzeba
 
 yarn: $(NODE_MODULES) ## Zainstaluj zależności Node.js (yarn install)
 
diff --git a/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss b/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
index 086113f84..271550d39 100644
--- a/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
+++ b/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
@@ -1,6 +1,17 @@
 // Deduplikator Autorow - Styles
 // BEM convention: .deduplikator-autorow__element--modifier
 
+// Foundation .label ma domyślnie kwadratowe rogi - w obrębie deduplikatora
+// chcemy jednolitą "pigułkową" estetykę (zgodną z chipami powodów
+// podobieństwa). Wrapper .deduplikator-autorow-page ogranicza override
+// tylko do tej strony, żeby nie wpływać globalnie na inne widoki BPP.
+.deduplikator-autorow-page .label {
+  border-radius: 999px;
+  padding: 3px 12px;
+  font-weight: 600;
+  letter-spacing: 0.02em;
+}
+
 // =============================================================================
 // SIDEBAR ACCORDION
 // =============================================================================
@@ -352,6 +363,28 @@
 
 .deduplikator-autorow__duplicate-card {
   margin-bottom: 20px;
+  // Niektóre nadrzędne layouty BPP wymuszają text-align: center w obrębie
+  // .callout (zaobserwowane w warningowych callout-ach panelu duplikatów —
+  // imiona/nazwiska autorów wyświetlały się wycentrowane). Wymuszamy
+  // domyślne wyrównanie do lewej dla całej karty.
+  text-align: left;
+}
+
+// Stan "wyłączone" dla przycisków "Scal wszystkie" gdy w grupie jest kandydat
+// poniżej progu pewności. Trzymamy je klikalne (do wyświetlenia komunikatu)
+// dlatego nie używamy [disabled] - tylko aria-disabled + klasa wizualna.
+.deduplikator-autorow__merge-all-btn--disabled,
+.button.deduplikator-autorow__merge-all-btn--disabled {
+  opacity: 0.55;
+  cursor: not-allowed;
+  background-color: #b5b5b5 !important;
+  color: #fff !important;
+
+  &:hover,
+  &:focus {
+    background-color: #b5b5b5 !important;
+    box-shadow: none;
+  }
 }
 
 .deduplikator-autorow__duplicate-header {
@@ -406,6 +439,10 @@
 
 .deduplikator-autorow__duplicates-header {
   margin-bottom: 15px;
+
+  .grid-x + .grid-x {
+    margin-top: 0.5em;
+  }
 }
 
 .deduplikator-autorow__duplicates-title {
@@ -484,21 +521,42 @@
 
 // Mode badges, partial-completed banner, mode filter, scan phase
 .deduplikator-autorow {
+  &__main-record-title {
+    display: flex;
+    align-items: center;
+    gap: 0.6em;
+    flex-wrap: wrap;
+  }
+
   &__badge {
-    display: inline-block;
-    padding: 2px 8px;
-    border-radius: 3px;
-    font-size: 0.75em;
-    font-weight: 600;
+    display: inline-flex;
+    align-items: center;
+    gap: 0.35em;
+    padding: 4px 10px 4px 9px;
+    border-radius: 999px;
+    font-size: 0.7em;
+    font-weight: 700;
+    letter-spacing: 0.04em;
+    text-transform: uppercase;
     color: #fff;
-    margin-left: 0.5em;
+    line-height: 1;
+    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.15);
+    vertical-align: middle;
+
+    .fi-link,
+    .fi-magnifying-glass {
+      font-size: 1em;
+      line-height: 1;
+    }
 
     &--pbn {
-      background-color: #2196f3;
+      background: linear-gradient(180deg, #42a5f5 0%, #1976d2 100%);
+      border: 1px solid #1565c0;
     }
 
     &--general {
-      background-color: #ff9800;
+      background: linear-gradient(180deg, #ffb74d 0%, #f57c00 100%);
+      border: 1px solid #ef6c00;
     }
   }
 
@@ -506,11 +564,93 @@
     margin: 1em 0;
   }
 
-  &__mode-filter {
+  // Top bar — filtr trybu po lewej, szybkie wyszukiwanie po prawej.
+  // Wymuszamy jeden wiersz (nowrap); search ma flex 1 + min-width: 0,
+  // żeby mógł zwężać się poniżej intrinsic width na wąskich ekranach,
+  // zamiast wyskakiwać do nowej linii i rozciągać się na 100% szerokości.
+  &__top-bar {
+    display: flex;
+    align-items: center;
+    justify-content: flex-start;
+    gap: 1em;
     margin: 1em 0;
+    flex-wrap: nowrap;
+  }
+
+  &__top-search {
+    flex: 1 1 280px;
+    min-width: 0;
+    max-width: 420px;
+    margin: 0;
+  }
+
+  &__top-search-group {
+    margin: 0;
+  }
+
+  &__top-search-info {
+    display: block;
+    margin-top: 0.4em;
+    color: #555;
+  }
+
+  &__confidence-filter {
+    flex: 0 0 auto;
+    margin: 0;
+    display: flex;
+    align-items: center;
+  }
+
+  &__confidence-buttons {
+    margin: 0;
+
+    .button {
+      margin: 0;
+      display: inline-flex;
+      align-items: center;
+      gap: 0.4em;
+    }
+  }
+
+  // Mode filter (Pokaż wyniki: PBN/Ogólny/Oba) — Foundation button-group based
+  &__mode-filter {
+    flex: 0 0 auto;
+    margin: 0;
+    display: flex;
+    align-items: center;
+    gap: 0.75em;
+    flex-wrap: nowrap;
+  }
+
+  &__mode-filter-label {
+    font-weight: 600;
+    color: #4a4a4a;
+  }
+
+  &__mode-buttons {
+    margin: 0;
+
+    .button {
+      margin: 0;
+      display: inline-flex;
+      align-items: center;
+      gap: 0.4em;
+    }
+  }
 
-    label {
-      margin-right: 1.5em;
+  &__mode-count {
+    display: inline-block;
+    margin-left: 0.3em;
+    padding: 1px 7px;
+    border-radius: 999px;
+    background: rgba(0, 0, 0, 0.18);
+    color: inherit;
+    font-size: 0.8em;
+    font-weight: 700;
+    line-height: 1.4;
+
+    .hollow & {
+      background: rgba(0, 0, 0, 0.08);
     }
   }
 
@@ -518,4 +658,105 @@
     margin-top: 0.5em;
     font-style: italic;
   }
+
+  // Action group sections within each duplicate card (Podgląd / Decyzja / Scalanie)
+  &__actions {
+    display: flex;
+    flex-direction: column;
+    gap: 14px;
+  }
+
+  &__action-group {
+    background: rgba(0, 0, 0, 0.03);
+    border: 1px solid rgba(0, 0, 0, 0.08);
+    border-radius: 6px;
+    padding: 10px 12px;
+  }
+
+  &__action-group-title {
+    font-size: 0.72rem;
+    font-weight: 700;
+    text-transform: uppercase;
+    letter-spacing: 0.06em;
+    color: #555;
+    margin: 0 0 8px 0;
+    display: flex;
+    align-items: center;
+    gap: 0.4em;
+
+    .fi-eye,
+    .fi-checkbox,
+    .fi-arrows-compress {
+      color: #888;
+    }
+  }
+
+  // Opisy bibliograficzne renderują <b>/<strong> wokół tytułów — to jest OK,
+  // tytuł ma być boldem. Problem: Foundation daje .callout a:not(.close-button)
+  // { font-weight: bolder }, więc cały tekst w <a> wewnątrz .callout jest
+  // bold. Resetujemy font-weight na <a> w obrębie itemów publikacji, ale
+  // zostawiamy <b>/<strong> z ich domyślnym bold, żeby tytuł nadal był
+  // wytłuszczony.
+  &__publication-list,
+  &__publication-item,
+  &__publication-item--duplicate,
+  &__publication-description {
+    font-weight: normal;
+  }
+
+  &__publication-item a,
+  &__publication-item--duplicate a {
+    font-weight: normal;
+  }
+
+  // Reason chips — small pills with icon + text
+  &__reasons-chips {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 6px;
+    padding: 0;
+    margin: 0;
+  }
+
+  &__reason-chip {
+    display: inline-flex;
+    align-items: center;
+    gap: 0.35em;
+    padding: 3px 9px;
+    border-radius: 999px;
+    font-size: 0.78rem;
+    line-height: 1.5;
+    border: 1px solid transparent;
+    white-space: nowrap;
+    max-width: 100%;
+
+    .deduplikator-autorow__reason-chip-text {
+      overflow: hidden;
+      text-overflow: ellipsis;
+    }
+
+    &--match {
+      background: #e6f4ea;
+      border-color: #b6dec0;
+      color: #1b5e20;
+    }
+
+    &--info {
+      background: #e7f0fc;
+      border-color: #b9d4f0;
+      color: #0d3a73;
+    }
+
+    &--weak {
+      background: #f0f0f0;
+      border-color: #d8d8d8;
+      color: #555;
+    }
+
+    &--warn {
+      background: #fff4e0;
+      border-color: #f5d49a;
+      color: #8a4b00;
+    }
+  }
 }
diff --git a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
index 1884e82fb..426a73bd5 100644
--- a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
+++ b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
@@ -6,15 +6,15 @@
       href="{% static 'deduplikator_autorow/css/deduplikator_autorow.css' %}">
 {% endblock %}
 
-{% block title %}Deduplikator Autorów PBN{% endblock %}
+{% block title %}Deduplikator autorów{% endblock %}
 
 {% block breadcrumbs %}
     <li><a href="/">Strona główna</a></li>
-    <li><a href="{% url "deduplikator_autorow:duplicate_authors" %}">Deduplikator autorów PBN</a></li>
+    <li><a href="{% url "deduplikator_autorow:duplicate_authors" %}">Deduplikator autorów</a></li>
 {% endblock %}
 
 {% block content %}
-    <div id="left-menu-container">
+    <div id="left-menu-container" class="deduplikator-autorow-page">
         <!-- Left Sidebar Menu -->
         <div class="left-menu-sidebar">
             <div class="left-menu-menu">
@@ -224,37 +224,6 @@
                     </li>
                     {% endif %}
 
-                    <!-- Wyszukiwanie autora -->
-                    <li class="accordion-item {% if search_lastname %}is-active{% endif %}" data-accordion-item>
-                        <a class="accordion-title" href="#" {% if search_lastname %}aria-expanded="true"{% endif %}>
-                            <span class="fi-magnifying-glass" aria-hidden="true"></span> Szukaj autora
-                        </a>
-                        <div class="accordion-content{% if search_lastname %} deduplikator-autorow__accordion-content{% endif %}" data-tab-content>
-                            <form method="get" action="{% url 'deduplikator_autorow:duplicate_authors' %}" class="deduplikator-autorow__form">
-                                <label for="search_lastname" class="deduplikator-autorow__label">Po nazwisku:</label>
-                                <input type="text" id="search_lastname" name="search_lastname"
-                                       value="{{ search_lastname|default:'' }}"
-                                       placeholder="Wpisz część nazwiska..."
-                                       class="deduplikator-autorow__input">
-                                <button type="submit" class="button primary expanded small">
-                                    <span class="fi-magnifying-glass"></span> Szukaj
-                                </button>
-                                {% if search_lastname %}
-                                    <a href="{% url 'deduplikator_autorow:duplicate_authors' %}"
-                                       class="button secondary expanded small deduplikator-autorow__button--margin-top">
-                                        Wyczyść
-                                    </a>
-                                    <div class="deduplikator-autorow__search-results">
-                                        <strong>Szukano:</strong> "{{ search_lastname }}"
-                                        {% if search_results_count is not None %}
-                                            <br><strong>Znaleziono:</strong> {{ search_results_count }} autor{% if search_results_count != 1 %}ów{% endif %}
-                                        {% endif %}
-                                    </div>
-                                {% endif %}
-                            </form>
-                        </div>
-                    </li>
-
                     <!-- Pobierz XLSX -->
                     <li class="accordion-item" data-accordion-item>
                         <a class="accordion-title" href="#">
@@ -269,43 +238,35 @@
                         </div>
                     </li>
 
-                    <!-- Obejrz nie-duplikaty -->
+                    <!-- Nie-duplikaty: obejrzenie + reset w jednym panelu -->
                     <li class="accordion-item {% if not_duplicate_count > 0 %}is-active{% endif %}" data-accordion-item>
                         <a class="accordion-title" href="#" {% if not_duplicate_count > 0 %}aria-expanded="true"{% endif %}>
-                            <span class="fi-eye" aria-hidden="true"></span> Obejrz nie-duplikaty
+                            <span class="fi-eye" aria-hidden="true"></span> Nie-duplikaty {% if not_duplicate_count > 0 %}({{ not_duplicate_count }}){% endif %}
                         </a>
                         <div class="accordion-content{% if not_duplicate_count > 0 %} deduplikator-autorow__accordion-content{% endif %}" data-tab-content>
-                            <p class="deduplikator-autorow__status-text--spaced">Przejrzyj autorów oznaczonych jako nie będących duplikatami.</p>
-                            <a href="/admin/deduplikator_autorow/notaduplicate/"
-                               class="button secondary expanded deduplikator-autorow__button"
-                               target="_blank">
-                                <span class="fi-magnifying-glass"></span> Otwórz listę
-                            </a>
-                        </div>
-                    </li>
-
-                    <!-- Reset nie-duplikatów -->
-                    {% if not_duplicate_count > 0 %}
-                    <li class="accordion-item" data-accordion-item>
-                        <a class="accordion-title" href="#">
-                            <span class="fi-refresh" aria-hidden="true"></span> Zresetuj nie-duplikaty ({{ not_duplicate_count }})
-                        </a>
-                        <div class="accordion-content" data-tab-content>
-                            <p class="deduplikator-autorow__status-text--spaced">Wyczyść wszystkie oznaczenia nie-duplikatów.</p>
-                            <p class="deduplikator-autorow__status-warning deduplikator-autorow__status-text--spaced">
-                                <span class="fi-alert"></span> Obecnie: {{ not_duplicate_count }} oznaczonych
-                            </p>
-                            <form method="post" action="{% url 'deduplikator_autorow:reset_not_duplicates' %}" class="deduplikator-autorow__form">
-                                {% csrf_token %}
-                                <button type="submit"
-                                        class="button alert expanded deduplikator-autorow__button"
-                                        data-confirm="Czy na pewno chcesz zresetować wszystkie oznaczenia nie-duplikatów? Tej operacji nie można cofnąć.">
-                                    <span class="fi-trash"></span> Zresetuj wszystkie
-                                </button>
-                            </form>
+                            {% if not_duplicate_count > 0 %}
+                                <p class="deduplikator-autorow__status-text--spaced">Autorzy oznaczeni jako nie będący duplikatami.</p>
+                                <p class="deduplikator-autorow__status-warning deduplikator-autorow__status-text--spaced">
+                                    <span class="fi-alert"></span> Obecnie: {{ not_duplicate_count }} oznaczonych
+                                </p>
+                                <a href="/admin/deduplikator_autorow/notaduplicate/"
+                                   class="button secondary expanded small deduplikator-autorow__button-margin-bottom"
+                                   target="_blank">
+                                    <span class="fi-magnifying-glass"></span> Otwórz listę
+                                </a>
+                                <form method="post" action="{% url 'deduplikator_autorow:reset_not_duplicates' %}" class="deduplikator-autorow__form">
+                                    {% csrf_token %}
+                                    <button type="submit"
+                                            class="button alert expanded small"
+                                            data-confirm="Czy na pewno chcesz zresetować wszystkie oznaczenia nie-duplikatów? Tej operacji nie można cofnąć.">
+                                        <span class="fi-trash"></span> Zresetuj wszystkie
+                                    </button>
+                                </form>
+                            {% else %}
+                                <p class="deduplikator-autorow__status-text--muted">Brak autorów oznaczonych jako nie-duplikaty.</p>
+                            {% endif %}
                         </div>
                     </li>
-                    {% endif %}
 
                     <!-- Ignorowani autorzy -->
                     <li class="accordion-item {% if ignored_authors_count > 0 %}is-active{% endif %}" data-accordion-item>
@@ -398,36 +359,94 @@
               </div>
             {% endif %}
 
-            {# Mode filter #}
-            <div class="row deduplikator-autorow__mode-filter">
-              <div class="small-12 columns">
-                <strong>Pokaż wyniki:</strong>
-                <label>
-                  <input type="radio" name="mode" value="pbn"
-                         {% if mode == "pbn" %}checked{% endif %}
-                         onchange="location.href='?mode=pbn'">
-                  PBN ({{ pending_pbn_count }})
-                </label>
-                <label>
-                  <input type="radio" name="mode" value="general"
-                         {% if mode == "general" %}checked{% endif %}
-                         onchange="location.href='?mode=general'">
-                  Ogólny ({{ pending_general_count }})
-                </label>
-                <label>
-                  <input type="radio" name="mode" value="both"
-                         {% if mode == "both" or not mode %}checked{% endif %}
-                         onchange="location.href='?mode=both'">
-                  Oba
-                </label>
+            {# Top bar: mode-filter po lewej, szybkie wyszukiwanie po prawej. #}
+            {# Filtr pewności (Wszyscy/Pewniaki/Słabe) NIE jest tutaj - dotyczy #}
+            {# tylko kandydatów aktualnego głównego autora i wisi przy #}
+            {# nagłówku 'Możliwe duplikaty'. #}
+            {# Top-bar pokazujemy tylko gdy mamy co filtrować — czyli mamy #}
+            {# głównego autora albo wynik wyszukiwania. Na ekranie postępu #}
+            {# skanowania ('running_scan and not glowny_autor') schowane. #}
+            {% with q_search=search_lastname %}
+            {% if glowny_autor or search_lastname %}
+            <div class="deduplikator-autorow__top-bar">
+              <div class="deduplikator-autorow__mode-filter">
+                <span class="deduplikator-autorow__mode-filter-label">Pokaż wyniki:</span>
+                <div class="button-group deduplikator-autorow__mode-buttons" role="tablist" aria-label="Filtr trybu wyników">
+                  <a href="?mode=pbn{% if q_search %}&amp;search_lastname={{ q_search }}{% endif %}"
+                     role="tab"
+                     aria-selected="{% if mode == 'pbn' %}true{% else %}false{% endif %}"
+                     class="button{% if mode == 'pbn' %} primary{% else %} hollow secondary{% endif %}">
+                    <span class="fi-link" aria-hidden="true"></span>
+                    PBN
+                    <span class="deduplikator-autorow__mode-count">{{ pending_pbn_count }}</span>
+                  </a>
+                  <a href="?mode=general{% if q_search %}&amp;search_lastname={{ q_search }}{% endif %}"
+                     role="tab"
+                     aria-selected="{% if mode == 'general' %}true{% else %}false{% endif %}"
+                     class="button{% if mode == 'general' %} primary{% else %} hollow secondary{% endif %}">
+                    <span class="fi-magnifying-glass" aria-hidden="true"></span>
+                    Ogólny
+                    <span class="deduplikator-autorow__mode-count">{{ pending_general_count }}</span>
+                  </a>
+                  <a href="?mode=both{% if q_search %}&amp;search_lastname={{ q_search }}{% endif %}"
+                     role="tab"
+                     aria-selected="{% if mode == 'both' or not mode %}true{% else %}false{% endif %}"
+                     class="button{% if mode == 'both' or not mode %} primary{% else %} hollow secondary{% endif %}">
+                    <span class="fi-list" aria-hidden="true"></span>
+                    Oba
+                  </a>
+                </div>
               </div>
+
+              <form method="get"
+                    action="{% url 'deduplikator_autorow:duplicate_authors' %}"
+                    class="deduplikator-autorow__top-search"
+                    role="search"
+                    autocomplete="off">
+                <input type="hidden" name="mode" value="{{ mode }}">
+                <label for="search_lastname" class="show-for-sr">Szukaj autora po nazwisku</label>
+                <div class="input-group deduplikator-autorow__top-search-group">
+                  <span class="input-group-label">
+                    <span class="fi-magnifying-glass" aria-hidden="true"></span>
+                  </span>
+                  <input type="text"
+                         id="search_lastname"
+                         name="search_lastname"
+                         value="{{ q_search|default:'' }}"
+                         placeholder="Szukaj autora po nazwisku..."
+                         class="input-group-field"
+                         list="lastname-suggestions"
+                         autocomplete="off">
+                  <datalist id="lastname-suggestions"></datalist>
+                  <div class="input-group-button">
+                    <button type="submit" class="button primary">Szukaj</button>
+                  </div>
+                  {% if q_search %}
+                  <div class="input-group-button">
+                    <a href="?mode={{ mode }}"
+                       class="button secondary"
+                       title="Wyczyść wyszukiwanie">
+                      <span class="fi-x" aria-hidden="true"></span>
+                    </a>
+                  </div>
+                  {% endif %}
+                </div>
+                {% if q_search and search_results_count is not None %}
+                <small class="deduplikator-autorow__top-search-info">
+                  Wynik dla "<strong>{{ q_search }}</strong>":
+                  {{ search_results_count }} autor{% if search_results_count != 1 %}ów{% endif %}
+                </small>
+                {% endif %}
+              </form>
             </div>
+            {% endif %}
+            {% endwith %}
 
             {% include "includes/pbn_freshness_warning.html" with custom_message="Analiza duplikatów opiera się na lokalnej kopii danych autorów z PBN. Wyniki mogą być nieaktualne." %}
 
             <div class="grid-x grid-padding-x align-middle deduplikator-autorow__header">
                 <div class="cell auto">
-                    <h1>Deduplikator Autorów PBN <span class="label warning deduplikator-autorow__beta-label">BETA</span></h1>
+                    <h1>Deduplikator autorów</h1>
                 </div>
                 <div class="cell shrink">
                     {% if not search_lastname and scientist %}
@@ -458,6 +477,17 @@ <h1>Deduplikator Autorów PBN <span class="label warning deduplikator-autorow__b
                             <a href="{% url 'deduplikator_autorow:duplicate_authors' %}?skip_count={{ skip_count|add:"1" }}"
                                class="button primary">Następny autor <span class="fi-arrow-right"></span></a>
                         </div>
+                    {% elif search_lastname and glowny_autor %}
+                        <div class="button-group">
+                            {% if search_has_prev %}
+                                <a href="?search_lastname={{ search_lastname }}&amp;mode={{ mode }}&amp;skip_count={{ skip_count|add:"-1" }}"
+                                   class="button secondary"><span class="fi-arrow-left"></span> Poprzedni</a>
+                            {% endif %}
+                            {% if search_has_next %}
+                            <a href="?search_lastname={{ search_lastname }}&amp;mode={{ mode }}&amp;skip_count={{ skip_count|add:"1" }}"
+                               class="button primary">Następny <span class="fi-arrow-right"></span></a>
+                            {% endif %}
+                        </div>
                     {% endif %}
                 </div>
             </div>
@@ -514,7 +544,22 @@ <h3><span class="fi-check"></span> Gratulacje!</h3>
             {% else %}
                 {% if glowny_autor %}
                     <div class="callout primary">
-                        <h3>Główny rekord autora{% if first_candidate %}{% if first_candidate.scan_mode == "pbn" %}<span class="deduplikator-autorow__badge deduplikator-autorow__badge--pbn">PBN</span>{% else %}<span class="deduplikator-autorow__badge deduplikator-autorow__badge--general">OGÓLNY</span>{% endif %}{% endif %}</h3>
+                        <h3 class="deduplikator-autorow__main-record-title">
+                            <span>Główny rekord autora</span>
+                            {% if first_candidate %}
+                                {% if first_candidate.scan_mode == "pbn" %}
+                                    <span class="deduplikator-autorow__badge deduplikator-autorow__badge--pbn"
+                                          title="Duplikat wykryty na podstawie danych PBN">
+                                        <span class="fi-link" aria-hidden="true"></span> PBN
+                                    </span>
+                                {% else %}
+                                    <span class="deduplikator-autorow__badge deduplikator-autorow__badge--general"
+                                          title="Duplikat wykryty przez analizę ogólną">
+                                        <span class="fi-magnifying-glass" aria-hidden="true"></span> ogólny
+                                    </span>
+                                {% endif %}
+                            {% endif %}
+                        </h3>
                         <div class="grid-x grid-padding-x">
                             <div class="cell medium-4">
                                 <strong>Imię i nazwisko:</strong><br>
@@ -596,62 +641,116 @@ <h4>Dyscypliny głównego autora (2022-2025):</h4>
                         </div>
 
                         {% if glowne_publikacje %}
-                            <h4>Publikacje głównego autora:{% if glowne_publikacje_year_range %}
+                            <h4>{% if glowne_publikacje_count == 1 %}Publikacja{% else %}Publikacje{% endif %} głównego autora:{% if glowne_publikacje_year_range %}
                                 <small class="deduplikator-autorow__year-hint">({{ glowne_publikacje_year_range }})</small>{% endif %}</h4>
                             <div class="deduplikator-autorow__publication-list">
                                 {% for publikacja in glowne_publikacje %}
                                     <div class="deduplikator-autorow__publication-item">
-                                        <small><strong>
+                                        <small>
                                             <a href="{% url 'bpp:browse_praca_old' publikacja.content_type.model publikacja.pk.1 %}"
                                                target="_blank" class="deduplikator-autorow__publication-link">
-                                                {{ publikacja.tytul_oryginalny }}
+                                                {{ publikacja.opis_bibliograficzny_cache|truncatewords_html:30|safe }}
                                             </a>
-                                            {% if publikacja.rok %}
-                                                <span class="deduplikator-autorow__publication-year"> ({{ publikacja.rok }})</span>{% endif %}
-                                        </strong></small><br>
-                                        <small>{{ publikacja.opis_bibliograficzny_cache|truncatewords:20 }}</small>
+                                        </small>
                                     </div>
                                 {% endfor %}
                             </div>
                         {% endif %}
                     </div>
 
-                    <div class="grid-x grid-padding-x align-middle deduplikator-autorow__duplicates-header">
-                        <div class="cell auto">
-                            <h3 class="deduplikator-autorow__duplicates-title">Możliwe duplikaty ({{ duplikaty_z_publikacjami|length }})</h3>
-                        </div>
-                        {% if duplikaty_z_publikacjami|length > 0 %}
-                        <div class="cell shrink">
-                            <div id="merge-all-progress" class="deduplikator-autorow__merge-progress">
-                                <span class="label warning">
-                                    <span class="fi-loop deduplikator-autorow__spinner"></span>
-                                    Scalanie... <span id="merge-progress-text">0/0</span> duplikatów
-                                </span>
+                    <div id="mozliwe-duplikaty" class="deduplikator-autorow__duplicates-header">
+                        {# Wiersz 1: nagłówek + wskaźnik postępu + przyciski scalania. #}
+                        <div class="grid-x grid-padding-x align-middle">
+                            <div class="cell auto">
+                                <h3 class="deduplikator-autorow__duplicates-title">
+                                    Możliwe duplikaty
+                                    {% if confidence_band != 'all' and duplikaty_z_publikacjami|length != candidates_total_for_main %}
+                                        ({{ duplikaty_z_publikacjami|length }} z {{ candidates_total_for_main }})
+                                    {% else %}
+                                        ({{ duplikaty_z_publikacjami|length }})
+                                    {% endif %}
+                                </h3>
                             </div>
+                            {% if duplikaty_z_publikacjami|length > 0 %}
+                            {# Wskaźnik postępu - ukryty domyślnie, JS pokazuje go w trakcie scalania. #}
+                            <div class="cell shrink" id="merge-all-progress-cell" hidden style="display: none;">
+                                <div id="merge-all-progress" class="deduplikator-autorow__merge-progress">
+                                    <span class="label warning">
+                                        <span class="fi-loop deduplikator-autorow__spinner"></span>
+                                        Scalanie... <span id="merge-progress-text">0/0</span> duplikatów
+                                    </span>
+                                </div>
+                            </div>
+                            <div class="cell shrink">
+                                <div class="button-group"
+                                     {% if not allow_merge_all %}data-low-confidence-names="{{ low_confidence_names|join:'||' }}"{% endif %}>
+                                    <button type="button"
+                                            id="merge-all-btn"
+                                            class="button small success{% if not allow_merge_all %} deduplikator-autorow__merge-all-btn--disabled{% endif %}"
+                                            data-merge-all="true"
+                                            data-skip-pbn="false"
+                                            {% if not allow_merge_all %}aria-disabled="true"{% endif %}>
+                                        Scal wszystkie automatycznie
+                                    </button>
+                                    <button type="button"
+                                            id="merge-all-no-pbn-btn"
+                                            class="button small warning{% if not allow_merge_all %} deduplikator-autorow__merge-all-btn--disabled{% endif %}"
+                                            data-merge-all="true"
+                                            data-skip-pbn="true"
+                                            {% if not allow_merge_all %}aria-disabled="true"{% endif %}>
+                                        Scal wszystkie, nie wysyłaj do PBN
+                                    </button>
+                                </div>
+                            </div>
+                            {% endif %}
                         </div>
-                        <div class="cell shrink">
-                            <div class="button-group">
-                                <button type="button"
-                                        id="merge-all-btn"
-                                        class="button small success"
-                                        data-merge-all="true"
-                                        data-skip-pbn="false">
-                                    Scal wszystkie automatycznie
-                                </button>
-                                <button type="button"
-                                        id="merge-all-no-pbn-btn"
-                                        class="button small warning"
-                                        data-merge-all="true"
-                                        data-skip-pbn="true">
-                                    Scal wszystkie, nie wysyłaj do PBN
-                                </button>
+
+                        {# Wiersz 2: filtr pewności per-autor — Wszyscy / Pewniaki / Słabe. #}
+                        {# Liczniki dotyczą TYLKO kandydatów aktualnego głównego #}
+                        {# autora; nie skanuje całej bazy. #}
+                        {# Ukrywamy cały rząd gdy obie kategorie mają kandydatów #}
+                        {# — ale gdy tylko jedna kategoria > 0, nie ma czego filtrować. #}
+                        {% if candidates_high_for_main > 0 and candidates_low_for_main > 0 %}
+                        <div class="grid-x grid-padding-x">
+                            <div class="cell shrink">
+                                <div class="button-group deduplikator-autorow__confidence-buttons" role="tablist" aria-label="Filtr pewności (per autor)">
+                                    <a href="?{% if search_lastname %}search_lastname={{ search_lastname }}&amp;{% else %}skip_count={{ skip_count }}&amp;{% endif %}mode={{ mode }}&amp;confidence=all#mozliwe-duplikaty"
+                                       role="tab"
+                                       aria-selected="{% if confidence_band == 'all' %}true{% else %}false{% endif %}"
+                                       class="button small{% if confidence_band == 'all' %} primary{% else %} hollow secondary{% endif %}"
+                                       title="Pokaż wszystkich kandydatów tego autora">
+                                        Wszyscy
+                                        <span class="deduplikator-autorow__mode-count">{{ candidates_total_for_main }}</span>
+                                    </a>
+                                    <a href="?{% if search_lastname %}search_lastname={{ search_lastname }}&amp;{% else %}skip_count={{ skip_count }}&amp;{% endif %}mode={{ mode }}&amp;confidence=high#mozliwe-duplikaty"
+                                       role="tab"
+                                       aria-selected="{% if confidence_band == 'high' %}true{% else %}false{% endif %}"
+                                       class="button small{% if confidence_band == 'high' %} primary{% else %} hollow secondary{% endif %}"
+                                       title="Tylko kandydaci z pewnością ≥ 50%">
+                                        <span class="fi-checkbox" aria-hidden="true"></span>
+                                        Pewniaki
+                                        <span class="deduplikator-autorow__mode-count">{{ candidates_high_for_main }}</span>
+                                    </a>
+                                    <a href="?{% if search_lastname %}search_lastname={{ search_lastname }}&amp;{% else %}skip_count={{ skip_count }}&amp;{% endif %}mode={{ mode }}&amp;confidence=low#mozliwe-duplikaty"
+                                       role="tab"
+                                       aria-selected="{% if confidence_band == 'low' %}true{% else %}false{% endif %}"
+                                       class="button small{% if confidence_band == 'low' %} primary{% else %} hollow secondary{% endif %}"
+                                       title="Tylko kandydaci z pewnością < 50%">
+                                        <span class="fi-alert" aria-hidden="true"></span>
+                                        Słabe
+                                        <span class="deduplikator-autorow__mode-count">{{ candidates_low_for_main }}</span>
+                                    </a>
+                                </div>
                             </div>
                         </div>
                         {% endif %}
                     </div>
 
                     {% for duplikat_data in duplikaty_z_publikacjami %}
-                        <div id="duplicate-card-{{ duplikat_data.autor.pk }}" class="callout deduplikator-autorow__duplicate-card {% if duplikat_data.analiza.pewnosc > 50 %}success{% elif duplikat_data.analiza.pewnosc > 0 %}warning{% else %}alert{% endif %}">
+                        <div id="duplicate-card-{{ duplikat_data.autor.pk }}"
+                             data-pewnosc="{{ duplikat_data.analiza.pewnosc }}"
+                             data-author-name="{{ duplikat_data.autor }}"
+                             class="callout deduplikator-autorow__duplicate-card {% if duplikat_data.analiza.pewnosc > 50 %}success{% elif duplikat_data.analiza.pewnosc > 0 %}warning{% else %}alert{% endif %}">
                             <div class="grid-x grid-padding-x">
                                 <div class="cell medium-8">
                                     <div class="deduplikator-autorow__duplicate-header">
@@ -698,105 +797,154 @@ <h3 class="deduplikator-autorow__duplicates-title">Możliwe duplikaty ({{ duplik
 
                                     <div class="deduplikator-autorow__reasons-section">
                                         <h5>Powody podobieństwa:</h5>
-                                        <ul>
+                                        <div class="deduplikator-autorow__reasons-chips">
                                             {% for powod in duplikat_data.analiza.powody_podobienstwa %}
-                                                <li>{{ powod }}</li>
+                                                <span class="deduplikator-autorow__reason-chip deduplikator-autorow__reason-chip--{{ powod.tone }}"
+                                                      title="{{ powod.text }}">
+                                                    <span class="{{ powod.icon }}" aria-hidden="true"></span>
+                                                    <span class="deduplikator-autorow__reason-chip-text">{{ powod.text }}</span>
+                                                </span>
                                                 {% empty %}
-                                                <li>Brak szczegółowych powodów</li>
+                                                <span class="deduplikator-autorow__reason-chip deduplikator-autorow__reason-chip--weak">
+                                                    <span class="fi-info" aria-hidden="true"></span>
+                                                    Brak szczegółowych powodów
+                                                </span>
                                             {% endfor %}
-                                        </ul>
+                                        </div>
                                     </div>
                                 </div>
 
-                                <div class="cell medium-4 text-right">
-                                    <div class="button-group stacked">
-                                        <button type="button"
-                                                id="merge-with-disc-btn-{{ duplikat_data.autor.pk }}"
-                                                class="button small success deduplikator-autorow__button--full-width"
-                                                data-merge-action="with-discipline"
-                                                data-main-author="{{ glowny_autor.pk }}"
-                                                data-duplicate-author="{{ duplikat_data.autor.pk }}"
-                                                data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
-                                            <span class="button-text">Scal + ustaw dyscyplinę</span>
-                                            <span class="spinner" style="display: none;">
-                                                <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
-                                            </span>
-                                        </button>
-                                        <button type="button"
-                                                id="merge-with-subdisc-btn-{{ duplikat_data.autor.pk }}"
-                                                class="button small success deduplikator-autorow__button--full-width"
-                                                data-merge-action="with-subdiscipline"
-                                                data-main-author="{{ glowny_autor.pk }}"
-                                                data-duplicate-author="{{ duplikat_data.autor.pk }}"
-                                                data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
-                                            <span class="button-text">Scal + ustaw subdyscyplinę</span>
-                                            <span class="spinner" style="display: none;">
-                                                <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
-                                            </span>
-                                        </button>
-                                        <button type="button"
-                                                id="merge-btn-{{ duplikat_data.autor.pk }}"
-                                                class="button small primary deduplikator-autorow__button--full-width"
-                                                data-merge-action="standard"
-                                                data-main-author="{{ glowny_autor.pk }}"
-                                                data-duplicate-author="{{ duplikat_data.autor.pk }}"
-                                                data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
-                                            <span class="button-text">Scal bez zmian dyscyplin</span>
-                                            <span class="spinner" style="display: none;">
-                                                <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
-                                            </span>
-                                        </button>
-                                        <button type="button"
-                                                id="merge-no-pbn-btn-{{ duplikat_data.autor.pk }}"
-                                                class="button small warning deduplikator-autorow__button--full-width"
-                                                data-merge-action="no-pbn"
-                                                data-main-author="{{ glowny_autor.pk }}"
-                                                data-duplicate-author="{{ duplikat_data.autor.pk }}"
-                                                data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
-                                            <span class="button-text">Scal, nie wysyłaj do PBN</span>
-                                            <span class="spinner" style="display: none;">
-                                                <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
-                                            </span>
-                                        </button>
-                                        <a href="#"
-                                           class="button small secondary deduplikator-autorow__button--full-width"
-                                           data-open-wydawnictwo="ciagle"
-                                           data-autor-id="{{ duplikat_data.autor.pk }}">
-                                            <span class="fi-pencil"></span> Pokaż wyd. ciągłe
-                                        </a>
-                                        <a href="#"
-                                           class="button small secondary deduplikator-autorow__button--full-width"
-                                           data-open-wydawnictwo="zwarte"
-                                           data-autor-id="{{ duplikat_data.autor.pk }}">
-                                            <span class="fi-pencil"></span> Pokaż wyd. zwarte
-                                        </a>
-                                        {% if duplikat_data.candidate_id %}
-                                        <form method="post" action="{% url 'deduplikator_autorow:mark_candidate_not_duplicate' %}"
-                                              class="deduplikator-autorow__form--block">
-                                            {% csrf_token %}
-                                            <input type="hidden" name="candidate_id"
-                                                   value="{{ duplikat_data.candidate_id }}">
-                                            <button type="submit" class="button small alert deduplikator-autorow__button--full-width">
-                                                Nie są duplikatami
+                                <div class="cell medium-4 deduplikator-autorow__actions">
+                                    {# 1. PODGLĄD — bez konsekwencji, tylko otwiera nowe karty #}
+                                    <div class="deduplikator-autorow__action-group">
+                                        <h6 class="deduplikator-autorow__action-group-title">
+                                            <span class="fi-eye" aria-hidden="true"></span> Podgląd
+                                        </h6>
+                                        <div class="button-group stacked">
+                                            <a href="#"
+                                               class="button small secondary deduplikator-autorow__button--full-width"
+                                               data-open-wydawnictwo="ciagle"
+                                               data-autor-id="{{ duplikat_data.autor.pk }}">
+                                                <span class="fi-page-multiple"></span> Pokaż wyd. ciągłe
+                                            </a>
+                                            <a href="#"
+                                               class="button small secondary deduplikator-autorow__button--full-width"
+                                               data-open-wydawnictwo="zwarte"
+                                               data-autor-id="{{ duplikat_data.autor.pk }}">
+                                                <span class="fi-book"></span> Pokaż wyd. zwarte
+                                            </a>
+                                            <a href="/admin/bpp/autor/{{ duplikat_data.autor.pk }}/change/"
+                                               target="_blank"
+                                               class="button small secondary deduplikator-autorow__button--full-width">
+                                                <span class="fi-pencil"></span> Otwórz w redagowaniu
+                                            </a>
+                                            <a href="{% url 'bpp:browse_autor' duplikat_data.autor.pk %}"
+                                               target="_blank"
+                                               class="button small secondary deduplikator-autorow__button--full-width">
+                                                <span class="fi-link"></span> Otwórz na stronie głównej
+                                            </a>
+                                            {% if duplikat_data.autor.pbn_uid_id %}
+                                            <a href="{{ duplikat_data.autor.link_do_pbn }}"
+                                               target="_blank"
+                                               class="button small secondary deduplikator-autorow__button--full-width">
+                                                <span class="fi-arrow-right"></span> Otwórz w PBN
+                                            </a>
+                                            {% endif %}
+                                        </div>
+                                    </div>
+
+                                    {# 2. DECYZJA — czy w ogóle są duplikatami? #}
+                                    <div class="deduplikator-autorow__action-group">
+                                        <h6 class="deduplikator-autorow__action-group-title">
+                                            <span class="fi-checkbox" aria-hidden="true"></span> Decyzja
+                                        </h6>
+                                        <div class="button-group stacked">
+                                            {% if duplikat_data.candidate_id %}
+                                            <button type="button"
+                                                    class="button small alert deduplikator-autorow__button--full-width"
+                                                    data-mark-not-duplicate="candidate"
+                                                    data-candidate-id="{{ duplikat_data.candidate_id }}"
+                                                    data-duplicate-author="{{ duplikat_data.autor.pk }}">
+                                                <span class="fi-x" aria-hidden="true"></span>
+                                                Nie jest duplikatem głównego autora
                                             </button>
-                                        </form>
-                                        {% else %}
-                                        <form method="post" action="{% url 'deduplikator_autorow:mark_non_duplicate' %}"
-                                              class="deduplikator-autorow__form--block">
-                                            {% csrf_token %}
-                                            <input type="hidden" name="scientist_pk"
-                                                   value="{{ duplikat_data.autor.pk }}">
-                                            <button type="submit" class="button small alert deduplikator-autorow__button--full-width">
-                                                Nie są duplikatami
+                                            {% else %}
+                                            <button type="button"
+                                                    class="button small alert deduplikator-autorow__button--full-width"
+                                                    data-mark-not-duplicate="autor"
+                                                    data-scientist-pk="{{ duplikat_data.autor.pk }}"
+                                                    data-duplicate-author="{{ duplikat_data.autor.pk }}">
+                                                <span class="fi-x" aria-hidden="true"></span>
+                                                Nie jest duplikatem głównego autora
                                             </button>
-                                        </form>
-                                        {% endif %}
-                                        {% if duplikat_data.publikacje_count == 0 %}
+                                            {% endif %}
+                                            {% if duplikat_data.publikacje_count == 0 %}
                                             <button class="button small alert deduplikator-autorow__button--full-width"
                                                     data-delete-author="{{ duplikat_data.autor.pk }}">
+                                                <span class="fi-trash" aria-hidden="true"></span>
                                                 Usuń autora (brak publikacji)
                                             </button>
-                                        {% endif %}
+                                            {% endif %}
+                                        </div>
+                                    </div>
+
+                                    {# 3. SCALANIE — co zrobić, jeśli to duplikat #}
+                                    <div class="deduplikator-autorow__action-group">
+                                        <h6 class="deduplikator-autorow__action-group-title">
+                                            <span class="fi-arrows-compress" aria-hidden="true"></span> Scalanie
+                                        </h6>
+                                        <div class="button-group stacked">
+                                            {% if glowny_autor_dyscypliny %}
+                                            <button type="button"
+                                                    id="merge-with-disc-btn-{{ duplikat_data.autor.pk }}"
+                                                    class="button small success deduplikator-autorow__button--full-width"
+                                                    data-merge-action="with-discipline"
+                                                    data-main-author="{{ glowny_autor.pk }}"
+                                                    data-duplicate-author="{{ duplikat_data.autor.pk }}"
+                                                    data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
+                                                <span class="button-text">Scal + ustaw dyscyplinę</span>
+                                                <span class="spinner" style="display: none;">
+                                                    <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
+                                                </span>
+                                            </button>
+                                            <button type="button"
+                                                    id="merge-with-subdisc-btn-{{ duplikat_data.autor.pk }}"
+                                                    class="button small success deduplikator-autorow__button--full-width"
+                                                    data-merge-action="with-subdiscipline"
+                                                    data-main-author="{{ glowny_autor.pk }}"
+                                                    data-duplicate-author="{{ duplikat_data.autor.pk }}"
+                                                    data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
+                                                <span class="button-text">Scal + ustaw subdyscyplinę</span>
+                                                <span class="spinner" style="display: none;">
+                                                    <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
+                                                </span>
+                                            </button>
+                                            {% endif %}
+                                            <button type="button"
+                                                    id="merge-btn-{{ duplikat_data.autor.pk }}"
+                                                    class="button small primary deduplikator-autorow__button--full-width"
+                                                    data-merge-action="standard"
+                                                    data-main-author="{{ glowny_autor.pk }}"
+                                                    data-duplicate-author="{{ duplikat_data.autor.pk }}"
+                                                    data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
+                                                <span class="button-text">Scal bez zmian dyscyplin</span>
+                                                <span class="spinner" style="display: none;">
+                                                    <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
+                                                </span>
+                                            </button>
+                                            <button type="button"
+                                                    id="merge-no-pbn-btn-{{ duplikat_data.autor.pk }}"
+                                                    class="button small warning deduplikator-autorow__button--full-width"
+                                                    data-merge-action="no-pbn"
+                                                    data-main-author="{{ glowny_autor.pk }}"
+                                                    data-duplicate-author="{{ duplikat_data.autor.pk }}"
+                                                    data-candidate-id="{% if duplikat_data.candidate_id %}{{ duplikat_data.candidate_id }}{% endif %}">
+                                                <span class="button-text">Scal, nie wysyłaj do PBN</span>
+                                                <span class="spinner" style="display: none;">
+                                                    <i class="fi-loop deduplikator-autorow__spinner"></i> Scalanie...
+                                                </span>
+                                            </button>
+                                        </div>
                                     </div>
                                 </div>
                             </div>
@@ -846,23 +994,19 @@ <h5 class="deduplikator-autorow__section-title">Dyscypliny duplikatu (2022-2025)
                             </div>
 
                             {% if duplikat_data.publikacje %}
-                                <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
+                                <h5 class="deduplikator-autorow__section-title">{% if duplikat_data.publikacje_count == 1 %}Publikacja{% else %}Publikacje{% endif %} duplikatu:
                                     {% if duplikat_data.publikacje_year_range %}
                                         <small class="deduplikator-autorow__year-hint">({{ duplikat_data.publikacje_year_range }})</small>{% endif %}
                                 </h5>
                                 <div class="deduplikator-autorow__publication-list deduplikator-autorow__publication-list--short">
                                     {% for publikacja in duplikat_data.publikacje %}
                                         <div class="deduplikator-autorow__publication-item--duplicate">
-                                            <small><strong>
+                                            <small class="deduplikator-autorow__publication-description">
                                                 <a href="{% url 'bpp:browse_praca' publikacja.pk.0 publikacja.pk.1 %}"
                                                    target="_blank" class="deduplikator-autorow__publication-link--muted">
-                                                    {{ publikacja.tytul_oryginalny }}
+                                                    {{ publikacja.opis_bibliograficzny_cache|truncatewords_html:25|safe }}
                                                 </a>
-                                                {% if publikacja.rok %}
-                                                    <span class="deduplikator-autorow__publication-year"> ({{ publikacja.rok }})</span>{% endif %}
-                                            </strong></small><br>
-                                            <small class="deduplikator-autorow__publication-description">
-                                                {{ publikacja.opis_bibliograficzny_cache|truncatewords:15 }}</small>
+                                            </small>
                                         </div>
                                     {% endfor %}
                                 </div>
@@ -965,8 +1109,25 @@ <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
         document.addEventListener('DOMContentLoaded', function() {
             adjustSidebarPosition();
             relocateMessages(); // Move messages to respect sidebar layout
+            scrollToHashTarget(); // Po kliknięciu filtra pewności scroll-do-mozliwe-duplikaty
         });
 
+        // Po nawigacji z fragmentem URL (np. ?confidence=high#mozliwe-duplikaty)
+        // przeglądarka domyślnie skoczy do elementu, ale sticky header BPP go
+        // schowa. Używamy oficjalnego helpera bpp.scrollToVisible, jak wymagane
+        // w CLAUDE.md.
+        function scrollToHashTarget() {
+            var hash = window.location.hash;
+            if (!hash || hash.length < 2) return;
+            var el = document.getElementById(hash.substring(1));
+            if (!el) return;
+            if (window.bpp && typeof window.bpp.scrollToVisible === 'function') {
+                window.bpp.scrollToVisible(el);
+            } else {
+                el.scrollIntoView({ behavior: 'smooth', block: 'start' });
+            }
+        }
+
         // Adjust on window resize and scroll
         window.addEventListener('resize', adjustSidebarPosition);
         window.addEventListener('scroll', adjustSidebarPosition);
@@ -995,10 +1156,10 @@ <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
                 button.querySelector('.spinner').style.display = 'inline';
             }
 
-            // Build request data
+            // Build request data — use Autor PKs directly (template provides Autor PKs).
             var requestData = {
-                'main_scientist_id': mainAuthorId,
-                'duplicate_scientist_id': duplicateAuthorId
+                'main_autor_id': mainAuthorId,
+                'duplicate_autor_id': duplicateAuthorId
             };
             if (candidateId) {
                 requestData['candidate_id'] = candidateId;
@@ -1183,6 +1344,185 @@ <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
             e.preventDefault();
         });
 
+        // Autocomplete dla pola "Szukaj autora po nazwisku" - debounce 200ms,
+        // używa endpointu lastname_suggestions zwracającego top-10 nazwisk
+        // z pending DuplicateCandidate. Przy < 2 znakach lista jest czyszczona.
+        (function setupLastnameAutocomplete() {
+            var input = document.getElementById('search_lastname');
+            var datalist = document.getElementById('lastname-suggestions');
+            if (!input || !datalist) return;
+
+            var debounceTimer = null;
+            var lastQuery = '';
+
+            function setOptions(names) {
+                while (datalist.firstChild) datalist.removeChild(datalist.firstChild);
+                names.forEach(function(name) {
+                    var opt = document.createElement('option');
+                    opt.value = name;
+                    datalist.appendChild(opt);
+                });
+            }
+
+            input.addEventListener('input', function() {
+                var q = input.value.trim();
+                if (q.length < 2) {
+                    setOptions([]);
+                    lastQuery = '';
+                    return;
+                }
+                if (q === lastQuery) return;
+                lastQuery = q;
+
+                clearTimeout(debounceTimer);
+                debounceTimer = setTimeout(function() {
+                    fetch('{% url "deduplikator_autorow:lastname_suggestions" %}?q=' + encodeURIComponent(q), {
+                        headers: { 'X-Requested-With': 'XMLHttpRequest' }
+                    })
+                    .then(function(r) { return r.json(); })
+                    .then(function(data) {
+                        if (data && Array.isArray(data.results)) {
+                            setOptions(data.results);
+                        }
+                    })
+                    .catch(function(err) {
+                        console.error('Autocomplete failed:', err);
+                    });
+                }, 200);
+            });
+        })();
+
+        // Po usunięciu/scaleniu karty duplikatu — przelicz, czy "Scal wszystkie"
+        // mogą wrócić do stanu aktywnego (wszyscy pozostali kandydaci ≥ 50%).
+        // Działa przeciwnie też: jeśli ktoś usunie ostatni "dobry" wpis i zostały
+        // tylko niskoprocentowe, przyciski wracają w stan disabled.
+        var MIN_PEWNOSC_THRESHOLD = {{ MIN_PEWNOSC_DO_WYSWIETLENIA|default:50 }};
+        function refreshMergeAllAvailability() {
+            var cards = document.querySelectorAll('[id^="duplicate-card-"]');
+            if (cards.length === 0) return;
+
+            var minPewnosc = 100;
+            var lowConfidence = [];
+            cards.forEach(function(card) {
+                var p = parseInt(card.dataset.pewnosc, 10);
+                if (isNaN(p)) return;
+                if (p < minPewnosc) minPewnosc = p;
+                if (p < MIN_PEWNOSC_THRESHOLD) {
+                    lowConfidence.push((card.dataset.authorName || 'autor') + ' (' + p + '%)');
+                }
+            });
+
+            var allowMergeAll = lowConfidence.length === 0;
+            var buttons = document.querySelectorAll('[data-merge-all]');
+            var group = document.querySelector('[data-low-confidence-names]');
+
+            buttons.forEach(function(btn) {
+                if (allowMergeAll) {
+                    btn.classList.remove('deduplikator-autorow__merge-all-btn--disabled');
+                    btn.removeAttribute('aria-disabled');
+                } else {
+                    btn.classList.add('deduplikator-autorow__merge-all-btn--disabled');
+                    btn.setAttribute('aria-disabled', 'true');
+                }
+            });
+
+            if (group) {
+                if (allowMergeAll) {
+                    group.removeAttribute('data-low-confidence-names');
+                } else {
+                    group.setAttribute('data-low-confidence-names', lowConfidence.join('||'));
+                }
+            } else if (!allowMergeAll && buttons.length > 0) {
+                // Brak wrappera (przyciski były od początku aktywne) — dorzuć dataset
+                // do najbliższego button-group, żeby alert mógł odczytać nazwiska.
+                var bg = buttons[0].closest('.button-group');
+                if (bg) bg.setAttribute('data-low-confidence-names', lowConfidence.join('||'));
+            }
+        }
+
+        // Event delegation for "Nie są duplikatami" - AJAX with fadeOut
+        document.addEventListener('click', function(e) {
+            var target = e.target.closest('[data-mark-not-duplicate]');
+            if (!target) return;
+            e.preventDefault();
+
+            var kind = target.dataset.markNotDuplicate;
+            var duplicateAuthorId = target.dataset.duplicateAuthor;
+            var card = document.getElementById('duplicate-card-' + duplicateAuthorId);
+
+            var url, payload;
+            if (kind === 'candidate') {
+                url = '{% url "deduplikator_autorow:mark_candidate_not_duplicate" %}';
+                payload = { candidate_id: target.dataset.candidateId };
+            } else {
+                url = '{% url "deduplikator_autorow:mark_non_duplicate" %}';
+                payload = { scientist_pk: target.dataset.scientistPk };
+            }
+
+            target.disabled = true;
+
+            $.ajax({
+                url: url,
+                type: 'POST',
+                dataType: 'json',
+                headers: {
+                    'X-CSRFToken': '{{ csrf_token }}',
+                    'X-Requested-With': 'XMLHttpRequest'
+                },
+                data: payload,
+                success: function(response) {
+                    if (response && response.success) {
+                        if (bppNotifications && bppNotifications.addMessage && response.message) {
+                            bppNotifications.addMessage({
+                                cssClass: 'success',
+                                text: response.message,
+                                sound: false
+                            });
+                        }
+                        if (card) {
+                            $(card).fadeOut(400, function() {
+                                $(this).remove();
+                                var countElement = document.querySelector('.deduplikator-autorow__duplicates-title');
+                                if (countElement) {
+                                    var match = countElement.textContent.match(/\((\d+)\)/);
+                                    if (match) {
+                                        var newCount = parseInt(match[1]) - 1;
+                                        countElement.textContent = 'Możliwe duplikaty (' + newCount + ')';
+                                        if (newCount === 0) {
+                                            setTimeout(function() { window.location.reload(); }, 500);
+                                            return;
+                                        }
+                                    }
+                                }
+                                refreshMergeAllAvailability();
+                            });
+                        }
+                    } else {
+                        target.disabled = false;
+                        var msg = (response && response.message) || 'Operacja nie powiodła się.';
+                        if (bppNotifications && bppNotifications.addMessage) {
+                            bppNotifications.addMessage({ cssClass: 'alert', text: msg, sound: true });
+                        } else {
+                            alert(msg);
+                        }
+                    }
+                },
+                error: function(xhr) {
+                    target.disabled = false;
+                    var msg = 'Błąd serwera podczas oznaczania jako nie-duplikat.';
+                    try {
+                        var resp = xhr.responseJSON || JSON.parse(xhr.responseText);
+                        if (resp && resp.message) msg = resp.message;
+                    } catch (err) { /* ignore parse */ }
+                    if (bppNotifications && bppNotifications.addMessage) {
+                        bppNotifications.addMessage({ cssClass: 'alert', text: msg, sound: true });
+                    } else {
+                        alert(msg);
+                    }
+                }
+            });
+        });
+
         // Event delegation for delete author (data-delete-author)
         document.addEventListener('click', function(e) {
             var target = e.target.closest('[data-delete-author]');
@@ -1218,10 +1558,34 @@ <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
         document.addEventListener('click', function(e) {
             var target = e.target.closest('[data-merge-all]');
             if (!target) return;
+            e.preventDefault();
+
+            // Wyszarzony przycisk (są kandydaci z pewnością < 50%) -> komunikat,
+            // zamiast uruchamiać scalanie zbiorcze.
+            if (target.getAttribute('aria-disabled') === 'true') {
+                var group = target.closest('[data-low-confidence-names]');
+                var namesAttr = group ? group.dataset.lowConfidenceNames : '';
+                var names = namesAttr ? namesAttr.split('||') : [];
+                var listHtml = names.length
+                    ? '\n\nAutorzy poniżej progu 50%:\n  • ' + names.join('\n  • ')
+                    : '';
+                alert(
+                    'Scalanie zbiorcze jest wyłączone dla tego głównego autora, '
+                    + 'ponieważ co najmniej jeden potencjalny duplikat ma pewność '
+                    + 'poniżej 50%.' + listHtml + '\n\n'
+                    + 'Co możesz zrobić, żeby przyciski wróciły:\n'
+                    + '  • Dodaj niepewnych autorów do "ignorowanych" (przycisk '
+                    + '"Ignoruj autora" w nagłówku),\n'
+                    + '  • lub scal/odrzuć ich ręcznie (przyciski w sekcji '
+                    + '"Decyzja" / "Scalanie" przy każdej karcie).\n\n'
+                    + 'Scalanie zbiorcze jest dostępne tylko, gdy WSZYSCY kandydaci '
+                    + 'mają pewność ≥ 50%.'
+                );
+                return;
+            }
 
             var skipPbn = target.dataset.skipPbn === 'true';
             handleMergeAll(skipPbn);
-            e.preventDefault();
         });
 
         function handleMergeAll(skipPbn) {
@@ -1256,6 +1620,11 @@ <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
             }
 
             // Show progress, disable buttons
+            var progressCell = document.getElementById('merge-all-progress-cell');
+            if (progressCell) {
+                progressCell.removeAttribute('hidden');
+                progressCell.style.display = '';
+            }
             document.getElementById('merge-all-progress').style.display = 'block';
             document.getElementById('merge-all-btn').disabled = true;
             document.getElementById('merge-all-no-pbn-btn').disabled = true;
@@ -1291,8 +1660,8 @@ <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
                     type: 'GET',
                     dataType: 'json',
                     data: {
-                        'main_scientist_id': mainAuthorId,
-                        'duplicate_scientist_id': duplicateId,
+                        'main_autor_id': mainAuthorId,
+                        'duplicate_autor_id': duplicateId,
                         'skip_pbn': skipPbn
                     },
                     complete: function(xhr, status) {
@@ -1321,6 +1690,11 @@ <h5 class="deduplikator-autorow__section-title">Publikacje duplikatu:
 
             function handleMergeAllError(response, duplicateId) {
                 // Hide progress
+                var progressCell = document.getElementById('merge-all-progress-cell');
+                if (progressCell) {
+                    progressCell.setAttribute('hidden', '');
+                    progressCell.style.display = 'none';
+                }
                 document.getElementById('merge-all-progress').style.display = 'none';
 
                 // Re-enable main buttons
diff --git a/src/deduplikator_autorow/views.py b/src/deduplikator_autorow/views.py
index d027d7537..646588e96 100644
--- a/src/deduplikator_autorow/views.py
+++ b/src/deduplikator_autorow/views.py
@@ -35,6 +35,7 @@
     znajdz_pierwszego_autora_z_duplikatami,
 )
 from .utils.counters import get_latest_usable_scan
+from .utils.reason_display import enrich_reasons
 
 # Minimalny próg pewności do wyświetlania duplikatów
 # Duplikaty z pewnością poniżej tego progu nie będą pokazywane
@@ -172,6 +173,12 @@ def _build_context_from_candidate(candidate, glowny_autor):
         Rekord.objects.prace_autora(candidate.duplicate_autor)
     )
 
+    # Display percent: znormalizowane 0..1 → 0..100, zaokrąglone i sklampowane.
+    # Surowy confidence_score może być < 0 lub > 100 i historycznie pokazywał
+    # użytkownikom wartości w rodzaju 140% — confidence_percent jest jedynym
+    # polem, które gwarantuje sensowny zakres do prezentacji.
+    pewnosc_display = max(0, min(100, round((candidate.confidence_percent or 0) * 100)))
+
     return {
         "autor": candidate.duplicate_autor,
         "publikacje": publikacje,
@@ -179,8 +186,8 @@ def _build_context_from_candidate(candidate, glowny_autor):
         "publikacje_year_range": year_range,
         "analiza": {
             "autor": candidate.duplicate_autor,
-            "pewnosc": candidate.confidence_score,
-            "powody_podobienstwa": candidate.reasons,
+            "pewnosc": pewnosc_display,
+            "powody_podobienstwa": enrich_reasons(candidate.reasons),
         },
         "candidate_id": candidate.pk,  # For marking as not duplicate
     }
@@ -205,6 +212,14 @@ def duplicate_authors_view(request):  # noqa: C901
     if mode not in ("pbn", "general", "both"):
         mode = "both"
 
+    # Filter confidence band: all|high|low (default all). high=>=50%, low=<50%.
+    # Próg porównujemy do confidence_percent jako ułamka, bo display % jest
+    # liczone z confidence_percent * 100 z klampem.
+    confidence_band = request.GET.get("confidence", "all")
+    if confidence_band not in ("all", "high", "low"):
+        confidence_band = "all"
+    confidence_threshold_frac = MIN_PEWNOSC_DO_WYSWIETLENIA / 100.0
+
     # Common context
     not_duplicate_count = NotADuplicate.objects.count()
     ignored_authors_count = IgnoredScientist.objects.count()
@@ -262,29 +277,27 @@ def duplicate_authors_view(request):  # noqa: C901
         return render(request, "deduplikator_autorow/duplicate_authors.html", context)
 
     # Count pending candidates
-    pending_count = DuplicateCandidate.objects.filter(
+    base_pending_qs = DuplicateCandidate.objects.filter(
         scan_run=completed_scan,
         status=DuplicateCandidate.Status.PENDING,
-    ).count()
+    )
+    pending_count = base_pending_qs.count()
     context["pending_candidates_count"] = pending_count
     context["total_authors_with_duplicates"] = pending_count
-    context["pending_pbn_count"] = DuplicateCandidate.objects.filter(
-        scan_run=completed_scan,
-        status=DuplicateCandidate.Status.PENDING,
-        scan_mode="pbn",
-    ).count()
-    context["pending_general_count"] = DuplicateCandidate.objects.filter(
-        scan_run=completed_scan,
-        status=DuplicateCandidate.Status.PENDING,
-        scan_mode="general",
+    context["pending_pbn_count"] = base_pending_qs.filter(scan_mode="pbn").count()
+    context["pending_general_count"] = base_pending_qs.filter(
+        scan_mode="general"
     ).count()
+    context["confidence_band"] = confidence_band
 
     # Handle search by lastname
     search_lastname = request.GET.get("search_lastname", "").strip()
     context["search_lastname"] = search_lastname
 
     if search_lastname:
-        # Search within stored candidates
+        # Search within stored candidates - confidence_band celowo NIE filtruje
+        # wyboru głównego autora (filtr per-autor stosujemy niżej, na liście
+        # candidates_for_author).
         candidates = (
             DuplicateCandidate.objects.filter(
                 scan_run=completed_scan,
@@ -302,9 +315,24 @@ def duplicate_authors_view(request):  # noqa: C901
         )
 
         if candidates.exists():
-            first_candidate = candidates.first()
-            glowny_autor = first_candidate.main_autor
+            search_author_ids = list(
+                candidates.values_list("main_autor", flat=True)
+                .distinct()
+                .order_by("main_autor")
+            )
+            try:
+                skip_count = int(request.GET.get("skip_count", 0))
+            except (ValueError, TypeError):
+                skip_count = 0
+            if skip_count >= len(search_author_ids):
+                skip_count = 0
+            glowny_autor_id = search_author_ids[skip_count]
+            glowny_autor = Autor.objects.get(pk=glowny_autor_id)
             candidates_for_author = candidates.filter(main_autor=glowny_autor)
+            context["skip_count"] = skip_count
+            context["search_total_authors"] = len(search_author_ids)
+            context["search_has_prev"] = skip_count > 0
+            context["search_has_next"] = skip_count < len(search_author_ids) - 1
         else:
             glowny_autor = None
             candidates_for_author = DuplicateCandidate.objects.none()
@@ -315,12 +343,41 @@ def duplicate_authors_view(request):  # noqa: C901
         except (ValueError, TypeError):
             skip_count = 0
 
-        # Get next author with pending duplicates using offset
+        # Get next author with pending duplicates using offset.
+        # confidence_band NIE jest tu przekazywane — chcemy iterować po
+        # WSZYSTKICH głównych autorach niezależnie od pewności ich kandydatów,
+        # filtr stosujemy niżej tylko na widocznym podzbiorze.
         glowny_autor, candidates_for_author, skip_count = _get_next_candidate_group(
-            completed_scan, skip_count=skip_count, mode=mode
+            completed_scan,
+            skip_count=skip_count,
+            mode=mode,
         )
         context["skip_count"] = skip_count
 
+    # Filter per-author by confidence band (NOT main author selection).
+    # Liczniki "X / Y" oraz per-band wyliczamy zanim podstawimy filtr.
+    if glowny_autor:
+        candidates_total_for_main = candidates_for_author.count()
+        candidates_high_for_main = candidates_for_author.filter(
+            confidence_percent__gte=confidence_threshold_frac
+        ).count()
+        candidates_low_for_main = candidates_total_for_main - candidates_high_for_main
+    else:
+        candidates_total_for_main = 0
+        candidates_high_for_main = 0
+        candidates_low_for_main = 0
+    if confidence_band == "high":
+        candidates_for_author = candidates_for_author.filter(
+            confidence_percent__gte=confidence_threshold_frac
+        )
+    elif confidence_band == "low":
+        candidates_for_author = candidates_for_author.filter(
+            confidence_percent__lt=confidence_threshold_frac
+        )
+    context["candidates_total_for_main"] = candidates_total_for_main
+    context["candidates_high_for_main"] = candidates_high_for_main
+    context["candidates_low_for_main"] = candidates_low_for_main
+
     if not glowny_autor:
         if pending_count == 0:
             messages.info(
@@ -347,6 +404,21 @@ def duplicate_authors_view(request):  # noqa: C901
         candidates_for_author.first() if candidates_for_author else None
     )
 
+    # "Scal wszystkie" jest aktywne tylko wtedy, gdy KAŻDY kandydat ma pewność
+    # ≥ MIN_PEWNOSC_DO_WYSWIETLENIA. Przy słabych trafieniach przyciski
+    # renderujemy w stanie wyszarzonym i klik pokazuje komunikat tłumaczący,
+    # co zrobić dalej (lista nazwisk z niską pewnością).
+    low_confidence_names = [
+        f"{d['autor']} ({d['analiza']['pewnosc']}%)"
+        for d in duplikaty_z_publikacjami
+        if d["analiza"]["pewnosc"] < MIN_PEWNOSC_DO_WYSWIETLENIA
+    ]
+    context["allow_merge_all"] = (
+        bool(duplikaty_z_publikacjami) and not low_confidence_names
+    )
+    context["low_confidence_names"] = low_confidence_names
+    context["MIN_PEWNOSC_DO_WYSWIETLENIA"] = MIN_PEWNOSC_DO_WYSWIETLENIA
+
     # Get main author's publications and disciplines
     context["glowny_autor_dyscypliny"] = (
         Autor_Dyscyplina.objects.filter(
@@ -433,6 +505,18 @@ def scal_autorow_view(request):
     )
 
     if not main_autor_id or not duplicate_autor_id:
+        # Sygnalizujemy do Rollbar — to nie powinno się zdarzać przy poprawnym
+        # wywołaniu z UI; raczej oznacza błąd JS-a lub niespójne dane (np.
+        # scientist_id wskazujący na rekord, którego rekord_w_bpp == None).
+        try:
+            raise ValueError(
+                "scal_autorow_view: missing required params after resolution. "
+                f"GET={dict(request.GET)} POST_keys={list(request.POST.keys())} "
+                f"resolved main={main_autor_id} duplicate={duplicate_autor_id}"
+            )
+        except ValueError:
+            traceback.print_exc()
+            rollbar.report_exc_info(sys.exc_info())
         return JsonResponse(
             {
                 "success": False,
@@ -495,37 +579,44 @@ def mark_non_duplicate(request):
     Przyjmuje parametry:
     - scientist_pk: Primary key Scientist do zapisania jako nie-duplikat
 
-    Zapisuje w modelu NotADuplicate i przekierowuje do następnego autora.
+    Zwraca JSON dla AJAX (X-Requested-With), w przeciwnym razie redirect.
     """
+    is_ajax = request.headers.get("X-Requested-With") == "XMLHttpRequest"
     scientist_pk = request.POST.get("scientist_pk")
 
-    if not scientist_pk:
-        messages.error(request, "Brak wymaganego parametru: scientist_pk")
+    def _respond(success, message, status=200, level="success"):
+        if is_ajax:
+            return JsonResponse({"success": success, "message": message}, status=status)
+        if level == "info":
+            messages.info(request, message)
+        elif success:
+            messages.success(request, message)
+        else:
+            messages.error(request, message)
         return redirect("deduplikator_autorow:duplicate_authors")
 
+    if not scientist_pk:
+        return _respond(False, "Brak wymaganego parametru: scientist_pk", status=400)
+
     try:
-        # Sprawdź czy Scientist istnieje
         autor = Autor.objects.get(pk=scientist_pk)
 
-        # Zapisz jako nie-duplikat (get_or_create zapobiega duplikatom)
         not_duplicate, created = NotADuplicate.objects.update_or_create(
             autor=autor, defaults=dict(created_by=request.user)
         )
 
         if created:
-            messages.success(request, f"Autor {autor} oznaczony jako nie-duplikat.")
-        else:
-            messages.info(
-                request, f"Autor {autor} był już oznaczony jako nie-duplikat."
-            )
+            return _respond(True, f"Autor {autor} oznaczony jako nie-duplikat.")
+        return _respond(
+            True, f"Autor {autor} był już oznaczony jako nie-duplikat.", level="info"
+        )
 
     except Autor.DoesNotExist:
-        messages.error(request, "Nie znaleziono autora o podanym ID.")
+        return _respond(False, "Nie znaleziono autora o podanym ID.", status=404)
     except Exception as e:
-        messages.error(request, f"Błąd podczas oznaczania autora: {str(e)}")
-
-    # Przekieruj do następnego autora z duplikatami
-    return redirect("deduplikator_autorow:duplicate_authors")
+        traceback.print_exc()
+        rollbar.report_exc_info(sys.exc_info())
+        return _respond(False, f"Błąd podczas oznaczania autora: {str(e)}", status=500)
 
 
 @group_required(GR_WPROWADZANIE_DANYCH)
@@ -642,41 +733,75 @@ def ignore_autor(request):
         return redirect("deduplikator_autorow:duplicate_authors")
 
 
+def _trigger_rescan_after_reset(request, reset_label):
+    """Próbuje uruchomić nowe skanowanie po resecie list ignorowanych/nie-duplikatów.
+
+    Reset zmienia zbiór wykluczeń, więc cache kandydatów (DuplicateCandidate)
+    przestaje być spójny z tym, co użytkownik widzi w UI. Bez rescanu mogą
+    pojawiać się duplikaty, które po reset-cie powinny zniknąć (lub odwrotnie:
+    brakować takich, które wcześniej były ignorowane). Wywołujemy delay()
+    w trybie best-effort — jeżeli scan już biegnie albo dane PBN są stare,
+    informujemy użytkownika ale nie blokujemy operacji resetu.
+    """
+    from .tasks import scan_for_duplicates
+
+    if get_running_scan():
+        messages.info(
+            request,
+            f"{reset_label}. Skanowanie duplikatów jest już w trakcie — "
+            "wyniki uwzględnią reset po jego zakończeniu.",
+        )
+        return
+
+    pbn_data_fresh, pbn_stale_message, _ = is_pbn_people_data_fresh()
+    if not pbn_data_fresh:
+        messages.warning(
+            request,
+            f"{reset_label}. Nie udało się automatycznie uruchomić skanowania "
+            f"({pbn_stale_message}); uruchom je ręcznie po pobraniu danych PBN.",
+        )
+        return
+
+    scan_for_duplicates.delay(user_id=request.user.pk)
+    messages.success(
+        request,
+        f"{reset_label}. Uruchomiono nowe skanowanie duplikatów w tle — "
+        "odśwież stronę za chwilę, aby zobaczyć postęp.",
+    )
+
+
 @group_required(GR_WPROWADZANIE_DANYCH)
 @require_http_methods(["POST"])
 def reset_ignored_scientists(request):
-    """
-    Remove all IgnoredScientist (PBN) markings.
-    """
+    """Remove all IgnoredScientist (PBN) markings and re-trigger scan."""
     count = IgnoredScientist.objects.count()
     IgnoredScientist.objects.all().delete()
-    messages.success(request, f"Zresetowano {count} ignorowanych autorów (PBN).")
+    _trigger_rescan_after_reset(
+        request, f"Zresetowano {count} ignorowanych autorów (PBN)"
+    )
     return redirect("deduplikator_autorow:duplicate_authors")
 
 
 @group_required(GR_WPROWADZANIE_DANYCH)
 @require_http_methods(["POST"])
 def reset_ignored_autorzy(request):
-    """
-    Remove all IgnoredAuthor (BPP) markings.
-    """
+    """Remove all IgnoredAuthor (BPP) markings and re-trigger scan."""
     count = IgnoredAuthor.objects.count()
     IgnoredAuthor.objects.all().delete()
-    messages.success(request, f"Zresetowano {count} ignorowanych autorów (BPP).")
+    _trigger_rescan_after_reset(
+        request, f"Zresetowano {count} ignorowanych autorów (BPP)"
+    )
     return redirect("deduplikator_autorow:duplicate_authors")
 
 
 @group_required(GR_WPROWADZANIE_DANYCH)
 def reset_not_duplicates(request):
-    """
-    Widok do resetowania (usuwania) wszystkich rekordów NotADuplicate.
-    """
+    """Widok do resetowania (usuwania) wszystkich rekordów NotADuplicate."""
     if request.method == "POST":
         count = NotADuplicate.objects.count()
         NotADuplicate.objects.all().delete()
-        messages.success(
-            request,
-            f"Zresetowano {count} autorów oznaczonych jako nie-duplikat.",
+        _trigger_rescan_after_reset(
+            request, f"Zresetowano {count} autorów oznaczonych jako nie-duplikat"
         )
     return redirect("deduplikator_autorow:duplicate_authors")
 
@@ -886,7 +1011,13 @@ def _get_pending_candidates_for_main_autor(main_autor_id, scan_run):
     )
 
 
-def _get_next_candidate_group(scan_run, skip_count=0, mode="both"):
+def _get_next_candidate_group(
+    scan_run,
+    skip_count=0,
+    mode="both",
+    confidence_band="all",
+    confidence_threshold_frac=0.5,
+):
     """
     Get the next group of candidates (all for the same main author).
     Returns (main_autor, candidates_queryset, skip_count) or (None, None, 0)
@@ -897,6 +1028,9 @@ def _get_next_candidate_group(scan_run, skip_count=0, mode="both"):
         skip_count: Number of main authors to skip (offset)
         mode: Filter by scan_mode ("pbn", "general", or "both"). When "both",
             PBN candidates are sorted before general (PBN is canonical).
+        confidence_band: "all" / "high" / "low". high = confidence_percent
+            >= threshold; low = strictly below threshold.
+        confidence_threshold_frac: próg jako ułamek 0..1 (np. 0.5 dla 50%).
 
     Returns:
         Tuple of (main_autor, candidates_queryset, current_skip_count)
@@ -909,6 +1043,10 @@ def _get_next_candidate_group(scan_run, skip_count=0, mode="both"):
     )
     if mode != "both":
         qs = qs.filter(scan_mode=mode)
+    if confidence_band == "high":
+        qs = qs.filter(confidence_percent__gte=confidence_threshold_frac)
+    elif confidence_band == "low":
+        qs = qs.filter(confidence_percent__lt=confidence_threshold_frac)
 
     # Annotate then iterate to dedupe in stable order. PostgreSQL's
     # DISTINCT + ORDER BY semantics require ordering columns in SELECT,
@@ -958,20 +1096,61 @@ def _get_next_candidate_group(scan_run, skip_count=0, mode="both"):
     return main_autor, candidates, skip_count
 
 
+@group_required(GR_WPROWADZANIE_DANYCH)
+def lastname_suggestions(request):
+    """Autocomplete dla wyszukiwarki nazwisk w deduplikatorze.
+
+    Zwraca top-10 unikalnych nazwisk autorów-głównych z PENDING-ujących
+    DuplicateCandidate filtrowanych po prefiksie. Bez aktywnego skanu
+    zwraca pustą listę. Wykorzystywane przez datalist na pasku górnym.
+    """
+    q = (request.GET.get("q") or "").strip()
+    if not q or len(q) < 2:
+        return JsonResponse({"results": []})
+
+    completed_scan = get_latest_usable_scan()
+    if not completed_scan:
+        return JsonResponse({"results": []})
+
+    nazwiska = (
+        DuplicateCandidate.objects.filter(
+            scan_run=completed_scan,
+            status=DuplicateCandidate.Status.PENDING,
+            main_autor__nazwisko__istartswith=q,
+        )
+        .values_list("main_autor__nazwisko", flat=True)
+        .distinct()
+        .order_by("main_autor__nazwisko")[:10]
+    )
+    return JsonResponse({"results": list(nazwiska)})
+
+
 @group_required(GR_WPROWADZANIE_DANYCH)
 @require_http_methods(["POST"])
 def mark_candidate_not_duplicate(request):
     """
     Mark a DuplicateCandidate as not a duplicate.
+
+    Returns JSON when called via AJAX (X-Requested-With: XMLHttpRequest),
+    otherwise redirects.
     """
     from django.utils import timezone
 
+    is_ajax = request.headers.get("X-Requested-With") == "XMLHttpRequest"
     candidate_id = request.POST.get("candidate_id")
 
-    if not candidate_id:
-        messages.error(request, "Brak wymaganego parametru: candidate_id")
+    def _respond(success, message, status=200):
+        if is_ajax:
+            return JsonResponse({"success": success, "message": message}, status=status)
+        if success:
+            messages.success(request, message)
+        else:
+            messages.error(request, message)
         return redirect("deduplikator_autorow:duplicate_authors")
 
+    if not candidate_id:
+        return _respond(False, "Brak wymaganego parametru: candidate_id", status=400)
+
     try:
         candidate = DuplicateCandidate.objects.get(pk=candidate_id)
         candidate.status = DuplicateCandidate.Status.NOT_DUPLICATE
@@ -979,19 +1158,20 @@ def mark_candidate_not_duplicate(request):
         candidate.reviewed_by = request.user
         candidate.save()
 
-        # Also mark the duplicate author in NotADuplicate (existing model)
         NotADuplicate.objects.update_or_create(
             autor=candidate.duplicate_autor, defaults={"created_by": request.user}
         )
 
-        messages.success(
-            request,
+        return _respond(
+            True,
             f"Autor {candidate.duplicate_autor_name} oznaczony jako nie-duplikat.",
         )
 
     except DuplicateCandidate.DoesNotExist:
-        messages.error(request, "Nie znaleziono kandydata o podanym ID.")
+        return _respond(False, "Nie znaleziono kandydata o podanym ID.", status=404)
     except Exception as e:
-        messages.error(request, f"Błąd podczas oznaczania kandydata: {str(e)}")
-
-    return redirect("deduplikator_autorow:duplicate_authors")
+        traceback.print_exc()
+        rollbar.report_exc_info(sys.exc_info())
+        return _respond(
+            False, f"Błąd podczas oznaczania kandydata: {str(e)}", status=500
+        )

From 3cbbe011ceb7fda48563f294bfbc6eb847a312ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sat, 2 May 2026 11:53:52 +0200
Subject: [PATCH 18/25] feat(autocomplete): log Autor creation via autocomplete
 as Django admin LogEntry

When a new author is created through the autocomplete 'create' dialog,
a LogEntry (ADDITION) is now recorded in Django admin history with
change_message='Utworzono z formularza autocomplete', making it possible
to trace who created the author and from where.
---
 .../test_autocomplete_authors.py              | 27 ++++++++++++++++---
 src/bpp/views/autocomplete/authors.py         | 19 ++++++++++++-
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/bpp/tests/test_autocomplete/test_autocomplete_authors.py b/src/bpp/tests/test_autocomplete/test_autocomplete_authors.py
index b2c5cc003..8deefa593 100644
--- a/src/bpp/tests/test_autocomplete/test_autocomplete_authors.py
+++ b/src/bpp/tests/test_autocomplete/test_autocomplete_authors.py
@@ -20,7 +20,6 @@
 )
 
 
-
 def test_dyscyplina_naukowa_przypisanie_autocomplete(
     app, autor_jan_kowalski, dyscyplina1, dyscyplina2, rok
 ):
@@ -75,7 +74,6 @@ def test_dyscyplina_naukowa_przypisanie_autocomplete(
     assert res.json["results"][0]["text"] == "memetyka stosowana"
 
 
-
 def test_dyscyplina_naukowa_przypisanie_autocomplete_brak_autora(
     app,
 ):
@@ -90,7 +88,6 @@ def test_dyscyplina_naukowa_przypisanie_autocomplete_brak_autora(
     assert res.json["results"][0]["text"] == "Podaj autora"
 
 
-
 def test_dyscyplina_naukowa_przypisanie_autocomplete_brak_drugiej(
     app, autor_jan_kowalski, dyscyplina1, dyscyplina2, rok
 ):
@@ -133,6 +130,30 @@ def autocomplete(s):
     assert Autor.objects.first().imiona == "Baz Quux"
 
 
+@pytest.mark.django_db
+def test_AutorAutocomplete_create_object_creates_log_entry(rf, admin_user, db):
+    from django.contrib.admin.models import ADDITION, LogEntry
+    from django.contrib.contenttypes.models import ContentType
+
+    autor_count_before = Autor.objects.count()
+
+    ac = AutorAutocomplete()
+    ac.request = rf.post("/", data={"text": "Kowalski Jan"})
+    ac.request.user = admin_user
+
+    obj = ac.create_object("Kowalski Jan")
+
+    assert obj.pk != -1
+    assert Autor.objects.count() == autor_count_before + 1
+
+    ct = ContentType.objects.get_for_model(Autor)
+    log = LogEntry.objects.get(
+        content_type=ct, object_id=str(obj.pk), action_flag=ADDITION
+    )
+    assert log.user == admin_user
+    assert "autocomplete" in log.change_message
+
+
 @pytest.mark.django_db
 def test_Status_KorektyAutocomplete(statusy_korekt):
     """Test status korekty autocomplete filtering."""
diff --git a/src/bpp/views/autocomplete/authors.py b/src/bpp/views/autocomplete/authors.py
index 1cea8c001..0373b49a6 100644
--- a/src/bpp/views/autocomplete/authors.py
+++ b/src/bpp/views/autocomplete/authors.py
@@ -94,10 +94,27 @@ class AutorAutocomplete(GroupRequiredMixin, AutorAutocompleteBase):
 
     def create_object(self, text):
         try:
-            return Autor.objects.create_from_string(text)
+            obj = Autor.objects.create_from_string(text)
         except ValueError:
             return self.err
 
+        from django.contrib.admin.models import ADDITION, LogEntry
+        from django.contrib.contenttypes.models import ContentType
+
+        try:
+            LogEntry.objects.create(
+                user_id=self.request.user.pk,
+                content_type_id=ContentType.objects.get_for_model(Autor).pk,
+                object_id=str(obj.pk),
+                object_repr=str(obj)[:200],
+                action_flag=ADDITION,
+                change_message="Utworzono z formularza autocomplete",
+            )
+        except (AttributeError, TypeError):
+            pass
+
+        return obj
+
 
 class PublicAutorAutocomplete(AutorAutocompleteBase):
     """Public autocomplete for authors (no create, no PBN/MNISW markers)."""

From a371624f3128fc2f940cd121df897417589214c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sat, 2 May 2026 17:47:33 +0200
Subject: [PATCH 19/25] =?UTF-8?q?fix(deduplikator=5Fautorow):=20UI=20popra?=
 =?UTF-8?q?wki=20cz.=202=20=E2=80=94=20CSS=20loading,=20layout,=20publikac?=
 =?UTF-8?q?je,=20nie-duplikaty?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Napraw ładowanie CSS: block extra_css nie istniał w hierarchii template'ów
  (bare.html->base.html), CSS nigdy się nie ładował. Zmieniono na
  {% block extrahead %} z {{ block.super }}.
- Bold publikacji: selektor .callout .deduplikator-autorow__publication-item a
  (spec. 0,2,1) wygrywa z Foundation .callout a:not(.close-button) (0,1,1)
- Lista publikacji: usunięte kolorowe border-left, obramowanie i padding —
  teraz zwykła lista tekstowa
- Top bar: usunięta etykieta 'Pokaż wyniki:', same przyciski trybu po lewej,
  wyszukiwarka po prawej (space-between)
- Przycisk 'Szukaj': border-radius: 0 po prawej gdy widoczny przycisk 'X'
  (czyszczenie wyszukiwania)
- Nie-duplikaty: licznik w sidebarze aktualizuje się po AJAX-owym oznaczeniu
  kandydata jako nie-duplikat (span#not-duplicate-count + JS increment)
- Empty state: gdy search_lastname aktywny i brak wyników — 'Nie znaleziono
  takich autorów' zamiast 'Gratulacje'
---
 .../scss/deduplikator_autorow.scss            | 54 +++++++++----------
 .../duplicate_authors.html                    | 31 +++++++++--
 2 files changed, 51 insertions(+), 34 deletions(-)

diff --git a/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss b/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
index 271550d39..4d3ca06e1 100644
--- a/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
+++ b/src/deduplikator_autorow/static/deduplikator_autorow/scss/deduplikator_autorow.scss
@@ -263,6 +263,16 @@
   color: #666;
 }
 
+.deduplikator-autorow__search-btn-flat-right {
+  border-top-right-radius: 0;
+  border-bottom-right-radius: 0;
+}
+
+.deduplikator-autorow__search-btn-flat-right + .input-group-button .button {
+  border-top-left-radius: 0;
+  border-bottom-left-radius: 0;
+}
+
 // =============================================================================
 // DISCIPLINE TABLE
 // =============================================================================
@@ -317,25 +327,21 @@
 .deduplikator-autorow__publication-list {
   max-height: 300px;
   overflow-y: auto;
-  border: 1px solid #e1e1e1;
-  padding: 10px;
+  padding: 0;
 }
 
 .deduplikator-autorow__publication-list--short {
   max-height: 250px;
-  background-color: #f9f9f9;
 }
 
 .deduplikator-autorow__publication-item {
-  margin-bottom: 8px;
-  padding: 5px;
-  border-left: 3px solid #1779ba;
+  margin-bottom: 4px;
+  padding: 0;
 }
 
 .deduplikator-autorow__publication-item--duplicate {
-  margin-bottom: 6px;
-  padding: 3px;
-  border-left: 2px solid #8a8a8a;
+  margin-bottom: 2px;
+  padding: 0;
 }
 
 .deduplikator-autorow__publication-link {
@@ -564,23 +570,19 @@
     margin: 1em 0;
   }
 
-  // Top bar — filtr trybu po lewej, szybkie wyszukiwanie po prawej.
-  // Wymuszamy jeden wiersz (nowrap); search ma flex 1 + min-width: 0,
-  // żeby mógł zwężać się poniżej intrinsic width na wąskich ekranach,
-  // zamiast wyskakiwać do nowej linii i rozciągać się na 100% szerokości.
+  // Top bar — przyciski trybu po lewej, wyszukiwarka po prawej.
   &__top-bar {
     display: flex;
     align-items: center;
-    justify-content: flex-start;
+    justify-content: space-between;
     gap: 1em;
     margin: 1em 0;
     flex-wrap: nowrap;
   }
 
   &__top-search {
-    flex: 1 1 280px;
-    min-width: 0;
-    max-width: 420px;
+    flex: 0 1 auto;
+    min-width: 200px;
     margin: 0;
   }
 
@@ -694,18 +696,12 @@
   // Opisy bibliograficzne renderują <b>/<strong> wokół tytułów — to jest OK,
   // tytuł ma być boldem. Problem: Foundation daje .callout a:not(.close-button)
   // { font-weight: bolder }, więc cały tekst w <a> wewnątrz .callout jest
-  // bold. Resetujemy font-weight na <a> w obrębie itemów publikacji, ale
-  // zostawiamy <b>/<strong> z ich domyślnym bold, żeby tytuł nadal był
-  // wytłuszczony.
-  &__publication-list,
-  &__publication-item,
-  &__publication-item--duplicate,
-  &__publication-description {
-    font-weight: normal;
-  }
-
-  &__publication-item a,
-  &__publication-item--duplicate a {
+  // bold. Resetujemy font-weight na <a> w obrębie itemów publikacji z
+  // wyższą specyficznością niż .callout a:not(.close-button), żeby wygrać
+  // kaskadę. Zostawiamy <b>/<strong> z ich domyślnym bold, żeby tytuł nadal
+  // był wytłuszczony.
+  .callout &__publication-item a,
+  .callout &__publication-item--duplicate a {
     font-weight: normal;
   }
 
diff --git a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
index 426a73bd5..1ac032b10 100644
--- a/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
+++ b/src/deduplikator_autorow/templates/deduplikator_autorow/duplicate_authors.html
@@ -1,7 +1,8 @@
 {% extends "base.html" %}
 {% load static %}
 
-{% block extra_css %}
+{% block extrahead %}
+{{ block.super }}
 <link rel="stylesheet"
       href="{% static 'deduplikator_autorow/css/deduplikator_autorow.css' %}">
 {% endblock %}
@@ -241,13 +242,13 @@
                     <!-- Nie-duplikaty: obejrzenie + reset w jednym panelu -->
                     <li class="accordion-item {% if not_duplicate_count > 0 %}is-active{% endif %}" data-accordion-item>
                         <a class="accordion-title" href="#" {% if not_duplicate_count > 0 %}aria-expanded="true"{% endif %}>
-                            <span class="fi-eye" aria-hidden="true"></span> Nie-duplikaty {% if not_duplicate_count > 0 %}({{ not_duplicate_count }}){% endif %}
+                            <span class="fi-eye" aria-hidden="true"></span> Nie-duplikaty {% if not_duplicate_count > 0 %}(<span id="not-duplicate-count" data-count="{{ not_duplicate_count }}">{{ not_duplicate_count }}</span>){% else %}<span id="not-duplicate-count" data-count="0" style="display:none"></span>{% endif %}
                         </a>
                         <div class="accordion-content{% if not_duplicate_count > 0 %} deduplikator-autorow__accordion-content{% endif %}" data-tab-content>
                             {% if not_duplicate_count > 0 %}
                                 <p class="deduplikator-autorow__status-text--spaced">Autorzy oznaczeni jako nie będący duplikatami.</p>
                                 <p class="deduplikator-autorow__status-warning deduplikator-autorow__status-text--spaced">
-                                    <span class="fi-alert"></span> Obecnie: {{ not_duplicate_count }} oznaczonych
+                                    <span class="fi-alert"></span> Obecnie: <span id="not-duplicate-count-text">{{ not_duplicate_count }}</span> oznaczonych
                                 </p>
                                 <a href="/admin/deduplikator_autorow/notaduplicate/"
                                    class="button secondary expanded small deduplikator-autorow__button-margin-bottom"
@@ -370,7 +371,6 @@
             {% if glowny_autor or search_lastname %}
             <div class="deduplikator-autorow__top-bar">
               <div class="deduplikator-autorow__mode-filter">
-                <span class="deduplikator-autorow__mode-filter-label">Pokaż wyniki:</span>
                 <div class="button-group deduplikator-autorow__mode-buttons" role="tablist" aria-label="Filtr trybu wyników">
                   <a href="?mode=pbn{% if q_search %}&amp;search_lastname={{ q_search }}{% endif %}"
                      role="tab"
@@ -419,7 +419,7 @@
                          autocomplete="off">
                   <datalist id="lastname-suggestions"></datalist>
                   <div class="input-group-button">
-                    <button type="submit" class="button primary">Szukaj</button>
+                    <button type="submit" class="button primary{% if q_search %} deduplikator-autorow__search-btn-flat-right{% endif %}">Szukaj</button>
                   </div>
                   {% if q_search %}
                   <div class="input-group-button">
@@ -529,6 +529,15 @@ <h3><span class="fi-loop"></span> Skanowanie w toku...</h3>
                     <p>Strona odświeży się automatycznie po zakończeniu skanowania.</p>
                 </div>
             {% elif not glowny_autor %}
+                {% if search_lastname %}
+                <div class="callout warning deduplikator-autorow__callout-centered">
+                    <h3><span class="fi-magnifying-glass"></span> Nie znaleziono takich autorów</h3>
+                    <p>Nie znaleziono kandydatów dla nazwiska „{{ search_lastname }}".</p>
+                    <a href="?mode={{ mode }}" class="button secondary deduplikator-autorow__callout-margin-top">
+                        <span class="fi-x"></span> Wyczyść wyszukiwanie
+                    </a>
+                </div>
+                {% else %}
                 <div class="callout success deduplikator-autorow__callout-centered">
                     <h3><span class="fi-check"></span> Gratulacje!</h3>
                     <p><strong>Wszystkie duplikaty zostały już przetworzone.</strong></p>
@@ -541,6 +550,7 @@ <h3><span class="fi-check"></span> Gratulacje!</h3>
                         </button>
                     </form>
                 </div>
+                {% endif %}
             {% else %}
                 {% if glowny_autor %}
                     <div class="callout primary">
@@ -1479,6 +1489,17 @@ <h5 class="deduplikator-autorow__section-title">{% if duplikat_data.publikacje_c
                                 sound: false
                             });
                         }
+                        {# Aktualizuj licznik nie-duplikatów w sidebarze #}
+                        var ndBadge = document.getElementById('not-duplicate-count');
+                        var ndText = document.getElementById('not-duplicate-count-text');
+                        if (ndBadge) {
+                            var ndCount = parseInt(ndBadge.dataset.count || '0') + 1;
+                            ndBadge.dataset.count = ndCount;
+                            ndBadge.textContent = ndCount;
+                        }
+                        if (ndText) {
+                            ndText.textContent = (parseInt(ndText.textContent) || 0) + 1;
+                        }
                         if (card) {
                             $(card).fadeOut(400, function() {
                                 $(this).remove();

From 12ff646876f4246ce84f6b71bd6bc7e42382aafb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sat, 2 May 2026 18:02:10 +0200
Subject: [PATCH 20/25] =?UTF-8?q?feat(deduplikator=5Fautorow):=20hard=20re?=
 =?UTF-8?q?jection=20roz=C5=82=C4=85cznych=20imion,=20ORCID=20w=20XLSX,=20?=
 =?UTF-8?q?naprawa=20top=5Fbar=20HTML,=20testy?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Hard reject kandydatów z rozłącznymi imionami (analysis_meta + analysis)
- Kolumny ORCID i PBN URL w eksporcie XLSX
- Endpoint lastname-suggestions do autouzupełniania
- reason_display.py — utils do wyświetlania powodów duplikacji
- Naprawa osieroconego <li> w top_bar.html (djlint H025)
- Zmiana etykiety 'deduplikator autorów PBN' → 'deduplikator autorów'
- Dodanie 'deduplikator autorów' do menu narzędzia
- .gitignore: .grunt-build-stamp
- CLAUDE.md: dodano reguły komentarzy Django i uv run
- Nowe testy: merge_all_refresh, reasons_display, xlsx_orcid_and_pbn_url
---
 .gitignore                                    |   2 +
 CLAUDE.md                                     |  16 ++
 ...duplikator-autorow-ui-overhaul.feature.rst |  28 +++
 .../tests/test_analysis_meta.py               | 164 ++++++++++++++-
 .../tests/test_merge_all_refresh.py           | 194 ++++++++++++++++++
 .../tests/test_reasons_display.py             | 105 ++++++++++
 .../tests/test_xlsx_export.py                 |  36 ++--
 .../tests/test_xlsx_orcid_and_pbn_url.py      | 108 ++++++++++
 src/deduplikator_autorow/urls.py              |   5 +
 src/deduplikator_autorow/utils/analysis.py    |  67 +++++-
 .../utils/analysis_meta.py                    | 113 +++++++---
 src/deduplikator_autorow/utils/export.py      |  58 ++++--
 .../utils/reason_display.py                   |  73 +++++++
 src/django_bpp/templates/top_bar.html         |  18 +-
 14 files changed, 905 insertions(+), 82 deletions(-)
 create mode 100644 src/bpp/newsfragments/+deduplikator-autorow-ui-overhaul.feature.rst
 create mode 100644 src/deduplikator_autorow/tests/test_merge_all_refresh.py
 create mode 100644 src/deduplikator_autorow/tests/test_reasons_display.py
 create mode 100644 src/deduplikator_autorow/tests/test_xlsx_orcid_and_pbn_url.py
 create mode 100644 src/deduplikator_autorow/utils/reason_display.py

diff --git a/.gitignore b/.gitignore
index 3dceac2d7..3f59bd0ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -175,6 +175,8 @@ dump.rdb
 .worktrees/
 .claude/
 
+.grunt-build-stamp
+
 # Local TODO / audit notes (not committed)
 TODO-*.txt
 TODO-*.md
diff --git a/CLAUDE.md b/CLAUDE.md
index 9b5291de7..99272aca9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -25,6 +25,22 @@ management system built with Django. Python >=3.10,<3.15.
   - Public frontend (Foundation CSS): monochrome Foundation-Icons
     (`<span class="fi-icon"/>`)
   - Django admin (`templates/admin/`): use emoji (no Foundation Icons)
+- **Django template comments `{# ... #}` są jedno-liniowe — KAZDA LINIA
+  MUSI mieć własne otwarcie `{#` i zamknięcie `#}` na tej samej linii.**
+  Po `\n` w środku komentarza parser przestaje go widzieć i tekst wycieka
+  do wyrenderowanego HTML-u. Powtarzający się błąd. Reguła:
+  - ❌ ZABRONIONE wieloliniowe komentarze typu:
+    ```django
+    {# linia 1
+       linia 2 #}
+    ```
+  - ✅ ZAWSZE każda linia z osobnym `{# ... #}`:
+    ```django
+    {# linia 1 #}
+    {# linia 2 #}
+    ```
+  - Alternatywa dla bloków: `{% comment %}...{% endcomment %}` (też OK,
+    ale per-line `{# #}` jest preferowane przez użytkownika).
 
 ## Python and Django Execution
 
diff --git a/src/bpp/newsfragments/+deduplikator-autorow-ui-overhaul.feature.rst b/src/bpp/newsfragments/+deduplikator-autorow-ui-overhaul.feature.rst
new file mode 100644
index 000000000..cc19ee26b
--- /dev/null
+++ b/src/bpp/newsfragments/+deduplikator-autorow-ui-overhaul.feature.rst
@@ -0,0 +1,28 @@
+Deduplikator autorów: gruntowna przebudowa UI. Tytuł i pozycje
+menu uproszczone z "Deduplikator autorów PBN" na "Deduplikator
+autorów" (bez znacznika BETA), wpis dodany dodatkowo do podmenu
+"Operacje". Tryb skanowania (PBN/ogólny) prezentowany jest jako
+kolorowy badge przy "Główny rekord autora", filtr "Pokaż wyniki"
+zmieniony z radio-buttonów na poziomy button-group.
+
+Przyciski na karcie każdego potencjalnego duplikatu pogrupowane
+w trzy logiczne sekcje: Podgląd (otwórz wyd. ciągłe/zwarte,
+redagowanie, stronę główną, PBN), Decyzja ("Nie jest duplikatem
+głównego autora", usuń autora bez publikacji), Scalanie (cztery
+warianty scalania). Przyciski "Scal + ustaw dyscyplinę" oraz
+"Scal + ustaw subdyscyplinę" są ukryte, gdy główny autor nie ma
+żadnej dyscypliny.
+
+Powody podobieństwa renderowane są jako kolorowe chipy z ikonami
+Foundation, z tonami match/info/weak/warn dobranymi do siły
+przesłanki. Procent pewności jest sklampowany do zakresu 0–100%
+(wcześniej widoczne były wartości typu 140% wynikające z surowego
+score).
+
+Naprawione: oznaczenie autora jako nie-duplikat (przycisk
+"Nie jest duplikatem głównego autora") wykonuje się teraz przez
+AJAX z fadeOut karty, zamiast przeładowywać widok i przeskakiwać
+do kolejnego głównego autora. Naprawiono też "Scal wszystkie",
+który dla kandydatów z trybu ogólnego zwracał błąd 400 (JS
+wysyłał ``main_scientist_id`` zamiast ``main_autor_id``); brakujące
+parametry trafiają teraz dodatkowo do Rollbara.
diff --git a/src/deduplikator_autorow/tests/test_analysis_meta.py b/src/deduplikator_autorow/tests/test_analysis_meta.py
index e436dcdd3..e4273250c 100644
--- a/src/deduplikator_autorow/tests/test_analysis_meta.py
+++ b/src/deduplikator_autorow/tests/test_analysis_meta.py
@@ -37,7 +37,8 @@ def test_identyczne_orcid_dodaje_50():
 
 
 def test_rozne_orcid_odejmuje_50():
-    # Różne nazwiska/imiona, żeby ORCID był dominującym sygnałem.
+    # Identyczne imiona (żeby hard-rejection nie zadziałał i ORCID-mismatch
+    # mógł być widoczny jako dominujący sygnał).
     a = _meta(
         nazwisko="kowalski",
         imiona=("jan",),
@@ -45,12 +46,16 @@ def test_rozne_orcid_odejmuje_50():
     )
     b = _meta(
         nazwisko="nowak",
-        imiona=("piotr",),
+        imiona=("jan",),
         orcid="0000-0002-2222-2222",
     )
     score, reasons = analiza_pary_meta(a, b)
-    assert score <= -40  # -50 plus drobne plusy z innych kryteriów
+    # +30 wspólne imię, -50 różny ORCID, +10 mało publikacji = -10 raw,
+    # ale plus inne drobne. Sprawdzamy że ORCID-mismatch wypłynął jako
+    # negatywny element (nie samo +30 dominuje).
     assert any("różny ORCID" in r for r in reasons)
+    # Sumarycznie score powinien być wyraźnie obniżony przez ORCID
+    assert score < 30
 
 
 def test_identyczne_nazwisko_dodaje_40():
@@ -81,3 +86,156 @@ def test_swap_imienia_z_nazwiskiem_dodaje_50():
     b = _meta(nazwisko="jan", imiona=("kowalski",))
     score, reasons = analiza_pary_meta(a, b)
     assert any("zamian" in r.lower() for r in reasons)
+
+
+# --- Penalty za różne imiona (rozłączne, brak overlap-u w żadnym wymiarze) ----
+
+
+def test_rozne_imiona_bez_zadnego_overlap_odejmuje_punkty():
+    """'Jan' vs 'Stefan': brak common, brak similar (3-prefix), brak inicjału.
+
+    Realny case użytkownika: 'Jan Kowalski' vs 'Stefan
+    Kowalski-Nowak' — system dawał ~49% mimo zupełnie innych imion.
+    Penalty ma zniwelować inne przesłanki tak, żeby raw score spadał poniżej
+    progu MIN_CONFIDENCE_TO_STORE.
+    """
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="kowalski-nowak", imiona=("stefan",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert any("różne imiona" in r.lower() for r in reasons), (
+        f"Brak powodu 'różne imiona' w {reasons}"
+    )
+    # Bez penalty: +30 (zawieranie nazwiska) + drobne plusy ≈ 30-50 raw.
+    # Z penalty -40: powinno spaść poniżej progu 50 wymaganego do zapisu.
+    assert score < 50, (
+        f"Score {score} >= 50 mimo zupełnie różnych imion (powody: {reasons})"
+    )
+
+
+def test_jedno_wspolne_imie_nie_powoduje_penalty():
+    """'Jan Maria' vs 'Maria Kasia' mają wspólne 'maria' — bez penalty."""
+    a = _meta(imiona=("jan", "maria"))
+    b = _meta(imiona=("maria", "kasia"))
+    _, reasons = analiza_pary_meta(a, b)
+    assert not any("różne imiona" in r.lower() for r in reasons)
+
+
+def test_podobne_imie_nie_powoduje_penalty():
+    """'Jan' vs 'Janusz' — startsWith(3) wystarcza, brak penalty."""
+    a = _meta(imiona=("jan",))
+    b = _meta(imiona=("janusz",))
+    _, reasons = analiza_pary_meta(a, b)
+    assert not any("różne imiona" in r.lower() for r in reasons)
+
+
+def test_pasujacy_inicjal_nie_powoduje_penalty():
+    """Wspólny inicjał (J vs J) traktowany jako sygnał - bez penalty."""
+    # _common_initials w meta bierze pierwszy znak imienia. "jan" i "jakub"
+    # mają wspólny inicjał "j" — i jednocześnie startsWith(3) NIE pasuje
+    # ('jan' vs 'jakub' — różne 3 prefiksy 'jan' vs 'jak'). Penalty nie powinien
+    # pojawić się tylko z powodu wspólnego inicjału.
+    a = _meta(imiona=("jan",))
+    b = _meta(imiona=("jakub",))
+    _, reasons = analiza_pary_meta(a, b)
+    assert not any("różne imiona" in r.lower() for r in reasons)
+
+
+def test_brak_imion_po_jednej_stronie_nie_aktywuje_penalty():
+    """Hard-rejection wymaga, by OBIE strony miały imiona — w przeciwnym razie
+    nie ma o czym mówić, że są 'różne'."""
+    a = _meta(imiona=("jan",))
+    b = _meta(imiona=())
+    _, reasons = analiza_pary_meta(a, b)
+    assert not any("różne imiona" in r.lower() for r in reasons)
+
+
+# --- Hard rejection: rozłączne imiona = NIE jest duplikatem (regardless of all) ----
+
+
+def test_zupelnie_rozne_imiona_jest_hard_rejected():
+    """Jan vs Agnieszka — różne imiona, brak swap → score mocno ujemny,
+    żeby pair na pewno NIE przeszedł progu zapisu."""
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="kowalski", imiona=("agnieszka",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert score < 0, f"Score {score} - pair powinna być twardo odrzucona"
+    assert score <= -1000, f"Score {score} - powinien być sentinel ≤ -1000"
+    assert any("odrzucono" in r.lower() for r in reasons)
+
+
+def test_hard_rejection_wygrywa_z_identycznym_orcid():
+    """Nawet identyczny ORCID (+50) nie ratuje pary z totalnie różnymi imionami.
+
+    Jeżeli ORCID jest taki sam ale imiona zupełnie różne, system nadal
+    odrzuca - to bardziej prawdopodobnie błąd w ORCID/imionach niż realny
+    duplikat (bo imiona człowieka raczej nie zmieniają się tak drastycznie).
+    """
+    a = _meta(nazwisko="kowalski", imiona=("jan",), orcid="0000-0001-1111-1111")
+    b = _meta(
+        nazwisko="kowalski-nowak", imiona=("stefan",), orcid="0000-0001-1111-1111"
+    )
+    score, reasons = analiza_pary_meta(a, b)
+    assert score <= -1000
+    assert any("odrzucono" in r.lower() for r in reasons)
+
+
+def test_hard_rejection_nie_blokuje_swap():
+    """Klasyczny swap 'Jan Kowalski' ↔ 'Kowalski Jan' nie jest hard-rejected,
+    mimo że wartości imion się nie pokrywają z imionami drugiego."""
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="jan", imiona=("kowalski",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert score > 0
+    assert any("zamian" in r.lower() for r in reasons)
+
+
+# --- Inicjały: kiedy MOŻE być duplikatem, kiedy NIE MOŻE ----------------------
+
+
+def test_jan_kowalski_vs_j_kropka_kowalski_moze_byc_duplikatem():
+    """'Jan Kowalski' vs 'J. Kowalski' — to samo nazwisko, inicjał J się
+    zgadza → kandydat MOŻE być duplikatem (nie hard-reject)."""
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="kowalski", imiona=("j.",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert score > 0, (
+        f"Para 'Jan' vs 'J.' powinna być akceptowalna, score={score}, reasons={reasons}"
+    )
+    assert not any("odrzucono" in r.lower() for r in reasons)
+
+
+def test_jan_kowalski_vs_a_kropka_kowalski_NIE_moze_byc_duplikatem():
+    """'Jan Kowalski' vs 'A. Kowalski' — to samo nazwisko, ale inicjał A != J
+    → hard-reject (różne osoby)."""
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="kowalski", imiona=("a.",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert score <= -1000, (
+        f"Inicjał A != J — para powinna być twardo odrzucona, score={score}"
+    )
+    assert any("odrzucono" in r.lower() for r in reasons)
+
+
+def test_jan_kowalski_vs_kowalski_j_swap_z_inicjalem_moze_byc():
+    """'Jan Kowalski' vs 'Kowalski J.' — swap z inicjałem (database
+    swap: imiona='Kowalski', nazwisko='J.'). Inicjał J pasuje do 'Jan' →
+    MOŻE być duplikatem."""
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="j.", imiona=("kowalski",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert score > 0, (
+        f"Swap z pasującym inicjałem powinien przejść, score={score}, reasons={reasons}"
+    )
+    assert not any("odrzucono" in r.lower() for r in reasons)
+
+
+def test_jan_kowalski_vs_kowalski_a_swap_z_innym_inicjalem_NIE_moze_byc():
+    """'Jan Kowalski' vs 'Kowalski A.' — swap-like layout, ale 'A.' nie
+    pasuje do 'Jan' (różne inicjały) → hard-reject."""
+    a = _meta(nazwisko="kowalski", imiona=("jan",))
+    b = _meta(nazwisko="a.", imiona=("kowalski",))
+    score, reasons = analiza_pary_meta(a, b)
+    assert score <= -1000, (
+        f"Swap-shape z różnym inicjałem powinien być odrzucony, score={score}"
+    )
+    assert any("odrzucono" in r.lower() for r in reasons)
diff --git a/src/deduplikator_autorow/tests/test_merge_all_refresh.py b/src/deduplikator_autorow/tests/test_merge_all_refresh.py
new file mode 100644
index 000000000..d10f9ff86
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_merge_all_refresh.py
@@ -0,0 +1,194 @@
+"""Testy stanu "Scal wszystkie" — gating po pewności + dane potrzebne JS-owi
+do odświeżenia stanu po AJAX-owym usunięciu karty (refreshMergeAllAvailability).
+
+Testy są server-side: sprawdzają dane, które view eksportuje do template-a,
+a template do DOM-u. Zapewniają, że klient ma wszystko czego potrzebuje, żeby
+prawidłowo przeliczyć stan przycisków bez przeładowania strony.
+
+Testowy E2E (kliknięcie "Nie jest duplikatem" + sprawdzenie odblokowania)
+żyje w `test_merge_all_refresh_e2e.py` (Playwright).
+"""
+
+import pytest
+from django.contrib.auth.models import Group
+from django.urls import reverse
+from django.utils import timezone
+from model_bakery import baker
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from deduplikator_autorow.models import DuplicateCandidate, DuplicateScanRun
+from deduplikator_autorow.views import MIN_PEWNOSC_DO_WYSWIETLENIA
+
+
+@pytest.fixture
+def auth_client(client, db):
+    user = baker.make("bpp.BppUser", is_active=True)
+    user.set_password("xx")
+    user.save()
+    grp, _ = Group.objects.get_or_create(name=GR_WPROWADZANIE_DANYCH)
+    user.groups.add(grp)
+    client.force_login(user)
+    return client
+
+
+def _create_candidate(scan, main, dup, confidence_percent, mode="pbn"):
+    """confidence_percent in 0..1 - musi przejść przez display=round(*100)."""
+    return DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=main,
+        duplicate_autor=dup,
+        confidence_score=int(confidence_percent * 100),  # nieistotne dla display
+        confidence_percent=confidence_percent,
+        main_autor_name=str(main),
+        duplicate_autor_name=str(dup),
+        scan_mode=mode,
+    )
+
+
+@pytest.fixture
+def scan_with_mixed_confidence(db):
+    """Scan: 1 main author + 2 duplikaty (jeden 80%, jeden 30%)."""
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.COMPLETED,
+        finished_at=timezone.now(),
+    )
+    main = baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    high = baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    low = baker.make("bpp.Autor", nazwisko="Kowal", imiona="Janusz")
+    high_cand = _create_candidate(scan, main, high, 0.80)
+    low_cand = _create_candidate(scan, main, low, 0.30)
+    return scan, main, high, low, high_cand, low_cand
+
+
+@pytest.fixture
+def scan_only_high_confidence(db):
+    """Scan: 1 main + 2 duplikaty - oba ≥ 50%."""
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.COMPLETED,
+        finished_at=timezone.now(),
+    )
+    main = baker.make("bpp.Autor", nazwisko="Nowak", imiona="Anna")
+    a = baker.make("bpp.Autor", nazwisko="Nowak", imiona="Anna")
+    b = baker.make("bpp.Autor", nazwisko="Nowak", imiona="Ania")
+    _create_candidate(scan, main, a, 0.85)
+    _create_candidate(scan, main, b, 0.65)
+    return scan, main
+
+
+def test_view_exposes_pewnosc_threshold_to_template(
+    auth_client, scan_only_high_confidence
+):
+    """JS musi znać próg, żeby przeliczyć po fadeOut. Testujemy że jest w DOM-ie."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    assert response.status_code == 200
+    content = response.content.decode()
+    # Wartość MIN_PEWNOSC_DO_WYSWIETLENIA (50) powinna być wstawiona do JS-a
+    # jako var MIN_PEWNOSC_THRESHOLD = 50;
+    assert f"MIN_PEWNOSC_THRESHOLD = {MIN_PEWNOSC_DO_WYSWIETLENIA}" in content
+
+
+def test_card_has_data_pewnosc_attribute(auth_client, scan_with_mixed_confidence):
+    """Każda karta musi mieć data-pewnosc - JS po fadeOut iteruje po nich."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    content = response.content.decode()
+    # Wysokopewny: 0.80 * 100 = 80
+    assert 'data-pewnosc="80"' in content
+    # Niskopewny: 0.30 * 100 = 30
+    assert 'data-pewnosc="30"' in content
+
+
+def test_card_has_data_author_name_attribute(auth_client, scan_with_mixed_confidence):
+    """Każda karta ma data-author-name dla aktualizowanej listy w alercie."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    content = response.content.decode()
+    # Sprawdzamy że atrybut jest, z dowolnym sensownym tekstem reprezentującym autora
+    assert "data-author-name=" in content
+
+
+def test_merge_all_disabled_when_low_confidence_present(
+    auth_client, scan_with_mixed_confidence
+):
+    """Z 30% kandydatem przyciski 'Scal wszystkie' są wyszarzone."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    content = response.content.decode()
+    assert 'aria-disabled="true"' in content
+    assert "deduplikator-autorow__merge-all-btn--disabled" in content
+
+
+def test_low_confidence_names_in_data_attribute(
+    auth_client, scan_with_mixed_confidence
+):
+    """data-low-confidence-names przekazuje listę nazwisk do alertu JS."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    content = response.content.decode()
+    assert "data-low-confidence-names=" in content
+    # Zawiera autora 30% (Kowal Janusz)
+    assert "30%" in content
+
+
+def test_merge_all_enabled_when_all_high_confidence(
+    auth_client, scan_only_high_confidence
+):
+    """Wszyscy kandydaci ≥ 50% - przyciski aktywne, brak klasy disabled."""
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    content = response.content.decode()
+    # Przyciski merge-all renderują się
+    assert 'data-merge-all="true"' in content
+    # Atrybut HTML aria-disabled="true" w button-tagach NIE występuje obok
+    # przycisków merge-all. Klasa __merge-all-btn--disabled występuje też jako
+    # string w JS-ie (dla manipulacji classList), więc asercję robimy
+    # znajdując każdy <button data-merge-all="true" ...> i sprawdzając, że
+    # nie zawiera klasy disabled w atrybucie class.
+    import re
+
+    button_tags = re.findall(r'<button[^>]*data-merge-all="true"[^>]*>', content)
+    assert button_tags, "Nie znalazłem ani jednego przycisku merge-all w HTML"
+    for tag in button_tags:
+        assert "merge-all-btn--disabled" not in tag, (
+            f"Przycisk ma klasę disabled mimo że wszyscy ≥ 50%: {tag}"
+        )
+        assert 'aria-disabled="true"' not in tag, (
+            f"Przycisk ma aria-disabled mimo że wszyscy ≥ 50%: {tag}"
+        )
+
+
+def test_view_does_not_render_merge_all_when_no_candidates(auth_client, db):
+    """Gdy nie ma kandydatów pending, sekcja merge-all w ogóle się nie renderuje."""
+    DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.COMPLETED,
+        finished_at=timezone.now(),
+    )
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    content = response.content.decode()
+    # `data-merge-all` jako string występuje w JS-ie; szukamy konkretnego
+    # atrybutu HTML w button-tagu.
+    import re
+
+    button_tags = re.findall(r'<button[^>]*data-merge-all="true"[^>]*>', content)
+    assert not button_tags, (
+        f"Nie powinno być przycisków merge-all bez kandydatów, znalazłem: {button_tags}"
+    )
+
+
+def test_pewnosc_display_is_clamped_to_0_100(auth_client, db):
+    """confidence_percent > 1.0 (historyczne dane) musi być sklampowany do 100%."""
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.COMPLETED,
+        finished_at=timezone.now(),
+    )
+    main = baker.make("bpp.Autor", nazwisko="X", imiona="Y")
+    dup = baker.make("bpp.Autor", nazwisko="X", imiona="Y")
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=main,
+        duplicate_autor=dup,
+        confidence_score=300,  # Surowy score > MAX_PEWNOSC
+        confidence_percent=1.4,  # Powinno być sklampowane przy display
+        main_autor_name="x",
+        duplicate_autor_name="x",
+        scan_mode="pbn",
+    )
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    content = response.content.decode()
+    assert 'data-pewnosc="100"' in content
+    assert 'data-pewnosc="140"' not in content
diff --git a/src/deduplikator_autorow/tests/test_reasons_display.py b/src/deduplikator_autorow/tests/test_reasons_display.py
new file mode 100644
index 000000000..fece4f689
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_reasons_display.py
@@ -0,0 +1,105 @@
+"""Testy wzbogacania powodów podobieństwa o ikony i ton.
+
+Logika mapowania text -> (icon, tone) musi siedzieć w warstwie Pythona
+(views/utils), a nie w nowo utworzonym tag-library `dedup_tags`. Powód:
+auto-discovery template-tagów Django wykonuje się raz przy starcie procesu.
+Świeżo dodany pakiet `templatetags/` nie jest skanowany ponownie po reloadzie
+plików (auto-reloader ładuje moduły, ale nie odświeża cache template-engine).
+W rezultacie {% load dedup_tags %} wywala TemplateSyntaxError aż do pełnego
+restartu serwera. Trzymanie logiki w views/utils omija ten cache całkowicie.
+"""
+
+import pytest
+from django.contrib.auth.models import Group
+from django.urls import reverse
+from django.utils import timezone
+from model_bakery import baker
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from deduplikator_autorow.models import DuplicateCandidate, DuplicateScanRun
+
+
+@pytest.fixture
+def auth_client(client, db):
+    user = baker.make("bpp.BppUser", is_active=True)
+    user.set_password("xx")
+    user.save()
+    grp, _ = Group.objects.get_or_create(name=GR_WPROWADZANIE_DANYCH)
+    user.groups.add(grp)
+    client.force_login(user)
+    return client
+
+
+@pytest.fixture
+def scan_with_orcid_reason(db):
+    """Scan z jednym kandydatem zawierającym powód "identyczny ORCID"."""
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.COMPLETED,
+        finished_at=timezone.now(),
+    )
+    a1 = baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    a2 = baker.make("bpp.Autor", nazwisko="Kowalski", imiona="Jan")
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=a1,
+        duplicate_autor=a2,
+        confidence_score=80,
+        confidence_percent=0.6,
+        main_autor_name="Kowalski Jan",
+        duplicate_autor_name="Kowalski Jan",
+        scan_mode="pbn",
+        reasons=[
+            "identyczny ORCID - to ten sam autor",
+            "identyczne nazwisko",
+            "wspólne lata publikacji: [2022, 2023]",
+        ],
+    )
+    return scan
+
+
+def test_enrich_reason_returns_icon_and_tone():
+    """Helper enrich_reason zwraca dict z text/icon/tone."""
+    from deduplikator_autorow.utils.reason_display import enrich_reason
+
+    result = enrich_reason("identyczny ORCID - to ten sam autor")
+    assert result["text"] == "identyczny ORCID - to ten sam autor"
+    assert result["icon"] == "fi-key"
+    assert result["tone"] == "match"
+
+
+def test_enrich_reason_orcid_difference_is_warn():
+    """Różny ORCID to mocna negatywna przesłanka -> warn."""
+    from deduplikator_autorow.utils.reason_display import enrich_reason
+
+    result = enrich_reason("różny ORCID - to różni autorzy")
+    assert result["icon"] == "fi-x-circle"
+    assert result["tone"] == "warn"
+
+
+def test_enrich_reason_unknown_text_falls_back():
+    """Nieznany tekst dostaje neutralną ikonę i ton info."""
+    from deduplikator_autorow.utils.reason_display import enrich_reason
+
+    result = enrich_reason("zupełnie nieznany powód xyz")
+    assert result["icon"] == "fi-info"
+    assert result["tone"] == "info"
+
+
+def test_view_renders_reason_chips_with_icons(auth_client, scan_with_orcid_reason):
+    """Widok renderuje powody jako chipy z ikonami Foundation.
+
+    Ten test by w praktyce złapał regresję typu
+    'TemplateSyntaxError: dedup_tags is not a registered tag library' —
+    bo wywalenie szablonu na {% load %} zwróciłoby 500, nie 200, a brak ikony
+    fi-key dla powodu z ORCID-em sygnalizuje że enrichment nie zadziałał.
+    """
+    response = auth_client.get(reverse("deduplikator_autorow:duplicate_authors"))
+    assert response.status_code == 200, (
+        "Widok zwrócił błąd — najczęstsza przyczyna to "
+        "TemplateSyntaxError z {% load dedup_tags %}"
+    )
+    content = response.content.decode()
+    assert "fi-key" in content, "Brak ikony ORCID (fi-key) w wyrenderowanej stronie"
+    assert "deduplikator-autorow__reason-chip--match" in content, (
+        "Brak chipa w tonie 'match' — enrichment powodów nie działa"
+    )
diff --git a/src/deduplikator_autorow/tests/test_xlsx_export.py b/src/deduplikator_autorow/tests/test_xlsx_export.py
index 29309bc0f..1a5618d09 100644
--- a/src/deduplikator_autorow/tests/test_xlsx_export.py
+++ b/src/deduplikator_autorow/tests/test_xlsx_export.py
@@ -124,11 +124,13 @@ def test_xlsx_structure_and_format():  # noqa: C901
         # Check headers (first row)
         expected_headers = [
             "Główny autor",
+            "ORCID głównego autora",
             "BPP ID głównego autora",
             "BPP URL głównego autora",
             "PBN UID głównego autora",
             "PBN URL głównego autora",
             "Duplikat",
+            "ORCID duplikatu",
             "BPP ID duplikatu",
             "BPP URL duplikatu",
             "PBN UID duplikatu",
@@ -158,38 +160,40 @@ def test_xlsx_structure_and_format():  # noqa: C901
             assert len(data_row) == len(expected_headers)
 
             # Check that BPP URLs are full URLs with HTTPS
-            if len(data_row) > 2 and data_row[2]:  # BPP URL column (C)
-                bpp_url = str(data_row[2])
+            if len(data_row) > 3 and data_row[3]:  # BPP URL column (D, 0-idx 3)
+                bpp_url = str(data_row[3])
                 assert bpp_url.startswith("https://")
                 assert "/bpp/autor/" in bpp_url
 
-            # Check that PBN URLs are properly formatted
-            if len(data_row) > 4 and data_row[4]:  # PBN URL column (E)
-                pbn_url = str(data_row[4])
-                assert pbn_url.startswith("https://pbn.nauka.gov.pl/")
-                assert "/persons/details/" in pbn_url
+            # Check that PBN URLs are properly formatted (LINK_PBN_DO_AUTORA pattern)
+            if len(data_row) > 5 and data_row[5]:  # PBN URL column (F, 0-idx 5)
+                pbn_url = str(data_row[5])
+                assert pbn_url.startswith("https://"), pbn_url
+                assert "/core/#/person/view/" in pbn_url, pbn_url
 
             # Check that similarity is a decimal number (not percentage)
-            if len(data_row) > 10 and data_row[10] is not None:  # Similarity column (K)
-                similarity = data_row[10]
+            if (
+                len(data_row) > 12 and data_row[12] is not None
+            ):  # Similarity (M, idx 12)
+                similarity = data_row[12]
                 assert isinstance(similarity, (int, float))
                 assert 0 <= similarity <= 1  # Should be between 0 and 1
 
             # Check that duplicate count is a positive integer
             if (
-                len(data_row) > 11 and data_row[11] is not None
-            ):  # Duplicate count column (L)
-                duplicate_count = data_row[11]
+                len(data_row) > 13 and data_row[13] is not None
+            ):  # Duplicate count (N, idx 13)
+                duplicate_count = data_row[13]
                 assert isinstance(duplicate_count, int)
                 assert duplicate_count >= 1  # Should be at least 1 duplicate
 
             # Check that URL cells have hyperlinks (if data exists)
             url_columns = [
-                3,
-                5,
-                8,
+                4,
+                6,
                 10,
-            ]  # BPP and PBN URL columns (1-indexed: C, E, H, J)
+                12,
+            ]  # BPP and PBN URL columns (1-indexed: D, F, J, L)
             for col_idx in url_columns:
                 if len(data_row) > col_idx - 1 and data_row[col_idx - 1]:
                     cell = ws.cell(row=2, column=col_idx)  # Check actual cell
diff --git a/src/deduplikator_autorow/tests/test_xlsx_orcid_and_pbn_url.py b/src/deduplikator_autorow/tests/test_xlsx_orcid_and_pbn_url.py
new file mode 100644
index 000000000..33ee44e50
--- /dev/null
+++ b/src/deduplikator_autorow/tests/test_xlsx_orcid_and_pbn_url.py
@@ -0,0 +1,108 @@
+"""Eksport XLSX: kolumny ORCID i poprawny URL do PBN."""
+
+from io import BytesIO
+
+import pytest
+from django.utils import timezone
+from model_bakery import baker
+from openpyxl import load_workbook
+
+from deduplikator_autorow.models import DuplicateCandidate, DuplicateScanRun
+from deduplikator_autorow.utils import export_duplicates_to_xlsx
+
+
+@pytest.fixture
+def candidate_with_orcid_and_pbn(db):
+    """Para autorów z ORCID i PBN UID, oraz Uczelnia z pbn_api_root."""
+    from bpp.models import Uczelnia
+
+    uczelnia, _ = Uczelnia.objects.get_or_create(
+        nazwa="Test U",
+        defaults={
+            "skrot": "TU",
+            "slug": "test-u",
+            "pbn_api_root": "https://pbn-micro-alpha.opi.org.pl",
+        },
+    )
+    if not uczelnia.pbn_api_root:
+        uczelnia.pbn_api_root = "https://pbn-micro-alpha.opi.org.pl"
+        uczelnia.save()
+
+    sci_main = baker.make("pbn_api.Scientist", mongoId="abcd-1234")
+    sci_dup = baker.make("pbn_api.Scientist", mongoId="efgh-5678")
+    main = baker.make(
+        "bpp.Autor",
+        nazwisko="Kowalski",
+        imiona="Jan",
+        orcid="0000-0001-2345-6789",
+        pbn_uid=sci_main,
+    )
+    dup = baker.make(
+        "bpp.Autor",
+        nazwisko="Kowalski",
+        imiona="Jan",
+        orcid="0000-0002-3456-7890",
+        pbn_uid=sci_dup,
+    )
+    scan = DuplicateScanRun.objects.create(
+        status=DuplicateScanRun.Status.COMPLETED,
+        finished_at=timezone.now(),
+    )
+    DuplicateCandidate.objects.create(
+        scan_run=scan,
+        main_autor=main,
+        duplicate_autor=dup,
+        confidence_score=80,
+        confidence_percent=0.6,
+        main_autor_name="Kowalski Jan",
+        duplicate_autor_name="Kowalski Jan",
+        scan_mode="pbn",
+    )
+    return main, dup
+
+
+def _load_xlsx_first_data_row():
+    result = export_duplicates_to_xlsx()
+    wb = load_workbook(BytesIO(result))
+    ws = wb.active
+    headers = [cell.value for cell in ws[1]]
+    data = [cell.value for cell in ws[2]]
+    return headers, data
+
+
+@pytest.mark.django_db
+def test_xlsx_has_orcid_columns(candidate_with_orcid_and_pbn):
+    """Eksport XLSX zawiera ORCID głównego autora i ORCID duplikatu."""
+    headers, _ = _load_xlsx_first_data_row()
+    assert "ORCID głównego autora" in headers, (
+        f"Brak kolumny 'ORCID głównego autora' w eksporcie. Nagłówki: {headers}"
+    )
+    assert "ORCID duplikatu" in headers, (
+        f"Brak kolumny 'ORCID duplikatu' w eksporcie. Nagłówki: {headers}"
+    )
+
+
+@pytest.mark.django_db
+def test_xlsx_orcid_values_filled(candidate_with_orcid_and_pbn):
+    """Wartości ORCID są obecne w wyeksportowanym wierszu."""
+    main, dup = candidate_with_orcid_and_pbn
+    headers, data = _load_xlsx_first_data_row()
+    main_idx = headers.index("ORCID głównego autora")
+    dup_idx = headers.index("ORCID duplikatu")
+    assert data[main_idx] == main.orcid
+    assert data[dup_idx] == dup.orcid
+
+
+@pytest.mark.django_db
+def test_xlsx_pbn_url_uses_pbn_api_root(candidate_with_orcid_and_pbn):
+    """PBN URL korzysta z LINK_PBN_DO_AUTORA + pbn_api_root, nie sedno-webapp."""
+    headers, data = _load_xlsx_first_data_row()
+    main_pbn_idx = headers.index("PBN URL głównego autora")
+    pbn_url = data[main_pbn_idx] or ""
+    assert "sedno-webapp" not in pbn_url, (
+        f"URL nadal wskazuje na stary sedno-webapp: {pbn_url}"
+    )
+    assert "/core/#/person/view/" in pbn_url, (
+        f"URL nie pasuje do wzorca LINK_PBN_DO_AUTORA: {pbn_url}"
+    )
+    assert "abcd-1234" in pbn_url, f"PBN UID nieobecne w URL: {pbn_url}"
diff --git a/src/deduplikator_autorow/urls.py b/src/deduplikator_autorow/urls.py
index 7ea91e41f..614d20661 100644
--- a/src/deduplikator_autorow/urls.py
+++ b/src/deduplikator_autorow/urls.py
@@ -43,4 +43,9 @@
         views.mark_candidate_not_duplicate,
         name="mark_candidate_not_duplicate",
     ),
+    path(
+        "lastname-suggestions/",
+        views.lastname_suggestions,
+        name="lastname_suggestions",
+    ),
 ]
diff --git a/src/deduplikator_autorow/utils/analysis.py b/src/deduplikator_autorow/utils/analysis.py
index 3039baebe..8c0f39840 100644
--- a/src/deduplikator_autorow/utils/analysis.py
+++ b/src/deduplikator_autorow/utils/analysis.py
@@ -6,6 +6,7 @@
 from bpp.models.cache import Rekord
 from pbn_api.models import OsobaZInstytucji
 
+from .analysis_meta import _name_or_initial_match
 from .gender import Gender, plcie_sa_rozne, zgadnij_plec_autora
 from .search import szukaj_kopii
 
@@ -42,6 +43,61 @@ def analiza_duplikatow(osoba_z_instytucji: OsobaZInstytucji) -> dict:  # noqa: C
     plec_glowny = zgadnij_plec_autora(glowny_autor.imiona, glowny_autor.plec)
 
     for duplikat in duplikaty:
+        # HARD REJECTION: imiona zupełnie rozłączne (brak common, similar,
+        # initial overlap) i to NIE swap → nie jest tym samym autorem.
+        # 'Jan' nie może być duplikatem 'Agnieszki' niezależnie od ORCID,
+        # nazwiska czy lat publikacji. Pomijamy całkowicie.
+        if duplikat.imiona and glowny_autor.imiona:
+            imiona_glowny_pre = glowny_autor.imiona.split()
+            imiona_duplikat_pre = duplikat.imiona.split()
+            common_pre = sum(
+                1
+                for ig in imiona_glowny_pre
+                for id_ in imiona_duplikat_pre
+                if ig.lower() == id_.lower()
+            )
+            similar_pre = sum(
+                1
+                for ig in imiona_glowny_pre
+                for id_ in imiona_duplikat_pre
+                if len(ig) >= 3
+                and len(id_) >= 3
+                and ig.lower() != id_.lower()
+                and (
+                    ig.lower().startswith(id_.lower()[:3])
+                    or id_.lower().startswith(ig.lower()[:3])
+                )
+            )
+            initials_g_pre = {ig[0].upper() for ig in imiona_glowny_pre if ig}
+            initials_d_pre = set()
+            for token in imiona_duplikat_pre:
+                if not token:
+                    continue
+                # 'J.' / 'J' / 'Jan' wszystkie dają inicjał na pierwszym znaku
+                initials_d_pre.add(token[0].upper())
+            init_count_pre = len(initials_g_pre & initials_d_pre)
+            # Inicjały też liczone do swap-detection: 'Jan Kowalski' ↔
+            # 'Kowalski J.' (database swap z inicjałem) musi przejść.
+            swap_pre = (
+                bool(duplikat.nazwisko)
+                and bool(glowny_autor.nazwisko)
+                and any(
+                    _name_or_initial_match(duplikat.nazwisko, ig)
+                    for ig in imiona_glowny_pre
+                )
+                and any(
+                    _name_or_initial_match(glowny_autor.nazwisko, id_)
+                    for id_ in imiona_duplikat_pre
+                )
+            )
+            if (
+                common_pre == 0
+                and similar_pre == 0
+                and init_count_pre == 0
+                and not swap_pre
+            ):
+                continue
+
         analiza = {"autor": duplikat, "powody_podobienstwa": [], "pewnosc": 0}  # 0-100%
 
         # Analiza płci - jeśli płcie są na pewno różne, to NIE mogą być duplikatami
@@ -144,14 +200,17 @@ def analiza_duplikatow(osoba_z_instytucji: OsobaZInstytucji) -> dict:  # noqa: C
             imiona_glowny = glowny_autor.imiona.split()
             imiona_duplikat = duplikat.imiona.split()
 
-            # Sprawdź czy nazwisko duplikatu = imię głównego (dokładnie)
+            # Sprawdź czy nazwisko duplikatu pasuje do imienia głównego
+            # (dokładnie LUB jako inicjał, np. 'J.' do 'Jan').
             dokladna_zamiana_nazwisko_duplikat = any(
-                duplikat.nazwisko.lower() == imie_g.lower() for imie_g in imiona_glowny
+                _name_or_initial_match(duplikat.nazwisko, imie_g)
+                for imie_g in imiona_glowny
             )
 
-            # Sprawdź czy nazwisko głównego = imię duplikatu (dokładnie)
+            # Sprawdź czy nazwisko głównego pasuje do imienia duplikatu
+            # (dokładnie LUB jako inicjał).
             dokladna_zamiana_nazwisko_glowny = any(
-                glowny_autor.nazwisko.lower() == imie_d.lower()
+                _name_or_initial_match(glowny_autor.nazwisko, imie_d)
                 for imie_d in imiona_duplikat
             )
 
diff --git a/src/deduplikator_autorow/utils/analysis_meta.py b/src/deduplikator_autorow/utils/analysis_meta.py
index 34a6d3244..7cc89291a 100644
--- a/src/deduplikator_autorow/utils/analysis_meta.py
+++ b/src/deduplikator_autorow/utils/analysis_meta.py
@@ -13,8 +13,77 @@ def _common_initials(imiona_a: list[str], imiona_b: list[str]) -> int:
     return len(initials_a & initials_b)
 
 
+def _name_or_initial_match(a: str, b: str) -> bool:
+    """True jeśli ``a == b`` albo jedna strona jest inicjałem drugiej.
+
+    Inicjał = pojedyncza litera (ewentualnie z kropką, jak 'J.'). Tym samym
+    'jan' i 'j.' są dopasowaniem, ale 'jan' i 'ja' już nie.
+    """
+    if not a or not b:
+        return False
+    a_clean = a.lower().rstrip(".")
+    b_clean = b.lower().rstrip(".")
+    if a_clean == b_clean:
+        return True
+    if len(a_clean) == 1 and b_clean.startswith(a_clean):
+        return True
+    if len(b_clean) == 1 and a_clean.startswith(b_clean):
+        return True
+    return False
+
+
 def analiza_pary_meta(a: dict, b: dict) -> tuple[int, list[str]]:  # noqa: C901
-    """Zwraca (score, reasons) dla pary (a, b) na bazie meta-cache."""
+    """Zwraca (score, reasons) dla pary (a, b) na bazie meta-cache.
+
+    HARD REJECTION: jeżeli obie strony mają imiona, ale nie ma między nimi
+    żadnego punktu wspólnego (ani pełne imię, ani 3-prefix, ani inicjał),
+    a jednocześnie NIE wykryto pełnej zamiany imię↔nazwisko, kandydat jest
+    natychmiast odrzucany. Zwracamy mocno ujemny score gwarantujący filtr
+    `score >= min_confidence` w `search_general.generate_pairs`. To nie jest
+    duplikat — żaden bonus z innych kryteriów (ORCID, nazwisko, lata) nie
+    może tego nadpisać. 'Jan' i 'Agnieszka' to różne osoby.
+    """
+    # Wczesne policzenie sygnałów dopasowania imion + ewentualnego swap-a.
+    common_imie = set(a["imiona_norm"]) & set(b["imiona_norm"])
+    similar_imie = 0
+    for ia in a["imiona_norm"]:
+        for ib in b["imiona_norm"]:
+            if len(ia) >= 3 and len(ib) >= 3 and ia != ib:
+                if ia.startswith(ib[:3]) or ib.startswith(ia[:3]):
+                    similar_imie += 1
+    init_count_imie = _common_initials(a["imiona_norm"], b["imiona_norm"])
+    # Swap obejmuje też przypadek z inicjałem: 'Jan Kowalski' ↔ 'Kowalski J.'
+    # gdzie database swap (imiona='Kowalski', nazwisko='J.') ma inicjał 'J'
+    # pasujący do imienia 'Jan' z drugiej strony.
+    wykryto_swap = (
+        bool(a["nazwisko_norm"])
+        and bool(b["nazwisko_norm"])
+        and bool(a["imiona_norm"])
+        and bool(b["imiona_norm"])
+        and any(
+            _name_or_initial_match(a["nazwisko_norm"], imie)
+            for imie in b["imiona_norm"]
+        )
+        and any(
+            _name_or_initial_match(b["nazwisko_norm"], imie)
+            for imie in a["imiona_norm"]
+        )
+    )
+
+    if (
+        a["imiona_norm"]
+        and b["imiona_norm"]
+        and not common_imie
+        and similar_imie == 0
+        and init_count_imie == 0
+        and not wykryto_swap
+    ):
+        return -1000, [
+            f"odrzucono: zupełnie różne imiona "
+            f"('{' '.join(a['imiona_norm'])}' vs "
+            f"'{' '.join(b['imiona_norm'])}') — to różni autorzy"
+        ]
+
     score = 0
     reasons: list[str] = []
 
@@ -78,37 +147,21 @@ def analiza_pary_meta(a: dict, b: dict) -> tuple[int, list[str]]:  # noqa: C901
                         f"({', '.join(sorted(common_parts))})"
                     )
 
-    if (
-        a["nazwisko_norm"]
-        and b["nazwisko_norm"]
-        and a["imiona_norm"]
-        and b["imiona_norm"]
-    ):
-        if (a["nazwisko_norm"] in b["imiona_norm"]) and (
-            b["nazwisko_norm"] in a["imiona_norm"]
-        ):
-            score += 50
-            reasons.append("wykryto pełną zamianę imienia z nazwiskiem")
+    if wykryto_swap:
+        score += 50
+        reasons.append("wykryto pełną zamianę imienia z nazwiskiem")
 
-    common = set(a["imiona_norm"]) & set(b["imiona_norm"])
-    if common:
-        score += 30 * len(common)
-        reasons.append(f"wspólne imię ({len(common)})")
+    if common_imie:
+        score += 30 * len(common_imie)
+        reasons.append(f"wspólne imię ({len(common_imie)})")
 
-    similar = 0
-    for ia in a["imiona_norm"]:
-        for ib in b["imiona_norm"]:
-            if len(ia) >= 3 and len(ib) >= 3 and ia != ib:
-                if ia.startswith(ib[:3]) or ib.startswith(ia[:3]):
-                    similar += 1
-    if similar:
-        score += 15 * similar
-        reasons.append(f"podobne imię ({similar})")
-
-    init_count = _common_initials(a["imiona_norm"], b["imiona_norm"])
-    if init_count:
-        score += 5 * init_count
-        reasons.append(f"pasujące inicjały ({init_count})")
+    if similar_imie:
+        score += 15 * similar_imie
+        reasons.append(f"podobne imię ({similar_imie})")
+
+    if init_count_imie:
+        score += 5 * init_count_imie
+        reasons.append(f"pasujące inicjały ({init_count_imie})")
 
     if not b["imiona_norm"] and a["imiona_norm"]:
         score += 10
diff --git a/src/deduplikator_autorow/utils/export.py b/src/deduplikator_autorow/utils/export.py
index 599072569..67a378117 100644
--- a/src/deduplikator_autorow/utils/export.py
+++ b/src/deduplikator_autorow/utils/export.py
@@ -22,11 +22,17 @@ def _get_site_domain():
         return "https://bpp.iplweb.pl"
 
 
-def _create_pbn_url(pbn_uid):
-    """Tworzy URL do profilu autora w PBN."""
-    if pbn_uid:
-        return f"https://pbn.nauka.gov.pl/sedno-webapp/persons/details/{pbn_uid}"
-    return ""
+def _create_pbn_url(autor):
+    """Zwraca aktualny URL do profilu autora w PBN.
+
+    Używa Autor.link_do_pbn() które łączy LINK_PBN_DO_AUTORA z pbn_api_root
+    z konfiguracji Uczelni - dotychczas zaszyty hardcoded https://pbn.nauka.gov.pl/
+    sedno-webapp/persons/details/{uid} prowadził do martwego/pustego endpointu.
+    """
+    if not autor or not autor.pbn_uid_id:
+        return ""
+    url = autor.link_do_pbn()
+    return url or ""
 
 
 def _get_author_name(candidate_name, autor):
@@ -46,15 +52,17 @@ def _build_candidate_row(candidate, site_domain, duplicate_counts):
 
     return [
         main_name,
+        main.orcid or "",
         main.pk,
         f"{site_domain}/bpp/autor/{main.pk}/",
         main.pbn_uid_id or "",
-        _create_pbn_url(main.pbn_uid_id),
+        _create_pbn_url(main),
         dup_name,
+        dup.orcid or "",
         dup.pk,
         f"{site_domain}/bpp/autor/{dup.pk}/",
         dup.pbn_uid_id or "",
-        _create_pbn_url(dup.pbn_uid_id),
+        _create_pbn_url(dup),
         round(candidate.confidence_percent, 2),
         duplicate_counts[candidate.main_autor_id],
         "PBN" if candidate.scan_mode == "pbn" else "Ogólny",
@@ -63,8 +71,12 @@ def _build_candidate_row(candidate, site_domain, duplicate_counts):
 
 def _format_url_hyperlinks(ws, data_rows_count):
     """Formatuje kolumny URL jako klikalne linki."""
-    # Kolumny z URL-ami: C (BPP główny), E (PBN główny), H (BPP duplikat), J (PBN duplikat)
-    url_columns = [3, 5, 8, 10]  # 1-indexed dla Excel
+    # Kolumny z URL-ami (1-indexed):
+    #   D = BPP URL głównego autora
+    #   F = PBN URL głównego autora
+    #   J = BPP URL duplikatu
+    #   L = PBN URL duplikatu
+    url_columns = [4, 6, 10, 12]
 
     for row_idx in range(2, data_rows_count + 2):  # Start from row 2 (after header)
         for col_idx in url_columns:
@@ -84,18 +96,20 @@ def export_duplicates_to_xlsx():
 
     Struktura pliku XLSX:
     - Kolumna A: Główny autor (NAZWISKO IMIĘ)
-    - Kolumna B: BPP ID głównego autora
-    - Kolumna C: BPP URL głównego autora (kliknij link)
-    - Kolumna D: PBN UID głównego autora
-    - Kolumna E: PBN URL głównego autora (kliknij link)
-    - Kolumna F: Duplikat (NAZWISKO IMIĘ)
-    - Kolumna G: BPP ID duplikatu
-    - Kolumna H: BPP URL duplikatu (kliknij link)
-    - Kolumna I: PBN UID duplikatu
-    - Kolumna J: PBN URL duplikatu (kliknij link)
-    - Kolumna K: Pewność podobieństwa (0.0-1.0)
-    - Kolumna L: Ilość duplikatów
-    - Kolumna M: Tryb (PBN / Ogólny)
+    - Kolumna B: ORCID głównego autora
+    - Kolumna C: BPP ID głównego autora
+    - Kolumna D: BPP URL głównego autora (kliknij link)
+    - Kolumna E: PBN UID głównego autora
+    - Kolumna F: PBN URL głównego autora (kliknij link)
+    - Kolumna G: Duplikat (NAZWISKO IMIĘ)
+    - Kolumna H: ORCID duplikatu
+    - Kolumna I: BPP ID duplikatu
+    - Kolumna J: BPP URL duplikatu (kliknij link)
+    - Kolumna K: PBN UID duplikatu
+    - Kolumna L: PBN URL duplikatu (kliknij link)
+    - Kolumna M: Pewność podobieństwa (0.0-1.0)
+    - Kolumna N: Ilość duplikatów
+    - Kolumna O: Tryb (PBN / Ogólny)
 
     Returns:
         bytes: Zawartość pliku XLSX
@@ -130,11 +144,13 @@ def export_duplicates_to_xlsx():
     # Nagłówki
     headers = [
         "Główny autor",
+        "ORCID głównego autora",
         "BPP ID głównego autora",
         "BPP URL głównego autora",
         "PBN UID głównego autora",
         "PBN URL głównego autora",
         "Duplikat",
+        "ORCID duplikatu",
         "BPP ID duplikatu",
         "BPP URL duplikatu",
         "PBN UID duplikatu",
diff --git a/src/deduplikator_autorow/utils/reason_display.py b/src/deduplikator_autorow/utils/reason_display.py
new file mode 100644
index 000000000..8124a3299
--- /dev/null
+++ b/src/deduplikator_autorow/utils/reason_display.py
@@ -0,0 +1,73 @@
+"""Mapowanie powodów podobieństwa autorów na ikony Foundation i ton koloru.
+
+Logika żyje w module Pythona zamiast w Django template-tag library, bo
+auto-discovery template-tagów wykonuje się raz, przy starcie procesu —
+świeżo dodany pakiet `templatetags/` nie zostaje zauważony bez restartu.
+Zwykły moduł utils ładuje się przy każdym auto-reloadzie pliku.
+"""
+
+# (fragment, icon_class, tone)
+# tone: match (zielony, mocna przesłanka), info (niebieski, neutralna),
+#       weak (szary, słaba), warn (pomarańczowy, ostrożnie).
+# Kolejność ma znaczenie — pierwszy pasujący wzorzec wygrywa, więc
+# specyficzne frazy idą przed ogólnymi.
+_PATTERNS: list[tuple[str, str, str]] = [
+    # ORCID — najmocniejsze przesłanki tożsamości
+    ("identyczny ORCID", "fi-key", "match"),
+    ("różny ORCID", "fi-x-circle", "warn"),
+    ("brak ORCID", "fi-key", "weak"),
+    # Nazwiska
+    ("identyczne nazwisko", "fi-checkbox", "match"),
+    ("identyczne człony nazwiska", "fi-checkbox", "match"),
+    ("podobne nazwisko", "fi-magnifying-glass", "info"),
+    # Imiona
+    ("zamianę imienia z nazwiskiem", "fi-loop", "warn"),
+    ("wspólne imię", "fi-torsos-female-male", "match"),
+    ("podobne imię", "fi-torso", "info"),
+    ("pasujące inicjały", "fi-text-color", "info"),
+    ("brak imion", "fi-prohibited", "weak"),
+    # Tytuł naukowy
+    ("identyczny tytuł naukowy", "fi-trophy", "match"),
+    ("różny tytuł naukowy", "fi-trophy", "warn"),
+    ("brak tytułu naukowego", "fi-trophy", "weak"),
+    # Liczba publikacji
+    ("mało publikacji", "fi-page", "info"),
+    ("średnio publikacji", "fi-page-multiple", "info"),
+    ("wiele publikacji", "fi-page-copy", "weak"),
+    # Lata publikacji
+    ("wspólne lata publikacji", "fi-calendar", "match"),
+    ("bliskie lata publikacji", "fi-calendar", "info"),
+    ("średnia odległość lat", "fi-calendar", "weak"),
+    ("duża odległość lat", "fi-calendar", "warn"),
+    # Fallbacki - szersze frazy
+    ("ORCID", "fi-key", "info"),
+    ("nazwisk", "fi-magnifying-glass", "info"),
+    ("imię", "fi-torsos-female-male", "info"),
+    ("imion", "fi-torsos-female-male", "info"),
+    ("publikacj", "fi-page", "info"),
+    ("lata", "fi-calendar", "info"),
+    ("tytuł", "fi-trophy", "info"),
+]
+
+
+def enrich_reason(reason: str) -> dict:
+    """Zwraca dict {text, icon, tone} dla pojedynczego powodu podobieństwa.
+
+    Dla pustego/None reason zwraca neutralny chip z ikoną fi-info.
+    """
+    text = (reason or "").strip()
+    if not text:
+        return {"text": "", "icon": "fi-info", "tone": "info"}
+
+    lowered = text.lower()
+    for needle, icon, tone in _PATTERNS:
+        if needle.lower() in lowered:
+            return {"text": text, "icon": icon, "tone": tone}
+    return {"text": text, "icon": "fi-info", "tone": "info"}
+
+
+def enrich_reasons(reasons) -> list[dict]:
+    """Wzbogaca listę powodów. Akceptuje None, listę stringów lub iterowalne."""
+    if not reasons:
+        return []
+    return [enrich_reason(r) for r in reasons]
diff --git a/src/django_bpp/templates/top_bar.html b/src/django_bpp/templates/top_bar.html
index 19f95182e..553075a31 100644
--- a/src/django_bpp/templates/top_bar.html
+++ b/src/django_bpp/templates/top_bar.html
@@ -108,7 +108,7 @@
                     raportMenu.parentElement.style.display = 'none';
                 }
             </script>
-
+            <!-- djlint:disable=H025 -->
             <li class="has-submenu">
                 <a href="#" data-toggle aria-haspopup="menu"><i class="fi-graph-bar" aria-hidden="true"></i> ewaluacja</a>
                 <div class="menu-columns-wrapper" data-submenu id="top-bar-menu-ewaluacji">
@@ -135,12 +135,11 @@
                         {% czy_pokazywac raport_slotow_uczelnia %}
                             <li><a href="{% url "raport_slotow:lista-raport-slotow-uczelnia" %}"><i class="fi-foundation"></i> raport slotów -
                                 uczelnia</a></li>
-                            <li><a href="{% url "raport_slotow:index-ewaluacja" %}"><i class="fi-clipboard-pencil"></i> raport slotów - ewaluacja</a>
+                            <li><a href="{% url "raport_slotow:index-ewaluacja" %}"><i class="fi-clipboard-pencil"></i> raport slotów - ewaluacja</a></li>
                             <li><a href="{% url "raport_slotow:index-upowaznienia" %}"><i class="fi-shield"></i> raport ewaluacja -
-                                upoważnienia</a>
+                                upoważnienia</a></li>
                             {# Disabled 3N report #}
-                            {# <li><a href="{% url "ewaluacja2021:lista-raportow3n" %}"><i class="fi-page-multiple" style="font-size: 1.6rem; margin-right: 0.8rem;"></i> raport ewaluacja - 3N</a> #}
-                            </li>
+                            {# <li><a href="{% url "ewaluacja2021:lista-raportow3n" %}"><i class="fi-page-multiple" style="font-size: 1.6rem; margin-right: 0.8rem;"></i> raport ewaluacja - 3N</a></li> #}
                         {% end_czy_pokazywac %}
                     </ul>
                 </div>
@@ -178,7 +177,7 @@
                                         <li><a href="{% url "pbn_wysylka_oswiadczen:main" %}"><i class="fi-upload"></i> wysylka oswiadczen do PBN</a></li>
                                         <li><a href="{% url "pbn_export_queue:export-queue-list" %}"><i class="fi-list"></i> kolejka eksportu do PBN</a></li>
                                         <li><a href="{% url "importer_autorow_pbn:main" %}"><i class="fi-torsos-female-male"></i> importer autorów PBN</a></li>
-                                        <li><a href="{% url "deduplikator_autorow:duplicate_authors" %}"><i class="fi-filter"></i> deduplikator autorów PBN</a></li>
+                                        <li><a href="{% url "deduplikator_autorow:duplicate_authors" %}"><i class="fi-filter"></i> deduplikator autorów</a></li>
                                     </ul>
                                     <ul class="menu vertical column-2">
                                         <li><a href="{% url "komparator_pbn:main" %}"><i class="fi-arrows-compress"></i> komparator PBN</a></li>
@@ -221,6 +220,8 @@
                                     <li><a href="{% url "rozbieznosci_pk:index" %}"><i class="fi-graph-pie"></i> rozbieżności
                                         punktacji MNiSW</a></li>
                                     <hr>
+                                    <li><a href="{% url "deduplikator_autorow:duplicate_authors" %}">
+                                        <i class="fi-filter"></i> deduplikator autorów</a></li>
                                     <li><a href="{% url "deduplikator_publikacji:duplicate_publications" %}">
                                         <i class="fi-page-copy"></i> deduplikator publikacji</a></li>
                                     <li><a href="{% url "deduplikator_zrodel:duplicate_sources" %}">
@@ -311,9 +312,10 @@
                             <li><a href="/admin/bpp/bppuser/" target="_blank"><i class="fi-torso-business"></i> zaloguj jako inny użytkownik</a></li>
                         {% endif %}
                     </ul>
-                </li>
+            </li>
+            <!-- djlint:enable=H025 -->
 
-                <script type="text/javascript">
+            <script type="text/javascript">
                     // Check if wyloguj menu has only one item and simplify if so
                     document.addEventListener('DOMContentLoaded', function() {
                         var wylogujMenuItem = document.getElementById('wyloguj-menu-item');

From afc1c57d2bd85126a8e8a22c6b52a52bfdddc5c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sun, 3 May 2026 08:55:08 +0200
Subject: [PATCH 21/25] refactor(deduplikator_autorow): split views.py (1177L)
 into views/ package
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Powtórzenie splitu z dev (commit 917909206) z uwzględnieniem nowych
funkcji wprowadzonych na feature branchu:
- helpers.py: + _read_param, _scientist_id_to_autor_id, _resolve_autor_id,
  get_running_scan, _get_pending_candidates_for_main_autor,
  _get_next_candidate_group
- duplicates.py: + lastname_suggestions
- ignore.py: split ignore_author → ignore_scientist + ignore_autor;
  reset_ignored_authors → reset_ignored_scientists + reset_ignored_autorzy;
  + _trigger_rescan_after_reset
- merge.py: scal_autorow_view + delete_author (bez zmian funkcjonalnych)
- scan.py: start_scan_view, cancel_scan_view, scan_status_view
- export.py: download_duplicates_xlsx
- __init__.py: re-export wszystkich symboli (backward compat z urls.py i
  testami importującymi z deduplikator_autorow.views)

views.py usunięty.
---
 src/deduplikator_autorow/views.py            | 1177 ------------------
 src/deduplikator_autorow/views/__init__.py   |   91 ++
 src/deduplikator_autorow/views/duplicates.py |  451 +++++++
 src/deduplikator_autorow/views/export.py     |   52 +
 src/deduplikator_autorow/views/helpers.py    |  325 +++++
 src/deduplikator_autorow/views/ignore.py     |  171 +++
 src/deduplikator_autorow/views/merge.py      |  161 +++
 src/deduplikator_autorow/views/scan.py       |  124 ++
 8 files changed, 1375 insertions(+), 1177 deletions(-)
 delete mode 100644 src/deduplikator_autorow/views.py
 create mode 100644 src/deduplikator_autorow/views/__init__.py
 create mode 100644 src/deduplikator_autorow/views/duplicates.py
 create mode 100644 src/deduplikator_autorow/views/export.py
 create mode 100644 src/deduplikator_autorow/views/helpers.py
 create mode 100644 src/deduplikator_autorow/views/ignore.py
 create mode 100644 src/deduplikator_autorow/views/merge.py
 create mode 100644 src/deduplikator_autorow/views/scan.py

diff --git a/src/deduplikator_autorow/views.py b/src/deduplikator_autorow/views.py
deleted file mode 100644
index 646588e96..000000000
--- a/src/deduplikator_autorow/views.py
+++ /dev/null
@@ -1,1177 +0,0 @@
-import sys
-import traceback
-from datetime import timedelta
-from functools import wraps
-
-import rollbar
-from django.contrib import messages
-from django.contrib.auth.decorators import login_required
-from django.core.exceptions import PermissionDenied
-from django.http import JsonResponse
-from django.shortcuts import redirect, render
-from django.utils import timezone
-from django.views.decorators.http import require_http_methods
-
-from bpp.const import GR_WPROWADZANIE_DANYCH
-from bpp.models import Autor
-from bpp.models.cache import Rekord
-from pbn_api.models import Scientist
-from pbn_downloader_app.freshness import is_pbn_people_data_fresh
-from pbn_downloader_app.models import PbnDownloadTask
-
-from .models import (
-    DuplicateCandidate,
-    DuplicateScanRun,
-    IgnoredAuthor,
-    IgnoredScientist,
-    LogScalania,
-    NotADuplicate,
-)
-from .utils import (
-    count_authors_with_lastname,
-    export_duplicates_to_xlsx,
-    scal_autora,
-    search_author_by_lastname,
-    znajdz_pierwszego_autora_z_duplikatami,
-)
-from .utils.counters import get_latest_usable_scan
-from .utils.reason_display import enrich_reasons
-
-# Minimalny próg pewności do wyświetlania duplikatów
-# Duplikaty z pewnością poniżej tego progu nie będą pokazywane
-MIN_PEWNOSC_DO_WYSWIETLENIA = 50
-
-
-def group_required(group_name):
-    """
-    Decorator that requires user to be logged in and belong to a specific group.
-    """
-
-    def decorator(view_func):
-        @wraps(view_func)
-        @login_required
-        def wrapper(request, *args, **kwargs):
-            if (
-                not request.user.is_superuser
-                and not request.user.groups.filter(name=group_name).exists()
-            ):
-                raise PermissionDenied
-            return view_func(request, *args, **kwargs)
-
-        return wrapper
-
-    return decorator
-
-
-def _get_excluded_authors_from_session(request):
-    """Get excluded authors from session as Scientist objects."""
-    skipped_authors_ids = request.session.get("skipped_authors", [])
-    if skipped_authors_ids:
-        return list(Scientist.objects.filter(pk__in=skipped_authors_ids))
-    return []
-
-
-def _handle_search_request(search_lastname):
-    """Handle search request and return scientist and count."""
-    scientist = search_author_by_lastname(search_lastname, excluded_authors=None)
-    search_results_count = count_authors_with_lastname(search_lastname)
-    return scientist, search_results_count
-
-
-def _clear_navigation_session(request):
-    """Clear skipped authors and navigation history from session."""
-    if "skipped_authors" in request.session:
-        del request.session["skipped_authors"]
-    if "navigation_history" in request.session:
-        del request.session["navigation_history"]
-    request.session.modified = True
-
-
-def _handle_go_previous(request, navigation_history, excluded_authors):
-    """Handle 'go previous' navigation action."""
-    if not navigation_history:
-        return znajdz_pierwszego_autora_z_duplikatami(excluded_authors)
-
-    previous_scientist_id = navigation_history.pop()
-    request.session["navigation_history"] = navigation_history
-    request.session.modified = True
-
-    try:
-        return Scientist.objects.get(pk=previous_scientist_id)
-    except Scientist.DoesNotExist:
-        return znajdz_pierwszego_autora_z_duplikatami(excluded_authors)
-
-
-def _handle_skip_current(request, scientist, excluded_authors):
-    """Handle 'skip current' navigation action."""
-    if not scientist:
-        return scientist
-
-    # Save to navigation history
-    if "navigation_history" not in request.session:
-        request.session["navigation_history"] = []
-    request.session["navigation_history"].append(scientist.pk)
-
-    # Add to skipped
-    if "skipped_authors" not in request.session:
-        request.session["skipped_authors"] = []
-    if scientist.pk not in request.session["skipped_authors"]:
-        request.session["skipped_authors"].append(scientist.pk)
-    request.session.modified = True
-
-    # Find next author
-    excluded_authors.append(scientist)
-    return znajdz_pierwszego_autora_z_duplikatami(excluded_authors)
-
-
-def _calculate_year_range(queryset):
-    """Calculate year range from a queryset with 'rok' field."""
-    lata = queryset.filter(rok__isnull=False).values_list("rok", flat=True)
-    if not lata:
-        return None
-
-    min_rok = min(lata)
-    max_rok = max(lata)
-    if min_rok == max_rok:
-        return str(min_rok)
-    return f"{min_rok}-{max_rok}"
-
-
-def _build_duplicate_publication_data(autor, metryka):
-    """Build publication data for a duplicate author."""
-    publikacje = Rekord.objects.prace_autora(autor)[:500]
-    publikacje_count = Rekord.objects.prace_autora(autor).count()
-    year_range = _calculate_year_range(Rekord.objects.prace_autora(autor))
-
-    return {
-        "autor": autor,
-        "publikacje": publikacje,
-        "publikacje_count": publikacje_count,
-        "publikacje_year_range": year_range,
-    }
-
-
-def _add_dyscypliny_to_duplicates(duplikaty_z_publikacjami):
-    """Add discipline information to duplicate authors."""
-    from bpp.models import Autor_Dyscyplina
-
-    for duplikat_data in duplikaty_z_publikacjami:
-        duplikat_data["dyscypliny"] = (
-            Autor_Dyscyplina.objects.filter(
-                autor=duplikat_data["autor"], rok__gte=2022, rok__lte=2025
-            )
-            .select_related("dyscyplina_naukowa", "subdyscyplina_naukowa")
-            .order_by("rok")
-        )
-
-
-def _build_context_from_candidate(candidate, glowny_autor):
-    """Build publication data for a duplicate from stored DuplicateCandidate."""
-    publikacje = Rekord.objects.prace_autora(candidate.duplicate_autor)[:500]
-    publikacje_count = candidate.duplicate_publications_count
-    year_range = _calculate_year_range(
-        Rekord.objects.prace_autora(candidate.duplicate_autor)
-    )
-
-    # Display percent: znormalizowane 0..1 → 0..100, zaokrąglone i sklampowane.
-    # Surowy confidence_score może być < 0 lub > 100 i historycznie pokazywał
-    # użytkownikom wartości w rodzaju 140% — confidence_percent jest jedynym
-    # polem, które gwarantuje sensowny zakres do prezentacji.
-    pewnosc_display = max(0, min(100, round((candidate.confidence_percent or 0) * 100)))
-
-    return {
-        "autor": candidate.duplicate_autor,
-        "publikacje": publikacje,
-        "publikacje_count": publikacje_count,
-        "publikacje_year_range": year_range,
-        "analiza": {
-            "autor": candidate.duplicate_autor,
-            "pewnosc": pewnosc_display,
-            "powody_podobienstwa": enrich_reasons(candidate.reasons),
-        },
-        "candidate_id": candidate.pk,  # For marking as not duplicate
-    }
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-def duplicate_authors_view(request):  # noqa: C901
-    """
-    Widok pokazujący główny rekord autora wraz z możliwymi duplikatami
-    i ich publikacjami (do 500 na duplikat).
-
-    Uses pre-computed duplicates from DuplicateCandidate table.
-    """
-    from bpp.models import Autor_Dyscyplina
-
-    # Get scan status
-    running_scan = get_running_scan()
-    completed_scan = get_latest_usable_scan()
-
-    # Filter mode: pbn|general|both (default both)
-    mode = request.GET.get("mode", "both")
-    if mode not in ("pbn", "general", "both"):
-        mode = "both"
-
-    # Filter confidence band: all|high|low (default all). high=>=50%, low=<50%.
-    # Próg porównujemy do confidence_percent jako ułamka, bo display % jest
-    # liczone z confidence_percent * 100 z klampem.
-    confidence_band = request.GET.get("confidence", "all")
-    if confidence_band not in ("all", "high", "low"):
-        confidence_band = "all"
-    confidence_threshold_frac = MIN_PEWNOSC_DO_WYSWIETLENIA / 100.0
-
-    # Common context
-    not_duplicate_count = NotADuplicate.objects.count()
-    ignored_authors_count = IgnoredScientist.objects.count()
-    latest_pbn_download = PbnDownloadTask.get_latest_task()
-
-    # Check PBN people data freshness
-    pbn_data_fresh, pbn_stale_message, pbn_last_download = is_pbn_people_data_fresh()
-
-    recent_merges = (
-        LogScalania.objects.filter(created_by=request.user)
-        .select_related("main_autor", "dyscyplina_before", "dyscyplina_after")
-        .order_by("-created_on")[:10]
-    )
-
-    # Base context for all scenarios
-    context = {
-        "scientist": None,
-        "glowny_autor": None,
-        "latest_pbn_download": latest_pbn_download,
-        "duplikaty_z_publikacjami": [],
-        "analiza": None,
-        "has_skipped_authors": False,
-        "has_previous_authors": False,
-        "total_authors_with_duplicates": 0,
-        "not_duplicate_count": not_duplicate_count,
-        "ignored_authors_count": ignored_authors_count,
-        "search_lastname": "",
-        "search_results_count": None,
-        "recent_merges": recent_merges,
-        # New scan-related context
-        "running_scan": running_scan,
-        "completed_scan": completed_scan,
-        "no_scan_available": not completed_scan and not running_scan,
-        "pending_candidates_count": 0,
-        "pending_pbn_count": 0,
-        "pending_general_count": 0,
-        # Filter mode (pbn|general|both)
-        "mode": mode,
-        # Navigation
-        "skip_count": 0,
-        # PBN data freshness
-        "pbn_data_fresh": pbn_data_fresh,
-        "pbn_stale_message": pbn_stale_message,
-        "pbn_last_download": pbn_last_download,
-    }
-
-    # If no completed scan, show "run scan first" message
-    if not completed_scan:
-        if running_scan:
-            messages.info(
-                request,
-                f"Skanowanie w toku: {running_scan.progress_percent}% "
-                f"({running_scan.authors_scanned}/{running_scan.total_authors_to_scan} autorów)",
-            )
-        return render(request, "deduplikator_autorow/duplicate_authors.html", context)
-
-    # Count pending candidates
-    base_pending_qs = DuplicateCandidate.objects.filter(
-        scan_run=completed_scan,
-        status=DuplicateCandidate.Status.PENDING,
-    )
-    pending_count = base_pending_qs.count()
-    context["pending_candidates_count"] = pending_count
-    context["total_authors_with_duplicates"] = pending_count
-    context["pending_pbn_count"] = base_pending_qs.filter(scan_mode="pbn").count()
-    context["pending_general_count"] = base_pending_qs.filter(
-        scan_mode="general"
-    ).count()
-    context["confidence_band"] = confidence_band
-
-    # Handle search by lastname
-    search_lastname = request.GET.get("search_lastname", "").strip()
-    context["search_lastname"] = search_lastname
-
-    if search_lastname:
-        # Search within stored candidates - confidence_band celowo NIE filtruje
-        # wyboru głównego autora (filtr per-autor stosujemy niżej, na liście
-        # candidates_for_author).
-        candidates = (
-            DuplicateCandidate.objects.filter(
-                scan_run=completed_scan,
-                status=DuplicateCandidate.Status.PENDING,
-                main_autor__nazwisko__icontains=search_lastname,
-            )
-            .select_related("main_autor", "duplicate_autor")
-            .order_by("-priority", "-confidence_score")
-        )
-        if mode != "both":
-            candidates = candidates.filter(scan_mode=mode)
-
-        context["search_results_count"] = (
-            candidates.values("main_autor").distinct().count()
-        )
-
-        if candidates.exists():
-            search_author_ids = list(
-                candidates.values_list("main_autor", flat=True)
-                .distinct()
-                .order_by("main_autor")
-            )
-            try:
-                skip_count = int(request.GET.get("skip_count", 0))
-            except (ValueError, TypeError):
-                skip_count = 0
-            if skip_count >= len(search_author_ids):
-                skip_count = 0
-            glowny_autor_id = search_author_ids[skip_count]
-            glowny_autor = Autor.objects.get(pk=glowny_autor_id)
-            candidates_for_author = candidates.filter(main_autor=glowny_autor)
-            context["skip_count"] = skip_count
-            context["search_total_authors"] = len(search_author_ids)
-            context["search_has_prev"] = skip_count > 0
-            context["search_has_next"] = skip_count < len(search_author_ids) - 1
-        else:
-            glowny_autor = None
-            candidates_for_author = DuplicateCandidate.objects.none()
-    else:
-        # Handle navigation - use skip_count as offset
-        try:
-            skip_count = int(request.GET.get("skip_count", 0))
-        except (ValueError, TypeError):
-            skip_count = 0
-
-        # Get next author with pending duplicates using offset.
-        # confidence_band NIE jest tu przekazywane — chcemy iterować po
-        # WSZYSTKICH głównych autorach niezależnie od pewności ich kandydatów,
-        # filtr stosujemy niżej tylko na widocznym podzbiorze.
-        glowny_autor, candidates_for_author, skip_count = _get_next_candidate_group(
-            completed_scan,
-            skip_count=skip_count,
-            mode=mode,
-        )
-        context["skip_count"] = skip_count
-
-    # Filter per-author by confidence band (NOT main author selection).
-    # Liczniki "X / Y" oraz per-band wyliczamy zanim podstawimy filtr.
-    if glowny_autor:
-        candidates_total_for_main = candidates_for_author.count()
-        candidates_high_for_main = candidates_for_author.filter(
-            confidence_percent__gte=confidence_threshold_frac
-        ).count()
-        candidates_low_for_main = candidates_total_for_main - candidates_high_for_main
-    else:
-        candidates_total_for_main = 0
-        candidates_high_for_main = 0
-        candidates_low_for_main = 0
-    if confidence_band == "high":
-        candidates_for_author = candidates_for_author.filter(
-            confidence_percent__gte=confidence_threshold_frac
-        )
-    elif confidence_band == "low":
-        candidates_for_author = candidates_for_author.filter(
-            confidence_percent__lt=confidence_threshold_frac
-        )
-    context["candidates_total_for_main"] = candidates_total_for_main
-    context["candidates_high_for_main"] = candidates_high_for_main
-    context["candidates_low_for_main"] = candidates_low_for_main
-
-    if not glowny_autor:
-        if pending_count == 0:
-            messages.info(
-                request,
-                "Brak duplikatów do sprawdzenia. Wszystkie zostały już przetworzone.",
-            )
-        return render(request, "deduplikator_autorow/duplicate_authors.html", context)
-
-    # Build context for the main author
-    context["glowny_autor"] = glowny_autor
-
-    # Try to get scientist for main author (for backward compatibility)
-    if glowny_autor.pbn_uid:
-        context["scientist"] = glowny_autor.pbn_uid
-
-    # Build duplicate list from stored candidates
-    duplikaty_z_publikacjami = []
-    for candidate in candidates_for_author:
-        pub_data = _build_context_from_candidate(candidate, glowny_autor)
-        duplikaty_z_publikacjami.append(pub_data)
-
-    context["duplikaty_z_publikacjami"] = duplikaty_z_publikacjami
-    context["first_candidate"] = (
-        candidates_for_author.first() if candidates_for_author else None
-    )
-
-    # "Scal wszystkie" jest aktywne tylko wtedy, gdy KAŻDY kandydat ma pewność
-    # ≥ MIN_PEWNOSC_DO_WYSWIETLENIA. Przy słabych trafieniach przyciski
-    # renderujemy w stanie wyszarzonym i klik pokazuje komunikat tłumaczący,
-    # co zrobić dalej (lista nazwisk z niską pewnością).
-    low_confidence_names = [
-        f"{d['autor']} ({d['analiza']['pewnosc']}%)"
-        for d in duplikaty_z_publikacjami
-        if d["analiza"]["pewnosc"] < MIN_PEWNOSC_DO_WYSWIETLENIA
-    ]
-    context["allow_merge_all"] = (
-        bool(duplikaty_z_publikacjami) and not low_confidence_names
-    )
-    context["low_confidence_names"] = low_confidence_names
-    context["MIN_PEWNOSC_DO_WYSWIETLENIA"] = MIN_PEWNOSC_DO_WYSWIETLENIA
-
-    # Get main author's publications and disciplines
-    context["glowny_autor_dyscypliny"] = (
-        Autor_Dyscyplina.objects.filter(
-            autor=glowny_autor, rok__gte=2022, rok__lte=2025
-        )
-        .select_related("dyscyplina_naukowa", "subdyscyplina_naukowa")
-        .order_by("rok")
-    )
-
-    _add_dyscypliny_to_duplicates(duplikaty_z_publikacjami)
-
-    glowny_autor_qs = Rekord.objects.prace_autora(glowny_autor)
-    context["glowne_publikacje"] = glowny_autor_qs[:500]
-    context["glowne_publikacje_count"] = glowny_autor_qs.count()
-    context["glowne_publikacje_year_range"] = _calculate_year_range(glowny_autor_qs)
-
-    return render(request, "deduplikator_autorow/duplicate_authors.html", context)
-
-
-def _read_param(request, *names):
-    """Read first non-empty param from GET/POST by trying multiple names."""
-    for name in names:
-        val = request.GET.get(name) or request.POST.get(name)
-        if val:
-            return val
-    return None
-
-
-def _scientist_id_to_autor_id(scientist_id):
-    """Map Scientist PK to Autor PK via rekord_w_bpp. Returns None if not found."""
-    try:
-        sci = Scientist.objects.get(pk=scientist_id)
-    except Scientist.DoesNotExist:
-        return None
-    autor = sci.rekord_w_bpp
-    return autor.pk if autor is not None else None
-
-
-def _resolve_autor_id(request, autor_param, scientist_param):
-    """Resolve Autor PK from preferred autor_param or legacy scientist_param.
-
-    Preference: explicit autor_id over scientist_id (mapped via rekord_w_bpp).
-    """
-    autor_id = _read_param(request, autor_param)
-    if autor_id:
-        return autor_id
-    sci_id = _read_param(request, scientist_param)
-    if sci_id:
-        return _scientist_id_to_autor_id(sci_id)
-    return None
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["GET", "POST"])
-def scal_autorow_view(request):
-    """
-    Widok do scalania autorów automatycznie.
-
-    Przyjmuje parametry (warianty):
-    - main_autor_id / duplicate_autor_id: ID autorów BPP (preferowane)
-    - main_scientist_id / duplicate_scientist_id: ID Scientist z PBN
-      (mapowane do Autor przez rekord_w_bpp; backwards-compat)
-    - skip_pbn: Opcjonalnie, jeśli true nie wysyła publikacji do PBN
-    - candidate_id: Opcjonalnie, ID DuplicateCandidate do oznaczenia jako scalony
-    - auto_assign_discipline: Opcjonalnie, jeśli true przypisuje główną dyscyplinę
-    - use_subdiscipline: Opcjonalnie, jeśli true używa subdyscypliny jako dyscypliny
-
-    Zwraca wynik operacji w formacie JSON.
-    """
-    from django.utils import timezone
-
-    skip_pbn = (_read_param(request, "skip_pbn") or "false").lower() == "true"
-    candidate_id = _read_param(request, "candidate_id")
-    auto_assign_discipline = (
-        _read_param(request, "auto_assign_discipline") or "false"
-    ).lower() == "true"
-    use_subdiscipline = (
-        _read_param(request, "use_subdiscipline") or "false"
-    ).lower() == "true"
-
-    main_autor_id = _resolve_autor_id(request, "main_autor_id", "main_scientist_id")
-    duplicate_autor_id = _resolve_autor_id(
-        request, "duplicate_autor_id", "duplicate_scientist_id"
-    )
-
-    if not main_autor_id or not duplicate_autor_id:
-        # Sygnalizujemy do Rollbar — to nie powinno się zdarzać przy poprawnym
-        # wywołaniu z UI; raczej oznacza błąd JS-a lub niespójne dane (np.
-        # scientist_id wskazujący na rekord, którego rekord_w_bpp == None).
-        try:
-            raise ValueError(
-                "scal_autorow_view: missing required params after resolution. "
-                f"GET={dict(request.GET)} POST_keys={list(request.POST.keys())} "
-                f"resolved main={main_autor_id} duplicate={duplicate_autor_id}"
-            )
-        except ValueError:
-            traceback.print_exc()
-            rollbar.report_exc_info(sys.exc_info())
-        return JsonResponse(
-            {
-                "success": False,
-                "error": (
-                    "Brak wymaganych parametrów: main_autor_id i duplicate_autor_id"
-                ),
-            },
-            status=400,
-        )
-
-    try:
-        try:
-            main_autor = Autor.objects.get(pk=main_autor_id)
-            duplicate_autor = Autor.objects.get(pk=duplicate_autor_id)
-        except Autor.DoesNotExist as e:
-            return JsonResponse(
-                {"success": False, "error": f"Nie znaleziono autora: {e}"},
-                status=404,
-            )
-
-        result = scal_autora(
-            main_autor,
-            duplicate_autor,
-            request.user,
-            skip_pbn=skip_pbn,
-            auto_assign_discipline=auto_assign_discipline,
-            use_subdiscipline=use_subdiscipline,
-        )
-
-        # Mark candidate as merged if provided
-        if candidate_id and result.get("success"):
-            try:
-                candidate = DuplicateCandidate.objects.get(pk=candidate_id)
-                candidate.status = DuplicateCandidate.Status.MERGED
-                candidate.reviewed_at = timezone.now()
-                candidate.reviewed_by = request.user
-                candidate.save()
-            except DuplicateCandidate.DoesNotExist:
-                # Candidate may have been deleted in the meantime
-                pass  # not an error - merge already succeeded
-
-        return JsonResponse({"success": result.get("success", False), "result": result})
-    except NotImplementedError as e:
-        return JsonResponse({"success": False, "error": str(e)}, status=501)
-    except Exception as e:
-        traceback.print_exc()
-        rollbar.report_exc_info(sys.exc_info())
-        return JsonResponse(
-            {"success": False, "error": f"Błąd podczas scalania autorów: {str(e)}"},
-            status=500,
-        )
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def mark_non_duplicate(request):
-    """
-    Widok do oznaczania autora jako nie-duplikatu.
-
-    Przyjmuje parametry:
-    - scientist_pk: Primary key Scientist do zapisania jako nie-duplikat
-
-    Zwraca JSON dla AJAX (X-Requested-With), w przeciwnym razie redirect.
-    """
-    is_ajax = request.headers.get("X-Requested-With") == "XMLHttpRequest"
-    scientist_pk = request.POST.get("scientist_pk")
-
-    def _respond(success, message, status=200, level="success"):
-        if is_ajax:
-            return JsonResponse({"success": success, "message": message}, status=status)
-        if level == "info":
-            messages.info(request, message)
-        elif success:
-            messages.success(request, message)
-        else:
-            messages.error(request, message)
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    if not scientist_pk:
-        return _respond(False, "Brak wymaganego parametru: scientist_pk", status=400)
-
-    try:
-        autor = Autor.objects.get(pk=scientist_pk)
-
-        not_duplicate, created = NotADuplicate.objects.update_or_create(
-            autor=autor, defaults=dict(created_by=request.user)
-        )
-
-        if created:
-            return _respond(True, f"Autor {autor} oznaczony jako nie-duplikat.")
-        return _respond(
-            True, f"Autor {autor} był już oznaczony jako nie-duplikat.", level="info"
-        )
-
-    except Autor.DoesNotExist:
-        return _respond(False, "Nie znaleziono autora o podanym ID.", status=404)
-    except Exception as e:
-        traceback.print_exc()
-        rollbar.report_exc_info(sys.exc_info())
-        return _respond(False, f"Błąd podczas oznaczania autora: {str(e)}", status=500)
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-def reset_skipped_authors(request):
-    """
-    Widok do resetowania listy pominiętych autorów i rozpoczęcia od początku.
-    """
-    # Wyczyść sesję z pominiętymi autorami i historią nawigacji
-    session_keys_to_clear = ["skipped_authors", "navigation_history"]
-    cleared_any = False
-
-    for key in session_keys_to_clear:
-        if key in request.session:
-            del request.session[key]
-            cleared_any = True
-
-    if cleared_any:
-        request.session.modified = True
-        messages.success(
-            request,
-            "Lista pominiętych autorów i historia nawigacji zostały wyczyszczone. Rozpoczynasz od początku.",
-        )
-
-    return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def ignore_scientist(request):
-    """
-    Mark a Scientist (PBN) as ignored in the deduplication process.
-
-    Parameters:
-    - scientist_id: ID of the Scientist to ignore
-    - reason: Optional reason for ignoring (from POST)
-    """
-    scientist_id = request.POST.get("scientist_id")
-    reason = request.POST.get("reason", "")
-
-    if not scientist_id:
-        messages.error(request, "Brak wymaganego parametru: scientist_id")
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    try:
-        scientist = Scientist.objects.get(pk=scientist_id)
-
-        # Check if already ignored
-        if IgnoredScientist.objects.filter(scientist=scientist).exists():
-            messages.warning(
-                request, f"Autor {scientist} jest już oznaczony jako ignorowany."
-            )
-        else:
-            # Get the BPP author if available
-            autor = None
-            if hasattr(scientist, "rekord_w_bpp"):
-                autor = scientist.rekord_w_bpp
-
-            IgnoredScientist.objects.create(
-                scientist=scientist, autor=autor, reason=reason, created_by=request.user
-            )
-            messages.success(
-                request, f"Autor {scientist} został oznaczony jako ignorowany."
-            )
-
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    except Scientist.DoesNotExist:
-        messages.error(request, f"Nie znaleziono Scientist o ID: {scientist_id}")
-        return redirect("deduplikator_autorow:duplicate_authors")
-    except Exception as e:
-        messages.error(request, f"Błąd podczas ignorowania autora: {str(e)}")
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def ignore_autor(request):
-    """
-    Mark a BPP Autor (without PBN-Scientist link) as ignored.
-
-    Parameters:
-    - autor_id: ID of the Autor to ignore
-    - reason: Optional reason for ignoring (from POST)
-    """
-    autor_id = request.POST.get("autor_id")
-    reason = request.POST.get("reason", "")
-
-    if not autor_id:
-        messages.error(request, "Brak wymaganego parametru: autor_id")
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    try:
-        autor = Autor.objects.get(pk=autor_id)
-
-        if IgnoredAuthor.objects.filter(autor=autor).exists():
-            messages.warning(
-                request, f"Autor {autor} jest już oznaczony jako ignorowany."
-            )
-        else:
-            IgnoredAuthor.objects.create(
-                autor=autor, reason=reason, created_by=request.user
-            )
-            messages.success(
-                request, f"Autor {autor} został oznaczony jako ignorowany."
-            )
-
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    except Autor.DoesNotExist:
-        messages.error(request, f"Nie znaleziono autora o ID: {autor_id}")
-        return redirect("deduplikator_autorow:duplicate_authors")
-    except Exception as e:
-        messages.error(request, f"Błąd podczas ignorowania autora: {str(e)}")
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-
-def _trigger_rescan_after_reset(request, reset_label):
-    """Próbuje uruchomić nowe skanowanie po resecie list ignorowanych/nie-duplikatów.
-
-    Reset zmienia zbiór wykluczeń, więc cache kandydatów (DuplicateCandidate)
-    przestaje być spójny z tym, co użytkownik widzi w UI. Bez rescanu mogą
-    pojawiać się duplikaty, które po reset-cie powinny zniknąć (lub odwrotnie:
-    brakować takich, które wcześniej były ignorowane). Wywołujemy delay()
-    w trybie best-effort — jeżeli scan już biegnie albo dane PBN są stare,
-    informujemy użytkownika ale nie blokujemy operacji resetu.
-    """
-    from .tasks import scan_for_duplicates
-
-    if get_running_scan():
-        messages.info(
-            request,
-            f"{reset_label}. Skanowanie duplikatów jest już w trakcie — "
-            "wyniki uwzględnią reset po jego zakończeniu.",
-        )
-        return
-
-    pbn_data_fresh, pbn_stale_message, _ = is_pbn_people_data_fresh()
-    if not pbn_data_fresh:
-        messages.warning(
-            request,
-            f"{reset_label}. Nie udało się automatycznie uruchomić skanowania "
-            f"({pbn_stale_message}); uruchom je ręcznie po pobraniu danych PBN.",
-        )
-        return
-
-    scan_for_duplicates.delay(user_id=request.user.pk)
-    messages.success(
-        request,
-        f"{reset_label}. Uruchomiono nowe skanowanie duplikatów w tle — "
-        "odśwież stronę za chwilę, aby zobaczyć postęp.",
-    )
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def reset_ignored_scientists(request):
-    """Remove all IgnoredScientist (PBN) markings and re-trigger scan."""
-    count = IgnoredScientist.objects.count()
-    IgnoredScientist.objects.all().delete()
-    _trigger_rescan_after_reset(
-        request, f"Zresetowano {count} ignorowanych autorów (PBN)"
-    )
-    return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def reset_ignored_autorzy(request):
-    """Remove all IgnoredAuthor (BPP) markings and re-trigger scan."""
-    count = IgnoredAuthor.objects.count()
-    IgnoredAuthor.objects.all().delete()
-    _trigger_rescan_after_reset(
-        request, f"Zresetowano {count} ignorowanych autorów (BPP)"
-    )
-    return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-def reset_not_duplicates(request):
-    """Widok do resetowania (usuwania) wszystkich rekordów NotADuplicate."""
-    if request.method == "POST":
-        count = NotADuplicate.objects.count()
-        NotADuplicate.objects.all().delete()
-        _trigger_rescan_after_reset(
-            request, f"Zresetowano {count} autorów oznaczonych jako nie-duplikat"
-        )
-    return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def delete_author(request):
-    """
-    Widok do usuwania autora (tylko jeśli nie ma publikacji).
-    """
-    author_id = request.POST.get("author_id")
-
-    if not author_id:
-        messages.error(request, "Brak wymaganego parametru: author_id")
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    try:
-        # Sprawdź czy autor istnieje
-        autor = Autor.objects.get(pk=author_id)
-
-        # Sprawdź czy autor ma publikacje
-        publikacje_count = Rekord.objects.prace_autora(autor).count()
-
-        if publikacje_count > 0:
-            messages.error(
-                request,
-                f"Nie można usunąć autora {autor} - ma {publikacje_count} publikacji.",
-            )
-        else:
-            # Usuń autora
-            autor_name = str(autor)
-            autor.delete()
-            messages.success(
-                request, f"Usunięto autora {autor_name} (brak publikacji)."
-            )
-
-    except Autor.DoesNotExist:
-        messages.error(request, "Nie znaleziono autora o podanym ID.")
-    except Exception as e:
-        messages.error(request, f"Błąd podczas usuwania autora: {str(e)}")
-
-    return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-def download_duplicates_xlsx(request):
-    """
-    Widok do pobierania listy duplikatów w formacie XLSX.
-
-    Generuje plik XLSX ze wszystkimi autorami z duplikatami,
-    zawierający głównego autora, jego PBN UID, duplikat i jego PBN UID.
-    """
-    import datetime
-
-    from django.http import HttpResponse
-
-    try:
-        # Generuj plik XLSX
-        xlsx_content = export_duplicates_to_xlsx()
-
-        # Stwórz odpowiedź HTTP z plikiem
-        response = HttpResponse(
-            xlsx_content,
-            content_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        )
-
-        # Nazwa pliku z datą
-        filename = (
-            f"duplikaty_autorow_{datetime.date.today().strftime('%Y-%m-%d')}.xlsx"
-        )
-        response["Content-Disposition"] = f'attachment; filename="{filename}"'
-
-        return response
-
-    except Exception as e:
-        rollbar.report_exc_info(sys.exc_info())
-        messages.error(request, f"Błąd podczas generowania pliku XLSX: {str(e)}")
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-
-def get_running_scan():
-    """Get the currently running scan, if any."""
-    return DuplicateScanRun.objects.filter(
-        status=DuplicateScanRun.Status.RUNNING
-    ).first()
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def start_scan_view(request):
-    """
-    Start a new duplicate scan task.
-    """
-    from .tasks import scan_for_duplicates
-
-    # Check if PBN people data is fresh
-    pbn_data_fresh, pbn_stale_message, _ = is_pbn_people_data_fresh()
-    if not pbn_data_fresh:
-        messages.error(
-            request,
-            f"Nie można uruchomić skanowania: {pbn_stale_message}. "
-            "Pobierz aktualne dane z PBN.",
-        )
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    # Check if scan is already running
-    if get_running_scan():
-        messages.warning(
-            request, "Skanowanie jest już w trakcie. Poczekaj na jego zakończenie."
-        )
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    # Start new scan
-    scan_for_duplicates.delay(user_id=request.user.pk)
-    messages.success(
-        request,
-        "Skanowanie duplikatów zostało uruchomione w tle. "
-        "Odśwież stronę za chwilę, aby zobaczyć postęp.",
-    )
-    return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def cancel_scan_view(request):
-    """
-    Cancel the currently running scan.
-    """
-    from .tasks import cancel_scan
-
-    running_scan = get_running_scan()
-    if not running_scan:
-        messages.warning(request, "Brak aktywnego skanowania do anulowania.")
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    cancel_scan.delay(running_scan.pk)
-    messages.success(request, "Skanowanie zostało oznaczone do anulowania.")
-    return redirect("deduplikator_autorow:duplicate_authors")
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-def scan_status_view(request, scan_id):
-    """
-    Return scan status as JSON (for AJAX polling).
-    """
-    try:
-        scan_run = DuplicateScanRun.objects.get(pk=scan_id)
-
-        # Calculate ETA
-        eta_seconds = None
-        eta_time = None
-        elapsed_seconds = None
-
-        if (
-            scan_run.status == DuplicateScanRun.Status.RUNNING
-            and scan_run.authors_scanned > 0
-            and scan_run.total_authors_to_scan > 0
-        ):
-            now = timezone.now()
-            elapsed = now - scan_run.started_at
-            elapsed_seconds = int(elapsed.total_seconds())
-
-            remaining_authors = (
-                scan_run.total_authors_to_scan - scan_run.authors_scanned
-            )
-            if remaining_authors > 0:
-                time_per_author = elapsed.total_seconds() / scan_run.authors_scanned
-                eta_seconds = int(time_per_author * remaining_authors)
-                eta_datetime = now + timedelta(seconds=eta_seconds)
-                eta_time = eta_datetime.strftime("%H:%M:%S")
-
-        return JsonResponse(
-            {
-                "id": scan_run.pk,
-                "status": scan_run.status,
-                "status_display": scan_run.get_status_display(),
-                "progress_percent": scan_run.progress_percent,
-                "authors_scanned": scan_run.authors_scanned,
-                "total_authors_to_scan": scan_run.total_authors_to_scan,
-                "duplicates_found": scan_run.duplicates_found,
-                "finished": scan_run.status
-                in [
-                    DuplicateScanRun.Status.COMPLETED,
-                    DuplicateScanRun.Status.PARTIAL_COMPLETED,
-                    DuplicateScanRun.Status.CANCELLED,
-                    DuplicateScanRun.Status.FAILED,
-                ],
-                "eta_seconds": eta_seconds,
-                "eta_time": eta_time,
-                "elapsed_seconds": elapsed_seconds,
-            }
-        )
-    except DuplicateScanRun.DoesNotExist:
-        return JsonResponse({"error": "Scan not found"}, status=404)
-
-
-def _get_pending_candidates_for_main_autor(main_autor_id, scan_run):
-    """Get pending duplicate candidates for a specific main author."""
-    return (
-        DuplicateCandidate.objects.filter(
-            scan_run=scan_run,
-            main_autor_id=main_autor_id,
-            status=DuplicateCandidate.Status.PENDING,
-        )
-        .select_related("main_autor", "duplicate_autor")
-        .order_by("-priority", "-confidence_score")
-    )
-
-
-def _get_next_candidate_group(
-    scan_run,
-    skip_count=0,
-    mode="both",
-    confidence_band="all",
-    confidence_threshold_frac=0.5,
-):
-    """
-    Get the next group of candidates (all for the same main author).
-    Returns (main_autor, candidates_queryset, skip_count) or (None, None, 0)
-    if no more pending.
-
-    Args:
-        scan_run: The scan run to get candidates from
-        skip_count: Number of main authors to skip (offset)
-        mode: Filter by scan_mode ("pbn", "general", or "both"). When "both",
-            PBN candidates are sorted before general (PBN is canonical).
-        confidence_band: "all" / "high" / "low". high = confidence_percent
-            >= threshold; low = strictly below threshold.
-        confidence_threshold_frac: próg jako ułamek 0..1 (np. 0.5 dla 50%).
-
-    Returns:
-        Tuple of (main_autor, candidates_queryset, current_skip_count)
-    """
-    from django.db.models import Case, IntegerField, Value, When
-
-    qs = DuplicateCandidate.objects.filter(
-        scan_run=scan_run,
-        status=DuplicateCandidate.Status.PENDING,
-    )
-    if mode != "both":
-        qs = qs.filter(scan_mode=mode)
-    if confidence_band == "high":
-        qs = qs.filter(confidence_percent__gte=confidence_threshold_frac)
-    elif confidence_band == "low":
-        qs = qs.filter(confidence_percent__lt=confidence_threshold_frac)
-
-    # Annotate then iterate to dedupe in stable order. PostgreSQL's
-    # DISTINCT + ORDER BY semantics require ordering columns in SELECT,
-    # which Django's .values_list().distinct() may strip when an
-    # annotation is involved — leading to runtime errors or
-    # non-deterministic ordering. Materialize to Python and dedupe
-    # explicitly: simple, deterministic, side-effect free.
-    rows = (
-        qs.annotate(
-            mode_order=Case(
-                When(scan_mode="pbn", then=Value(0)),
-                When(scan_mode="general", then=Value(1)),
-                default=Value(2),
-                output_field=IntegerField(),
-            )
-        )
-        .order_by("mode_order", "-priority", "-confidence_score", "main_autor_id")
-        .values_list("main_autor_id", flat=True)
-    )
-
-    # Stable dedupe preserving order of first occurrence.
-    seen: set[int] = set()
-    main_autor_ids: list[int] = []
-    for pk in rows:
-        if pk not in seen:
-            seen.add(pk)
-            main_autor_ids.append(pk)
-
-    if not main_autor_ids:
-        return None, None, 0
-
-    # Ensure skip_count is within bounds
-    if skip_count >= len(main_autor_ids):
-        skip_count = 0  # Wrap around to beginning
-
-    # Get the main author at the skip_count position
-    main_autor_id = main_autor_ids[skip_count]
-
-    try:
-        main_autor = Autor.objects.get(pk=main_autor_id)
-    except Autor.DoesNotExist:
-        return None, None, 0
-
-    # Get all pending candidates for this main author
-    candidates = _get_pending_candidates_for_main_autor(main_autor_id, scan_run)
-
-    return main_autor, candidates, skip_count
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-def lastname_suggestions(request):
-    """Autocomplete dla wyszukiwarki nazwisk w deduplikatorze.
-
-    Zwraca top-10 unikalnych nazwisk autorów-głównych z PENDING-ujących
-    DuplicateCandidate filtrowanych po prefiksie. Bez aktywnego skanu
-    zwraca pustą listę. Wykorzystywane przez datalist na pasku górnym.
-    """
-    q = (request.GET.get("q") or "").strip()
-    if not q or len(q) < 2:
-        return JsonResponse({"results": []})
-
-    completed_scan = get_latest_usable_scan()
-    if not completed_scan:
-        return JsonResponse({"results": []})
-
-    nazwiska = (
-        DuplicateCandidate.objects.filter(
-            scan_run=completed_scan,
-            status=DuplicateCandidate.Status.PENDING,
-            main_autor__nazwisko__istartswith=q,
-        )
-        .values_list("main_autor__nazwisko", flat=True)
-        .distinct()
-        .order_by("main_autor__nazwisko")[:10]
-    )
-    return JsonResponse({"results": list(nazwiska)})
-
-
-@group_required(GR_WPROWADZANIE_DANYCH)
-@require_http_methods(["POST"])
-def mark_candidate_not_duplicate(request):
-    """
-    Mark a DuplicateCandidate as not a duplicate.
-
-    Returns JSON when called via AJAX (X-Requested-With: XMLHttpRequest),
-    otherwise redirects.
-    """
-    from django.utils import timezone
-
-    is_ajax = request.headers.get("X-Requested-With") == "XMLHttpRequest"
-    candidate_id = request.POST.get("candidate_id")
-
-    def _respond(success, message, status=200):
-        if is_ajax:
-            return JsonResponse({"success": success, "message": message}, status=status)
-        if success:
-            messages.success(request, message)
-        else:
-            messages.error(request, message)
-        return redirect("deduplikator_autorow:duplicate_authors")
-
-    if not candidate_id:
-        return _respond(False, "Brak wymaganego parametru: candidate_id", status=400)
-
-    try:
-        candidate = DuplicateCandidate.objects.get(pk=candidate_id)
-        candidate.status = DuplicateCandidate.Status.NOT_DUPLICATE
-        candidate.reviewed_at = timezone.now()
-        candidate.reviewed_by = request.user
-        candidate.save()
-
-        NotADuplicate.objects.update_or_create(
-            autor=candidate.duplicate_autor, defaults={"created_by": request.user}
-        )
-
-        return _respond(
-            True,
-            f"Autor {candidate.duplicate_autor_name} oznaczony jako nie-duplikat.",
-        )
-
-    except DuplicateCandidate.DoesNotExist:
-        return _respond(False, "Nie znaleziono kandydata o podanym ID.", status=404)
-    except Exception as e:
-        traceback.print_exc()
-        rollbar.report_exc_info(sys.exc_info())
-        return _respond(
-            False, f"Błąd podczas oznaczania kandydata: {str(e)}", status=500
-        )
diff --git a/src/deduplikator_autorow/views/__init__.py b/src/deduplikator_autorow/views/__init__.py
new file mode 100644
index 000000000..2cb8a95a2
--- /dev/null
+++ b/src/deduplikator_autorow/views/__init__.py
@@ -0,0 +1,91 @@
+"""Public view interface for the ``deduplikator_autorow`` app.
+
+This package was split out of a single ``views.py`` module. All previously
+public symbols are re-exported here so existing imports (urls.py, tests, etc.)
+continue to work via ``from deduplikator_autorow.views import ...``.
+
+Module map:
+- :mod:`.helpers` — decorators, session helpers, candidate/scan query helpers,
+  param resolution helpers
+- :mod:`.duplicates` — main duplicate-browsing/resolution views and lastname
+  autocomplete
+- :mod:`.merge` — merge & delete-author views
+- :mod:`.ignore` — ignored-author management views (PBN + BPP)
+- :mod:`.scan` — scan-task lifecycle (start/cancel/status) views
+- :mod:`.export` — XLSX export view
+"""
+
+from .duplicates import (
+    duplicate_authors_view,
+    lastname_suggestions,
+    mark_candidate_not_duplicate,
+    mark_non_duplicate,
+    reset_not_duplicates,
+    reset_skipped_authors,
+)
+from .export import download_duplicates_xlsx
+from .helpers import (
+    MIN_PEWNOSC_DO_WYSWIETLENIA,
+    _add_dyscypliny_to_duplicates,
+    _build_context_from_candidate,
+    _build_duplicate_publication_data,
+    _calculate_year_range,
+    _clear_navigation_session,
+    _get_excluded_authors_from_session,
+    _get_next_candidate_group,
+    _get_pending_candidates_for_main_autor,
+    _handle_go_previous,
+    _handle_search_request,
+    _handle_skip_current,
+    _read_param,
+    _resolve_autor_id,
+    _scientist_id_to_autor_id,
+    get_running_scan,
+    group_required,
+)
+from .ignore import (
+    _trigger_rescan_after_reset,
+    ignore_autor,
+    ignore_scientist,
+    reset_ignored_autorzy,
+    reset_ignored_scientists,
+)
+from .merge import delete_author, scal_autorow_view
+from .scan import cancel_scan_view, scan_status_view, start_scan_view
+
+__all__ = [
+    "MIN_PEWNOSC_DO_WYSWIETLENIA",
+    "_add_dyscypliny_to_duplicates",
+    "_build_context_from_candidate",
+    "_build_duplicate_publication_data",
+    "_calculate_year_range",
+    "_clear_navigation_session",
+    "_get_excluded_authors_from_session",
+    "_get_next_candidate_group",
+    "_get_pending_candidates_for_main_autor",
+    "_handle_go_previous",
+    "_handle_search_request",
+    "_handle_skip_current",
+    "_read_param",
+    "_resolve_autor_id",
+    "_scientist_id_to_autor_id",
+    "_trigger_rescan_after_reset",
+    "cancel_scan_view",
+    "delete_author",
+    "download_duplicates_xlsx",
+    "duplicate_authors_view",
+    "get_running_scan",
+    "group_required",
+    "ignore_autor",
+    "ignore_scientist",
+    "lastname_suggestions",
+    "mark_candidate_not_duplicate",
+    "mark_non_duplicate",
+    "reset_ignored_autorzy",
+    "reset_ignored_scientists",
+    "reset_not_duplicates",
+    "reset_skipped_authors",
+    "scal_autorow_view",
+    "scan_status_view",
+    "start_scan_view",
+]
diff --git a/src/deduplikator_autorow/views/duplicates.py b/src/deduplikator_autorow/views/duplicates.py
new file mode 100644
index 000000000..64a0ac52c
--- /dev/null
+++ b/src/deduplikator_autorow/views/duplicates.py
@@ -0,0 +1,451 @@
+"""Main duplicate-browsing/resolution views.
+
+Includes the primary list view (``duplicate_authors_view``), the
+mark-as-not-duplicate endpoints, navigation reset, and the lastname
+autocomplete used by the search bar.
+"""
+
+import sys
+import traceback
+
+import rollbar
+from django.contrib import messages
+from django.http import JsonResponse
+from django.shortcuts import redirect, render
+from django.views.decorators.http import require_http_methods
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from bpp.models import Autor
+from bpp.models.cache import Rekord
+from pbn_downloader_app.freshness import is_pbn_people_data_fresh
+from pbn_downloader_app.models import PbnDownloadTask
+
+from ..models import (
+    DuplicateCandidate,
+    IgnoredScientist,
+    LogScalania,
+    NotADuplicate,
+)
+from ..utils.counters import get_latest_usable_scan
+from .helpers import (
+    MIN_PEWNOSC_DO_WYSWIETLENIA,
+    _add_dyscypliny_to_duplicates,
+    _build_context_from_candidate,
+    _calculate_year_range,
+    _get_next_candidate_group,
+    get_running_scan,
+    group_required,
+)
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+def duplicate_authors_view(request):  # noqa: C901
+    """
+    Widok pokazujący główny rekord autora wraz z możliwymi duplikatami
+    i ich publikacjami (do 500 na duplikat).
+
+    Uses pre-computed duplicates from DuplicateCandidate table.
+    """
+    from bpp.models import Autor_Dyscyplina
+
+    # Get scan status
+    running_scan = get_running_scan()
+    completed_scan = get_latest_usable_scan()
+
+    # Filter mode: pbn|general|both (default both)
+    mode = request.GET.get("mode", "both")
+    if mode not in ("pbn", "general", "both"):
+        mode = "both"
+
+    # Filter confidence band: all|high|low (default all). high=>=50%, low=<50%.
+    # Próg porównujemy do confidence_percent jako ułamka, bo display % jest
+    # liczone z confidence_percent * 100 z klampem.
+    confidence_band = request.GET.get("confidence", "all")
+    if confidence_band not in ("all", "high", "low"):
+        confidence_band = "all"
+    confidence_threshold_frac = MIN_PEWNOSC_DO_WYSWIETLENIA / 100.0
+
+    # Common context
+    not_duplicate_count = NotADuplicate.objects.count()
+    ignored_authors_count = IgnoredScientist.objects.count()
+    latest_pbn_download = PbnDownloadTask.get_latest_task()
+
+    # Check PBN people data freshness
+    pbn_data_fresh, pbn_stale_message, pbn_last_download = is_pbn_people_data_fresh()
+
+    recent_merges = (
+        LogScalania.objects.filter(created_by=request.user)
+        .select_related("main_autor", "dyscyplina_before", "dyscyplina_after")
+        .order_by("-created_on")[:10]
+    )
+
+    # Base context for all scenarios
+    context = {
+        "scientist": None,
+        "glowny_autor": None,
+        "latest_pbn_download": latest_pbn_download,
+        "duplikaty_z_publikacjami": [],
+        "analiza": None,
+        "has_skipped_authors": False,
+        "has_previous_authors": False,
+        "total_authors_with_duplicates": 0,
+        "not_duplicate_count": not_duplicate_count,
+        "ignored_authors_count": ignored_authors_count,
+        "search_lastname": "",
+        "search_results_count": None,
+        "recent_merges": recent_merges,
+        # New scan-related context
+        "running_scan": running_scan,
+        "completed_scan": completed_scan,
+        "no_scan_available": not completed_scan and not running_scan,
+        "pending_candidates_count": 0,
+        "pending_pbn_count": 0,
+        "pending_general_count": 0,
+        # Filter mode (pbn|general|both)
+        "mode": mode,
+        # Navigation
+        "skip_count": 0,
+        # PBN data freshness
+        "pbn_data_fresh": pbn_data_fresh,
+        "pbn_stale_message": pbn_stale_message,
+        "pbn_last_download": pbn_last_download,
+    }
+
+    # If no completed scan, show "run scan first" message
+    if not completed_scan:
+        if running_scan:
+            messages.info(
+                request,
+                f"Skanowanie w toku: {running_scan.progress_percent}% "
+                f"({running_scan.authors_scanned}/{running_scan.total_authors_to_scan} autorów)",
+            )
+        return render(request, "deduplikator_autorow/duplicate_authors.html", context)
+
+    # Count pending candidates
+    base_pending_qs = DuplicateCandidate.objects.filter(
+        scan_run=completed_scan,
+        status=DuplicateCandidate.Status.PENDING,
+    )
+    pending_count = base_pending_qs.count()
+    context["pending_candidates_count"] = pending_count
+    context["total_authors_with_duplicates"] = pending_count
+    context["pending_pbn_count"] = base_pending_qs.filter(scan_mode="pbn").count()
+    context["pending_general_count"] = base_pending_qs.filter(
+        scan_mode="general"
+    ).count()
+    context["confidence_band"] = confidence_band
+
+    # Handle search by lastname
+    search_lastname = request.GET.get("search_lastname", "").strip()
+    context["search_lastname"] = search_lastname
+
+    if search_lastname:
+        # Search within stored candidates - confidence_band celowo NIE filtruje
+        # wyboru głównego autora (filtr per-autor stosujemy niżej, na liście
+        # candidates_for_author).
+        candidates = (
+            DuplicateCandidate.objects.filter(
+                scan_run=completed_scan,
+                status=DuplicateCandidate.Status.PENDING,
+                main_autor__nazwisko__icontains=search_lastname,
+            )
+            .select_related("main_autor", "duplicate_autor")
+            .order_by("-priority", "-confidence_score")
+        )
+        if mode != "both":
+            candidates = candidates.filter(scan_mode=mode)
+
+        context["search_results_count"] = (
+            candidates.values("main_autor").distinct().count()
+        )
+
+        if candidates.exists():
+            search_author_ids = list(
+                candidates.values_list("main_autor", flat=True)
+                .distinct()
+                .order_by("main_autor")
+            )
+            try:
+                skip_count = int(request.GET.get("skip_count", 0))
+            except (ValueError, TypeError):
+                skip_count = 0
+            if skip_count >= len(search_author_ids):
+                skip_count = 0
+            glowny_autor_id = search_author_ids[skip_count]
+            glowny_autor = Autor.objects.get(pk=glowny_autor_id)
+            candidates_for_author = candidates.filter(main_autor=glowny_autor)
+            context["skip_count"] = skip_count
+            context["search_total_authors"] = len(search_author_ids)
+            context["search_has_prev"] = skip_count > 0
+            context["search_has_next"] = skip_count < len(search_author_ids) - 1
+        else:
+            glowny_autor = None
+            candidates_for_author = DuplicateCandidate.objects.none()
+    else:
+        # Handle navigation - use skip_count as offset
+        try:
+            skip_count = int(request.GET.get("skip_count", 0))
+        except (ValueError, TypeError):
+            skip_count = 0
+
+        # Get next author with pending duplicates using offset.
+        # confidence_band NIE jest tu przekazywane — chcemy iterować po
+        # WSZYSTKICH głównych autorach niezależnie od pewności ich kandydatów,
+        # filtr stosujemy niżej tylko na widocznym podzbiorze.
+        glowny_autor, candidates_for_author, skip_count = _get_next_candidate_group(
+            completed_scan,
+            skip_count=skip_count,
+            mode=mode,
+        )
+        context["skip_count"] = skip_count
+
+    # Filter per-author by confidence band (NOT main author selection).
+    # Liczniki "X / Y" oraz per-band wyliczamy zanim podstawimy filtr.
+    if glowny_autor:
+        candidates_total_for_main = candidates_for_author.count()
+        candidates_high_for_main = candidates_for_author.filter(
+            confidence_percent__gte=confidence_threshold_frac
+        ).count()
+        candidates_low_for_main = candidates_total_for_main - candidates_high_for_main
+    else:
+        candidates_total_for_main = 0
+        candidates_high_for_main = 0
+        candidates_low_for_main = 0
+    if confidence_band == "high":
+        candidates_for_author = candidates_for_author.filter(
+            confidence_percent__gte=confidence_threshold_frac
+        )
+    elif confidence_band == "low":
+        candidates_for_author = candidates_for_author.filter(
+            confidence_percent__lt=confidence_threshold_frac
+        )
+    context["candidates_total_for_main"] = candidates_total_for_main
+    context["candidates_high_for_main"] = candidates_high_for_main
+    context["candidates_low_for_main"] = candidates_low_for_main
+
+    if not glowny_autor:
+        if pending_count == 0:
+            messages.info(
+                request,
+                "Brak duplikatów do sprawdzenia. Wszystkie zostały już przetworzone.",
+            )
+        return render(request, "deduplikator_autorow/duplicate_authors.html", context)
+
+    # Build context for the main author
+    context["glowny_autor"] = glowny_autor
+
+    # Try to get scientist for main author (for backward compatibility)
+    if glowny_autor.pbn_uid:
+        context["scientist"] = glowny_autor.pbn_uid
+
+    # Build duplicate list from stored candidates
+    duplikaty_z_publikacjami = []
+    for candidate in candidates_for_author:
+        pub_data = _build_context_from_candidate(candidate, glowny_autor)
+        duplikaty_z_publikacjami.append(pub_data)
+
+    context["duplikaty_z_publikacjami"] = duplikaty_z_publikacjami
+    context["first_candidate"] = (
+        candidates_for_author.first() if candidates_for_author else None
+    )
+
+    # "Scal wszystkie" jest aktywne tylko wtedy, gdy KAŻDY kandydat ma pewność
+    # ≥ MIN_PEWNOSC_DO_WYSWIETLENIA. Przy słabych trafieniach przyciski
+    # renderujemy w stanie wyszarzonym i klik pokazuje komunikat tłumaczący,
+    # co zrobić dalej (lista nazwisk z niską pewnością).
+    low_confidence_names = [
+        f"{d['autor']} ({d['analiza']['pewnosc']}%)"
+        for d in duplikaty_z_publikacjami
+        if d["analiza"]["pewnosc"] < MIN_PEWNOSC_DO_WYSWIETLENIA
+    ]
+    context["allow_merge_all"] = (
+        bool(duplikaty_z_publikacjami) and not low_confidence_names
+    )
+    context["low_confidence_names"] = low_confidence_names
+    context["MIN_PEWNOSC_DO_WYSWIETLENIA"] = MIN_PEWNOSC_DO_WYSWIETLENIA
+
+    # Get main author's publications and disciplines
+    context["glowny_autor_dyscypliny"] = (
+        Autor_Dyscyplina.objects.filter(
+            autor=glowny_autor, rok__gte=2022, rok__lte=2025
+        )
+        .select_related("dyscyplina_naukowa", "subdyscyplina_naukowa")
+        .order_by("rok")
+    )
+
+    _add_dyscypliny_to_duplicates(duplikaty_z_publikacjami)
+
+    glowny_autor_qs = Rekord.objects.prace_autora(glowny_autor)
+    context["glowne_publikacje"] = glowny_autor_qs[:500]
+    context["glowne_publikacje_count"] = glowny_autor_qs.count()
+    context["glowne_publikacje_year_range"] = _calculate_year_range(glowny_autor_qs)
+
+    return render(request, "deduplikator_autorow/duplicate_authors.html", context)
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def mark_non_duplicate(request):
+    """
+    Widok do oznaczania autora jako nie-duplikatu.
+
+    Przyjmuje parametry:
+    - scientist_pk: Primary key Scientist do zapisania jako nie-duplikat
+
+    Zwraca JSON dla AJAX (X-Requested-With), w przeciwnym razie redirect.
+    """
+    is_ajax = request.headers.get("X-Requested-With") == "XMLHttpRequest"
+    scientist_pk = request.POST.get("scientist_pk")
+
+    def _respond(success, message, status=200, level="success"):
+        if is_ajax:
+            return JsonResponse({"success": success, "message": message}, status=status)
+        if level == "info":
+            messages.info(request, message)
+        elif success:
+            messages.success(request, message)
+        else:
+            messages.error(request, message)
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    if not scientist_pk:
+        return _respond(False, "Brak wymaganego parametru: scientist_pk", status=400)
+
+    try:
+        autor = Autor.objects.get(pk=scientist_pk)
+
+        not_duplicate, created = NotADuplicate.objects.update_or_create(
+            autor=autor, defaults=dict(created_by=request.user)
+        )
+
+        if created:
+            return _respond(True, f"Autor {autor} oznaczony jako nie-duplikat.")
+        return _respond(
+            True, f"Autor {autor} był już oznaczony jako nie-duplikat.", level="info"
+        )
+
+    except Autor.DoesNotExist:
+        return _respond(False, "Nie znaleziono autora o podanym ID.", status=404)
+    except Exception as e:
+        traceback.print_exc()
+        rollbar.report_exc_info(sys.exc_info())
+        return _respond(False, f"Błąd podczas oznaczania autora: {str(e)}", status=500)
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+def reset_skipped_authors(request):
+    """
+    Widok do resetowania listy pominiętych autorów i rozpoczęcia od początku.
+    """
+    # Wyczyść sesję z pominiętymi autorami i historią nawigacji
+    session_keys_to_clear = ["skipped_authors", "navigation_history"]
+    cleared_any = False
+
+    for key in session_keys_to_clear:
+        if key in request.session:
+            del request.session[key]
+            cleared_any = True
+
+    if cleared_any:
+        request.session.modified = True
+        messages.success(
+            request,
+            "Lista pominiętych autorów i historia nawigacji zostały wyczyszczone. Rozpoczynasz od początku.",
+        )
+
+    return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+def reset_not_duplicates(request):
+    """Widok do resetowania (usuwania) wszystkich rekordów NotADuplicate."""
+    from .ignore import _trigger_rescan_after_reset
+
+    if request.method == "POST":
+        count = NotADuplicate.objects.count()
+        NotADuplicate.objects.all().delete()
+        _trigger_rescan_after_reset(
+            request, f"Zresetowano {count} autorów oznaczonych jako nie-duplikat"
+        )
+    return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+def lastname_suggestions(request):
+    """Autocomplete dla wyszukiwarki nazwisk w deduplikatorze.
+
+    Zwraca top-10 unikalnych nazwisk autorów-głównych z PENDING-ujących
+    DuplicateCandidate filtrowanych po prefiksie. Bez aktywnego skanu
+    zwraca pustą listę. Wykorzystywane przez datalist na pasku górnym.
+    """
+    q = (request.GET.get("q") or "").strip()
+    if not q or len(q) < 2:
+        return JsonResponse({"results": []})
+
+    completed_scan = get_latest_usable_scan()
+    if not completed_scan:
+        return JsonResponse({"results": []})
+
+    nazwiska = (
+        DuplicateCandidate.objects.filter(
+            scan_run=completed_scan,
+            status=DuplicateCandidate.Status.PENDING,
+            main_autor__nazwisko__istartswith=q,
+        )
+        .values_list("main_autor__nazwisko", flat=True)
+        .distinct()
+        .order_by("main_autor__nazwisko")[:10]
+    )
+    return JsonResponse({"results": list(nazwiska)})
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def mark_candidate_not_duplicate(request):
+    """
+    Mark a DuplicateCandidate as not a duplicate.
+
+    Returns JSON when called via AJAX (X-Requested-With: XMLHttpRequest),
+    otherwise redirects.
+    """
+    from django.utils import timezone
+
+    is_ajax = request.headers.get("X-Requested-With") == "XMLHttpRequest"
+    candidate_id = request.POST.get("candidate_id")
+
+    def _respond(success, message, status=200):
+        if is_ajax:
+            return JsonResponse({"success": success, "message": message}, status=status)
+        if success:
+            messages.success(request, message)
+        else:
+            messages.error(request, message)
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    if not candidate_id:
+        return _respond(False, "Brak wymaganego parametru: candidate_id", status=400)
+
+    try:
+        candidate = DuplicateCandidate.objects.get(pk=candidate_id)
+        candidate.status = DuplicateCandidate.Status.NOT_DUPLICATE
+        candidate.reviewed_at = timezone.now()
+        candidate.reviewed_by = request.user
+        candidate.save()
+
+        NotADuplicate.objects.update_or_create(
+            autor=candidate.duplicate_autor, defaults={"created_by": request.user}
+        )
+
+        return _respond(
+            True,
+            f"Autor {candidate.duplicate_autor_name} oznaczony jako nie-duplikat.",
+        )
+
+    except DuplicateCandidate.DoesNotExist:
+        return _respond(False, "Nie znaleziono kandydata o podanym ID.", status=404)
+    except Exception as e:
+        traceback.print_exc()
+        rollbar.report_exc_info(sys.exc_info())
+        return _respond(
+            False, f"Błąd podczas oznaczania kandydata: {str(e)}", status=500
+        )
diff --git a/src/deduplikator_autorow/views/export.py b/src/deduplikator_autorow/views/export.py
new file mode 100644
index 000000000..fcb78ec28
--- /dev/null
+++ b/src/deduplikator_autorow/views/export.py
@@ -0,0 +1,52 @@
+"""XLSX export view.
+
+``download_duplicates_xlsx`` returns the full duplicates list as an XLSX
+attachment.
+"""
+
+import datetime
+import sys
+
+import rollbar
+from django.contrib import messages
+from django.http import HttpResponse
+from django.shortcuts import redirect
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+
+from ..utils import export_duplicates_to_xlsx
+from .helpers import group_required
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+def download_duplicates_xlsx(request):
+    """
+    Widok do pobierania listy duplikatów w formacie XLSX.
+
+    Generuje plik XLSX ze wszystkimi autorami z duplikatami,
+    zawierający głównego autora, jego PBN UID, duplikat i jego PBN UID.
+    """
+    try:
+        # Generuj plik XLSX
+        xlsx_content = export_duplicates_to_xlsx()
+
+        # Stwórz odpowiedź HTTP z plikiem
+        response = HttpResponse(
+            xlsx_content,
+            content_type=(
+                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+            ),
+        )
+
+        # Nazwa pliku z datą
+        filename = (
+            f"duplikaty_autorow_{datetime.date.today().strftime('%Y-%m-%d')}.xlsx"
+        )
+        response["Content-Disposition"] = f'attachment; filename="{filename}"'
+
+        return response
+
+    except Exception as e:
+        rollbar.report_exc_info(sys.exc_info())
+        messages.error(request, f"Błąd podczas generowania pliku XLSX: {str(e)}")
+        return redirect("deduplikator_autorow:duplicate_authors")
diff --git a/src/deduplikator_autorow/views/helpers.py b/src/deduplikator_autorow/views/helpers.py
new file mode 100644
index 000000000..e827e3d61
--- /dev/null
+++ b/src/deduplikator_autorow/views/helpers.py
@@ -0,0 +1,325 @@
+"""Helper utilities used across deduplikator_autorow views.
+
+Contains:
+- ``group_required`` decorator (auth + group membership check)
+- session navigation helpers (``_get_excluded_authors_from_session``,
+  ``_handle_*``)
+- duplicate-data builders (``_build_*``, ``_add_dyscypliny_to_duplicates``)
+- param resolution helpers (``_read_param``, ``_resolve_autor_id``,
+  ``_scientist_id_to_autor_id``)
+- scan/candidate query helpers (``get_running_scan``,
+  ``_get_pending_candidates_for_main_autor``, ``_get_next_candidate_group``)
+
+Public symbols re-exported via ``deduplikator_autorow.views`` for backward
+compatibility.
+"""
+
+from functools import wraps
+
+from django.contrib.auth.decorators import login_required
+from django.core.exceptions import PermissionDenied
+
+from bpp.models import Autor
+from bpp.models.cache import Rekord
+from pbn_api.models import Scientist
+
+from ..models import DuplicateCandidate, DuplicateScanRun
+from ..utils import (
+    count_authors_with_lastname,
+    search_author_by_lastname,
+    znajdz_pierwszego_autora_z_duplikatami,
+)
+from ..utils.reason_display import enrich_reasons
+
+# Minimalny próg pewności do wyświetlania duplikatów
+# Duplikaty z pewnością poniżej tego progu nie będą pokazywane
+MIN_PEWNOSC_DO_WYSWIETLENIA = 50
+
+
+def group_required(group_name):
+    """
+    Decorator that requires user to be logged in and belong to a specific group.
+    """
+
+    def decorator(view_func):
+        @wraps(view_func)
+        @login_required
+        def wrapper(request, *args, **kwargs):
+            if (
+                not request.user.is_superuser
+                and not request.user.groups.filter(name=group_name).exists()
+            ):
+                raise PermissionDenied
+            return view_func(request, *args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
+def _get_excluded_authors_from_session(request):
+    """Get excluded authors from session as Scientist objects."""
+    skipped_authors_ids = request.session.get("skipped_authors", [])
+    if skipped_authors_ids:
+        return list(Scientist.objects.filter(pk__in=skipped_authors_ids))
+    return []
+
+
+def _handle_search_request(search_lastname):
+    """Handle search request and return scientist and count."""
+    scientist = search_author_by_lastname(search_lastname, excluded_authors=None)
+    search_results_count = count_authors_with_lastname(search_lastname)
+    return scientist, search_results_count
+
+
+def _clear_navigation_session(request):
+    """Clear skipped authors and navigation history from session."""
+    if "skipped_authors" in request.session:
+        del request.session["skipped_authors"]
+    if "navigation_history" in request.session:
+        del request.session["navigation_history"]
+    request.session.modified = True
+
+
+def _handle_go_previous(request, navigation_history, excluded_authors):
+    """Handle 'go previous' navigation action."""
+    if not navigation_history:
+        return znajdz_pierwszego_autora_z_duplikatami(excluded_authors)
+
+    previous_scientist_id = navigation_history.pop()
+    request.session["navigation_history"] = navigation_history
+    request.session.modified = True
+
+    try:
+        return Scientist.objects.get(pk=previous_scientist_id)
+    except Scientist.DoesNotExist:
+        return znajdz_pierwszego_autora_z_duplikatami(excluded_authors)
+
+
+def _handle_skip_current(request, scientist, excluded_authors):
+    """Handle 'skip current' navigation action."""
+    if not scientist:
+        return scientist
+
+    # Save to navigation history
+    if "navigation_history" not in request.session:
+        request.session["navigation_history"] = []
+    request.session["navigation_history"].append(scientist.pk)
+
+    # Add to skipped
+    if "skipped_authors" not in request.session:
+        request.session["skipped_authors"] = []
+    if scientist.pk not in request.session["skipped_authors"]:
+        request.session["skipped_authors"].append(scientist.pk)
+    request.session.modified = True
+
+    # Find next author
+    excluded_authors.append(scientist)
+    return znajdz_pierwszego_autora_z_duplikatami(excluded_authors)
+
+
+def _calculate_year_range(queryset):
+    """Calculate year range from a queryset with 'rok' field."""
+    lata = queryset.filter(rok__isnull=False).values_list("rok", flat=True)
+    if not lata:
+        return None
+
+    min_rok = min(lata)
+    max_rok = max(lata)
+    if min_rok == max_rok:
+        return str(min_rok)
+    return f"{min_rok}-{max_rok}"
+
+
+def _build_duplicate_publication_data(autor, metryka):
+    """Build publication data for a duplicate author."""
+    publikacje = Rekord.objects.prace_autora(autor)[:500]
+    publikacje_count = Rekord.objects.prace_autora(autor).count()
+    year_range = _calculate_year_range(Rekord.objects.prace_autora(autor))
+
+    return {
+        "autor": autor,
+        "publikacje": publikacje,
+        "publikacje_count": publikacje_count,
+        "publikacje_year_range": year_range,
+    }
+
+
+def _add_dyscypliny_to_duplicates(duplikaty_z_publikacjami):
+    """Add discipline information to duplicate authors."""
+    from bpp.models import Autor_Dyscyplina
+
+    for duplikat_data in duplikaty_z_publikacjami:
+        duplikat_data["dyscypliny"] = (
+            Autor_Dyscyplina.objects.filter(
+                autor=duplikat_data["autor"], rok__gte=2022, rok__lte=2025
+            )
+            .select_related("dyscyplina_naukowa", "subdyscyplina_naukowa")
+            .order_by("rok")
+        )
+
+
+def _build_context_from_candidate(candidate, glowny_autor):
+    """Build publication data for a duplicate from stored DuplicateCandidate."""
+    publikacje = Rekord.objects.prace_autora(candidate.duplicate_autor)[:500]
+    publikacje_count = candidate.duplicate_publications_count
+    year_range = _calculate_year_range(
+        Rekord.objects.prace_autora(candidate.duplicate_autor)
+    )
+
+    # Display percent: znormalizowane 0..1 → 0..100, zaokrąglone i sklampowane.
+    # Surowy confidence_score może być < 0 lub > 100 i historycznie pokazywał
+    # użytkownikom wartości w rodzaju 140% — confidence_percent jest jedynym
+    # polem, które gwarantuje sensowny zakres do prezentacji.
+    pewnosc_display = max(0, min(100, round((candidate.confidence_percent or 0) * 100)))
+
+    return {
+        "autor": candidate.duplicate_autor,
+        "publikacje": publikacje,
+        "publikacje_count": publikacje_count,
+        "publikacje_year_range": year_range,
+        "analiza": {
+            "autor": candidate.duplicate_autor,
+            "pewnosc": pewnosc_display,
+            "powody_podobienstwa": enrich_reasons(candidate.reasons),
+        },
+        "candidate_id": candidate.pk,  # For marking as not duplicate
+    }
+
+
+def _read_param(request, *names):
+    """Read first non-empty param from GET/POST by trying multiple names."""
+    for name in names:
+        val = request.GET.get(name) or request.POST.get(name)
+        if val:
+            return val
+    return None
+
+
+def _scientist_id_to_autor_id(scientist_id):
+    """Map Scientist PK to Autor PK via rekord_w_bpp. Returns None if not found."""
+    try:
+        sci = Scientist.objects.get(pk=scientist_id)
+    except Scientist.DoesNotExist:
+        return None
+    autor = sci.rekord_w_bpp
+    return autor.pk if autor is not None else None
+
+
+def _resolve_autor_id(request, autor_param, scientist_param):
+    """Resolve Autor PK from preferred autor_param or legacy scientist_param.
+
+    Preference: explicit autor_id over scientist_id (mapped via rekord_w_bpp).
+    """
+    autor_id = _read_param(request, autor_param)
+    if autor_id:
+        return autor_id
+    sci_id = _read_param(request, scientist_param)
+    if sci_id:
+        return _scientist_id_to_autor_id(sci_id)
+    return None
+
+
+def get_running_scan():
+    """Get the currently running scan, if any."""
+    return DuplicateScanRun.objects.filter(
+        status=DuplicateScanRun.Status.RUNNING
+    ).first()
+
+
+def _get_pending_candidates_for_main_autor(main_autor_id, scan_run):
+    """Get pending duplicate candidates for a specific main author."""
+    return (
+        DuplicateCandidate.objects.filter(
+            scan_run=scan_run,
+            main_autor_id=main_autor_id,
+            status=DuplicateCandidate.Status.PENDING,
+        )
+        .select_related("main_autor", "duplicate_autor")
+        .order_by("-priority", "-confidence_score")
+    )
+
+
+def _get_next_candidate_group(
+    scan_run,
+    skip_count=0,
+    mode="both",
+    confidence_band="all",
+    confidence_threshold_frac=0.5,
+):
+    """
+    Get the next group of candidates (all for the same main author).
+    Returns (main_autor, candidates_queryset, skip_count) or (None, None, 0)
+    if no more pending.
+
+    Args:
+        scan_run: The scan run to get candidates from
+        skip_count: Number of main authors to skip (offset)
+        mode: Filter by scan_mode ("pbn", "general", or "both"). When "both",
+            PBN candidates are sorted before general (PBN is canonical).
+        confidence_band: "all" / "high" / "low". high = confidence_percent
+            >= threshold; low = strictly below threshold.
+        confidence_threshold_frac: próg jako ułamek 0..1 (np. 0.5 dla 50%).
+
+    Returns:
+        Tuple of (main_autor, candidates_queryset, current_skip_count)
+    """
+    from django.db.models import Case, IntegerField, Value, When
+
+    qs = DuplicateCandidate.objects.filter(
+        scan_run=scan_run,
+        status=DuplicateCandidate.Status.PENDING,
+    )
+    if mode != "both":
+        qs = qs.filter(scan_mode=mode)
+    if confidence_band == "high":
+        qs = qs.filter(confidence_percent__gte=confidence_threshold_frac)
+    elif confidence_band == "low":
+        qs = qs.filter(confidence_percent__lt=confidence_threshold_frac)
+
+    # Annotate then iterate to dedupe in stable order. PostgreSQL's
+    # DISTINCT + ORDER BY semantics require ordering columns in SELECT,
+    # which Django's .values_list().distinct() may strip when an
+    # annotation is involved — leading to runtime errors or
+    # non-deterministic ordering. Materialize to Python and dedupe
+    # explicitly: simple, deterministic, side-effect free.
+    rows = (
+        qs.annotate(
+            mode_order=Case(
+                When(scan_mode="pbn", then=Value(0)),
+                When(scan_mode="general", then=Value(1)),
+                default=Value(2),
+                output_field=IntegerField(),
+            )
+        )
+        .order_by("mode_order", "-priority", "-confidence_score", "main_autor_id")
+        .values_list("main_autor_id", flat=True)
+    )
+
+    # Stable dedupe preserving order of first occurrence.
+    seen: set[int] = set()
+    main_autor_ids: list[int] = []
+    for pk in rows:
+        if pk not in seen:
+            seen.add(pk)
+            main_autor_ids.append(pk)
+
+    if not main_autor_ids:
+        return None, None, 0
+
+    # Ensure skip_count is within bounds
+    if skip_count >= len(main_autor_ids):
+        skip_count = 0  # Wrap around to beginning
+
+    # Get the main author at the skip_count position
+    main_autor_id = main_autor_ids[skip_count]
+
+    try:
+        main_autor = Autor.objects.get(pk=main_autor_id)
+    except Autor.DoesNotExist:
+        return None, None, 0
+
+    # Get all pending candidates for this main author
+    candidates = _get_pending_candidates_for_main_autor(main_autor_id, scan_run)
+
+    return main_autor, candidates, skip_count
diff --git a/src/deduplikator_autorow/views/ignore.py b/src/deduplikator_autorow/views/ignore.py
new file mode 100644
index 000000000..abc3c9d06
--- /dev/null
+++ b/src/deduplikator_autorow/views/ignore.py
@@ -0,0 +1,171 @@
+"""Ignored-author management views.
+
+- ``ignore_scientist`` — mark a PBN Scientist as ignored
+- ``ignore_autor`` — mark a BPP Autor (no PBN link) as ignored
+- ``reset_ignored_scientists`` / ``reset_ignored_autorzy`` — clear lists and
+  re-trigger scan
+- ``_trigger_rescan_after_reset`` — shared helper for reset endpoints
+"""
+
+from django.contrib import messages
+from django.shortcuts import redirect
+from django.views.decorators.http import require_http_methods
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from bpp.models import Autor
+from pbn_api.models import Scientist
+from pbn_downloader_app.freshness import is_pbn_people_data_fresh
+
+from ..models import IgnoredAuthor, IgnoredScientist
+from .helpers import get_running_scan, group_required
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def ignore_scientist(request):
+    """
+    Mark a Scientist (PBN) as ignored in the deduplication process.
+
+    Parameters:
+    - scientist_id: ID of the Scientist to ignore
+    - reason: Optional reason for ignoring (from POST)
+    """
+    scientist_id = request.POST.get("scientist_id")
+    reason = request.POST.get("reason", "")
+
+    if not scientist_id:
+        messages.error(request, "Brak wymaganego parametru: scientist_id")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    try:
+        scientist = Scientist.objects.get(pk=scientist_id)
+
+        # Check if already ignored
+        if IgnoredScientist.objects.filter(scientist=scientist).exists():
+            messages.warning(
+                request, f"Autor {scientist} jest już oznaczony jako ignorowany."
+            )
+        else:
+            # Get the BPP author if available
+            autor = None
+            if hasattr(scientist, "rekord_w_bpp"):
+                autor = scientist.rekord_w_bpp
+
+            IgnoredScientist.objects.create(
+                scientist=scientist, autor=autor, reason=reason, created_by=request.user
+            )
+            messages.success(
+                request, f"Autor {scientist} został oznaczony jako ignorowany."
+            )
+
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    except Scientist.DoesNotExist:
+        messages.error(request, f"Nie znaleziono Scientist o ID: {scientist_id}")
+        return redirect("deduplikator_autorow:duplicate_authors")
+    except Exception as e:
+        messages.error(request, f"Błąd podczas ignorowania autora: {str(e)}")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def ignore_autor(request):
+    """
+    Mark a BPP Autor (without PBN-Scientist link) as ignored.
+
+    Parameters:
+    - autor_id: ID of the Autor to ignore
+    - reason: Optional reason for ignoring (from POST)
+    """
+    autor_id = request.POST.get("autor_id")
+    reason = request.POST.get("reason", "")
+
+    if not autor_id:
+        messages.error(request, "Brak wymaganego parametru: autor_id")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    try:
+        autor = Autor.objects.get(pk=autor_id)
+
+        if IgnoredAuthor.objects.filter(autor=autor).exists():
+            messages.warning(
+                request, f"Autor {autor} jest już oznaczony jako ignorowany."
+            )
+        else:
+            IgnoredAuthor.objects.create(
+                autor=autor, reason=reason, created_by=request.user
+            )
+            messages.success(
+                request, f"Autor {autor} został oznaczony jako ignorowany."
+            )
+
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    except Autor.DoesNotExist:
+        messages.error(request, f"Nie znaleziono autora o ID: {autor_id}")
+        return redirect("deduplikator_autorow:duplicate_authors")
+    except Exception as e:
+        messages.error(request, f"Błąd podczas ignorowania autora: {str(e)}")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+
+def _trigger_rescan_after_reset(request, reset_label):
+    """Próbuje uruchomić nowe skanowanie po resecie list ignorowanych/nie-duplikatów.
+
+    Reset zmienia zbiór wykluczeń, więc cache kandydatów (DuplicateCandidate)
+    przestaje być spójny z tym, co użytkownik widzi w UI. Bez rescanu mogą
+    pojawiać się duplikaty, które po reset-cie powinny zniknąć (lub odwrotnie:
+    brakować takich, które wcześniej były ignorowane). Wywołujemy delay()
+    w trybie best-effort — jeżeli scan już biegnie albo dane PBN są stare,
+    informujemy użytkownika ale nie blokujemy operacji resetu.
+    """
+    from ..tasks import scan_for_duplicates
+
+    if get_running_scan():
+        messages.info(
+            request,
+            f"{reset_label}. Skanowanie duplikatów jest już w trakcie — "
+            "wyniki uwzględnią reset po jego zakończeniu.",
+        )
+        return
+
+    pbn_data_fresh, pbn_stale_message, _ = is_pbn_people_data_fresh()
+    if not pbn_data_fresh:
+        messages.warning(
+            request,
+            f"{reset_label}. Nie udało się automatycznie uruchomić skanowania "
+            f"({pbn_stale_message}); uruchom je ręcznie po pobraniu danych PBN.",
+        )
+        return
+
+    scan_for_duplicates.delay(user_id=request.user.pk)
+    messages.success(
+        request,
+        f"{reset_label}. Uruchomiono nowe skanowanie duplikatów w tle — "
+        "odśwież stronę za chwilę, aby zobaczyć postęp.",
+    )
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def reset_ignored_scientists(request):
+    """Remove all IgnoredScientist (PBN) markings and re-trigger scan."""
+    count = IgnoredScientist.objects.count()
+    IgnoredScientist.objects.all().delete()
+    _trigger_rescan_after_reset(
+        request, f"Zresetowano {count} ignorowanych autorów (PBN)"
+    )
+    return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def reset_ignored_autorzy(request):
+    """Remove all IgnoredAuthor (BPP) markings and re-trigger scan."""
+    count = IgnoredAuthor.objects.count()
+    IgnoredAuthor.objects.all().delete()
+    _trigger_rescan_after_reset(
+        request, f"Zresetowano {count} ignorowanych autorów (BPP)"
+    )
+    return redirect("deduplikator_autorow:duplicate_authors")
diff --git a/src/deduplikator_autorow/views/merge.py b/src/deduplikator_autorow/views/merge.py
new file mode 100644
index 000000000..185f02f8a
--- /dev/null
+++ b/src/deduplikator_autorow/views/merge.py
@@ -0,0 +1,161 @@
+"""Merge & delete-author views.
+
+``scal_autorow_view`` performs the actual merge of two authors;
+``delete_author`` removes an author when they have no publications.
+"""
+
+import sys
+import traceback
+
+import rollbar
+from django.contrib import messages
+from django.http import JsonResponse
+from django.shortcuts import redirect
+from django.views.decorators.http import require_http_methods
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from bpp.models import Autor
+from bpp.models.cache import Rekord
+
+from ..models import DuplicateCandidate
+from ..utils import scal_autora
+from .helpers import _read_param, _resolve_autor_id, group_required
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["GET", "POST"])
+def scal_autorow_view(request):
+    """
+    Widok do scalania autorów automatycznie.
+
+    Przyjmuje parametry (warianty):
+    - main_autor_id / duplicate_autor_id: ID autorów BPP (preferowane)
+    - main_scientist_id / duplicate_scientist_id: ID Scientist z PBN
+      (mapowane do Autor przez rekord_w_bpp; backwards-compat)
+    - skip_pbn: Opcjonalnie, jeśli true nie wysyła publikacji do PBN
+    - candidate_id: Opcjonalnie, ID DuplicateCandidate do oznaczenia jako scalony
+    - auto_assign_discipline: Opcjonalnie, jeśli true przypisuje główną dyscyplinę
+    - use_subdiscipline: Opcjonalnie, jeśli true używa subdyscypliny jako dyscypliny
+
+    Zwraca wynik operacji w formacie JSON.
+    """
+    from django.utils import timezone
+
+    skip_pbn = (_read_param(request, "skip_pbn") or "false").lower() == "true"
+    candidate_id = _read_param(request, "candidate_id")
+    auto_assign_discipline = (
+        _read_param(request, "auto_assign_discipline") or "false"
+    ).lower() == "true"
+    use_subdiscipline = (
+        _read_param(request, "use_subdiscipline") or "false"
+    ).lower() == "true"
+
+    main_autor_id = _resolve_autor_id(request, "main_autor_id", "main_scientist_id")
+    duplicate_autor_id = _resolve_autor_id(
+        request, "duplicate_autor_id", "duplicate_scientist_id"
+    )
+
+    if not main_autor_id or not duplicate_autor_id:
+        # Sygnalizujemy do Rollbar — to nie powinno się zdarzać przy poprawnym
+        # wywołaniu z UI; raczej oznacza błąd JS-a lub niespójne dane (np.
+        # scientist_id wskazujący na rekord, którego rekord_w_bpp == None).
+        try:
+            raise ValueError(
+                "scal_autorow_view: missing required params after resolution. "
+                f"GET={dict(request.GET)} POST_keys={list(request.POST.keys())} "
+                f"resolved main={main_autor_id} duplicate={duplicate_autor_id}"
+            )
+        except ValueError:
+            traceback.print_exc()
+            rollbar.report_exc_info(sys.exc_info())
+        return JsonResponse(
+            {
+                "success": False,
+                "error": (
+                    "Brak wymaganych parametrów: main_autor_id i duplicate_autor_id"
+                ),
+            },
+            status=400,
+        )
+
+    try:
+        try:
+            main_autor = Autor.objects.get(pk=main_autor_id)
+            duplicate_autor = Autor.objects.get(pk=duplicate_autor_id)
+        except Autor.DoesNotExist as e:
+            return JsonResponse(
+                {"success": False, "error": f"Nie znaleziono autora: {e}"},
+                status=404,
+            )
+
+        result = scal_autora(
+            main_autor,
+            duplicate_autor,
+            request.user,
+            skip_pbn=skip_pbn,
+            auto_assign_discipline=auto_assign_discipline,
+            use_subdiscipline=use_subdiscipline,
+        )
+
+        # Mark candidate as merged if provided
+        if candidate_id and result.get("success"):
+            try:
+                candidate = DuplicateCandidate.objects.get(pk=candidate_id)
+                candidate.status = DuplicateCandidate.Status.MERGED
+                candidate.reviewed_at = timezone.now()
+                candidate.reviewed_by = request.user
+                candidate.save()
+            except DuplicateCandidate.DoesNotExist:
+                # Candidate may have been deleted in the meantime
+                pass  # not an error - merge already succeeded
+
+        return JsonResponse({"success": result.get("success", False), "result": result})
+    except NotImplementedError as e:
+        return JsonResponse({"success": False, "error": str(e)}, status=501)
+    except Exception as e:
+        traceback.print_exc()
+        rollbar.report_exc_info(sys.exc_info())
+        return JsonResponse(
+            {"success": False, "error": f"Błąd podczas scalania autorów: {str(e)}"},
+            status=500,
+        )
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def delete_author(request):
+    """
+    Widok do usuwania autora (tylko jeśli nie ma publikacji).
+    """
+    author_id = request.POST.get("author_id")
+
+    if not author_id:
+        messages.error(request, "Brak wymaganego parametru: author_id")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    try:
+        # Sprawdź czy autor istnieje
+        autor = Autor.objects.get(pk=author_id)
+
+        # Sprawdź czy autor ma publikacje
+        publikacje_count = Rekord.objects.prace_autora(autor).count()
+
+        if publikacje_count > 0:
+            messages.error(
+                request,
+                f"Nie można usunąć autora {autor} - ma {publikacje_count} publikacji.",
+            )
+        else:
+            # Usuń autora
+            autor_name = str(autor)
+            autor.delete()
+            messages.success(
+                request, f"Usunięto autora {autor_name} (brak publikacji)."
+            )
+
+    except Autor.DoesNotExist:
+        messages.error(request, "Nie znaleziono autora o podanym ID.")
+    except Exception as e:
+        messages.error(request, f"Błąd podczas usuwania autora: {str(e)}")
+
+    return redirect("deduplikator_autorow:duplicate_authors")
diff --git a/src/deduplikator_autorow/views/scan.py b/src/deduplikator_autorow/views/scan.py
new file mode 100644
index 000000000..0f2ab7fa1
--- /dev/null
+++ b/src/deduplikator_autorow/views/scan.py
@@ -0,0 +1,124 @@
+"""Scan-task lifecycle views (start / cancel / status)."""
+
+from datetime import timedelta
+
+from django.contrib import messages
+from django.http import JsonResponse
+from django.shortcuts import redirect
+from django.utils import timezone
+from django.views.decorators.http import require_http_methods
+
+from bpp.const import GR_WPROWADZANIE_DANYCH
+from pbn_downloader_app.freshness import is_pbn_people_data_fresh
+
+from ..models import DuplicateScanRun
+from .helpers import get_running_scan, group_required
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def start_scan_view(request):
+    """
+    Start a new duplicate scan task.
+    """
+    from ..tasks import scan_for_duplicates
+
+    # Check if PBN people data is fresh
+    pbn_data_fresh, pbn_stale_message, _ = is_pbn_people_data_fresh()
+    if not pbn_data_fresh:
+        messages.error(
+            request,
+            f"Nie można uruchomić skanowania: {pbn_stale_message}. "
+            "Pobierz aktualne dane z PBN.",
+        )
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    # Check if scan is already running
+    if get_running_scan():
+        messages.warning(
+            request, "Skanowanie jest już w trakcie. Poczekaj na jego zakończenie."
+        )
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    # Start new scan
+    scan_for_duplicates.delay(user_id=request.user.pk)
+    messages.success(
+        request,
+        "Skanowanie duplikatów zostało uruchomione w tle. "
+        "Odśwież stronę za chwilę, aby zobaczyć postęp.",
+    )
+    return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+@require_http_methods(["POST"])
+def cancel_scan_view(request):
+    """
+    Cancel the currently running scan.
+    """
+    from ..tasks import cancel_scan
+
+    running_scan = get_running_scan()
+    if not running_scan:
+        messages.warning(request, "Brak aktywnego skanowania do anulowania.")
+        return redirect("deduplikator_autorow:duplicate_authors")
+
+    cancel_scan.delay(running_scan.pk)
+    messages.success(request, "Skanowanie zostało oznaczone do anulowania.")
+    return redirect("deduplikator_autorow:duplicate_authors")
+
+
+@group_required(GR_WPROWADZANIE_DANYCH)
+def scan_status_view(request, scan_id):
+    """
+    Return scan status as JSON (for AJAX polling).
+    """
+    try:
+        scan_run = DuplicateScanRun.objects.get(pk=scan_id)
+
+        # Calculate ETA
+        eta_seconds = None
+        eta_time = None
+        elapsed_seconds = None
+
+        if (
+            scan_run.status == DuplicateScanRun.Status.RUNNING
+            and scan_run.authors_scanned > 0
+            and scan_run.total_authors_to_scan > 0
+        ):
+            now = timezone.now()
+            elapsed = now - scan_run.started_at
+            elapsed_seconds = int(elapsed.total_seconds())
+
+            remaining_authors = (
+                scan_run.total_authors_to_scan - scan_run.authors_scanned
+            )
+            if remaining_authors > 0:
+                time_per_author = elapsed.total_seconds() / scan_run.authors_scanned
+                eta_seconds = int(time_per_author * remaining_authors)
+                eta_datetime = now + timedelta(seconds=eta_seconds)
+                eta_time = eta_datetime.strftime("%H:%M:%S")
+
+        return JsonResponse(
+            {
+                "id": scan_run.pk,
+                "status": scan_run.status,
+                "status_display": scan_run.get_status_display(),
+                "progress_percent": scan_run.progress_percent,
+                "authors_scanned": scan_run.authors_scanned,
+                "total_authors_to_scan": scan_run.total_authors_to_scan,
+                "duplicates_found": scan_run.duplicates_found,
+                "finished": scan_run.status
+                in [
+                    DuplicateScanRun.Status.COMPLETED,
+                    DuplicateScanRun.Status.PARTIAL_COMPLETED,
+                    DuplicateScanRun.Status.CANCELLED,
+                    DuplicateScanRun.Status.FAILED,
+                ],
+                "eta_seconds": eta_seconds,
+                "eta_time": eta_time,
+                "elapsed_seconds": elapsed_seconds,
+            }
+        )
+    except DuplicateScanRun.DoesNotExist:
+        return JsonResponse({"error": "Scan not found"}, status=404)

From a0a6b6a51c708a6bcb4163644ab3ebdcb4aea2eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sun, 3 May 2026 21:56:49 +0200
Subject: [PATCH 22/25] Potential fix for pull request finding 'CodeQL /
 Information exposure through an exception'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 src/deduplikator_autorow/views/duplicates.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/deduplikator_autorow/views/duplicates.py b/src/deduplikator_autorow/views/duplicates.py
index 64a0ac52c..7e3722be7 100644
--- a/src/deduplikator_autorow/views/duplicates.py
+++ b/src/deduplikator_autorow/views/duplicates.py
@@ -329,7 +329,11 @@ def _respond(success, message, status=200, level="success"):
     except Exception as e:
         traceback.print_exc()
         rollbar.report_exc_info(sys.exc_info())
-        return _respond(False, f"Błąd podczas oznaczania autora: {str(e)}", status=500)
+        return _respond(
+            False,
+            "Wystąpił wewnętrzny błąd podczas oznaczania autora. Spróbuj ponownie później.",
+            status=500,
+        )
 
 
 @group_required(GR_WPROWADZANIE_DANYCH)

From 0e2f7bc4e35f3a9854f512b5e7d24ac301a4e786 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sun, 3 May 2026 21:57:00 +0200
Subject: [PATCH 23/25] Potential fix for pull request finding 'CodeQL /
 Information exposure through an exception'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 src/deduplikator_autorow/views/duplicates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/deduplikator_autorow/views/duplicates.py b/src/deduplikator_autorow/views/duplicates.py
index 7e3722be7..259ab5dac 100644
--- a/src/deduplikator_autorow/views/duplicates.py
+++ b/src/deduplikator_autorow/views/duplicates.py
@@ -451,5 +451,5 @@ def _respond(success, message, status=200):
         traceback.print_exc()
         rollbar.report_exc_info(sys.exc_info())
         return _respond(
-            False, f"Błąd podczas oznaczania kandydata: {str(e)}", status=500
+            False, "Wystąpił wewnętrzny błąd podczas oznaczania kandydata.", status=500
         )

From 461731a0a29f6405fa9fbe514b9d68dc83e1221c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sun, 3 May 2026 21:57:15 +0200
Subject: [PATCH 24/25] Potential fix for pull request finding 'CodeQL /
 Information exposure through an exception'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 src/deduplikator_autorow/views/merge.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/deduplikator_autorow/views/merge.py b/src/deduplikator_autorow/views/merge.py
index 185f02f8a..0dea1b2ea 100644
--- a/src/deduplikator_autorow/views/merge.py
+++ b/src/deduplikator_autorow/views/merge.py
@@ -83,8 +83,9 @@ def scal_autorow_view(request):
             main_autor = Autor.objects.get(pk=main_autor_id)
             duplicate_autor = Autor.objects.get(pk=duplicate_autor_id)
         except Autor.DoesNotExist as e:
+            rollbar.report_exc_info(sys.exc_info())
             return JsonResponse(
-                {"success": False, "error": f"Nie znaleziono autora: {e}"},
+                {"success": False, "error": "Nie znaleziono autora."},
                 status=404,
             )
 

From 998ed8e863b1fe467199ea0a01700299f3d33892 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Pasternak?= <michal.dtz@gmail.com>
Date: Sun, 3 May 2026 21:57:29 +0200
Subject: [PATCH 25/25] Potential fix for pull request finding 'CodeQL /
 Information exposure through an exception'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 src/deduplikator_autorow/views/merge.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/deduplikator_autorow/views/merge.py b/src/deduplikator_autorow/views/merge.py
index 0dea1b2ea..89f0f5af3 100644
--- a/src/deduplikator_autorow/views/merge.py
+++ b/src/deduplikator_autorow/views/merge.py
@@ -117,7 +117,10 @@ def scal_autorow_view(request):
         traceback.print_exc()
         rollbar.report_exc_info(sys.exc_info())
         return JsonResponse(
-            {"success": False, "error": f"Błąd podczas scalania autorów: {str(e)}"},
+            {
+                "success": False,
+                "error": "Wystąpił wewnętrzny błąd podczas scalania autorów.",
+            },
             status=500,
         )