From 6b0b2c61658a82a1bdf34fcce9a37ce347855bad Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:08:46 +0000 Subject: [PATCH 01/17] feat(parsing): support bulk telegram exports --- src/accxus/platforms/telegram/parsing.py | 262 ++++++++++++++++++++++- src/accxus/types/telegram.py | 10 +- 2 files changed, 264 insertions(+), 8 deletions(-) diff --git a/src/accxus/platforms/telegram/parsing.py b/src/accxus/platforms/telegram/parsing.py index 1b8341e..fb22ea6 100644 --- a/src/accxus/platforms/telegram/parsing.py +++ b/src/accxus/platforms/telegram/parsing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import contextlib import json import logging from collections.abc import Callable @@ -12,6 +13,36 @@ log = logging.getLogger(__name__) +def _clean_filename(value: str) -> str: + cleaned = "".join(ch if ch.isalnum() or ch in ("-", "_", ".") else "_" for ch in value) + return cleaned.strip("._") or "chat" + + +def _chat_ref(chat: dict[str, Any]) -> str: + username = str(chat.get("username") or "").strip() + if username: + return f"@{username}" + return str(chat["id"]) + + +def _format_optional(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value + if hasattr(value, "isoformat"): + return str(value.isoformat()) + return str(value) + + +def _stringify_list(values: Any) -> list[str]: + if not values: + return [] + if not isinstance(values, list | tuple): + values = [values] + return [_format_optional(v) for v in values if v is not None] + + async def export_chat_history( session_name: str, chat: str, @@ -45,6 +76,7 @@ async def save_chat_history( on_progress: Callable[[int], None] | None = None, ) -> int: messages = await export_chat_history(session_name, chat, limit, on_progress) + dest.parent.mkdir(parents=True, exist_ok=True) if fmt == "txt": lines = [f"[{m['date']}] {m['from'] or 'unknown'}: {m['text']}" for m in messages] dest.write_text("\n".join(lines), encoding="utf-8") @@ -54,22 +86,169 @@ async def save_chat_history( return len(messages) +async def save_chats_history( + session_name: str, + chats: list[str], + dest_dir: Path, + fmt: str = "json", + limit: int = 0, + on_progress: Callable[[str, int], None] | None = None, +) -> dict[str, int]: + dest_dir.mkdir(parents=True, exist_ok=True) + exported: dict[str, int] = {} + + for chat in chats: + chat_key = _clean_filename(chat.lstrip("@")) + + def _progress(count: int, chat_ref: str = chat) -> None: + if on_progress: + on_progress(chat_ref, count) + + dest = dest_dir / f"{chat_key}.{fmt}" + exported[chat] = await save_chat_history( + session_name, + chat, + dest, + fmt=fmt, + limit=limit, + on_progress=_progress, + ) + + log.info("[parse] exported %d chat histories to %s", len(exported), dest_dir) + return exported + + +async def save_all_dialog_histories( + session_name: str, + dest_dir: Path, + *, + kind: str = "all", + selected_chats: list[str] | None = None, + fmt: str = "json", + limit: int = 0, + on_progress: Callable[[str, int], None] | None = None, +) -> dict[str, int]: + chats = selected_chats or [ + _chat_ref(chat) for chat in await list_dialogs(session_name, kind=kind, limit=0) + ] + return await save_chats_history( + session_name, + chats, + dest_dir, + fmt=fmt, + limit=limit, + on_progress=on_progress, + ) + + +async def _download_user_avatar(client: Any, user: Any, avatar_dir: Path | None) -> str: + if avatar_dir is None: + return "" + photo = getattr(user, "photo", None) + file_id = getattr(photo, "big_file_id", "") or getattr(photo, "small_file_id", "") + if not file_id: + return "" + + avatar_dir.mkdir(parents=True, exist_ok=True) + dest = avatar_dir / f"{user.id}.jpg" + try: + downloaded = await client.download_media(file_id, file_name=str(dest)) + return str(downloaded or dest) + except Exception as exc: + log.debug("[parse] avatar download failed for %s: %s", user.id, exc) + return "" + + +async def _load_user_extras(client: Any, user_id: int) -> dict[str, Any]: + extras: dict[str, Any] = { + "bio": "", + "song": "", + "birthday": "", + "gifts": [], + } + with contextlib.suppress(Exception): + chat = await client.get_chat(user_id) + extras["bio"] = getattr(chat, "bio", "") or getattr(chat, "description", "") or "" + extras["song"] = _format_optional(getattr(chat, "profile_song", "")) + extras["birthday"] = _format_optional(getattr(chat, "birthday", "")) + extras["gifts"] = _stringify_list(getattr(chat, "gifts", [])) + + with contextlib.suppress(Exception): + from pyrogram.raw.functions.users import GetFullUser # type: ignore[import-untyped] + + peer = await client.resolve_peer(user_id) + full = await client.invoke(GetFullUser(id=peer)) + full_user = getattr(full, "full_user", full) + extras["bio"] = extras["bio"] or getattr(full_user, "about", "") or "" + extras["song"] = extras["song"] or _format_optional(getattr(full_user, "profile_song", "")) + extras["birthday"] = extras["birthday"] or _format_optional( + getattr(full_user, "birthday", "") + ) + extras["gifts"] = extras["gifts"] or _stringify_list( + getattr(full_user, "gifts", None) or getattr(full_user, "premium_gifts", None) + ) + + return extras + + +async def _parsed_user_from_member( + client: Any, + member: Any, + *, + chat_info: dict[str, Any], + avatar_dir: Path | None, +) -> ParsedUser: + u = member.user + extras = await _load_user_extras(client, u.id) + return ParsedUser( + id=u.id, + username=u.username or "", + first_name=u.first_name or "", + last_name=u.last_name or "", + phone=u.phone_number or "", + avatar_path=await _download_user_avatar(client, u, avatar_dir), + bio=extras["bio"], + song=extras["song"], + birthday=extras["birthday"], + gifts=extras["gifts"], + source_chat_id=chat_info.get("id"), + source_chat_title=chat_info.get("title", ""), + source_chat_username=chat_info.get("username", ""), + ) + + async def parse_chat_members( session_name: str, chat: str, on_progress: Callable[[int], None] | None = None, + avatar_dir: Path | None = None, ) -> list[ParsedUser]: users: list[ParsedUser] = [] async with connected(session_name) as client: + chat_obj = await client.get_chat(chat) + chat_info = { + "id": chat_obj.id, + "title": ( + getattr(chat_obj, "title", None) + or " ".join( + p + for p in [ + getattr(chat_obj, "first_name", ""), + getattr(chat_obj, "last_name", ""), + ] + if p + ) + or str(chat_obj.id) + ), + "username": getattr(chat_obj, "username", "") or "", + } async for member in client.get_chat_members(chat): # type: ignore[reportGeneralTypeIssues] - u = member.user users.append( - ParsedUser( - id=u.id, - username=u.username or "", - first_name=u.first_name or "", - last_name=u.last_name or "", - phone=u.phone_number or "", + await _parsed_user_from_member( + client, + member, + chat_info=chat_info, + avatar_dir=avatar_dir, ) ) if on_progress and len(users) % 50 == 0: @@ -78,6 +257,75 @@ async def parse_chat_members( return users +async def parse_chats_members( + session_name: str, + chats: list[str], + *, + avatar_dir: Path | None = None, + on_progress: Callable[[str, int], None] | None = None, +) -> list[ParsedUser]: + users_by_id: dict[int, ParsedUser] = {} + async with connected(session_name) as client: + for chat in chats: + chat_obj = await client.get_chat(chat) + chat_info = { + "id": chat_obj.id, + "title": ( + getattr(chat_obj, "title", None) + or " ".join( + p + for p in [ + getattr(chat_obj, "first_name", ""), + getattr(chat_obj, "last_name", ""), + ] + if p + ) + or str(chat_obj.id) + ), + "username": getattr(chat_obj, "username", "") or "", + } + count = 0 + async for member in client.get_chat_members(chat): # type: ignore[reportGeneralTypeIssues] + parsed = await _parsed_user_from_member( + client, + member, + chat_info=chat_info, + avatar_dir=avatar_dir, + ) + if parsed.id not in users_by_id: + users_by_id[parsed.id] = parsed + count += 1 + if on_progress and count % 50 == 0: + on_progress(chat, count) + if on_progress: + on_progress(chat, count) + + users = list(users_by_id.values()) + log.info("[parse] parsed %d unique members from %d chats", len(users), len(chats)) + return users + + +async def save_chats_members( + session_name: str, + chats: list[str], + dest: Path, + *, + avatar_dir: Path | None = None, + on_progress: Callable[[str, int], None] | None = None, +) -> int: + users = await parse_chats_members( + session_name, + chats, + avatar_dir=avatar_dir, + on_progress=on_progress, + ) + payload = [u.model_dump() for u in users] + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8") + log.info("[parse] saved %d parsed members to %s", len(users), dest) + return len(users) + + async def list_dialogs( session_name: str, kind: str = "all", diff --git a/src/accxus/types/telegram.py b/src/accxus/types/telegram.py index fbe741a..e24a9bb 100644 --- a/src/accxus/types/telegram.py +++ b/src/accxus/types/telegram.py @@ -2,7 +2,7 @@ from enum import Enum -from pydantic import BaseModel, computed_field +from pydantic import BaseModel, Field, computed_field class SessionKind(str, Enum): @@ -41,6 +41,14 @@ class ParsedUser(BaseModel): first_name: str = "" last_name: str = "" phone: str = "" + avatar_path: str = "" + bio: str = "" + song: str = "" + birthday: str = "" + gifts: list[str] = Field(default_factory=list) + source_chat_id: int | None = None + source_chat_title: str = "" + source_chat_username: str = "" @computed_field # type: ignore[prop-decorator] @property From 671d44b6f5790329c2859db54e90a3e3cc099008 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:08:59 +0000 Subject: [PATCH 02/17] feat(ui): add selective parsing controls --- src/accxus/ui/tg/parsing.py | 170 ++++++++++++++++++++++++++++++++---- 1 file changed, 155 insertions(+), 15 deletions(-) diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index f0ed4b4..b3d8b18 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -77,6 +77,15 @@ def _get_session(widget: Widget, sel_id: str) -> str | None: return val +def _split_refs(value: str) -> list[str]: + refs: list[str] = [] + for chunk in value.replace("\n", ",").split(","): + ref = chunk.strip() + if ref: + refs.append(ref) + return refs + + class ParsingTab(Widget): DEFAULT_CSS = """ ParsingTab { height: 100%; width: 100%; } @@ -105,8 +114,16 @@ def compose(self) -> ComposeResult: yield Label("[bold]Chat List[/bold]") yield Select(choices, id="chats_sess", prompt="Select session") yield Select(_KIND_LABELS, value="all", id="chats_kind") + yield Input( + placeholder="Selected chats: @chat, -100123 (blank = fetched)", + id="chats_selected", + ) + yield Input(placeholder="Output dir (default: exported_chats)", id="chats_out") + yield Input(placeholder="History limit per chat (blank = all)", id="chats_limit") with Widget(classes="prow"): yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") + yield Button("Export Selected JSON", id="btn_export_chats", variant="success") + yield Button("Parse Users", id="btn_parse_chats", variant="success") yield Static("", id="chats_status") yield DataTable(id="chats_table", cursor_type="row", zebra_stripes=True) with TabPane("Export Chat", id="tp_export"), Widget(classes="pform", id="export_pane"): @@ -123,9 +140,12 @@ def compose(self) -> ComposeResult: with TabPane("Parse Users", id="tp_parse"), Widget(classes="pform", id="parse_pane"): yield Label("[bold]Parse Group Members[/bold]") yield Select(choices, id="pu_sess", prompt="Select session") - yield Input(placeholder="Group: @group / username / ID", id="pu_chat") + yield Input(placeholder="Groups: @group, @group2 / IDs", id="pu_chat") + yield Input(placeholder="Output JSON (default: parsed_users.json)", id="pu_out") + yield Input(placeholder="Avatar dir (default: parsed_avatars)", id="pu_avatars") with Widget(classes="prow"): yield Button("Parse", id="btn_parse", variant="success") + yield Button("Parse + Save JSON", id="btn_parse_save", variant="primary") yield Static("", id="pu_status") yield Label("[dim]Save parsed users to a group:[/dim]") with Widget(classes="prow"): @@ -159,9 +179,18 @@ def _build_chats_pane(self) -> None: pane.mount(Label("[bold]Chat List[/bold]")) pane.mount(Select(choices, id="chats_sess", prompt="Select session")) pane.mount(Select(_KIND_LABELS, value="all", id="chats_kind")) + pane.mount( + Input( + placeholder="Selected chats: @chat, -100123 (blank = fetched)", id="chats_selected" + ) + ) + pane.mount(Input(placeholder="Output dir (default: exported_chats)", id="chats_out")) + pane.mount(Input(placeholder="History limit per chat (blank = all)", id="chats_limit")) pane.mount( Widget( Button("Fetch Chats", id="btn_fetch_chats", variant="primary"), + Button("Export Selected JSON", id="btn_export_chats", variant="success"), + Button("Parse Users", id="btn_parse_chats", variant="success"), classes="prow", ) ) @@ -208,6 +237,80 @@ async def _do_fetch_chats(self) -> None: finally: self.query_one("#btn_fetch_chats", Button).disabled = False + def _selected_chat_refs(self) -> list[str]: + entered = _split_refs(self.query_one("#chats_selected", Input).value) + if entered: + return entered + return [ + f"@{d['username']}" if d.get("username") else str(d["id"]) + for d in self._fetched_dialogs + ] + + async def _do_export_chats(self) -> None: + session = _get_session(self, "#chats_sess") + chats = self._selected_chat_refs() + if not session or not chats: + self.app.notify("Select a session and fetch or enter chats", severity="warning") + return + + limit_raw = self.query_one("#chats_limit", Input).value.strip() + limit = int(limit_raw) if limit_raw.isdigit() else 0 + out_raw = self.query_one("#chats_out", Input).value.strip() + dest_dir = Path(out_raw or "exported_chats") + status = self.query_one("#chats_status", Static) + button = self.query_one("#btn_export_chats", Button) + button.disabled = True + + def _prog(chat: str, count: int) -> None: + status.update(f"[dim]{chat}: exported {count} messages…[/dim]") + + try: + exported = await tg_parsing.save_chats_history( + session, + chats, + dest_dir, + fmt="json", + limit=limit, + on_progress=_prog, + ) + status.update(f"✅ Exported {len(exported)} chats → {dest_dir}") + except Exception as e: + status.update(f"❌ {e}") + log.error("bulk chat export error: %s", e) + finally: + button.disabled = False + + async def _do_parse_chats_from_list(self) -> None: + session = _get_session(self, "#chats_sess") + chats = self._selected_chat_refs() + if not session or not chats: + self.app.notify("Select a session and fetch or enter chats", severity="warning") + return + + dest = Path("parsed_users.json") + avatar_dir = Path("parsed_avatars") + status = self.query_one("#chats_status", Static) + button = self.query_one("#btn_parse_chats", Button) + button.disabled = True + + def _prog(chat: str, count: int) -> None: + status.update(f"[dim]{chat}: parsed {count} users…[/dim]") + + try: + count = await tg_parsing.save_chats_members( + session, + chats, + dest, + avatar_dir=avatar_dir, + on_progress=_prog, + ) + status.update(f"✅ Parsed {count} users → {dest}; avatars → {avatar_dir}") + except Exception as e: + status.update(f"❌ {e}") + log.error("bulk users parse error: %s", e) + finally: + button.disabled = False + def _build_export_pane(self) -> None: pane = self.query_one("#export_pane") choices = _session_select_choices() @@ -231,10 +334,13 @@ def _build_parse_pane(self) -> None: choices = _session_select_choices() pane.mount(Label("[bold]Parse Group Members[/bold]")) pane.mount(Select(choices, id="pu_sess", prompt="Select session")) - pane.mount(Input(placeholder="Group: @group / username / ID", id="pu_chat")) + pane.mount(Input(placeholder="Groups: @group, @group2 / IDs", id="pu_chat")) + pane.mount(Input(placeholder="Output JSON (default: parsed_users.json)", id="pu_out")) + pane.mount(Input(placeholder="Avatar dir (default: parsed_avatars)", id="pu_avatars")) pane.mount( Widget( Button("Parse", id="btn_parse", variant="success"), + Button("Parse + Save JSON", id="btn_parse_save", variant="primary"), classes="prow", ) ) @@ -294,12 +400,18 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: bid = event.button.id if bid == "btn_fetch_chats": await self._do_fetch_chats() + elif bid == "btn_export_chats": + await self._do_export_chats() + elif bid == "btn_parse_chats": + await self._do_parse_chats_from_list() elif bid == "btn_exp_json": await self._do_export("json") elif bid == "btn_exp_txt": await self._do_export("txt") elif bid == "btn_parse": - await self._do_parse() + await self._do_parse(save=False) + elif bid == "btn_parse_save": + await self._do_parse(save=True) elif bid == "btn_save_grp": self._save_group() elif bid == "btn_grp_refresh": @@ -349,22 +461,30 @@ def _prog(n: int) -> None: for bid in ("btn_exp_json", "btn_exp_txt"): self.query_one(f"#{bid}", Button).disabled = False - async def _do_parse(self) -> None: + async def _do_parse(self, *, save: bool = False) -> None: session = _get_session(self, "#pu_sess") - chat = self.query_one("#pu_chat", Input).value.strip() - if not session or not chat: - self.app.notify("Select a session and enter a group", severity="warning") + chats = _split_refs(self.query_one("#pu_chat", Input).value) + if not session or not chats: + self.app.notify("Select a session and enter one or more groups", severity="warning") return status = self.query_one("#pu_status", Static) log_view = self.query_one("#parse_log", RichLog) - self.query_one("#btn_parse", Button).disabled = True + for bid in ("btn_parse", "btn_parse_save"): + self.query_one(f"#{bid}", Button).disabled = True - def _prog(n: int) -> None: - status.update(f"[dim]Parsed {n} users…[/dim]") + def _prog(chat: str, count: int) -> None: + status.update(f"[dim]{chat}: parsed {count} users…[/dim]") try: - users = await tg_parsing.parse_chat_members(session, chat, on_progress=_prog) + avatar_dir_raw = self.query_one("#pu_avatars", Input).value.strip() + avatar_dir = Path(avatar_dir_raw or "parsed_avatars") + users = await tg_parsing.parse_chats_members( + session, + chats, + avatar_dir=avatar_dir, + on_progress=_prog, + ) self._parsed_users = [ { "id": u.id, @@ -372,19 +492,39 @@ def _prog(n: int) -> None: "first_name": u.first_name, "last_name": u.last_name, "phone": u.phone, + "avatar_path": u.avatar_path, + "bio": u.bio, + "song": u.song, + "birthday": u.birthday, + "gifts": u.gifts, + "source_chat_id": u.source_chat_id, + "source_chat_title": u.source_chat_title, + "source_chat_username": u.source_chat_username, } for u in users ] - status.update(f"✅ Parsed {len(users)} users") - log_view.write(f"✅ Parsed {len(users)} users from {chat!r}") - log.info("parsed %d users from %s", len(users), chat) + if save: + out_raw = self.query_one("#pu_out", Input).value.strip() + dest = Path(out_raw or "parsed_users.json") + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_text( + json.dumps(self._parsed_users, indent=2, ensure_ascii=False), + encoding="utf-8", + ) + status.update(f"✅ Parsed {len(users)} users → {dest}; avatars → {avatar_dir}") + log_view.write(f"✅ Parsed users saved: {dest} ({len(users)} users)") + else: + status.update(f"✅ Parsed {len(users)} users") + log_view.write(f"✅ Parsed {len(users)} users from {len(chats)} chats") + log.info("parsed %d users from %d chats", len(users), len(chats)) self.query_one("#btn_save_grp", Button).disabled = False except Exception as e: status.update(f"❌ {e}") log_view.write(f"❌ Parse failed: {e}") log.error("parse error: %s", e) finally: - self.query_one("#btn_parse", Button).disabled = False + for bid in ("btn_parse", "btn_parse_save"): + self.query_one(f"#{bid}", Button).disabled = False def _save_group(self) -> None: gname = self.query_one("#pu_grp_name", Input).value.strip() From ee39740161017cf788b710284f800e87c15b16f3 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:11:51 +0000 Subject: [PATCH 03/17] fix(ui): render parsing tab content --- src/accxus/ui/tg/parsing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index b3d8b18..f5db7ef 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -90,6 +90,8 @@ class ParsingTab(Widget): DEFAULT_CSS = """ ParsingTab { height: 100%; width: 100%; } ParsingTab TabbedContent { height: 1fr; } + ParsingTab ContentSwitcher { height: 1fr; } + ParsingTab TabPane { height: 1fr; } .pform { padding: 1 2; height: 100%; overflow-y: auto; } .pform Label { margin-bottom: 1; } .pform Input { margin-bottom: 1; width: 44; } From 4c7f3281df645baf383db1975903c4791ab67168 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:19:35 +0000 Subject: [PATCH 04/17] feat(parsing): enrich telegram exports --- src/accxus/platforms/telegram/parsing.py | 318 +++++++++++++++++++++-- src/accxus/types/telegram.py | 3 +- 2 files changed, 304 insertions(+), 17 deletions(-) diff --git a/src/accxus/platforms/telegram/parsing.py b/src/accxus/platforms/telegram/parsing.py index fb22ea6..b8bdee2 100644 --- a/src/accxus/platforms/telegram/parsing.py +++ b/src/accxus/platforms/telegram/parsing.py @@ -35,6 +35,18 @@ def _format_optional(value: Any) -> str: return str(value) +def _enum_value(value: Any) -> str: + if value is None: + return "" + enum_value = getattr(value, "value", None) + if isinstance(enum_value, str): + return enum_value + enum_name = getattr(value, "name", None) + if isinstance(enum_name, str): + return enum_name.lower() + return str(value) + + def _stringify_list(values: Any) -> list[str]: if not values: return [] @@ -43,25 +55,265 @@ def _stringify_list(values: Any) -> list[str]: return [_format_optional(v) for v in values if v is not None] +def _serializable_value(value: Any, depth: int = 0) -> Any: + if value is None or isinstance(value, str | int | float | bool): + return value + if depth > 2: + return _format_optional(value) + if isinstance(value, list | tuple | set): + return [_serializable_value(item, depth + 1) for item in value] + if isinstance(value, dict): + return { + str(key): _serializable_value(item, depth + 1) + for key, item in value.items() + if not str(key).startswith("_") + } + if hasattr(value, "isoformat"): + return value.isoformat() + if hasattr(value, "value"): + return _enum_value(value) + data = getattr(value, "__dict__", None) + if isinstance(data, dict): + return { + key: _serializable_value(item, depth + 1) + for key, item in data.items() + if not key.startswith("_") and key != "_client" + } + return _format_optional(value) + + +def _normalize_gift(gift: Any) -> dict[str, Any]: + data = _serializable_value(gift) + if not isinstance(data, dict): + data = {"value": data} + from_id = ( + data.get("from") + or data.get("from_id") + or data.get("sender_id") + or data.get("user_id") + or data.get("peer_id") + or "" + ) + gift_type = data.get("type") or data.get("_") or data.get("title") or type(gift).__name__ + date = data.get("date") or data.get("timestamp") or "" + normalized = {"from": from_id, "type": gift_type, "date": date} + for key, value in data.items(): + if key not in normalized and key not in {"from_id", "sender_id", "user_id", "peer_id"}: + normalized[key] = value + return normalized + + +def _normalize_gifts(values: Any) -> list[dict[str, Any]]: + if not values: + return [] + if not isinstance(values, list | tuple): + values = [values] + return [_normalize_gift(value) for value in values if value is not None] + + +def _message_sender(msg: Any) -> str: + if getattr(msg, "from_user", None): + user = msg.from_user + return user.username or str(user.id) + if getattr(msg, "sender_chat", None): + chat = msg.sender_chat + return chat.username or chat.title or str(chat.id) + return "" + + +def _message_type(msg: Any) -> str: + if getattr(msg, "service", None): + return "service" + if getattr(msg, "media", None): + return _enum_value(msg.media) + if getattr(msg, "text", None): + return "text" + return "empty" + + +def _user_label(user: Any) -> str: + if user is None: + return "" + username = getattr(user, "username", "") or "" + if username: + return f"@{username}" + name = " ".join( + part for part in [getattr(user, "first_name", ""), getattr(user, "last_name", "")] if part + ) + return name or str(getattr(user, "id", "")) + + +def _service_text(msg: Any) -> str: + service = _enum_value(getattr(msg, "service", None)) + actor = _message_sender(msg) or "system" + if service == "new_chat_members": + members = ", ".join( + _user_label(user) for user in getattr(msg, "new_chat_members", []) or [] + ) + return f"{actor} added {members}".strip() + if service == "left_chat_members": + return f"{_user_label(getattr(msg, 'left_chat_member', None))} left the chat".strip() + if service == "new_chat_title": + return f"{actor} changed chat title to {getattr(msg, 'new_chat_title', '')}" + if service == "new_chat_photo": + return f"{actor} changed chat photo" + if service == "delete_chat_photo": + return f"{actor} deleted chat photo" + if service == "pinned_message": + pinned = getattr(getattr(msg, "pinned_message", None), "id", "") + return f"{actor} pinned message {pinned}".strip() + if service == "video_chat_started": + return f"{actor} started video chat" + if service == "video_chat_ended": + ended = getattr(msg, "video_chat_ended", None) + duration = getattr(ended, "duration", "") + return f"{actor} ended video chat {duration}".strip() + if service == "video_chat_scheduled": + scheduled = getattr(msg, "video_chat_scheduled", None) + start_date = _format_optional(getattr(scheduled, "start_date", "")) + return f"{actor} scheduled video chat {start_date}".strip() + if service == "video_chat_members_invited": + invited = getattr(msg, "video_chat_members_invited", None) + users = ", ".join(_user_label(user) for user in getattr(invited, "users", []) or []) + return f"{actor} invited {users} to video chat".strip() + ttl_period = getattr(msg, "ttl_period", None) or getattr(msg, "message_auto_delete_timer", None) + if ttl_period: + return f"{actor} changed auto-delete timer to {ttl_period}" + return service + + +def _service_details(msg: Any) -> dict[str, Any]: + fields = [ + "new_chat_members", + "left_chat_member", + "new_chat_title", + "delete_chat_photo", + "group_chat_created", + "supergroup_chat_created", + "channel_chat_created", + "migrate_to_chat_id", + "migrate_from_chat_id", + "pinned_message", + "game_high_score", + "video_chat_scheduled", + "video_chat_started", + "video_chat_ended", + "video_chat_members_invited", + "web_app_data", + "ttl_period", + "message_auto_delete_timer", + "message_auto_delete_timer_changed", + ] + details: dict[str, Any] = {} + for field in fields: + value = getattr(msg, field, None) + if value: + details[field] = _serializable_value(value) + return details + + +def _media_suffix(msg: Any) -> str: + media_type = _enum_value(getattr(msg, "media", None)) + media = getattr(msg, media_type, None) if media_type else None + file_name = getattr(media, "file_name", "") or "" + if file_name and Path(file_name).suffix: + return Path(file_name).suffix + mime_type = getattr(media, "mime_type", "") or "" + if mime_type == "application/x-tgsticker": + return ".tgs" + if mime_type == "video/webm": + return ".webm" + if mime_type == "image/webp": + return ".webp" + if media_type == "photo": + return ".jpg" + if media_type == "sticker": + if getattr(media, "is_animated", False): + return ".tgs" + if getattr(media, "is_video", False): + return ".webm" + return ".webp" + if media_type == "animation": + return ".mp4" + return "" + + +async def _download_message_media(client: Any, msg: Any, media_dir: Path | None) -> str: + if media_dir is None or not getattr(msg, "media", None): + return "" + media_dir.mkdir(parents=True, exist_ok=True) + media_type = _enum_value(msg.media) + dest = media_dir / f"{media_type}{msg.id}{_media_suffix(msg)}" + try: + downloaded = await client.download_media(msg, file_name=str(dest)) + return Path(str(downloaded or dest)).name + except Exception as exc: + log.debug("[parse] media download failed for message %s: %s", msg.id, exc) + return "" + + +def _custom_emoji_ids(msg: Any) -> list[int]: + ids: list[int] = [] + for entity in list(getattr(msg, "entities", []) or []) + list( + getattr(msg, "caption_entities", []) or [] + ): + custom_emoji_id = getattr(entity, "custom_emoji_id", None) + if custom_emoji_id: + ids.append(int(custom_emoji_id)) + return ids + + +async def _download_custom_emojis(client: Any, msg: Any, media_dir: Path | None) -> list[str]: + ids = _custom_emoji_ids(msg) + if media_dir is None or not ids: + return [] + media_dir.mkdir(parents=True, exist_ok=True) + files: list[str] = [] + with contextlib.suppress(Exception): + stickers = await client.get_custom_emoji_stickers(ids) + for sticker in stickers: + suffix = ".tgs" if sticker.is_animated else ".webm" if sticker.is_video else ".webp" + dest = media_dir / f"emoji{sticker.file_unique_id}{suffix}" + try: + downloaded = await client.download_media(sticker.file_id, file_name=str(dest)) + files.append(Path(str(downloaded or dest)).name) + except Exception as exc: + log.debug("[parse] custom emoji download failed: %s", exc) + return files + + +async def _message_to_dict(client: Any, msg: Any, media_dir: Path | None) -> dict[str, Any]: + msg_type = _message_type(msg) + service = _enum_value(getattr(msg, "service", None)) + media = _enum_value(getattr(msg, "media", None)) + text = msg.text or msg.caption or "" + if service and not text: + text = _service_text(msg) + return { + "id": msg.id, + "date": str(msg.date), + "from": _message_sender(msg), + "type": msg_type, + "service": service, + "media_type": media, + "text": text, + "media_file": await _download_message_media(client, msg, media_dir), + "custom_emoji_files": await _download_custom_emojis(client, msg, media_dir), + "service_details": _service_details(msg) if service else {}, + } + + async def export_chat_history( session_name: str, chat: str, limit: int = 0, on_progress: Callable[[int], None] | None = None, + media_dir: Path | None = None, ) -> list[dict[str, Any]]: messages: list[dict[str, Any]] = [] async with connected(session_name) as client: async for msg in client.get_chat_history(chat, limit=limit or 0): # type: ignore[reportGeneralTypeIssues] - messages.append( - { - "id": msg.id, - "date": str(msg.date), - "from": ( - (msg.from_user.username or str(msg.from_user.id)) if msg.from_user else "" - ), - "text": msg.text or msg.caption or "", - } - ) + messages.append(await _message_to_dict(client, msg, media_dir)) if on_progress and len(messages) % 100 == 0: on_progress(len(messages)) return messages @@ -74,8 +326,9 @@ async def save_chat_history( fmt: str = "json", limit: int = 0, on_progress: Callable[[int], None] | None = None, + media_dir: Path | None = None, ) -> int: - messages = await export_chat_history(session_name, chat, limit, on_progress) + messages = await export_chat_history(session_name, chat, limit, on_progress, media_dir) dest.parent.mkdir(parents=True, exist_ok=True) if fmt == "txt": lines = [f"[{m['date']}] {m['from'] or 'unknown'}: {m['text']}" for m in messages] @@ -93,6 +346,7 @@ async def save_chats_history( fmt: str = "json", limit: int = 0, on_progress: Callable[[str, int], None] | None = None, + media_dir: Path | None = None, ) -> dict[str, int]: dest_dir.mkdir(parents=True, exist_ok=True) exported: dict[str, int] = {} @@ -105,6 +359,7 @@ def _progress(count: int, chat_ref: str = chat) -> None: on_progress(chat_ref, count) dest = dest_dir / f"{chat_key}.{fmt}" + chat_media_dir = media_dir / chat_key if media_dir else None exported[chat] = await save_chat_history( session_name, chat, @@ -112,6 +367,7 @@ def _progress(count: int, chat_ref: str = chat) -> None: fmt=fmt, limit=limit, on_progress=_progress, + media_dir=chat_media_dir, ) log.info("[parse] exported %d chat histories to %s", len(exported), dest_dir) @@ -127,6 +383,7 @@ async def save_all_dialog_histories( fmt: str = "json", limit: int = 0, on_progress: Callable[[str, int], None] | None = None, + media_dir: Path | None = None, ) -> dict[str, int]: chats = selected_chats or [ _chat_ref(chat) for chat in await list_dialogs(session_name, kind=kind, limit=0) @@ -138,6 +395,7 @@ async def save_all_dialog_histories( fmt=fmt, limit=limit, on_progress=on_progress, + media_dir=media_dir, ) @@ -171,7 +429,13 @@ async def _load_user_extras(client: Any, user_id: int) -> dict[str, Any]: extras["bio"] = getattr(chat, "bio", "") or getattr(chat, "description", "") or "" extras["song"] = _format_optional(getattr(chat, "profile_song", "")) extras["birthday"] = _format_optional(getattr(chat, "birthday", "")) - extras["gifts"] = _stringify_list(getattr(chat, "gifts", [])) + extras["birthday"] = extras["birthday"] or _format_optional(getattr(chat, "birthdate", "")) + extras["song"] = extras["song"] or _format_optional(getattr(chat, "profile_music", "")) + extras["gifts"] = _normalize_gifts( + getattr(chat, "gifts", None) + or getattr(chat, "received_gifts", None) + or getattr(chat, "premium_gifts", None) + ) with contextlib.suppress(Exception): from pyrogram.raw.functions.users import GetFullUser # type: ignore[import-untyped] @@ -182,11 +446,15 @@ async def _load_user_extras(client: Any, user_id: int) -> dict[str, Any]: extras["bio"] = extras["bio"] or getattr(full_user, "about", "") or "" extras["song"] = extras["song"] or _format_optional(getattr(full_user, "profile_song", "")) extras["birthday"] = extras["birthday"] or _format_optional( - getattr(full_user, "birthday", "") + getattr(full_user, "birthday", "") or getattr(full_user, "birthdate", "") ) - extras["gifts"] = extras["gifts"] or _stringify_list( - getattr(full_user, "gifts", None) or getattr(full_user, "premium_gifts", None) + extras["song"] = extras["song"] or _format_optional(getattr(full_user, "profile_music", "")) + extras["gifts"] = extras["gifts"] or _normalize_gifts( + getattr(full_user, "gifts", None) + or getattr(full_user, "received_gifts", None) + or getattr(full_user, "premium_gifts", None) ) + extras["raw_profile"] = _serializable_value(full_user) return extras @@ -371,11 +639,29 @@ async def list_dialogs( async def get_user_info(session_name: str, user_id: str) -> dict[str, Any]: async with connected(session_name) as client: u = await client.get_users(user_id) + extras = await _load_user_extras(client, u.id) return { "id": u.id, "username": u.username or "", "first_name": u.first_name or "", "last_name": u.last_name or "", "phone": u.phone_number or "", - "bio": getattr(u, "bio", "") or "", + "bio": extras.get("bio", "") or getattr(u, "bio", "") or "", + "birthday": extras.get("birthday", ""), + "song": extras.get("song", ""), + "gifts": extras.get("gifts", []), + "is_bot": bool(getattr(u, "is_bot", False)), + "is_contact": bool(getattr(u, "is_contact", False)), + "is_mutual_contact": bool(getattr(u, "is_mutual_contact", False)), + "is_premium": bool(getattr(u, "is_premium", False)), + "is_verified": bool(getattr(u, "is_verified", False)), + "is_scam": bool(getattr(u, "is_scam", False)), + "is_fake": bool(getattr(u, "is_fake", False)), + "language_code": getattr(u, "language_code", "") or "", + "dc_id": getattr(u, "dc_id", None), + "status": _enum_value(getattr(u, "status", None)), + "last_online_date": _format_optional(getattr(u, "last_online_date", "")), + "next_offline_date": _format_optional(getattr(u, "next_offline_date", "")), + "emoji_status": _serializable_value(getattr(u, "emoji_status", None)), + "raw_profile": extras.get("raw_profile", {}), } diff --git a/src/accxus/types/telegram.py b/src/accxus/types/telegram.py index e24a9bb..b75c34e 100644 --- a/src/accxus/types/telegram.py +++ b/src/accxus/types/telegram.py @@ -1,6 +1,7 @@ from __future__ import annotations from enum import Enum +from typing import Any from pydantic import BaseModel, Field, computed_field @@ -45,7 +46,7 @@ class ParsedUser(BaseModel): bio: str = "" song: str = "" birthday: str = "" - gifts: list[str] = Field(default_factory=list) + gifts: list[dict[str, Any]] = Field(default_factory=list) source_chat_id: int | None = None source_chat_title: str = "" source_chat_username: str = "" From 81ec97d12e61274d9759c02540ffd65690929b87 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:19:39 +0000 Subject: [PATCH 05/17] feat(ui): expose media export controls --- src/accxus/ui/tg/parsing.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index f5db7ef..27d44ce 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -121,6 +121,7 @@ def compose(self) -> ComposeResult: id="chats_selected", ) yield Input(placeholder="Output dir (default: exported_chats)", id="chats_out") + yield Input(placeholder="Media dir (default: media)", id="chats_media") yield Input(placeholder="History limit per chat (blank = all)", id="chats_limit") with Widget(classes="prow"): yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") @@ -133,6 +134,7 @@ def compose(self) -> ComposeResult: yield Select(choices, id="exp_sess", prompt="Select session") yield Input(placeholder="Chat: @group / username / ID", id="exp_chat") yield Input(placeholder="Output file (default: export_.json)", id="exp_out") + yield Input(placeholder="Media dir (blank = no media download)", id="exp_media") yield Input(placeholder="Limit (blank = all)", id="exp_limit") with Widget(classes="prow"): yield Button("Export JSON", id="btn_exp_json", variant="success") @@ -187,6 +189,7 @@ def _build_chats_pane(self) -> None: ) ) pane.mount(Input(placeholder="Output dir (default: exported_chats)", id="chats_out")) + pane.mount(Input(placeholder="Media dir (default: media)", id="chats_media")) pane.mount(Input(placeholder="History limit per chat (blank = all)", id="chats_limit")) pane.mount( Widget( @@ -259,6 +262,8 @@ async def _do_export_chats(self) -> None: limit = int(limit_raw) if limit_raw.isdigit() else 0 out_raw = self.query_one("#chats_out", Input).value.strip() dest_dir = Path(out_raw or "exported_chats") + media_raw = self.query_one("#chats_media", Input).value.strip() + media_dir = Path(media_raw or "media") status = self.query_one("#chats_status", Static) button = self.query_one("#btn_export_chats", Button) button.disabled = True @@ -274,8 +279,9 @@ def _prog(chat: str, count: int) -> None: fmt="json", limit=limit, on_progress=_prog, + media_dir=media_dir, ) - status.update(f"✅ Exported {len(exported)} chats → {dest_dir}") + status.update(f"✅ Exported {len(exported)} chats → {dest_dir}; media → {media_dir}") except Exception as e: status.update(f"❌ {e}") log.error("bulk chat export error: %s", e) @@ -320,6 +326,7 @@ def _build_export_pane(self) -> None: pane.mount(Select(choices, id="exp_sess", prompt="Select session")) pane.mount(Input(placeholder="Chat: @group / username / ID", id="exp_chat")) pane.mount(Input(placeholder="Output file (default: export_.json)", id="exp_out")) + pane.mount(Input(placeholder="Media dir (blank = no media download)", id="exp_media")) pane.mount(Input(placeholder="Limit (blank = all)", id="exp_limit")) pane.mount( Widget( @@ -436,8 +443,10 @@ async def _do_export(self, fmt: str) -> None: limit_raw = self.query_one("#exp_limit", Input).value.strip() out_raw = self.query_one("#exp_out", Input).value.strip() + media_raw = self.query_one("#exp_media", Input).value.strip() limit = int(limit_raw) if limit_raw.isdigit() else 0 dest = Path(out_raw or f"export_{chat.lstrip('@')}.{fmt}") + media_dir = Path(media_raw) if media_raw else None status = self.query_one("#exp_status", Static) log_view = self.query_one("#export_log", RichLog) @@ -450,10 +459,17 @@ def _prog(n: int) -> None: try: count = await tg_parsing.save_chat_history( - session, chat, dest, fmt=fmt, limit=limit, on_progress=_prog + session, + chat, + dest, + fmt=fmt, + limit=limit, + on_progress=_prog, + media_dir=media_dir, ) - status.update(f"✅ {count} messages → {dest}") - log_view.write(f"✅ Export complete: {dest} ({count} messages)") + media_note = f"; media → {media_dir}" if media_dir else "" + status.update(f"✅ {count} messages → {dest}{media_note}") + log_view.write(f"✅ Export complete: {dest} ({count} messages){media_note}") log.info("export done: %s messages from %s -> %s", count, chat, dest) except Exception as e: status.update(f"❌ {e}") @@ -599,6 +615,9 @@ async def _do_snapshot(self) -> None: f"✅ Snapshot: " f"{info['first_name']} {info['last_name']} " f"@{info['username'] or '—'} " + f"birthday:{info.get('birthday') or '—'} " + f"song:{info.get('song') or '—'} " + f"gifts:{len(info.get('gifts') or [])} " f"[dim]{info['timestamp']}[/dim]" ) log.info("snapshot saved for %s", user_id) @@ -622,5 +641,8 @@ def _show_profile_history(self) -> None: f" [dim]{s['timestamp']}[/dim] " f"{s.get('first_name','')} {s.get('last_name','')} " f"@{s.get('username') or '—'} " + f"[dim]birthday:[/dim] {s.get('birthday') or '—'} " + f"[dim]song:[/dim] {s.get('song') or '—'} " + f"[dim]gifts:[/dim] {len(s.get('gifts') or [])} " f"[dim]bio:[/dim] {s.get('bio') or '—'}" ) From 550b7256b4d3be47576f39605260f62fc6dd310f Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:23:21 +0000 Subject: [PATCH 06/17] feat(ui): select fetched parsing chats --- src/accxus/ui/tg/parsing.py | 80 ++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index 27d44ce..acc7695 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -86,6 +86,13 @@ def _split_refs(value: str) -> list[str]: return refs +def _dialog_ref(dialog: dict[str, Any]) -> str: + username = str(dialog.get("username") or "") + if username: + return f"@{username}" + return str(dialog["id"]) + + class ParsingTab(Widget): DEFAULT_CSS = """ ParsingTab { height: 100%; width: 100%; } @@ -108,16 +115,19 @@ def __init__(self) -> None: super().__init__() self._parsed_users: list[Any] = [] self._fetched_dialogs: list[dict[str, Any]] = [] + self._selected_chats: set[str] = set() def compose(self) -> ComposeResult: choices = _session_select_choices() with TabbedContent(): with TabPane("Chats", id="tp_chats"), Widget(classes="pform", id="chats_pane"): - yield Label("[bold]Chat List[/bold]") + yield Label( + "[bold]Selected Chats[/bold]\n[dim]Fetch, then click rows to toggle[/dim]" + ) yield Select(choices, id="chats_sess", prompt="Select session") yield Select(_KIND_LABELS, value="all", id="chats_kind") yield Input( - placeholder="Selected chats: @chat, -100123 (blank = fetched)", + placeholder="Manual chats fallback: @chat, @chat2", id="chats_selected", ) yield Input(placeholder="Output dir (default: exported_chats)", id="chats_out") @@ -125,6 +135,8 @@ def compose(self) -> ComposeResult: yield Input(placeholder="History limit per chat (blank = all)", id="chats_limit") with Widget(classes="prow"): yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") + yield Button("All", id="btn_select_all_chats") + yield Button("Clear", id="btn_clear_chats") yield Button("Export Selected JSON", id="btn_export_chats", variant="success") yield Button("Parse Users", id="btn_parse_chats", variant="success") yield Static("", id="chats_status") @@ -180,20 +192,20 @@ def on_mount(self) -> None: def _build_chats_pane(self) -> None: pane = self.query_one("#chats_pane") choices = _session_select_choices() - pane.mount(Label("[bold]Chat List[/bold]")) - pane.mount(Select(choices, id="chats_sess", prompt="Select session")) - pane.mount(Select(_KIND_LABELS, value="all", id="chats_kind")) pane.mount( - Input( - placeholder="Selected chats: @chat, -100123 (blank = fetched)", id="chats_selected" - ) + Label("[bold]Selected Chats[/bold]\n[dim]Fetch, then click rows to toggle[/dim]") ) + pane.mount(Select(choices, id="chats_sess", prompt="Select session")) + pane.mount(Select(_KIND_LABELS, value="all", id="chats_kind")) + pane.mount(Input(placeholder="Manual chats fallback: @chat, @chat2", id="chats_selected")) pane.mount(Input(placeholder="Output dir (default: exported_chats)", id="chats_out")) pane.mount(Input(placeholder="Media dir (default: media)", id="chats_media")) pane.mount(Input(placeholder="History limit per chat (blank = all)", id="chats_limit")) pane.mount( Widget( Button("Fetch Chats", id="btn_fetch_chats", variant="primary"), + Button("All", id="btn_select_all_chats"), + Button("Clear", id="btn_clear_chats"), Button("Export Selected JSON", id="btn_export_chats", variant="success"), Button("Parse Users", id="btn_parse_chats", variant="success"), classes="prow", @@ -205,12 +217,34 @@ def _build_chats_pane(self) -> None: def _init_chats_table(self) -> None: tbl = self.query_one("#chats_table", DataTable) tbl.clear(columns=True) + tbl.add_column("", key="sel") tbl.add_column("", key="kind") tbl.add_column("Title", key="title") tbl.add_column("@Username", key="uname") tbl.add_column("ID", key="chat_id") tbl.add_column("Unread", key="unread") + def _sync_selected_chats(self) -> None: + tbl = self.query_one("#chats_table", DataTable) + available = {_dialog_ref(dialog) for dialog in self._fetched_dialogs} + self._selected_chats.intersection_update(available) + for dialog in self._fetched_dialogs: + ref = _dialog_ref(dialog) + with contextlib.suppress(Exception): + tbl.update_cell(ref, "sel", "●" if ref in self._selected_chats else "○") + with contextlib.suppress(Exception): + self.query_one("#chats_status", Static).update( + f"[dim]Selected: {len(self._selected_chats)} / {len(self._fetched_dialogs)}[/dim]" + ) + + def _select_all_chats(self) -> None: + self._selected_chats = {_dialog_ref(dialog) for dialog in self._fetched_dialogs} + self._sync_selected_chats() + + def _clear_selected_chats(self) -> None: + self._selected_chats.clear() + self._sync_selected_chats() + async def _do_fetch_chats(self) -> None: session = _get_session(self, "#chats_sess") if not session: @@ -224,17 +258,20 @@ async def _do_fetch_chats(self) -> None: self.query_one("#btn_fetch_chats", Button).disabled = True status.update("[dim]Fetching chats…[/dim]") self._init_chats_table() + self._selected_chats.clear() try: dialogs = await tg_parsing.list_dialogs(session, kind=kind) self._fetched_dialogs = dialogs tbl = self.query_one("#chats_table", DataTable) for d in dialogs: + ref = _dialog_ref(d) icon = _KIND_ICONS.get(d["kind"], "❓") uname = f"@{d['username']}" if d["username"] else "—" unread = str(d["unread"]) if d["unread"] else "·" - tbl.add_row(icon, d["title"], uname, str(d["id"]), unread) - status.update(f"✅ {len(dialogs)} chats fetched") + tbl.add_row("○", icon, d["title"], uname, str(d["id"]), unread, key=ref) + self._select_all_chats() + status.update(f"✅ {len(dialogs)} chats fetched; selected all") log.info("fetched %d dialogs from session %s (filter=%s)", len(dialogs), session, kind) except Exception as e: status.update(f"❌ {e}") @@ -243,13 +280,12 @@ async def _do_fetch_chats(self) -> None: self.query_one("#btn_fetch_chats", Button).disabled = False def _selected_chat_refs(self) -> list[str]: + if self._selected_chats: + return list(self._selected_chats) entered = _split_refs(self.query_one("#chats_selected", Input).value) if entered: return entered - return [ - f"@{d['username']}" if d.get("username") else str(d["id"]) - for d in self._fetched_dialogs - ] + return [] async def _do_export_chats(self) -> None: session = _get_session(self, "#chats_sess") @@ -409,6 +445,10 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: bid = event.button.id if bid == "btn_fetch_chats": await self._do_fetch_chats() + elif bid == "btn_select_all_chats": + self._select_all_chats() + elif bid == "btn_clear_chats": + self._clear_selected_chats() elif bid == "btn_export_chats": await self._do_export_chats() elif bid == "btn_parse_chats": @@ -434,6 +474,18 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: elif bid == "btn_prof_history": self._show_profile_history() + def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None: + if event.data_table.id != "chats_table": + return + key = str(event.row_key.value) if event.row_key.value is not None else "" + if not key: + return + if key in self._selected_chats: + self._selected_chats.discard(key) + else: + self._selected_chats.add(key) + self._sync_selected_chats() + async def _do_export(self, fmt: str) -> None: session = _get_session(self, "#exp_sess") chat = self.query_one("#exp_chat", Input).value.strip() From 2237232dc75e69a620a584b14b98d78ee7899231 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:26:18 +0000 Subject: [PATCH 07/17] fix(ui): show parsing chat table --- src/accxus/ui/tg/parsing.py | 94 ++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index acc7695..823f083 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -105,10 +105,14 @@ class ParsingTab(Widget): .pform Select { margin-bottom: 1; width: 44; } .prow { layout: horizontal; height: auto; margin-bottom: 1; } .prow Input { width: 28; margin-right: 1; } + .prow Select { width: 28; margin-right: 1; } + .prow Static { width: 28; margin-right: 1; height: 3; content-align: left middle; } .prow Button { margin-right: 1; } .plog { height: 12; margin-top: 1; } #groups_table { height: 10; margin-bottom: 1; } - #chats_table { height: 1fr; margin-bottom: 1; } + #chats_pane { overflow: hidden; } + #chats_controls { height: auto; } + #chats_table { height: 1fr; min-height: 10; margin-bottom: 1; } """ def __init__(self) -> None: @@ -121,25 +125,22 @@ def compose(self) -> ComposeResult: choices = _session_select_choices() with TabbedContent(): with TabPane("Chats", id="tp_chats"), Widget(classes="pform", id="chats_pane"): - yield Label( - "[bold]Selected Chats[/bold]\n[dim]Fetch, then click rows to toggle[/dim]" - ) - yield Select(choices, id="chats_sess", prompt="Select session") - yield Select(_KIND_LABELS, value="all", id="chats_kind") - yield Input( - placeholder="Manual chats fallback: @chat, @chat2", - id="chats_selected", - ) - yield Input(placeholder="Output dir (default: exported_chats)", id="chats_out") - yield Input(placeholder="Media dir (default: media)", id="chats_media") - yield Input(placeholder="History limit per chat (blank = all)", id="chats_limit") - with Widget(classes="prow"): - yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") - yield Button("All", id="btn_select_all_chats") - yield Button("Clear", id="btn_clear_chats") - yield Button("Export Selected JSON", id="btn_export_chats", variant="success") - yield Button("Parse Users", id="btn_parse_chats", variant="success") - yield Static("", id="chats_status") + with Widget(id="chats_controls"): + with Widget(classes="prow"): + yield Select(choices, id="chats_sess", prompt="Session") + yield Select(_KIND_LABELS, value="all", id="chats_kind") + yield Static("Fetched chats: 0", id="chats_status") + with Widget(classes="prow"): + yield Input(placeholder="Output (default: exported_chats)", id="chats_out") + yield Input(placeholder="History limit (blank = all)", id="chats_limit") + with Widget(classes="prow"): + yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") + yield Button("All", id="btn_select_all_chats") + yield Button("Clear", id="btn_clear_chats") + yield Button( + "Export Selected JSON", id="btn_export_chats", variant="success" + ) + yield Button("Parse Users", id="btn_parse_chats", variant="success") yield DataTable(id="chats_table", cursor_type="row", zebra_stripes=True) with TabPane("Export Chat", id="tp_export"), Widget(classes="pform", id="export_pane"): yield Label("[bold]Export Chat History[/bold]") @@ -192,26 +193,30 @@ def on_mount(self) -> None: def _build_chats_pane(self) -> None: pane = self.query_one("#chats_pane") choices = _session_select_choices() - pane.mount( - Label("[bold]Selected Chats[/bold]\n[dim]Fetch, then click rows to toggle[/dim]") - ) - pane.mount(Select(choices, id="chats_sess", prompt="Select session")) - pane.mount(Select(_KIND_LABELS, value="all", id="chats_kind")) - pane.mount(Input(placeholder="Manual chats fallback: @chat, @chat2", id="chats_selected")) - pane.mount(Input(placeholder="Output dir (default: exported_chats)", id="chats_out")) - pane.mount(Input(placeholder="Media dir (default: media)", id="chats_media")) - pane.mount(Input(placeholder="History limit per chat (blank = all)", id="chats_limit")) pane.mount( Widget( - Button("Fetch Chats", id="btn_fetch_chats", variant="primary"), - Button("All", id="btn_select_all_chats"), - Button("Clear", id="btn_clear_chats"), - Button("Export Selected JSON", id="btn_export_chats", variant="success"), - Button("Parse Users", id="btn_parse_chats", variant="success"), - classes="prow", + Widget( + Select(choices, id="chats_sess", prompt="Session"), + Select(_KIND_LABELS, value="all", id="chats_kind"), + Static("Fetched chats: 0", id="chats_status"), + classes="prow", + ), + Widget( + Input(placeholder="Output (default: exported_chats)", id="chats_out"), + Input(placeholder="History limit (blank = all)", id="chats_limit"), + classes="prow", + ), + Widget( + Button("Fetch Chats", id="btn_fetch_chats", variant="primary"), + Button("All", id="btn_select_all_chats"), + Button("Clear", id="btn_clear_chats"), + Button("Export Selected JSON", id="btn_export_chats", variant="success"), + Button("Parse Users", id="btn_parse_chats", variant="success"), + classes="prow", + ), + id="chats_controls", ) ) - pane.mount(Static("", id="chats_status")) pane.mount(DataTable(id="chats_table", cursor_type="row", zebra_stripes=True)) def _init_chats_table(self) -> None: @@ -234,7 +239,8 @@ def _sync_selected_chats(self) -> None: tbl.update_cell(ref, "sel", "●" if ref in self._selected_chats else "○") with contextlib.suppress(Exception): self.query_one("#chats_status", Static).update( - f"[dim]Selected: {len(self._selected_chats)} / {len(self._fetched_dialogs)}[/dim]" + f"Fetched chats: {len(self._fetched_dialogs)} " + f"Selected: {len(self._selected_chats)}" ) def _select_all_chats(self) -> None: @@ -280,26 +286,20 @@ async def _do_fetch_chats(self) -> None: self.query_one("#btn_fetch_chats", Button).disabled = False def _selected_chat_refs(self) -> list[str]: - if self._selected_chats: - return list(self._selected_chats) - entered = _split_refs(self.query_one("#chats_selected", Input).value) - if entered: - return entered - return [] + return list(self._selected_chats) async def _do_export_chats(self) -> None: session = _get_session(self, "#chats_sess") chats = self._selected_chat_refs() if not session or not chats: - self.app.notify("Select a session and fetch or enter chats", severity="warning") + self.app.notify("Select a session and choose chats from the table", severity="warning") return limit_raw = self.query_one("#chats_limit", Input).value.strip() limit = int(limit_raw) if limit_raw.isdigit() else 0 out_raw = self.query_one("#chats_out", Input).value.strip() dest_dir = Path(out_raw or "exported_chats") - media_raw = self.query_one("#chats_media", Input).value.strip() - media_dir = Path(media_raw or "media") + media_dir = dest_dir / "media" status = self.query_one("#chats_status", Static) button = self.query_one("#btn_export_chats", Button) button.disabled = True @@ -328,7 +328,7 @@ async def _do_parse_chats_from_list(self) -> None: session = _get_session(self, "#chats_sess") chats = self._selected_chat_refs() if not session or not chats: - self.app.notify("Select a session and fetch or enter chats", severity="warning") + self.app.notify("Select a session and choose chats from the table", severity="warning") return dest = Path("parsed_users.json") From ee000c223f4b0424459d27ef312ae04139773686 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:30:58 +0000 Subject: [PATCH 08/17] fix(ui): stack parsing chat controls --- src/accxus/platforms/telegram/parsing.py | 2 +- src/accxus/ui/tg/parsing.py | 43 +++++++++++++++++++----- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/accxus/platforms/telegram/parsing.py b/src/accxus/platforms/telegram/parsing.py index b8bdee2..21e9bc1 100644 --- a/src/accxus/platforms/telegram/parsing.py +++ b/src/accxus/platforms/telegram/parsing.py @@ -597,7 +597,7 @@ async def save_chats_members( async def list_dialogs( session_name: str, kind: str = "all", - limit: int = 200, + limit: int = 0, ) -> list[dict[str, Any]]: from pyrogram.enums import ChatType # type: ignore[import-untyped] diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index 823f083..b0c1be7 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -108,6 +108,10 @@ class ParsingTab(Widget): .prow Select { width: 28; margin-right: 1; } .prow Static { width: 28; margin-right: 1; height: 3; content-align: left middle; } .prow Button { margin-right: 1; } + .cfield { height: auto; margin-bottom: 1; } + .cfield Label { height: 1; margin-bottom: 0; } + .cfield Input { width: 52; margin-bottom: 0; } + .cfield Select { width: 52; margin-bottom: 0; } .plog { height: 12; margin-top: 1; } #groups_table { height: 10; margin-bottom: 1; } #chats_pane { overflow: hidden; } @@ -126,12 +130,20 @@ def compose(self) -> ComposeResult: with TabbedContent(): with TabPane("Chats", id="tp_chats"), Widget(classes="pform", id="chats_pane"): with Widget(id="chats_controls"): - with Widget(classes="prow"): + with Widget(classes="cfield"): + yield Label("Session") yield Select(choices, id="chats_sess", prompt="Session") + with Widget(classes="cfield"): + yield Label("Type") yield Select(_KIND_LABELS, value="all", id="chats_kind") - yield Static("Fetched chats: 0", id="chats_status") - with Widget(classes="prow"): + with Widget(classes="cfield"): + yield Label("Fetched chats") + yield Static("0 selected: 0", id="chats_status") + with Widget(classes="cfield"): + yield Label("Output") yield Input(placeholder="Output (default: exported_chats)", id="chats_out") + with Widget(classes="cfield"): + yield Label("History limit") yield Input(placeholder="History limit (blank = all)", id="chats_limit") with Widget(classes="prow"): yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") @@ -196,15 +208,29 @@ def _build_chats_pane(self) -> None: pane.mount( Widget( Widget( + Label("Session"), Select(choices, id="chats_sess", prompt="Session"), + classes="cfield", + ), + Widget( + Label("Type"), Select(_KIND_LABELS, value="all", id="chats_kind"), - Static("Fetched chats: 0", id="chats_status"), - classes="prow", + classes="cfield", ), Widget( + Label("Fetched chats"), + Static("0 selected: 0", id="chats_status"), + classes="cfield", + ), + Widget( + Label("Output"), Input(placeholder="Output (default: exported_chats)", id="chats_out"), + classes="cfield", + ), + Widget( + Label("History limit"), Input(placeholder="History limit (blank = all)", id="chats_limit"), - classes="prow", + classes="cfield", ), Widget( Button("Fetch Chats", id="btn_fetch_chats", variant="primary"), @@ -239,8 +265,7 @@ def _sync_selected_chats(self) -> None: tbl.update_cell(ref, "sel", "●" if ref in self._selected_chats else "○") with contextlib.suppress(Exception): self.query_one("#chats_status", Static).update( - f"Fetched chats: {len(self._fetched_dialogs)} " - f"Selected: {len(self._selected_chats)}" + f"{len(self._fetched_dialogs)} selected: {len(self._selected_chats)}" ) def _select_all_chats(self) -> None: @@ -267,7 +292,7 @@ async def _do_fetch_chats(self) -> None: self._selected_chats.clear() try: - dialogs = await tg_parsing.list_dialogs(session, kind=kind) + dialogs = await tg_parsing.list_dialogs(session, kind=kind, limit=0) self._fetched_dialogs = dialogs tbl = self.query_one("#chats_table", DataTable) for d in dialogs: From a92cdfaa9622e3a6ece60ecf33de98fbc6645819 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:32:22 +0000 Subject: [PATCH 09/17] fix(parsing): resolve unread telegram chats --- src/accxus/platforms/telegram/parsing.py | 70 ++++++++++++++++++------ 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/src/accxus/platforms/telegram/parsing.py b/src/accxus/platforms/telegram/parsing.py index 21e9bc1..36635e8 100644 --- a/src/accxus/platforms/telegram/parsing.py +++ b/src/accxus/platforms/telegram/parsing.py @@ -12,6 +12,8 @@ log = logging.getLogger(__name__) +ChatRef = int | str + def _clean_filename(value: str) -> str: cleaned = "".join(ch if ch.isalnum() or ch in ("-", "_", ".") else "_" for ch in value) @@ -25,6 +27,35 @@ def _chat_ref(chat: dict[str, Any]) -> str: return str(chat["id"]) +def _normalize_chat_ref(chat: ChatRef) -> ChatRef: + if isinstance(chat, int): + return chat + value = chat.strip() + if value.lstrip("-").isdigit(): + return int(value) + return value + + +async def _resolve_chat_ref(client: Any, chat: ChatRef) -> ChatRef: + ref = _normalize_chat_ref(chat) + with contextlib.suppress(Exception): + resolved = await client.get_chat(ref) + return resolved.id + + wanted_id = ref if isinstance(ref, int) else None + wanted_text = str(ref).lstrip("@").lower() if isinstance(ref, str) else "" + async for dialog in client.get_dialogs(limit=0): # type: ignore[reportGeneralTypeIssues] + dialog_chat = dialog.chat + if wanted_id is not None and dialog_chat.id == wanted_id: + return dialog_chat.id + username = (getattr(dialog_chat, "username", "") or "").lower() + title = (getattr(dialog_chat, "title", "") or "").lower() + if wanted_text and wanted_text in {username, title}: + return dialog_chat.id + + return ref + + def _format_optional(value: Any) -> str: if value is None: return "" @@ -305,14 +336,15 @@ async def _message_to_dict(client: Any, msg: Any, media_dir: Path | None) -> dic async def export_chat_history( session_name: str, - chat: str, + chat: ChatRef, limit: int = 0, on_progress: Callable[[int], None] | None = None, media_dir: Path | None = None, ) -> list[dict[str, Any]]: messages: list[dict[str, Any]] = [] async with connected(session_name) as client: - async for msg in client.get_chat_history(chat, limit=limit or 0): # type: ignore[reportGeneralTypeIssues] + resolved_chat = await _resolve_chat_ref(client, chat) + async for msg in client.get_chat_history(resolved_chat, limit=limit or 0): # type: ignore[reportGeneralTypeIssues] messages.append(await _message_to_dict(client, msg, media_dir)) if on_progress and len(messages) % 100 == 0: on_progress(len(messages)) @@ -321,7 +353,7 @@ async def export_chat_history( async def save_chat_history( session_name: str, - chat: str, + chat: ChatRef, dest: Path, fmt: str = "json", limit: int = 0, @@ -341,7 +373,7 @@ async def save_chat_history( async def save_chats_history( session_name: str, - chats: list[str], + chats: list[ChatRef], dest_dir: Path, fmt: str = "json", limit: int = 0, @@ -352,15 +384,15 @@ async def save_chats_history( exported: dict[str, int] = {} for chat in chats: - chat_key = _clean_filename(chat.lstrip("@")) + chat_key = _clean_filename(str(chat).lstrip("@")) - def _progress(count: int, chat_ref: str = chat) -> None: + def _progress(count: int, chat_ref: ChatRef = chat) -> None: if on_progress: - on_progress(chat_ref, count) + on_progress(str(chat_ref), count) dest = dest_dir / f"{chat_key}.{fmt}" chat_media_dir = media_dir / chat_key if media_dir else None - exported[chat] = await save_chat_history( + exported[str(chat)] = await save_chat_history( session_name, chat, dest, @@ -379,7 +411,7 @@ async def save_all_dialog_histories( dest_dir: Path, *, kind: str = "all", - selected_chats: list[str] | None = None, + selected_chats: list[ChatRef] | None = None, fmt: str = "json", limit: int = 0, on_progress: Callable[[str, int], None] | None = None, @@ -487,13 +519,14 @@ async def _parsed_user_from_member( async def parse_chat_members( session_name: str, - chat: str, + chat: ChatRef, on_progress: Callable[[int], None] | None = None, avatar_dir: Path | None = None, ) -> list[ParsedUser]: users: list[ParsedUser] = [] async with connected(session_name) as client: - chat_obj = await client.get_chat(chat) + resolved_chat = await _resolve_chat_ref(client, chat) + chat_obj = await client.get_chat(resolved_chat) chat_info = { "id": chat_obj.id, "title": ( @@ -510,7 +543,7 @@ async def parse_chat_members( ), "username": getattr(chat_obj, "username", "") or "", } - async for member in client.get_chat_members(chat): # type: ignore[reportGeneralTypeIssues] + async for member in client.get_chat_members(resolved_chat): # type: ignore[reportGeneralTypeIssues] users.append( await _parsed_user_from_member( client, @@ -527,7 +560,7 @@ async def parse_chat_members( async def parse_chats_members( session_name: str, - chats: list[str], + chats: list[ChatRef], *, avatar_dir: Path | None = None, on_progress: Callable[[str, int], None] | None = None, @@ -535,7 +568,8 @@ async def parse_chats_members( users_by_id: dict[int, ParsedUser] = {} async with connected(session_name) as client: for chat in chats: - chat_obj = await client.get_chat(chat) + resolved_chat = await _resolve_chat_ref(client, chat) + chat_obj = await client.get_chat(resolved_chat) chat_info = { "id": chat_obj.id, "title": ( @@ -553,7 +587,7 @@ async def parse_chats_members( "username": getattr(chat_obj, "username", "") or "", } count = 0 - async for member in client.get_chat_members(chat): # type: ignore[reportGeneralTypeIssues] + async for member in client.get_chat_members(resolved_chat): # type: ignore[reportGeneralTypeIssues] parsed = await _parsed_user_from_member( client, member, @@ -564,9 +598,9 @@ async def parse_chats_members( users_by_id[parsed.id] = parsed count += 1 if on_progress and count % 50 == 0: - on_progress(chat, count) + on_progress(str(chat), count) if on_progress: - on_progress(chat, count) + on_progress(str(chat), count) users = list(users_by_id.values()) log.info("[parse] parsed %d unique members from %d chats", len(users), len(chats)) @@ -575,7 +609,7 @@ async def parse_chats_members( async def save_chats_members( session_name: str, - chats: list[str], + chats: list[ChatRef], dest: Path, *, avatar_dir: Path | None = None, From f61e7f1bd36e3722459b1dc2b063a1dd190bba7e Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:38:25 +0000 Subject: [PATCH 10/17] feat(sessions): cache telegram dc metadata --- src/accxus/platforms/telegram/client.py | 12 ++- src/accxus/platforms/telegram/sessions.py | 90 ++++++++++++++++++++--- src/accxus/types/telegram.py | 1 + 3 files changed, 92 insertions(+), 11 deletions(-) diff --git a/src/accxus/platforms/telegram/client.py b/src/accxus/platforms/telegram/client.py index 0f99e17..edffebd 100644 --- a/src/accxus/platforms/telegram/client.py +++ b/src/accxus/platforms/telegram/client.py @@ -10,6 +10,7 @@ from pyrogram import Client # type: ignore[import-untyped] import accxus.config as cfg +from accxus.platforms.telegram import sessions as tg_sessions from accxus.types.core import ProxyConfig from accxus.types.telegram import SessionInfo, SessionStatus from accxus.utils.session_convert import detect_kind @@ -27,6 +28,10 @@ def make_client( ) -> Client: from pyrogram import Client as _Client # type: ignore[import-untyped] + dc_id = tg_sessions.hydrate_session_dc_metadata(session_name) + if dc_id is not None: + log.debug("[tg] session %s uses dc_id=%s", session_name, dc_id) + _proxy = proxy or cfg.config.telegram_proxy return _Client( # type: ignore[reportCallIssue] name=session_name, @@ -68,6 +73,7 @@ async def fetch_info( ) -> SessionInfo: async with connected(session_name, proxy=proxy) as client: me = await client.get_me() + dc_id = await client.storage.dc_id() try: chat = await client.get_chat(me.id) bio: str = getattr(chat, "bio", "") or "" @@ -82,6 +88,7 @@ async def fetch_info( last_name=me.last_name or "", username=me.username or "", bio=bio, + dc_id=dc_id, kind=kind, status=SessionStatus.VALID, ) @@ -100,7 +107,10 @@ async def check_validity( try: async with connected(session_name, proxy=proxy) as client: me = await client.get_me() - return SessionStatus.VALID if me else SessionStatus.INVALID + if me: + tg_sessions.update_metadata_dc_id(session_name, await client.storage.dc_id()) + return SessionStatus.VALID + return SessionStatus.INVALID except (AuthKeyUnregistered, UserDeactivated, UserDeactivatedBan): return SessionStatus.INVALID except Exception: diff --git a/src/accxus/platforms/telegram/sessions.py b/src/accxus/platforms/telegram/sessions.py index bbefd3a..78f91c1 100644 --- a/src/accxus/platforms/telegram/sessions.py +++ b/src/accxus/platforms/telegram/sessions.py @@ -2,6 +2,7 @@ import json import logging +import sqlite3 from pathlib import Path from typing import Any @@ -27,22 +28,90 @@ def save_metadata(meta: dict[str, dict[str, Any]]) -> None: _META_FILE.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8") +def read_session_dc_id(session_name: str) -> int | None: + path = session_path(session_name) + if not path.exists(): + return None + try: + with sqlite3.connect(path) as conn: + row = conn.execute("SELECT dc_id FROM sessions LIMIT 1").fetchone() + except sqlite3.Error: + return None + if not row or row[0] is None: + return None + try: + return int(row[0]) + except (TypeError, ValueError): + return None + + +def update_metadata_dc_id(session_name: str, dc_id: int | None) -> None: + if dc_id is None: + return + meta = load_metadata() + item = meta.setdefault(session_name, {}) + if item.get("dc_id") == dc_id: + return + item["dc_id"] = dc_id + save_metadata(meta) + + def update_metadata(session_name: str, info: SessionInfo) -> None: meta = load_metadata() - meta.setdefault(session_name, {}).update( - { - "phone": info.phone, - "first_name": info.first_name, - "last_name": info.last_name, - "username": info.username, - "kind": info.kind.name, - "status": info.status.value, - } - ) + data = { + "phone": info.phone, + "first_name": info.first_name, + "last_name": info.last_name, + "username": info.username, + "kind": info.kind.name, + "status": info.status.value, + } + if info.dc_id is not None: + data["dc_id"] = info.dc_id + meta.setdefault(session_name, {}).update(data) save_metadata(meta) +def hydrate_session_dc_metadata(session_name: str) -> int | None: + dc_id = read_session_dc_id(session_name) + update_metadata_dc_id(session_name, dc_id) + return dc_id + + +def hydrate_all_dc_metadata() -> None: + meta = load_metadata() + changed = False + for f in sorted(cfg.SESSIONS_DIR.glob("*.session")): + dc_id = read_session_dc_id(f.stem) + if dc_id is not None and meta.setdefault(f.stem, {}).get("dc_id") != dc_id: + meta[f.stem]["dc_id"] = dc_id + changed = True + if changed: + save_metadata(meta) + + +def update_metadata_statuses(statuses: dict[str, SessionStatus]) -> None: + meta = load_metadata() + for name, status in statuses.items(): + item = meta.setdefault(name, {}) + item["status"] = status.value + dc_id = read_session_dc_id(name) + if dc_id is not None: + item["dc_id"] = dc_id + save_metadata(meta) + + +def _coerce_dc_id(value: Any) -> int | None: + if value is None or value == "": + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + def list_sessions() -> list[SessionInfo]: + hydrate_all_dc_metadata() meta = load_metadata() result: list[SessionInfo] = [] for f in sorted(cfg.SESSIONS_DIR.glob("*.session")): @@ -67,6 +136,7 @@ def list_sessions() -> list[SessionInfo]: last_name=m.get("last_name", ""), username=m.get("username", ""), bio=m.get("bio", ""), + dc_id=_coerce_dc_id(m.get("dc_id")) or read_session_dc_id(name), kind=kind, status=status, ) diff --git a/src/accxus/types/telegram.py b/src/accxus/types/telegram.py index b75c34e..897b957 100644 --- a/src/accxus/types/telegram.py +++ b/src/accxus/types/telegram.py @@ -26,6 +26,7 @@ class SessionInfo(BaseModel): last_name: str = "" username: str = "" bio: str = "" + dc_id: int | None = None kind: SessionKind = SessionKind.PYROGRAM status: SessionStatus = SessionStatus.UNKNOWN From a586ca35000de87d46abf1fa107c94b9994ef71d Mon Sep 17 00:00:00 2001 From: xelthorV Date: Mon, 11 May 2026 18:38:30 +0000 Subject: [PATCH 11/17] feat(ui): show telegram session dc --- src/accxus/ui/app.py | 4 +++- src/accxus/ui/tg/add_session.py | 1 + src/accxus/ui/tg/sessions.py | 14 ++++++++++---- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/accxus/ui/app.py b/src/accxus/ui/app.py index e4e26f7..e195dcb 100644 --- a/src/accxus/ui/app.py +++ b/src/accxus/ui/app.py @@ -205,7 +205,9 @@ async def _cmd_session( # pyright: ignore[reportUnusedFunction] for s in sessions: color = "green" if s.status.value == "valid" else "red" name_part = f"[cyan]{s.name}[/cyan]" - phone_part = f"[dim]{s.phone or '?'} · @{s.username or '—'}[/dim]" + phone_part = ( + f"[dim]{s.phone or '?'} · @{s.username or '—'} · DC {s.dc_id or '—'}[/dim]" + ) _write(app, f" [{color}]●[/{color}] {name_part} {phone_part}") elif action == "check": diff --git a/src/accxus/ui/tg/add_session.py b/src/accxus/ui/tg/add_session.py index aae62f2..7826fdf 100644 --- a/src/accxus/ui/tg/add_session.py +++ b/src/accxus/ui/tg/add_session.py @@ -265,6 +265,7 @@ async def _finish(self) -> None: first_name=me.first_name or "", last_name=me.last_name or "", username=me.username or "", + dc_id=await self._client.storage.dc_id(), ) tg_sessions.update_metadata(self._name, info) await self._client.disconnect() diff --git a/src/accxus/ui/tg/sessions.py b/src/accxus/ui/tg/sessions.py index 1c34777..e775396 100644 --- a/src/accxus/ui/tg/sessions.py +++ b/src/accxus/ui/tg/sessions.py @@ -187,6 +187,7 @@ async def _finish(self) -> None: first_name=me.first_name or "", last_name=me.last_name or "", username=me.username or "", + dc_id=await self._client.storage.dc_id(), ) tg_sessions.update_metadata(self._name, info) await self._client.disconnect() @@ -497,10 +498,13 @@ def _reload_table(self) -> None: tbl.clear(columns=True) tbl.add_column("Session", key="name") tbl.add_column("Phone", key="phone") + tbl.add_column("DC", key="dc") tbl.add_column("Status", key="status") for info in tg_sessions.list_sessions(): status_str = self._status_markup(info.status) - tbl.add_row(info.name, info.phone or "—", status_str, key=info.name) + tbl.add_row( + info.name, info.phone or "—", str(info.dc_id or "—"), status_str, key=info.name + ) @staticmethod def _status_markup(s: SessionStatus) -> str: @@ -598,6 +602,7 @@ async def _do_access(self, name: str) -> None: f"[bold]{info.first_name} {info.last_name}[/bold] " f"{'@' + info.username if info.username else ''}\n" f"[dim]Phone:[/dim] {info.phone or '—'}\n" + f"[dim]DC:[/dim] {info.dc_id or '—'}\n" f"[dim]Bio:[/dim] {info.bio or '—'}\n" f"[dim]Session:[/dim] {name}.session {kind_label}" ) @@ -619,11 +624,12 @@ async def _check_all(self) -> None: names = [s.name for s in sessions] results = await tg_client.check_all_validity(names) - meta = tg_sessions.load_metadata() + tg_sessions.update_metadata_statuses(results) + sessions_by_name = {info.name: info for info in tg_sessions.list_sessions()} for name, status in results.items(): - meta.setdefault(name, {})["status"] = status.value tbl.update_cell(name, "status", self._status_markup(status)) - tg_sessions.save_metadata(meta) + if name in sessions_by_name: + tbl.update_cell(name, "dc", str(sessions_by_name[name].dc_id or "—")) valid = sum(1 for s in results.values() if s == SessionStatus.VALID) self.app.notify(f"✓ {valid}/{len(results)} valid", title=" Sessions") From 9e28a3046ef455c998897264fd54decd20c7478c Mon Sep 17 00:00:00 2001 From: xelthorV Date: Tue, 12 May 2026 12:57:27 +0000 Subject: [PATCH 12/17] feat(parsing): enhance gift parsing and add message export cooldown --- src/accxus/platforms/telegram/parsing.py | 82 ++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/src/accxus/platforms/telegram/parsing.py b/src/accxus/platforms/telegram/parsing.py index 36635e8..e0e0784 100644 --- a/src/accxus/platforms/telegram/parsing.py +++ b/src/accxus/platforms/telegram/parsing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio import contextlib import json import logging @@ -114,9 +115,13 @@ def _serializable_value(value: Any, depth: int = 0) -> Any: def _normalize_gift(gift: Any) -> dict[str, Any]: + if gift is None: + return {} + data = _serializable_value(gift) if not isinstance(data, dict): data = {"value": data} + from_id = ( data.get("from") or data.get("from_id") @@ -127,11 +132,40 @@ def _normalize_gift(gift: Any) -> dict[str, Any]: ) gift_type = data.get("type") or data.get("_") or data.get("title") or type(gift).__name__ date = data.get("date") or data.get("timestamp") or "" - normalized = {"from": from_id, "type": gift_type, "date": date} + + res = { + "from": from_id, + "type": gift_type, + "date": date, + "price": 0, + "currency": "stars", + "status": "common", + "message": data.get("message") or "", + } + + # Enhanced parsing for Star Gifts + if hasattr(gift, "gift"): # UserStarGift + g = gift.gift + res["price"] = getattr(g, "stars", 0) + if getattr(g, "limited_count", 0) > 0: + res["status"] = "rare" + if getattr(gift, "upgraded", False): + res["status"] = "upgraded" + if getattr(gift, "upgrade_tag", None): + res["status"] += f" ({gift.upgrade_tag})" + + # Premium Gift parsing + if res["type"] == "PremiumGiftOption": + res["price"] = data.get("amount", 0) + res["currency"] = data.get("currency", "USD") + res["status"] = "premium" + + # Merge remaining fields for key, value in data.items(): - if key not in normalized and key not in {"from_id", "sender_id", "user_id", "peer_id"}: - normalized[key] = value - return normalized + if key not in res and key not in {"from_id", "sender_id", "user_id", "peer_id"}: + res[key] = value + + return res def _normalize_gifts(values: Any) -> list[dict[str, Any]]: @@ -334,20 +368,57 @@ async def _message_to_dict(client: Any, msg: Any, media_dir: Path | None) -> dic } +async def get_chat_senders( + session_name: str, + chat: ChatRef, + limit: int = 500, +) -> list[dict[str, Any]]: + """Fetch unique senders from chat history to allow filtering.""" + senders: dict[int, dict[str, Any]] = {} + async with connected(session_name) as client: + resolved_chat = await _resolve_chat_ref(client, chat) + async for msg in client.get_chat_history(resolved_chat, limit=limit): + u = msg.from_user + if u: + if u.id not in senders: + name = f"{u.first_name or ''} {u.last_name or ''}".strip() or str(u.id) + label = f"{name} ({u.id}/@{u.username})" if u.username else f"{name} ({u.id})" + senders[u.id] = {"id": u.id, "label": label, "username": u.username} + elif msg.sender_chat: + c = msg.sender_chat + if c.id not in senders: + name = c.title or str(c.id) + label = f"{name} ({c.id}/@{c.username})" if c.username else f"{name} ({c.id})" + senders[c.id] = {"id": c.id, "label": label, "username": c.username} + await asyncio.sleep(0.02) + return sorted(senders.values(), key=lambda x: x["label"]) + + async def export_chat_history( session_name: str, chat: ChatRef, limit: int = 0, on_progress: Callable[[int], None] | None = None, media_dir: Path | None = None, + sender_ids: list[int] | None = None, ) -> list[dict[str, Any]]: messages: list[dict[str, Any]] = [] async with connected(session_name) as client: resolved_chat = await _resolve_chat_ref(client, chat) async for msg in client.get_chat_history(resolved_chat, limit=limit or 0): # type: ignore[reportGeneralTypeIssues] + if sender_ids: + sid = (msg.from_user.id if msg.from_user else None) or ( + msg.sender_chat.id if msg.sender_chat else None + ) + if sid not in sender_ids: + continue + messages.append(await _message_to_dict(client, msg, media_dir)) if on_progress and len(messages) % 100 == 0: on_progress(len(messages)) + + # Cooldown to avoid bans + await asyncio.sleep(0.05) return messages @@ -359,8 +430,9 @@ async def save_chat_history( limit: int = 0, on_progress: Callable[[int], None] | None = None, media_dir: Path | None = None, + sender_ids: list[int] | None = None, ) -> int: - messages = await export_chat_history(session_name, chat, limit, on_progress, media_dir) + messages = await export_chat_history(session_name, chat, limit, on_progress, media_dir, sender_ids) dest.parent.mkdir(parents=True, exist_ok=True) if fmt == "txt": lines = [f"[{m['date']}] {m['from'] or 'unknown'}: {m['text']}" for m in messages] From a50b57a58637629006d74c604ddc30b3405b34ca Mon Sep 17 00:00:00 2001 From: xelthorV Date: Tue, 12 May 2026 12:57:27 +0000 Subject: [PATCH 13/17] feat(ui): add sender filtering to chat export --- src/accxus/ui/tg/parsing.py | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index b0c1be7..a16ee10 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -158,6 +158,14 @@ def compose(self) -> ComposeResult: yield Label("[bold]Export Chat History[/bold]") yield Select(choices, id="exp_sess", prompt="Select session") yield Input(placeholder="Chat: @group / username / ID", id="exp_chat") + with Widget(classes="prow"): + yield Button("Fetch Senders", id="btn_fetch_senders") + yield Select( + [("All Senders", "all")], + id="exp_sender", + prompt="Filter by sender", + value="all", + ) yield Input(placeholder="Output file (default: export_.json)", id="exp_out") yield Input(placeholder="Media dir (blank = no media download)", id="exp_media") yield Input(placeholder="Limit (blank = all)", id="exp_limit") @@ -498,6 +506,8 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: await self._do_snapshot() elif bid == "btn_prof_history": self._show_profile_history() + elif bid == "btn_fetch_senders": + await self._do_fetch_senders() def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None: if event.data_table.id != "chats_table": @@ -511,6 +521,29 @@ def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None: self._selected_chats.add(key) self._sync_selected_chats() + async def _do_fetch_senders(self) -> None: + session = _get_session(self, "#exp_sess") + chat = self.query_one("#exp_chat", Input).value.strip() + if not session or not chat: + self.app.notify("Select a session and enter a chat to fetch senders", severity="warning") + return + + status = self.query_one("#exp_status", Static) + status.update("[dim]Fetching unique senders from history…[/dim]") + try: + senders = await tg_parsing.get_chat_senders(session, chat, limit=500) + sel = self.query_one("#exp_sender", Select) + choices = [("All Senders", "all")] + [(s["label"], str(s["id"])) for s in senders] + if hasattr(sel, "set_options"): + sel.set_options(choices) # type: ignore[attr-defined] + else: + sel.options = choices # type: ignore[attr-defined] + sel.value = "all" + status.update(f"✅ Found {len(senders)} senders") + except Exception as e: + status.update(f"❌ {e}") + log.error("fetch senders error: %s", e) + async def _do_export(self, fmt: str) -> None: session = _get_session(self, "#exp_sess") chat = self.query_one("#exp_chat", Input).value.strip() @@ -524,6 +557,12 @@ async def _do_export(self, fmt: str) -> None: limit = int(limit_raw) if limit_raw.isdigit() else 0 dest = Path(out_raw or f"export_{chat.lstrip('@')}.{fmt}") media_dir = Path(media_raw) if media_raw else None + + sender_id_raw = self.query_one("#exp_sender", Select).value + sender_ids = None + if sender_id_raw and sender_id_raw != "all": + sender_ids = [int(sender_id_raw)] + status = self.query_one("#exp_status", Static) log_view = self.query_one("#export_log", RichLog) @@ -543,6 +582,7 @@ def _prog(n: int) -> None: limit=limit, on_progress=_prog, media_dir=media_dir, + sender_ids=sender_ids, ) media_note = f"; media → {media_dir}" if media_dir else "" status.update(f"✅ {count} messages → {dest}{media_note}") From eac1efaca1198a5de7375bf2f52ba72018180bd3 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Tue, 12 May 2026 13:04:07 +0000 Subject: [PATCH 14/17] fix(parsing): use absolute paths for downloads and fix type issues --- src/accxus/platforms/telegram/parsing.py | 22 +++++++-------- src/accxus/ui/tg/parsing.py | 35 +++++++++++++----------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/accxus/platforms/telegram/parsing.py b/src/accxus/platforms/telegram/parsing.py index e0e0784..bad25f8 100644 --- a/src/accxus/platforms/telegram/parsing.py +++ b/src/accxus/platforms/telegram/parsing.py @@ -79,14 +79,6 @@ def _enum_value(value: Any) -> str: return str(value) -def _stringify_list(values: Any) -> list[str]: - if not values: - return [] - if not isinstance(values, list | tuple): - values = [values] - return [_format_optional(v) for v in values if v is not None] - - def _serializable_value(value: Any, depth: int = 0) -> Any: if value is None or isinstance(value, str | int | float | bool): return value @@ -306,12 +298,13 @@ def _media_suffix(msg: Any) -> str: async def _download_message_media(client: Any, msg: Any, media_dir: Path | None) -> str: if media_dir is None or not getattr(msg, "media", None): return "" + media_dir = media_dir.absolute() media_dir.mkdir(parents=True, exist_ok=True) media_type = _enum_value(msg.media) dest = media_dir / f"{media_type}{msg.id}{_media_suffix(msg)}" try: downloaded = await client.download_media(msg, file_name=str(dest)) - return Path(str(downloaded or dest)).name + return str(downloaded or dest) except Exception as exc: log.debug("[parse] media download failed for message %s: %s", msg.id, exc) return "" @@ -332,6 +325,7 @@ async def _download_custom_emojis(client: Any, msg: Any, media_dir: Path | None) ids = _custom_emoji_ids(msg) if media_dir is None or not ids: return [] + media_dir = media_dir.absolute() media_dir.mkdir(parents=True, exist_ok=True) files: list[str] = [] with contextlib.suppress(Exception): @@ -341,7 +335,7 @@ async def _download_custom_emojis(client: Any, msg: Any, media_dir: Path | None) dest = media_dir / f"emoji{sticker.file_unique_id}{suffix}" try: downloaded = await client.download_media(sticker.file_id, file_name=str(dest)) - files.append(Path(str(downloaded or dest)).name) + files.append(str(downloaded or dest)) except Exception as exc: log.debug("[parse] custom emoji download failed: %s", exc) return files @@ -377,7 +371,8 @@ async def get_chat_senders( senders: dict[int, dict[str, Any]] = {} async with connected(session_name) as client: resolved_chat = await _resolve_chat_ref(client, chat) - async for msg in client.get_chat_history(resolved_chat, limit=limit): + # Using type: ignore[attr-defined] if pyright complains about get_chat_history not being a method on Client + async for msg in client.get_chat_history(resolved_chat, limit=limit): # type: ignore[attr-defined] u = msg.from_user if u: if u.id not in senders: @@ -432,7 +427,9 @@ async def save_chat_history( media_dir: Path | None = None, sender_ids: list[int] | None = None, ) -> int: - messages = await export_chat_history(session_name, chat, limit, on_progress, media_dir, sender_ids) + messages = await export_chat_history( + session_name, chat, limit, on_progress, media_dir, sender_ids + ) dest.parent.mkdir(parents=True, exist_ok=True) if fmt == "txt": lines = [f"[{m['date']}] {m['from'] or 'unknown'}: {m['text']}" for m in messages] @@ -511,6 +508,7 @@ async def _download_user_avatar(client: Any, user: Any, avatar_dir: Path | None) if not file_id: return "" + avatar_dir = avatar_dir.absolute() avatar_dir.mkdir(parents=True, exist_ok=True) dest = avatar_dir / f"{user.id}.jpg" try: diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index a16ee10..c4d63be 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -331,7 +331,7 @@ async def _do_export_chats(self) -> None: limit_raw = self.query_one("#chats_limit", Input).value.strip() limit = int(limit_raw) if limit_raw.isdigit() else 0 out_raw = self.query_one("#chats_out", Input).value.strip() - dest_dir = Path(out_raw or "exported_chats") + dest_dir = Path(out_raw or "exported_chats").absolute() media_dir = dest_dir / "media" status = self.query_one("#chats_status", Static) button = self.query_one("#btn_export_chats", Button) @@ -343,7 +343,7 @@ def _prog(chat: str, count: int) -> None: try: exported = await tg_parsing.save_chats_history( session, - chats, + list(chats), # type: ignore[arg-type] dest_dir, fmt="json", limit=limit, @@ -364,8 +364,8 @@ async def _do_parse_chats_from_list(self) -> None: self.app.notify("Select a session and choose chats from the table", severity="warning") return - dest = Path("parsed_users.json") - avatar_dir = Path("parsed_avatars") + dest = Path("parsed_users.json").absolute() + avatar_dir = Path("parsed_avatars").absolute() status = self.query_one("#chats_status", Static) button = self.query_one("#btn_parse_chats", Button) button.disabled = True @@ -376,7 +376,7 @@ def _prog(chat: str, count: int) -> None: try: count = await tg_parsing.save_chats_members( session, - chats, + list(chats), # type: ignore[arg-type] dest, avatar_dir=avatar_dir, on_progress=_prog, @@ -525,7 +525,9 @@ async def _do_fetch_senders(self) -> None: session = _get_session(self, "#exp_sess") chat = self.query_one("#exp_chat", Input).value.strip() if not session or not chat: - self.app.notify("Select a session and enter a chat to fetch senders", severity="warning") + self.app.notify( + "Select a session and enter a chat to fetch senders", severity="warning" + ) return status = self.query_one("#exp_status", Static) @@ -535,9 +537,9 @@ async def _do_fetch_senders(self) -> None: sel = self.query_one("#exp_sender", Select) choices = [("All Senders", "all")] + [(s["label"], str(s["id"])) for s in senders] if hasattr(sel, "set_options"): - sel.set_options(choices) # type: ignore[attr-defined] + sel.set_options(choices) # type: ignore[attr-defined,reportGeneralTypeIssues] else: - sel.options = choices # type: ignore[attr-defined] + sel.options = choices # type: ignore[attr-defined,reportGeneralTypeIssues] sel.value = "all" status.update(f"✅ Found {len(senders)} senders") except Exception as e: @@ -555,13 +557,14 @@ async def _do_export(self, fmt: str) -> None: out_raw = self.query_one("#exp_out", Input).value.strip() media_raw = self.query_one("#exp_media", Input).value.strip() limit = int(limit_raw) if limit_raw.isdigit() else 0 - dest = Path(out_raw or f"export_{chat.lstrip('@')}.{fmt}") - media_dir = Path(media_raw) if media_raw else None + dest = Path(out_raw or f"export_{chat.lstrip('@')}.{fmt}").absolute() + media_dir = Path(media_raw).absolute() if media_raw else None - sender_id_raw = self.query_one("#exp_sender", Select).value + sender_val = self.query_one("#exp_sender", Select).value sender_ids = None - if sender_id_raw and sender_id_raw != "all": - sender_ids = [int(sender_id_raw)] + if sender_val and str(sender_val) != "all" and str(sender_val) != "Select.BLANK": + with contextlib.suppress(ValueError): + sender_ids = [int(str(sender_val))] status = self.query_one("#exp_status", Static) log_view = self.query_one("#export_log", RichLog) @@ -613,10 +616,10 @@ def _prog(chat: str, count: int) -> None: try: avatar_dir_raw = self.query_one("#pu_avatars", Input).value.strip() - avatar_dir = Path(avatar_dir_raw or "parsed_avatars") + avatar_dir = Path(avatar_dir_raw or "parsed_avatars").absolute() users = await tg_parsing.parse_chats_members( session, - chats, + list(chats), # type: ignore[arg-type] avatar_dir=avatar_dir, on_progress=_prog, ) @@ -640,7 +643,7 @@ def _prog(chat: str, count: int) -> None: ] if save: out_raw = self.query_one("#pu_out", Input).value.strip() - dest = Path(out_raw or "parsed_users.json") + dest = Path(out_raw or "parsed_users.json").absolute() dest.parent.mkdir(parents=True, exist_ok=True) dest.write_text( json.dumps(self._parsed_users, indent=2, ensure_ascii=False), From f752c48ee06b0076c43415bc097625217511117d Mon Sep 17 00:00:00 2001 From: xelthorV Date: Tue, 12 May 2026 13:16:20 +0000 Subject: [PATCH 15/17] ci: add pr summary --- .github/workflows/pr-summary.yml | 420 +++++++++++++++++++++++++++++++ 1 file changed, 420 insertions(+) create mode 100644 .github/workflows/pr-summary.yml diff --git a/.github/workflows/pr-summary.yml b/.github/workflows/pr-summary.yml new file mode 100644 index 0000000..795db39 --- /dev/null +++ b/.github/workflows/pr-summary.yml @@ -0,0 +1,420 @@ +name: PR Summary + +on: + pull_request: + types: + - opened + - synchronize + - reopened + +permissions: + pull-requests: write + contents: read + +jobs: + pr-summary: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Generate rich PR summary + id: summary + uses: actions/github-script@v7 + with: + script: | + const pr = context.payload.pull_request; + + const commits = await github.paginate( + github.rest.pulls.listCommits, + { + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: pr.number, + per_page: 100 + } + ); + + const files = await github.paginate( + github.rest.pulls.listFiles, + { + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: pr.number, + per_page: 100 + } + ); + + const groups = { + feat: [], + fix: [], + refactor: [], + perf: [], + docs: [], + test: [], + chore: [], + ci: [], + build: [], + style: [], + revert: [], + other: [] + }; + + const titles = { + feat: "✨ Features", + fix: "🐛 Fixes", + refactor: "♻️ Refactoring", + perf: "⚡ Performance", + docs: "📝 Documentation", + test: "🧪 Tests", + chore: "🔧 Chores", + ci: "🚀 CI", + build: "📦 Build", + style: "🎨 Style", + revert: "⏪ Reverts", + other: "📌 Other" + }; + + let totalAdditions = 0; + let totalDeletions = 0; + + const contributors = new Map(); + + for (const file of files) { + totalAdditions += file.additions; + totalDeletions += file.deletions; + } + + for (const commit of commits) { + const sha = commit.sha.substring(0, 7); + const url = commit.html_url; + + const author = + commit.author?.login || + commit.commit.author.name; + + contributors.set( + author, + (contributors.get(author) || 0) + 1 + ); + + const message = + commit.commit.message.split("\n")[0]; + + const match = message.match( + /^(\w+)(\((.*?)\))?:\s(.+)$/ + ); + + let type = "other"; + let scope = ""; + let description = message; + + if (match) { + type = match[1]; + scope = match[3] || ""; + description = match[4]; + } + + if (!groups[type]) { + type = "other"; + } + + groups[type].push({ + sha, + scope, + description, + url, + author + }); + } + + const changedFiles = files + .sort((a, b) => b.changes - a.changes) + .slice(0, 15); + + const filesByExtension = {}; + + for (const file of files) { + const ext = + file.filename.includes(".") + ? file.filename.split(".").pop() + : "other"; + + filesByExtension[ext] = + (filesByExtension[ext] || 0) + 1; + } + + const sortedExtensions = Object.entries(filesByExtension) + .sort((a, b) => b[1] - a[1]); + + let body = ""; + + body += ` + + +

📋 Pull Request Summary

+ + + + + + + + + + + + + + + + + + + + + +
Repository${context.repo.owner}/${context.repo.repo}
PR + + #${pr.number} — ${pr.title} + +
Author@${pr.user.login}
Branch + ${pr.head.ref} + → + ${pr.base.ref} +
+ +
+ + + + + + + + + + + +
+

${commits.length}

+ commits +
+

${files.length}

+ changed files +
+

+${totalAdditions}

+ additions +
+

-${totalDeletions}

+ deletions +
+ +
+ `; + + for (const [type, items] of Object.entries(groups)) { + if (!items.length) continue; + + body += ` +
+ + ${titles[type]} (${items.length}) + + +
+ + + + + + + + `; + + for (const item of items) { + const scope = item.scope + ? `${item.scope} ` + : ""; + + body += ` + + + + + + + + `; + } + + body += ` +
CommitDescriptionAuthor
+ + ${item.sha} + + + ${scope}${item.description} + @${item.author}
+
+ +
+ `; + } + + body += ` +

📂 Most Changed Files

+ + + + + + + + `; + + for (const file of changedFiles) { + const statusEmoji = { + added: "🟢", + modified: "🟡", + removed: "🔴", + renamed: "🔵" + }[file.status] || "⚪"; + + body += ` + + + + + + + + `; + } + + body += ` +
FileStatusChanges
+ ${file.filename} + + ${statusEmoji} ${file.status} + + +${file.additions} + / -${file.deletions} +
+ +
+ +

🧩 File Types

+ + + + + + + `; + + for (const [ext, count] of sortedExtensions) { + body += ` + + + + + `; + } + + body += ` +
ExtensionFiles
.${ext}${count}
+ +
+ +

👥 Contributors

+ + + + + + + `; + + for (const [user, count] of contributors.entries()) { + body += ` + + + + + `; + } + + body += ` +
UserCommits
@${user}${count}
+ +
+ +
+ + 🔎 View raw commit messages + + +
+ +
+            `;
+
+            for (const commit of commits) {
+              body += `${commit.commit.message}\n\n`;
+            }
+
+            body += `
+              
+
+ +
+ +
+ + + Generated automatically from conventional commits and pull request metadata. + + `; + + core.setOutput("body", body); + + - name: Create or update PR comment + uses: actions/github-script@v7 + env: + BODY: ${{ steps.summary.outputs.body }} + with: + script: | + const marker = ''; + + const comments = await github.paginate( + github.rest.issues.listComments, + { + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number + } + ); + + const existing = comments.find(comment => + comment.body.includes(marker) + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: process.env.BODY + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: process.env.BODY + }); + } \ No newline at end of file From 389e9f723355498108f67e1d98801cd50d720cab Mon Sep 17 00:00:00 2001 From: xelthorV Date: Tue, 12 May 2026 13:17:02 +0000 Subject: [PATCH 16/17] chore: bump version to 0.3.0 --- pyproject.toml | 2 +- src/accxus/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e1b346a..93c60e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "accxus" -version = "0.1.0" +version = "0.3.0" description = "accxus is a program where you can create, manage, and modify accounts on various social networks. It uses SMS activation services for registration." readme = "README.md" requires-python = ">=3.10" diff --git a/src/accxus/__init__.py b/src/accxus/__init__.py index d3ec452..493f741 100644 --- a/src/accxus/__init__.py +++ b/src/accxus/__init__.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.3.0" From de9b37c56b242d37b2f2e476de8c50e3b858a213 Mon Sep 17 00:00:00 2001 From: xelthorV Date: Tue, 12 May 2026 13:24:27 +0000 Subject: [PATCH 17/17] ci: add markdown in PR Summary --- .github/workflows/pr-summary.yml | 346 ++++++++++++------------------- 1 file changed, 136 insertions(+), 210 deletions(-) diff --git a/.github/workflows/pr-summary.yml b/.github/workflows/pr-summary.yml index 795db39..6644a8d 100644 --- a/.github/workflows/pr-summary.yml +++ b/.github/workflows/pr-summary.yml @@ -16,12 +16,12 @@ jobs: runs-on: ubuntu-latest steps: - - name: Checkout repository + - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Generate rich PR summary + - name: Generate PR summary id: summary uses: actions/github-script@v7 with: @@ -57,8 +57,8 @@ jobs: test: [], chore: [], ci: [], - build: [], style: [], + build: [], revert: [], other: [] }; @@ -72,20 +72,32 @@ jobs: test: "🧪 Tests", chore: "🔧 Chores", ci: "🚀 CI", - build: "📦 Build", style: "🎨 Style", + build: "📦 Build", revert: "⏪ Reverts", other: "📌 Other" }; - let totalAdditions = 0; - let totalDeletions = 0; + let additions = 0; + let deletions = 0; const contributors = new Map(); + const scopeStats = new Map(); + const dirStats = new Map(); for (const file of files) { - totalAdditions += file.additions; - totalDeletions += file.deletions; + additions += file.additions; + deletions += file.deletions; + + const dir = + file.filename.includes("/") + ? file.filename.split("/")[0] + : "root"; + + dirStats.set( + dir, + (dirStats.get(dir) || 0) + 1 + ); } for (const commit of commits) { @@ -122,273 +134,187 @@ jobs: type = "other"; } + if (scope) { + scopeStats.set( + scope, + (scopeStats.get(scope) || 0) + 1 + ); + } + groups[type].push({ sha, + url, scope, description, - url, author }); } - const changedFiles = files + const topFiles = [...files] .sort((a, b) => b.changes - a.changes) - .slice(0, 15); + .slice(0, 10); - const filesByExtension = {}; + const topScopes = [...scopeStats.entries()] + .sort((a, b) => b[1] - a[1]); - for (const file of files) { - const ext = - file.filename.includes(".") - ? file.filename.split(".").pop() - : "other"; + const topDirs = [...dirStats.entries()] + .sort((a, b) => b[1] - a[1]); + + function progress(value, total) { + const width = 20; + const filled = Math.round((value / total) * width); - filesByExtension[ext] = - (filesByExtension[ext] || 0) + 1; + return ( + "█".repeat(filled) + + "░".repeat(width - filled) + ); } - const sortedExtensions = Object.entries(filesByExtension) - .sort((a, b) => b[1] - a[1]); + const totalTypedCommits = Object.values(groups) + .reduce((acc, arr) => acc + arr.length, 0); let body = ""; - body += ` - - -

📋 Pull Request Summary

- - - - - - - - - - - - - - - - - - - - - -
Repository${context.repo.owner}/${context.repo.repo}
PR - - #${pr.number} — ${pr.title} - -
Author@${pr.user.login}
Branch - ${pr.head.ref} - → - ${pr.base.ref} -
- -
- - - - - - - - - - - -
-

${commits.length}

- commits -
-

${files.length}

- changed files -
-

+${totalAdditions}

- additions -
-

-${totalDeletions}

- deletions -
- -
- `; + body += `\n`; + + body += `# 📋 PR Summary\n\n`; + + body += `### ${pr.title}\n\n`; + + body += `> ${pr.user.login} opened a pull request from \`${pr.head.ref}\` → \`${pr.base.ref}\`\n\n`; + + body += `---\n\n`; + + body += `## 📊 Overview\n\n`; + + body += `| Metric | Value |\n`; + body += `|---|---|\n`; + body += `| Commits | \`${commits.length}\` |\n`; + body += `| Changed Files | \`${files.length}\` |\n`; + body += `| Additions | \`+${additions}\` |\n`; + body += `| Deletions | \`-${deletions}\` |\n`; + body += `| Contributors | \`${contributors.size}\` |\n\n`; + + body += `---\n\n`; + + body += `## 📈 Change Distribution\n\n`; for (const [type, items] of Object.entries(groups)) { if (!items.length) continue; - body += ` -
- - ${titles[type]} (${items.length}) - + const bar = progress( + items.length, + totalTypedCommits + ); + + body += `- ${titles[type]} \`${bar}\` ${items.length}\n`; + } + + body += `\n---\n\n`; -
+ for (const [type, items] of Object.entries(groups)) { + if (!items.length) continue; + + body += `## ${titles[type]}\n\n`; - - - - - - - `; + body += `
\n`; + body += `${items.length} commits\n\n`; for (const item of items) { const scope = item.scope - ? `${item.scope} ` + ? `\`${item.scope}\` ` : ""; - body += ` -
- - - - - - - `; + body += `- [\`${item.sha}\`](${item.url}) ${scope}${item.description} — @${item.author}\n`; } - body += ` -
CommitDescriptionAuthor
- - ${item.sha} - - - ${scope}${item.description} - @${item.author}
-
- -
- `; + body += `\n\n\n`; } - body += ` -

📂 Most Changed Files

- - - - - - - - `; - - for (const file of changedFiles) { - const statusEmoji = { - added: "🟢", - modified: "🟡", - removed: "🔴", - renamed: "🔵" - }[file.status] || "⚪"; - - body += ` - - - - - - - - `; + body += `---\n\n`; + + body += `## 🎯 Main Impact Areas\n\n`; + + for (const [scope, count] of topScopes.slice(0, 8)) { + body += `- \`${scope}\` — ${count} commits\n`; } - body += ` -
FileStatusChanges
- ${file.filename} - - ${statusEmoji} ${file.status} - - +${file.additions} - / -${file.deletions} -
+ body += `\n---\n\n`; -
+ body += `## 📂 Most Changed Files\n\n`; -

🧩 File Types

+ body += `\`\`\`diff\n`; - - - - - - `; + for (const file of topFiles) { + body += `+ ${String(file.additions).padEnd(4)} `; + body += `- ${String(file.deletions).padEnd(4)} `; + body += `${file.filename}\n`; + } + + body += `\`\`\`\n\n`; + + body += `---\n\n`; - for (const [ext, count] of sortedExtensions) { - body += ` - - - - - `; + body += `## 🧩 Changed Directories\n\n`; + + for (const [dir, count] of topDirs.slice(0, 10)) { + body += `- \`${dir}/\` — ${count} files\n`; } - body += ` -
ExtensionFiles
.${ext}${count}
+ body += `\n---\n\n`; -
+ body += `## ⚠️ High Impact Files\n\n`; -

👥 Contributors

+ const risky = files + .filter(f => f.changes > 200) + .sort((a, b) => b.changes - a.changes); - - - - - - `; + if (risky.length) { + for (const file of risky) { + body += `- \`${file.filename}\` `; + body += `(+${file.additions} / -${file.deletions})\n`; + } + } else { + body += `No high impact files detected.\n`; + } + + body += `\n---\n\n`; + + body += `## 👥 Contributors\n\n`; for (const [user, count] of contributors.entries()) { - body += ` - - - - - `; + body += `- @${user} — ${count} commits\n`; } - body += ` -
UserCommits
@${user}${count}
+ body += `\n---\n\n`; -
+ body += `## 🔎 Raw Commit Messages\n\n`; -
- - 🔎 View raw commit messages - + body += `
\n`; + body += `Show raw commits\n\n`; -
- -
-            `;
+            body += `\`\`\`text\n`;
 
             for (const commit of commits) {
               body += `${commit.commit.message}\n\n`;
             }
 
-            body += `
-              
-
- -
+ body += `\`\`\`\n`; + body += `
\n\n`; -
+ body += `---\n\n`; - - Generated automatically from conventional commits and pull request metadata. - - `; + body += `Generated automatically from conventional commits and PR metadata.`; core.setOutput("body", body); - - name: Create or update PR comment + - name: Create or update comment uses: actions/github-script@v7 env: BODY: ${{ steps.summary.outputs.body }} with: script: | - const marker = ''; + const marker = ''; const comments = await github.paginate( github.rest.issues.listComments,