diff --git a/.github/workflows/pr-summary.yml b/.github/workflows/pr-summary.yml new file mode 100644 index 0000000..795db39 --- /dev/null +++ b/.github/workflows/pr-summary.yml @@ -0,0 +1,420 @@ +name: PR Summary + +on: + pull_request: + types: + - opened + - synchronize + - reopened + +permissions: + pull-requests: write + contents: read + +jobs: + pr-summary: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Generate rich PR summary + id: summary + uses: actions/github-script@v7 + with: + script: | + const pr = context.payload.pull_request; + + const commits = await github.paginate( + github.rest.pulls.listCommits, + { + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: pr.number, + per_page: 100 + } + ); + + const files = await github.paginate( + github.rest.pulls.listFiles, + { + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: pr.number, + per_page: 100 + } + ); + + const groups = { + feat: [], + fix: [], + refactor: [], + perf: [], + docs: [], + test: [], + chore: [], + ci: [], + build: [], + style: [], + revert: [], + other: [] + }; + + const titles = { + feat: "✨ Features", + fix: "πŸ› Fixes", + refactor: "♻️ Refactoring", + perf: "⚑ Performance", + docs: "πŸ“ Documentation", + test: "πŸ§ͺ Tests", + chore: "πŸ”§ Chores", + ci: "πŸš€ CI", + build: "πŸ“¦ Build", + style: "🎨 Style", + revert: "βͺ Reverts", + other: "πŸ“Œ Other" + }; + + let totalAdditions = 0; + let totalDeletions = 0; + + const contributors = new Map(); + + for (const file of files) { + totalAdditions += file.additions; + totalDeletions += file.deletions; + } + + for (const commit of commits) { + const sha = commit.sha.substring(0, 7); + const url = commit.html_url; + + const author = + commit.author?.login || + commit.commit.author.name; + + contributors.set( + author, + (contributors.get(author) || 0) + 1 + ); + + const message = + commit.commit.message.split("\n")[0]; + + const match = message.match( + /^(\w+)(\((.*?)\))?:\s(.+)$/ + ); + + let type = "other"; + let scope = ""; + let description = message; + + if (match) { + type = match[1]; + scope = match[3] || ""; + description = match[4]; + } + + if (!groups[type]) { + type = "other"; + } + + groups[type].push({ + sha, + scope, + description, + url, + author + }); + } + + const changedFiles = files + .sort((a, b) => b.changes - a.changes) + .slice(0, 15); + + const filesByExtension = {}; + + for (const file of files) { + const ext = + file.filename.includes(".") + ? file.filename.split(".").pop() + : "other"; + + filesByExtension[ext] = + (filesByExtension[ext] || 0) + 1; + } + + const sortedExtensions = Object.entries(filesByExtension) + .sort((a, b) => b[1] - a[1]); + + let body = ""; + + body += ` + + +

πŸ“‹ Pull Request Summary

+ + + + + + + + + + + + + + + + + + + + + +
Repository${context.repo.owner}/${context.repo.repo}
PR + + #${pr.number} β€” ${pr.title} + +
Author@${pr.user.login}
Branch + ${pr.head.ref} + β†’ + ${pr.base.ref} +
+ +
+ + + + + + + + + + + +
+

${commits.length}

+ commits +
+

${files.length}

+ changed files +
+

+${totalAdditions}

+ additions +
+

-${totalDeletions}

+ deletions +
+ +
+ `; + + for (const [type, items] of Object.entries(groups)) { + if (!items.length) continue; + + body += ` +
+ + ${titles[type]} (${items.length}) + + +
+ + + + + + + + `; + + for (const item of items) { + const scope = item.scope + ? `${item.scope} ` + : ""; + + body += ` + + + + + + + + `; + } + + body += ` +
CommitDescriptionAuthor
+ + ${item.sha} + + + ${scope}${item.description} + @${item.author}
+
+ +
+ `; + } + + body += ` +

πŸ“‚ Most Changed Files

+ + + + + + + + `; + + for (const file of changedFiles) { + const statusEmoji = { + added: "🟒", + modified: "🟑", + removed: "πŸ”΄", + renamed: "πŸ”΅" + }[file.status] || "βšͺ"; + + body += ` + + + + + + + + `; + } + + body += ` +
FileStatusChanges
+ ${file.filename} + + ${statusEmoji} ${file.status} + + +${file.additions} + / -${file.deletions} +
+ +
+ +

🧩 File Types

+ + + + + + + `; + + for (const [ext, count] of sortedExtensions) { + body += ` + + + + + `; + } + + body += ` +
ExtensionFiles
.${ext}${count}
+ +
+ +

πŸ‘₯ Contributors

+ + + + + + + `; + + for (const [user, count] of contributors.entries()) { + body += ` + + + + + `; + } + + body += ` +
UserCommits
@${user}${count}
+ +
+ +
+ + πŸ”Ž View raw commit messages + + +
+ +
+            `;
+
+            for (const commit of commits) {
+              body += `${commit.commit.message}\n\n`;
+            }
+
+            body += `
+              
+
+ +
+ +
+ + + Generated automatically from conventional commits and pull request metadata. + + `; + + core.setOutput("body", body); + + - name: Create or update PR comment + uses: actions/github-script@v7 + env: + BODY: ${{ steps.summary.outputs.body }} + with: + script: | + const marker = ''; + + const comments = await github.paginate( + github.rest.issues.listComments, + { + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number + } + ); + + const existing = comments.find(comment => + comment.body.includes(marker) + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: process.env.BODY + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.pull_request.number, + body: process.env.BODY + }); + } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e1b346a..93c60e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "accxus" -version = "0.1.0" +version = "0.3.0" description = "accxus is a program where you can create, manage, and modify accounts on various social networks. It uses SMS activation services for registration." readme = "README.md" requires-python = ">=3.10" diff --git a/src/accxus/__init__.py b/src/accxus/__init__.py index d3ec452..493f741 100644 --- a/src/accxus/__init__.py +++ b/src/accxus/__init__.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/src/accxus/platforms/telegram/client.py b/src/accxus/platforms/telegram/client.py index 0f99e17..edffebd 100644 --- a/src/accxus/platforms/telegram/client.py +++ b/src/accxus/platforms/telegram/client.py @@ -10,6 +10,7 @@ from pyrogram import Client # type: ignore[import-untyped] import accxus.config as cfg +from accxus.platforms.telegram import sessions as tg_sessions from accxus.types.core import ProxyConfig from accxus.types.telegram import SessionInfo, SessionStatus from accxus.utils.session_convert import detect_kind @@ -27,6 +28,10 @@ def make_client( ) -> Client: from pyrogram import Client as _Client # type: ignore[import-untyped] + dc_id = tg_sessions.hydrate_session_dc_metadata(session_name) + if dc_id is not None: + log.debug("[tg] session %s uses dc_id=%s", session_name, dc_id) + _proxy = proxy or cfg.config.telegram_proxy return _Client( # type: ignore[reportCallIssue] name=session_name, @@ -68,6 +73,7 @@ async def fetch_info( ) -> SessionInfo: async with connected(session_name, proxy=proxy) as client: me = await client.get_me() + dc_id = await client.storage.dc_id() try: chat = await client.get_chat(me.id) bio: str = getattr(chat, "bio", "") or "" @@ -82,6 +88,7 @@ async def fetch_info( last_name=me.last_name or "", username=me.username or "", bio=bio, + dc_id=dc_id, kind=kind, status=SessionStatus.VALID, ) @@ -100,7 +107,10 @@ async def check_validity( try: async with connected(session_name, proxy=proxy) as client: me = await client.get_me() - return SessionStatus.VALID if me else SessionStatus.INVALID + if me: + tg_sessions.update_metadata_dc_id(session_name, await client.storage.dc_id()) + return SessionStatus.VALID + return SessionStatus.INVALID except (AuthKeyUnregistered, UserDeactivated, UserDeactivatedBan): return SessionStatus.INVALID except Exception: diff --git a/src/accxus/platforms/telegram/parsing.py b/src/accxus/platforms/telegram/parsing.py index 1b8341e..bad25f8 100644 --- a/src/accxus/platforms/telegram/parsing.py +++ b/src/accxus/platforms/telegram/parsing.py @@ -1,5 +1,7 @@ from __future__ import annotations +import asyncio +import contextlib import json import logging from collections.abc import Callable @@ -11,40 +13,424 @@ log = logging.getLogger(__name__) +ChatRef = int | str + + +def _clean_filename(value: str) -> str: + cleaned = "".join(ch if ch.isalnum() or ch in ("-", "_", ".") else "_" for ch in value) + return cleaned.strip("._") or "chat" + + +def _chat_ref(chat: dict[str, Any]) -> str: + username = str(chat.get("username") or "").strip() + if username: + return f"@{username}" + return str(chat["id"]) + + +def _normalize_chat_ref(chat: ChatRef) -> ChatRef: + if isinstance(chat, int): + return chat + value = chat.strip() + if value.lstrip("-").isdigit(): + return int(value) + return value + + +async def _resolve_chat_ref(client: Any, chat: ChatRef) -> ChatRef: + ref = _normalize_chat_ref(chat) + with contextlib.suppress(Exception): + resolved = await client.get_chat(ref) + return resolved.id + + wanted_id = ref if isinstance(ref, int) else None + wanted_text = str(ref).lstrip("@").lower() if isinstance(ref, str) else "" + async for dialog in client.get_dialogs(limit=0): # type: ignore[reportGeneralTypeIssues] + dialog_chat = dialog.chat + if wanted_id is not None and dialog_chat.id == wanted_id: + return dialog_chat.id + username = (getattr(dialog_chat, "username", "") or "").lower() + title = (getattr(dialog_chat, "title", "") or "").lower() + if wanted_text and wanted_text in {username, title}: + return dialog_chat.id + + return ref + + +def _format_optional(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value + if hasattr(value, "isoformat"): + return str(value.isoformat()) + return str(value) + + +def _enum_value(value: Any) -> str: + if value is None: + return "" + enum_value = getattr(value, "value", None) + if isinstance(enum_value, str): + return enum_value + enum_name = getattr(value, "name", None) + if isinstance(enum_name, str): + return enum_name.lower() + return str(value) + + +def _serializable_value(value: Any, depth: int = 0) -> Any: + if value is None or isinstance(value, str | int | float | bool): + return value + if depth > 2: + return _format_optional(value) + if isinstance(value, list | tuple | set): + return [_serializable_value(item, depth + 1) for item in value] + if isinstance(value, dict): + return { + str(key): _serializable_value(item, depth + 1) + for key, item in value.items() + if not str(key).startswith("_") + } + if hasattr(value, "isoformat"): + return value.isoformat() + if hasattr(value, "value"): + return _enum_value(value) + data = getattr(value, "__dict__", None) + if isinstance(data, dict): + return { + key: _serializable_value(item, depth + 1) + for key, item in data.items() + if not key.startswith("_") and key != "_client" + } + return _format_optional(value) + + +def _normalize_gift(gift: Any) -> dict[str, Any]: + if gift is None: + return {} + + data = _serializable_value(gift) + if not isinstance(data, dict): + data = {"value": data} + + from_id = ( + data.get("from") + or data.get("from_id") + or data.get("sender_id") + or data.get("user_id") + or data.get("peer_id") + or "" + ) + gift_type = data.get("type") or data.get("_") or data.get("title") or type(gift).__name__ + date = data.get("date") or data.get("timestamp") or "" + + res = { + "from": from_id, + "type": gift_type, + "date": date, + "price": 0, + "currency": "stars", + "status": "common", + "message": data.get("message") or "", + } + + # Enhanced parsing for Star Gifts + if hasattr(gift, "gift"): # UserStarGift + g = gift.gift + res["price"] = getattr(g, "stars", 0) + if getattr(g, "limited_count", 0) > 0: + res["status"] = "rare" + if getattr(gift, "upgraded", False): + res["status"] = "upgraded" + if getattr(gift, "upgrade_tag", None): + res["status"] += f" ({gift.upgrade_tag})" + + # Premium Gift parsing + if res["type"] == "PremiumGiftOption": + res["price"] = data.get("amount", 0) + res["currency"] = data.get("currency", "USD") + res["status"] = "premium" + + # Merge remaining fields + for key, value in data.items(): + if key not in res and key not in {"from_id", "sender_id", "user_id", "peer_id"}: + res[key] = value + + return res + + +def _normalize_gifts(values: Any) -> list[dict[str, Any]]: + if not values: + return [] + if not isinstance(values, list | tuple): + values = [values] + return [_normalize_gift(value) for value in values if value is not None] + + +def _message_sender(msg: Any) -> str: + if getattr(msg, "from_user", None): + user = msg.from_user + return user.username or str(user.id) + if getattr(msg, "sender_chat", None): + chat = msg.sender_chat + return chat.username or chat.title or str(chat.id) + return "" + + +def _message_type(msg: Any) -> str: + if getattr(msg, "service", None): + return "service" + if getattr(msg, "media", None): + return _enum_value(msg.media) + if getattr(msg, "text", None): + return "text" + return "empty" + + +def _user_label(user: Any) -> str: + if user is None: + return "" + username = getattr(user, "username", "") or "" + if username: + return f"@{username}" + name = " ".join( + part for part in [getattr(user, "first_name", ""), getattr(user, "last_name", "")] if part + ) + return name or str(getattr(user, "id", "")) + + +def _service_text(msg: Any) -> str: + service = _enum_value(getattr(msg, "service", None)) + actor = _message_sender(msg) or "system" + if service == "new_chat_members": + members = ", ".join( + _user_label(user) for user in getattr(msg, "new_chat_members", []) or [] + ) + return f"{actor} added {members}".strip() + if service == "left_chat_members": + return f"{_user_label(getattr(msg, 'left_chat_member', None))} left the chat".strip() + if service == "new_chat_title": + return f"{actor} changed chat title to {getattr(msg, 'new_chat_title', '')}" + if service == "new_chat_photo": + return f"{actor} changed chat photo" + if service == "delete_chat_photo": + return f"{actor} deleted chat photo" + if service == "pinned_message": + pinned = getattr(getattr(msg, "pinned_message", None), "id", "") + return f"{actor} pinned message {pinned}".strip() + if service == "video_chat_started": + return f"{actor} started video chat" + if service == "video_chat_ended": + ended = getattr(msg, "video_chat_ended", None) + duration = getattr(ended, "duration", "") + return f"{actor} ended video chat {duration}".strip() + if service == "video_chat_scheduled": + scheduled = getattr(msg, "video_chat_scheduled", None) + start_date = _format_optional(getattr(scheduled, "start_date", "")) + return f"{actor} scheduled video chat {start_date}".strip() + if service == "video_chat_members_invited": + invited = getattr(msg, "video_chat_members_invited", None) + users = ", ".join(_user_label(user) for user in getattr(invited, "users", []) or []) + return f"{actor} invited {users} to video chat".strip() + ttl_period = getattr(msg, "ttl_period", None) or getattr(msg, "message_auto_delete_timer", None) + if ttl_period: + return f"{actor} changed auto-delete timer to {ttl_period}" + return service + + +def _service_details(msg: Any) -> dict[str, Any]: + fields = [ + "new_chat_members", + "left_chat_member", + "new_chat_title", + "delete_chat_photo", + "group_chat_created", + "supergroup_chat_created", + "channel_chat_created", + "migrate_to_chat_id", + "migrate_from_chat_id", + "pinned_message", + "game_high_score", + "video_chat_scheduled", + "video_chat_started", + "video_chat_ended", + "video_chat_members_invited", + "web_app_data", + "ttl_period", + "message_auto_delete_timer", + "message_auto_delete_timer_changed", + ] + details: dict[str, Any] = {} + for field in fields: + value = getattr(msg, field, None) + if value: + details[field] = _serializable_value(value) + return details + + +def _media_suffix(msg: Any) -> str: + media_type = _enum_value(getattr(msg, "media", None)) + media = getattr(msg, media_type, None) if media_type else None + file_name = getattr(media, "file_name", "") or "" + if file_name and Path(file_name).suffix: + return Path(file_name).suffix + mime_type = getattr(media, "mime_type", "") or "" + if mime_type == "application/x-tgsticker": + return ".tgs" + if mime_type == "video/webm": + return ".webm" + if mime_type == "image/webp": + return ".webp" + if media_type == "photo": + return ".jpg" + if media_type == "sticker": + if getattr(media, "is_animated", False): + return ".tgs" + if getattr(media, "is_video", False): + return ".webm" + return ".webp" + if media_type == "animation": + return ".mp4" + return "" + + +async def _download_message_media(client: Any, msg: Any, media_dir: Path | None) -> str: + if media_dir is None or not getattr(msg, "media", None): + return "" + media_dir = media_dir.absolute() + media_dir.mkdir(parents=True, exist_ok=True) + media_type = _enum_value(msg.media) + dest = media_dir / f"{media_type}{msg.id}{_media_suffix(msg)}" + try: + downloaded = await client.download_media(msg, file_name=str(dest)) + return str(downloaded or dest) + except Exception as exc: + log.debug("[parse] media download failed for message %s: %s", msg.id, exc) + return "" + + +def _custom_emoji_ids(msg: Any) -> list[int]: + ids: list[int] = [] + for entity in list(getattr(msg, "entities", []) or []) + list( + getattr(msg, "caption_entities", []) or [] + ): + custom_emoji_id = getattr(entity, "custom_emoji_id", None) + if custom_emoji_id: + ids.append(int(custom_emoji_id)) + return ids + + +async def _download_custom_emojis(client: Any, msg: Any, media_dir: Path | None) -> list[str]: + ids = _custom_emoji_ids(msg) + if media_dir is None or not ids: + return [] + media_dir = media_dir.absolute() + media_dir.mkdir(parents=True, exist_ok=True) + files: list[str] = [] + with contextlib.suppress(Exception): + stickers = await client.get_custom_emoji_stickers(ids) + for sticker in stickers: + suffix = ".tgs" if sticker.is_animated else ".webm" if sticker.is_video else ".webp" + dest = media_dir / f"emoji{sticker.file_unique_id}{suffix}" + try: + downloaded = await client.download_media(sticker.file_id, file_name=str(dest)) + files.append(str(downloaded or dest)) + except Exception as exc: + log.debug("[parse] custom emoji download failed: %s", exc) + return files + + +async def _message_to_dict(client: Any, msg: Any, media_dir: Path | None) -> dict[str, Any]: + msg_type = _message_type(msg) + service = _enum_value(getattr(msg, "service", None)) + media = _enum_value(getattr(msg, "media", None)) + text = msg.text or msg.caption or "" + if service and not text: + text = _service_text(msg) + return { + "id": msg.id, + "date": str(msg.date), + "from": _message_sender(msg), + "type": msg_type, + "service": service, + "media_type": media, + "text": text, + "media_file": await _download_message_media(client, msg, media_dir), + "custom_emoji_files": await _download_custom_emojis(client, msg, media_dir), + "service_details": _service_details(msg) if service else {}, + } + + +async def get_chat_senders( + session_name: str, + chat: ChatRef, + limit: int = 500, +) -> list[dict[str, Any]]: + """Fetch unique senders from chat history to allow filtering.""" + senders: dict[int, dict[str, Any]] = {} + async with connected(session_name) as client: + resolved_chat = await _resolve_chat_ref(client, chat) + # Using type: ignore[attr-defined] if pyright complains about get_chat_history not being a method on Client + async for msg in client.get_chat_history(resolved_chat, limit=limit): # type: ignore[attr-defined] + u = msg.from_user + if u: + if u.id not in senders: + name = f"{u.first_name or ''} {u.last_name or ''}".strip() or str(u.id) + label = f"{name} ({u.id}/@{u.username})" if u.username else f"{name} ({u.id})" + senders[u.id] = {"id": u.id, "label": label, "username": u.username} + elif msg.sender_chat: + c = msg.sender_chat + if c.id not in senders: + name = c.title or str(c.id) + label = f"{name} ({c.id}/@{c.username})" if c.username else f"{name} ({c.id})" + senders[c.id] = {"id": c.id, "label": label, "username": c.username} + await asyncio.sleep(0.02) + return sorted(senders.values(), key=lambda x: x["label"]) + async def export_chat_history( session_name: str, - chat: str, + chat: ChatRef, limit: int = 0, on_progress: Callable[[int], None] | None = None, + media_dir: Path | None = None, + sender_ids: list[int] | None = None, ) -> list[dict[str, Any]]: messages: list[dict[str, Any]] = [] async with connected(session_name) as client: - async for msg in client.get_chat_history(chat, limit=limit or 0): # type: ignore[reportGeneralTypeIssues] - messages.append( - { - "id": msg.id, - "date": str(msg.date), - "from": ( - (msg.from_user.username or str(msg.from_user.id)) if msg.from_user else "" - ), - "text": msg.text or msg.caption or "", - } - ) + resolved_chat = await _resolve_chat_ref(client, chat) + async for msg in client.get_chat_history(resolved_chat, limit=limit or 0): # type: ignore[reportGeneralTypeIssues] + if sender_ids: + sid = (msg.from_user.id if msg.from_user else None) or ( + msg.sender_chat.id if msg.sender_chat else None + ) + if sid not in sender_ids: + continue + + messages.append(await _message_to_dict(client, msg, media_dir)) if on_progress and len(messages) % 100 == 0: on_progress(len(messages)) + + # Cooldown to avoid bans + await asyncio.sleep(0.05) return messages async def save_chat_history( session_name: str, - chat: str, + chat: ChatRef, dest: Path, fmt: str = "json", limit: int = 0, on_progress: Callable[[int], None] | None = None, + media_dir: Path | None = None, + sender_ids: list[int] | None = None, ) -> int: - messages = await export_chat_history(session_name, chat, limit, on_progress) + messages = await export_chat_history( + session_name, chat, limit, on_progress, media_dir, sender_ids + ) + dest.parent.mkdir(parents=True, exist_ok=True) if fmt == "txt": lines = [f"[{m['date']}] {m['from'] or 'unknown'}: {m['text']}" for m in messages] dest.write_text("\n".join(lines), encoding="utf-8") @@ -54,22 +440,186 @@ async def save_chat_history( return len(messages) +async def save_chats_history( + session_name: str, + chats: list[ChatRef], + dest_dir: Path, + fmt: str = "json", + limit: int = 0, + on_progress: Callable[[str, int], None] | None = None, + media_dir: Path | None = None, +) -> dict[str, int]: + dest_dir.mkdir(parents=True, exist_ok=True) + exported: dict[str, int] = {} + + for chat in chats: + chat_key = _clean_filename(str(chat).lstrip("@")) + + def _progress(count: int, chat_ref: ChatRef = chat) -> None: + if on_progress: + on_progress(str(chat_ref), count) + + dest = dest_dir / f"{chat_key}.{fmt}" + chat_media_dir = media_dir / chat_key if media_dir else None + exported[str(chat)] = await save_chat_history( + session_name, + chat, + dest, + fmt=fmt, + limit=limit, + on_progress=_progress, + media_dir=chat_media_dir, + ) + + log.info("[parse] exported %d chat histories to %s", len(exported), dest_dir) + return exported + + +async def save_all_dialog_histories( + session_name: str, + dest_dir: Path, + *, + kind: str = "all", + selected_chats: list[ChatRef] | None = None, + fmt: str = "json", + limit: int = 0, + on_progress: Callable[[str, int], None] | None = None, + media_dir: Path | None = None, +) -> dict[str, int]: + chats = selected_chats or [ + _chat_ref(chat) for chat in await list_dialogs(session_name, kind=kind, limit=0) + ] + return await save_chats_history( + session_name, + chats, + dest_dir, + fmt=fmt, + limit=limit, + on_progress=on_progress, + media_dir=media_dir, + ) + + +async def _download_user_avatar(client: Any, user: Any, avatar_dir: Path | None) -> str: + if avatar_dir is None: + return "" + photo = getattr(user, "photo", None) + file_id = getattr(photo, "big_file_id", "") or getattr(photo, "small_file_id", "") + if not file_id: + return "" + + avatar_dir = avatar_dir.absolute() + avatar_dir.mkdir(parents=True, exist_ok=True) + dest = avatar_dir / f"{user.id}.jpg" + try: + downloaded = await client.download_media(file_id, file_name=str(dest)) + return str(downloaded or dest) + except Exception as exc: + log.debug("[parse] avatar download failed for %s: %s", user.id, exc) + return "" + + +async def _load_user_extras(client: Any, user_id: int) -> dict[str, Any]: + extras: dict[str, Any] = { + "bio": "", + "song": "", + "birthday": "", + "gifts": [], + } + with contextlib.suppress(Exception): + chat = await client.get_chat(user_id) + extras["bio"] = getattr(chat, "bio", "") or getattr(chat, "description", "") or "" + extras["song"] = _format_optional(getattr(chat, "profile_song", "")) + extras["birthday"] = _format_optional(getattr(chat, "birthday", "")) + extras["birthday"] = extras["birthday"] or _format_optional(getattr(chat, "birthdate", "")) + extras["song"] = extras["song"] or _format_optional(getattr(chat, "profile_music", "")) + extras["gifts"] = _normalize_gifts( + getattr(chat, "gifts", None) + or getattr(chat, "received_gifts", None) + or getattr(chat, "premium_gifts", None) + ) + + with contextlib.suppress(Exception): + from pyrogram.raw.functions.users import GetFullUser # type: ignore[import-untyped] + + peer = await client.resolve_peer(user_id) + full = await client.invoke(GetFullUser(id=peer)) + full_user = getattr(full, "full_user", full) + extras["bio"] = extras["bio"] or getattr(full_user, "about", "") or "" + extras["song"] = extras["song"] or _format_optional(getattr(full_user, "profile_song", "")) + extras["birthday"] = extras["birthday"] or _format_optional( + getattr(full_user, "birthday", "") or getattr(full_user, "birthdate", "") + ) + extras["song"] = extras["song"] or _format_optional(getattr(full_user, "profile_music", "")) + extras["gifts"] = extras["gifts"] or _normalize_gifts( + getattr(full_user, "gifts", None) + or getattr(full_user, "received_gifts", None) + or getattr(full_user, "premium_gifts", None) + ) + extras["raw_profile"] = _serializable_value(full_user) + + return extras + + +async def _parsed_user_from_member( + client: Any, + member: Any, + *, + chat_info: dict[str, Any], + avatar_dir: Path | None, +) -> ParsedUser: + u = member.user + extras = await _load_user_extras(client, u.id) + return ParsedUser( + id=u.id, + username=u.username or "", + first_name=u.first_name or "", + last_name=u.last_name or "", + phone=u.phone_number or "", + avatar_path=await _download_user_avatar(client, u, avatar_dir), + bio=extras["bio"], + song=extras["song"], + birthday=extras["birthday"], + gifts=extras["gifts"], + source_chat_id=chat_info.get("id"), + source_chat_title=chat_info.get("title", ""), + source_chat_username=chat_info.get("username", ""), + ) + + async def parse_chat_members( session_name: str, - chat: str, + chat: ChatRef, on_progress: Callable[[int], None] | None = None, + avatar_dir: Path | None = None, ) -> list[ParsedUser]: users: list[ParsedUser] = [] async with connected(session_name) as client: - async for member in client.get_chat_members(chat): # type: ignore[reportGeneralTypeIssues] - u = member.user + resolved_chat = await _resolve_chat_ref(client, chat) + chat_obj = await client.get_chat(resolved_chat) + chat_info = { + "id": chat_obj.id, + "title": ( + getattr(chat_obj, "title", None) + or " ".join( + p + for p in [ + getattr(chat_obj, "first_name", ""), + getattr(chat_obj, "last_name", ""), + ] + if p + ) + or str(chat_obj.id) + ), + "username": getattr(chat_obj, "username", "") or "", + } + async for member in client.get_chat_members(resolved_chat): # type: ignore[reportGeneralTypeIssues] users.append( - ParsedUser( - id=u.id, - username=u.username or "", - first_name=u.first_name or "", - last_name=u.last_name or "", - phone=u.phone_number or "", + await _parsed_user_from_member( + client, + member, + chat_info=chat_info, + avatar_dir=avatar_dir, ) ) if on_progress and len(users) % 50 == 0: @@ -78,10 +628,80 @@ async def parse_chat_members( return users +async def parse_chats_members( + session_name: str, + chats: list[ChatRef], + *, + avatar_dir: Path | None = None, + on_progress: Callable[[str, int], None] | None = None, +) -> list[ParsedUser]: + users_by_id: dict[int, ParsedUser] = {} + async with connected(session_name) as client: + for chat in chats: + resolved_chat = await _resolve_chat_ref(client, chat) + chat_obj = await client.get_chat(resolved_chat) + chat_info = { + "id": chat_obj.id, + "title": ( + getattr(chat_obj, "title", None) + or " ".join( + p + for p in [ + getattr(chat_obj, "first_name", ""), + getattr(chat_obj, "last_name", ""), + ] + if p + ) + or str(chat_obj.id) + ), + "username": getattr(chat_obj, "username", "") or "", + } + count = 0 + async for member in client.get_chat_members(resolved_chat): # type: ignore[reportGeneralTypeIssues] + parsed = await _parsed_user_from_member( + client, + member, + chat_info=chat_info, + avatar_dir=avatar_dir, + ) + if parsed.id not in users_by_id: + users_by_id[parsed.id] = parsed + count += 1 + if on_progress and count % 50 == 0: + on_progress(str(chat), count) + if on_progress: + on_progress(str(chat), count) + + users = list(users_by_id.values()) + log.info("[parse] parsed %d unique members from %d chats", len(users), len(chats)) + return users + + +async def save_chats_members( + session_name: str, + chats: list[ChatRef], + dest: Path, + *, + avatar_dir: Path | None = None, + on_progress: Callable[[str, int], None] | None = None, +) -> int: + users = await parse_chats_members( + session_name, + chats, + avatar_dir=avatar_dir, + on_progress=on_progress, + ) + payload = [u.model_dump() for u in users] + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8") + log.info("[parse] saved %d parsed members to %s", len(users), dest) + return len(users) + + async def list_dialogs( session_name: str, kind: str = "all", - limit: int = 200, + limit: int = 0, ) -> list[dict[str, Any]]: from pyrogram.enums import ChatType # type: ignore[import-untyped] @@ -123,11 +743,29 @@ async def list_dialogs( async def get_user_info(session_name: str, user_id: str) -> dict[str, Any]: async with connected(session_name) as client: u = await client.get_users(user_id) + extras = await _load_user_extras(client, u.id) return { "id": u.id, "username": u.username or "", "first_name": u.first_name or "", "last_name": u.last_name or "", "phone": u.phone_number or "", - "bio": getattr(u, "bio", "") or "", + "bio": extras.get("bio", "") or getattr(u, "bio", "") or "", + "birthday": extras.get("birthday", ""), + "song": extras.get("song", ""), + "gifts": extras.get("gifts", []), + "is_bot": bool(getattr(u, "is_bot", False)), + "is_contact": bool(getattr(u, "is_contact", False)), + "is_mutual_contact": bool(getattr(u, "is_mutual_contact", False)), + "is_premium": bool(getattr(u, "is_premium", False)), + "is_verified": bool(getattr(u, "is_verified", False)), + "is_scam": bool(getattr(u, "is_scam", False)), + "is_fake": bool(getattr(u, "is_fake", False)), + "language_code": getattr(u, "language_code", "") or "", + "dc_id": getattr(u, "dc_id", None), + "status": _enum_value(getattr(u, "status", None)), + "last_online_date": _format_optional(getattr(u, "last_online_date", "")), + "next_offline_date": _format_optional(getattr(u, "next_offline_date", "")), + "emoji_status": _serializable_value(getattr(u, "emoji_status", None)), + "raw_profile": extras.get("raw_profile", {}), } diff --git a/src/accxus/platforms/telegram/sessions.py b/src/accxus/platforms/telegram/sessions.py index bbefd3a..78f91c1 100644 --- a/src/accxus/platforms/telegram/sessions.py +++ b/src/accxus/platforms/telegram/sessions.py @@ -2,6 +2,7 @@ import json import logging +import sqlite3 from pathlib import Path from typing import Any @@ -27,22 +28,90 @@ def save_metadata(meta: dict[str, dict[str, Any]]) -> None: _META_FILE.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8") +def read_session_dc_id(session_name: str) -> int | None: + path = session_path(session_name) + if not path.exists(): + return None + try: + with sqlite3.connect(path) as conn: + row = conn.execute("SELECT dc_id FROM sessions LIMIT 1").fetchone() + except sqlite3.Error: + return None + if not row or row[0] is None: + return None + try: + return int(row[0]) + except (TypeError, ValueError): + return None + + +def update_metadata_dc_id(session_name: str, dc_id: int | None) -> None: + if dc_id is None: + return + meta = load_metadata() + item = meta.setdefault(session_name, {}) + if item.get("dc_id") == dc_id: + return + item["dc_id"] = dc_id + save_metadata(meta) + + def update_metadata(session_name: str, info: SessionInfo) -> None: meta = load_metadata() - meta.setdefault(session_name, {}).update( - { - "phone": info.phone, - "first_name": info.first_name, - "last_name": info.last_name, - "username": info.username, - "kind": info.kind.name, - "status": info.status.value, - } - ) + data = { + "phone": info.phone, + "first_name": info.first_name, + "last_name": info.last_name, + "username": info.username, + "kind": info.kind.name, + "status": info.status.value, + } + if info.dc_id is not None: + data["dc_id"] = info.dc_id + meta.setdefault(session_name, {}).update(data) save_metadata(meta) +def hydrate_session_dc_metadata(session_name: str) -> int | None: + dc_id = read_session_dc_id(session_name) + update_metadata_dc_id(session_name, dc_id) + return dc_id + + +def hydrate_all_dc_metadata() -> None: + meta = load_metadata() + changed = False + for f in sorted(cfg.SESSIONS_DIR.glob("*.session")): + dc_id = read_session_dc_id(f.stem) + if dc_id is not None and meta.setdefault(f.stem, {}).get("dc_id") != dc_id: + meta[f.stem]["dc_id"] = dc_id + changed = True + if changed: + save_metadata(meta) + + +def update_metadata_statuses(statuses: dict[str, SessionStatus]) -> None: + meta = load_metadata() + for name, status in statuses.items(): + item = meta.setdefault(name, {}) + item["status"] = status.value + dc_id = read_session_dc_id(name) + if dc_id is not None: + item["dc_id"] = dc_id + save_metadata(meta) + + +def _coerce_dc_id(value: Any) -> int | None: + if value is None or value == "": + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + def list_sessions() -> list[SessionInfo]: + hydrate_all_dc_metadata() meta = load_metadata() result: list[SessionInfo] = [] for f in sorted(cfg.SESSIONS_DIR.glob("*.session")): @@ -67,6 +136,7 @@ def list_sessions() -> list[SessionInfo]: last_name=m.get("last_name", ""), username=m.get("username", ""), bio=m.get("bio", ""), + dc_id=_coerce_dc_id(m.get("dc_id")) or read_session_dc_id(name), kind=kind, status=status, ) diff --git a/src/accxus/types/telegram.py b/src/accxus/types/telegram.py index fbe741a..897b957 100644 --- a/src/accxus/types/telegram.py +++ b/src/accxus/types/telegram.py @@ -1,8 +1,9 @@ from __future__ import annotations from enum import Enum +from typing import Any -from pydantic import BaseModel, computed_field +from pydantic import BaseModel, Field, computed_field class SessionKind(str, Enum): @@ -25,6 +26,7 @@ class SessionInfo(BaseModel): last_name: str = "" username: str = "" bio: str = "" + dc_id: int | None = None kind: SessionKind = SessionKind.PYROGRAM status: SessionStatus = SessionStatus.UNKNOWN @@ -41,6 +43,14 @@ class ParsedUser(BaseModel): first_name: str = "" last_name: str = "" phone: str = "" + avatar_path: str = "" + bio: str = "" + song: str = "" + birthday: str = "" + gifts: list[dict[str, Any]] = Field(default_factory=list) + source_chat_id: int | None = None + source_chat_title: str = "" + source_chat_username: str = "" @computed_field # type: ignore[prop-decorator] @property diff --git a/src/accxus/ui/app.py b/src/accxus/ui/app.py index e4e26f7..e195dcb 100644 --- a/src/accxus/ui/app.py +++ b/src/accxus/ui/app.py @@ -205,7 +205,9 @@ async def _cmd_session( # pyright: ignore[reportUnusedFunction] for s in sessions: color = "green" if s.status.value == "valid" else "red" name_part = f"[cyan]{s.name}[/cyan]" - phone_part = f"[dim]{s.phone or '?'} Β· @{s.username or 'β€”'}[/dim]" + phone_part = ( + f"[dim]{s.phone or '?'} Β· @{s.username or 'β€”'} Β· DC {s.dc_id or 'β€”'}[/dim]" + ) _write(app, f" [{color}]●[/{color}] {name_part} {phone_part}") elif action == "check": diff --git a/src/accxus/ui/tg/add_session.py b/src/accxus/ui/tg/add_session.py index aae62f2..7826fdf 100644 --- a/src/accxus/ui/tg/add_session.py +++ b/src/accxus/ui/tg/add_session.py @@ -265,6 +265,7 @@ async def _finish(self) -> None: first_name=me.first_name or "", last_name=me.last_name or "", username=me.username or "", + dc_id=await self._client.storage.dc_id(), ) tg_sessions.update_metadata(self._name, info) await self._client.disconnect() diff --git a/src/accxus/ui/tg/parsing.py b/src/accxus/ui/tg/parsing.py index f0ed4b4..c4d63be 100644 --- a/src/accxus/ui/tg/parsing.py +++ b/src/accxus/ui/tg/parsing.py @@ -77,43 +77,97 @@ def _get_session(widget: Widget, sel_id: str) -> str | None: return val +def _split_refs(value: str) -> list[str]: + refs: list[str] = [] + for chunk in value.replace("\n", ",").split(","): + ref = chunk.strip() + if ref: + refs.append(ref) + return refs + + +def _dialog_ref(dialog: dict[str, Any]) -> str: + username = str(dialog.get("username") or "") + if username: + return f"@{username}" + return str(dialog["id"]) + + class ParsingTab(Widget): DEFAULT_CSS = """ ParsingTab { height: 100%; width: 100%; } ParsingTab TabbedContent { height: 1fr; } + ParsingTab ContentSwitcher { height: 1fr; } + ParsingTab TabPane { height: 1fr; } .pform { padding: 1 2; height: 100%; overflow-y: auto; } .pform Label { margin-bottom: 1; } .pform Input { margin-bottom: 1; width: 44; } .pform Select { margin-bottom: 1; width: 44; } .prow { layout: horizontal; height: auto; margin-bottom: 1; } .prow Input { width: 28; margin-right: 1; } + .prow Select { width: 28; margin-right: 1; } + .prow Static { width: 28; margin-right: 1; height: 3; content-align: left middle; } .prow Button { margin-right: 1; } + .cfield { height: auto; margin-bottom: 1; } + .cfield Label { height: 1; margin-bottom: 0; } + .cfield Input { width: 52; margin-bottom: 0; } + .cfield Select { width: 52; margin-bottom: 0; } .plog { height: 12; margin-top: 1; } #groups_table { height: 10; margin-bottom: 1; } - #chats_table { height: 1fr; margin-bottom: 1; } + #chats_pane { overflow: hidden; } + #chats_controls { height: auto; } + #chats_table { height: 1fr; min-height: 10; margin-bottom: 1; } """ def __init__(self) -> None: super().__init__() self._parsed_users: list[Any] = [] self._fetched_dialogs: list[dict[str, Any]] = [] + self._selected_chats: set[str] = set() def compose(self) -> ComposeResult: choices = _session_select_choices() with TabbedContent(): with TabPane("Chats", id="tp_chats"), Widget(classes="pform", id="chats_pane"): - yield Label("[bold]Chat List[/bold]") - yield Select(choices, id="chats_sess", prompt="Select session") - yield Select(_KIND_LABELS, value="all", id="chats_kind") - with Widget(classes="prow"): - yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") - yield Static("", id="chats_status") + with Widget(id="chats_controls"): + with Widget(classes="cfield"): + yield Label("Session") + yield Select(choices, id="chats_sess", prompt="Session") + with Widget(classes="cfield"): + yield Label("Type") + yield Select(_KIND_LABELS, value="all", id="chats_kind") + with Widget(classes="cfield"): + yield Label("Fetched chats") + yield Static("0 selected: 0", id="chats_status") + with Widget(classes="cfield"): + yield Label("Output") + yield Input(placeholder="Output (default: exported_chats)", id="chats_out") + with Widget(classes="cfield"): + yield Label("History limit") + yield Input(placeholder="History limit (blank = all)", id="chats_limit") + with Widget(classes="prow"): + yield Button("Fetch Chats", id="btn_fetch_chats", variant="primary") + yield Button("All", id="btn_select_all_chats") + yield Button("Clear", id="btn_clear_chats") + yield Button( + "Export Selected JSON", id="btn_export_chats", variant="success" + ) + yield Button("Parse Users", id="btn_parse_chats", variant="success") yield DataTable(id="chats_table", cursor_type="row", zebra_stripes=True) with TabPane("Export Chat", id="tp_export"), Widget(classes="pform", id="export_pane"): yield Label("[bold]Export Chat History[/bold]") yield Select(choices, id="exp_sess", prompt="Select session") yield Input(placeholder="Chat: @group / username / ID", id="exp_chat") + with Widget(classes="prow"): + yield Button("Fetch Senders", id="btn_fetch_senders") + yield Select( + [("All Senders", "all")], + id="exp_sender", + prompt="Filter by sender", + value="all", + ) yield Input(placeholder="Output file (default: export_.json)", id="exp_out") + yield Input(placeholder="Media dir (blank = no media download)", id="exp_media") yield Input(placeholder="Limit (blank = all)", id="exp_limit") with Widget(classes="prow"): yield Button("Export JSON", id="btn_exp_json", variant="success") @@ -123,9 +177,12 @@ def compose(self) -> ComposeResult: with TabPane("Parse Users", id="tp_parse"), Widget(classes="pform", id="parse_pane"): yield Label("[bold]Parse Group Members[/bold]") yield Select(choices, id="pu_sess", prompt="Select session") - yield Input(placeholder="Group: @group / username / ID", id="pu_chat") + yield Input(placeholder="Groups: @group, @group2 / IDs", id="pu_chat") + yield Input(placeholder="Output JSON (default: parsed_users.json)", id="pu_out") + yield Input(placeholder="Avatar dir (default: parsed_avatars)", id="pu_avatars") with Widget(classes="prow"): yield Button("Parse", id="btn_parse", variant="success") + yield Button("Parse + Save JSON", id="btn_parse_save", variant="primary") yield Static("", id="pu_status") yield Label("[dim]Save parsed users to a group:[/dim]") with Widget(classes="prow"): @@ -156,27 +213,77 @@ def on_mount(self) -> None: def _build_chats_pane(self) -> None: pane = self.query_one("#chats_pane") choices = _session_select_choices() - pane.mount(Label("[bold]Chat List[/bold]")) - pane.mount(Select(choices, id="chats_sess", prompt="Select session")) - pane.mount(Select(_KIND_LABELS, value="all", id="chats_kind")) pane.mount( Widget( - Button("Fetch Chats", id="btn_fetch_chats", variant="primary"), - classes="prow", + Widget( + Label("Session"), + Select(choices, id="chats_sess", prompt="Session"), + classes="cfield", + ), + Widget( + Label("Type"), + Select(_KIND_LABELS, value="all", id="chats_kind"), + classes="cfield", + ), + Widget( + Label("Fetched chats"), + Static("0 selected: 0", id="chats_status"), + classes="cfield", + ), + Widget( + Label("Output"), + Input(placeholder="Output (default: exported_chats)", id="chats_out"), + classes="cfield", + ), + Widget( + Label("History limit"), + Input(placeholder="History limit (blank = all)", id="chats_limit"), + classes="cfield", + ), + Widget( + Button("Fetch Chats", id="btn_fetch_chats", variant="primary"), + Button("All", id="btn_select_all_chats"), + Button("Clear", id="btn_clear_chats"), + Button("Export Selected JSON", id="btn_export_chats", variant="success"), + Button("Parse Users", id="btn_parse_chats", variant="success"), + classes="prow", + ), + id="chats_controls", ) ) - pane.mount(Static("", id="chats_status")) pane.mount(DataTable(id="chats_table", cursor_type="row", zebra_stripes=True)) def _init_chats_table(self) -> None: tbl = self.query_one("#chats_table", DataTable) tbl.clear(columns=True) + tbl.add_column("", key="sel") tbl.add_column("", key="kind") tbl.add_column("Title", key="title") tbl.add_column("@Username", key="uname") tbl.add_column("ID", key="chat_id") tbl.add_column("Unread", key="unread") + def _sync_selected_chats(self) -> None: + tbl = self.query_one("#chats_table", DataTable) + available = {_dialog_ref(dialog) for dialog in self._fetched_dialogs} + self._selected_chats.intersection_update(available) + for dialog in self._fetched_dialogs: + ref = _dialog_ref(dialog) + with contextlib.suppress(Exception): + tbl.update_cell(ref, "sel", "●" if ref in self._selected_chats else "β—‹") + with contextlib.suppress(Exception): + self.query_one("#chats_status", Static).update( + f"{len(self._fetched_dialogs)} selected: {len(self._selected_chats)}" + ) + + def _select_all_chats(self) -> None: + self._selected_chats = {_dialog_ref(dialog) for dialog in self._fetched_dialogs} + self._sync_selected_chats() + + def _clear_selected_chats(self) -> None: + self._selected_chats.clear() + self._sync_selected_chats() + async def _do_fetch_chats(self) -> None: session = _get_session(self, "#chats_sess") if not session: @@ -190,17 +297,20 @@ async def _do_fetch_chats(self) -> None: self.query_one("#btn_fetch_chats", Button).disabled = True status.update("[dim]Fetching chats…[/dim]") self._init_chats_table() + self._selected_chats.clear() try: - dialogs = await tg_parsing.list_dialogs(session, kind=kind) + dialogs = await tg_parsing.list_dialogs(session, kind=kind, limit=0) self._fetched_dialogs = dialogs tbl = self.query_one("#chats_table", DataTable) for d in dialogs: + ref = _dialog_ref(d) icon = _KIND_ICONS.get(d["kind"], "❓") uname = f"@{d['username']}" if d["username"] else "β€”" unread = str(d["unread"]) if d["unread"] else "Β·" - tbl.add_row(icon, d["title"], uname, str(d["id"]), unread) - status.update(f"βœ… {len(dialogs)} chats fetched") + tbl.add_row("β—‹", icon, d["title"], uname, str(d["id"]), unread, key=ref) + self._select_all_chats() + status.update(f"βœ… {len(dialogs)} chats fetched; selected all") log.info("fetched %d dialogs from session %s (filter=%s)", len(dialogs), session, kind) except Exception as e: status.update(f"❌ {e}") @@ -208,6 +318,76 @@ async def _do_fetch_chats(self) -> None: finally: self.query_one("#btn_fetch_chats", Button).disabled = False + def _selected_chat_refs(self) -> list[str]: + return list(self._selected_chats) + + async def _do_export_chats(self) -> None: + session = _get_session(self, "#chats_sess") + chats = self._selected_chat_refs() + if not session or not chats: + self.app.notify("Select a session and choose chats from the table", severity="warning") + return + + limit_raw = self.query_one("#chats_limit", Input).value.strip() + limit = int(limit_raw) if limit_raw.isdigit() else 0 + out_raw = self.query_one("#chats_out", Input).value.strip() + dest_dir = Path(out_raw or "exported_chats").absolute() + media_dir = dest_dir / "media" + status = self.query_one("#chats_status", Static) + button = self.query_one("#btn_export_chats", Button) + button.disabled = True + + def _prog(chat: str, count: int) -> None: + status.update(f"[dim]{chat}: exported {count} messages…[/dim]") + + try: + exported = await tg_parsing.save_chats_history( + session, + list(chats), # type: ignore[arg-type] + dest_dir, + fmt="json", + limit=limit, + on_progress=_prog, + media_dir=media_dir, + ) + status.update(f"βœ… Exported {len(exported)} chats β†’ {dest_dir}; media β†’ {media_dir}") + except Exception as e: + status.update(f"❌ {e}") + log.error("bulk chat export error: %s", e) + finally: + button.disabled = False + + async def _do_parse_chats_from_list(self) -> None: + session = _get_session(self, "#chats_sess") + chats = self._selected_chat_refs() + if not session or not chats: + self.app.notify("Select a session and choose chats from the table", severity="warning") + return + + dest = Path("parsed_users.json").absolute() + avatar_dir = Path("parsed_avatars").absolute() + status = self.query_one("#chats_status", Static) + button = self.query_one("#btn_parse_chats", Button) + button.disabled = True + + def _prog(chat: str, count: int) -> None: + status.update(f"[dim]{chat}: parsed {count} users…[/dim]") + + try: + count = await tg_parsing.save_chats_members( + session, + list(chats), # type: ignore[arg-type] + dest, + avatar_dir=avatar_dir, + on_progress=_prog, + ) + status.update(f"βœ… Parsed {count} users β†’ {dest}; avatars β†’ {avatar_dir}") + except Exception as e: + status.update(f"❌ {e}") + log.error("bulk users parse error: %s", e) + finally: + button.disabled = False + def _build_export_pane(self) -> None: pane = self.query_one("#export_pane") choices = _session_select_choices() @@ -215,6 +395,7 @@ def _build_export_pane(self) -> None: pane.mount(Select(choices, id="exp_sess", prompt="Select session")) pane.mount(Input(placeholder="Chat: @group / username / ID", id="exp_chat")) pane.mount(Input(placeholder="Output file (default: export_.json)", id="exp_out")) + pane.mount(Input(placeholder="Media dir (blank = no media download)", id="exp_media")) pane.mount(Input(placeholder="Limit (blank = all)", id="exp_limit")) pane.mount( Widget( @@ -231,10 +412,13 @@ def _build_parse_pane(self) -> None: choices = _session_select_choices() pane.mount(Label("[bold]Parse Group Members[/bold]")) pane.mount(Select(choices, id="pu_sess", prompt="Select session")) - pane.mount(Input(placeholder="Group: @group / username / ID", id="pu_chat")) + pane.mount(Input(placeholder="Groups: @group, @group2 / IDs", id="pu_chat")) + pane.mount(Input(placeholder="Output JSON (default: parsed_users.json)", id="pu_out")) + pane.mount(Input(placeholder="Avatar dir (default: parsed_avatars)", id="pu_avatars")) pane.mount( Widget( Button("Parse", id="btn_parse", variant="success"), + Button("Parse + Save JSON", id="btn_parse_save", variant="primary"), classes="prow", ) ) @@ -294,12 +478,22 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: bid = event.button.id if bid == "btn_fetch_chats": await self._do_fetch_chats() + elif bid == "btn_select_all_chats": + self._select_all_chats() + elif bid == "btn_clear_chats": + self._clear_selected_chats() + elif bid == "btn_export_chats": + await self._do_export_chats() + elif bid == "btn_parse_chats": + await self._do_parse_chats_from_list() elif bid == "btn_exp_json": await self._do_export("json") elif bid == "btn_exp_txt": await self._do_export("txt") elif bid == "btn_parse": - await self._do_parse() + await self._do_parse(save=False) + elif bid == "btn_parse_save": + await self._do_parse(save=True) elif bid == "btn_save_grp": self._save_group() elif bid == "btn_grp_refresh": @@ -312,6 +506,45 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: await self._do_snapshot() elif bid == "btn_prof_history": self._show_profile_history() + elif bid == "btn_fetch_senders": + await self._do_fetch_senders() + + def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None: + if event.data_table.id != "chats_table": + return + key = str(event.row_key.value) if event.row_key.value is not None else "" + if not key: + return + if key in self._selected_chats: + self._selected_chats.discard(key) + else: + self._selected_chats.add(key) + self._sync_selected_chats() + + async def _do_fetch_senders(self) -> None: + session = _get_session(self, "#exp_sess") + chat = self.query_one("#exp_chat", Input).value.strip() + if not session or not chat: + self.app.notify( + "Select a session and enter a chat to fetch senders", severity="warning" + ) + return + + status = self.query_one("#exp_status", Static) + status.update("[dim]Fetching unique senders from history…[/dim]") + try: + senders = await tg_parsing.get_chat_senders(session, chat, limit=500) + sel = self.query_one("#exp_sender", Select) + choices = [("All Senders", "all")] + [(s["label"], str(s["id"])) for s in senders] + if hasattr(sel, "set_options"): + sel.set_options(choices) # type: ignore[attr-defined,reportGeneralTypeIssues] + else: + sel.options = choices # type: ignore[attr-defined,reportGeneralTypeIssues] + sel.value = "all" + status.update(f"βœ… Found {len(senders)} senders") + except Exception as e: + status.update(f"❌ {e}") + log.error("fetch senders error: %s", e) async def _do_export(self, fmt: str) -> None: session = _get_session(self, "#exp_sess") @@ -322,8 +555,17 @@ async def _do_export(self, fmt: str) -> None: limit_raw = self.query_one("#exp_limit", Input).value.strip() out_raw = self.query_one("#exp_out", Input).value.strip() + media_raw = self.query_one("#exp_media", Input).value.strip() limit = int(limit_raw) if limit_raw.isdigit() else 0 - dest = Path(out_raw or f"export_{chat.lstrip('@')}.{fmt}") + dest = Path(out_raw or f"export_{chat.lstrip('@')}.{fmt}").absolute() + media_dir = Path(media_raw).absolute() if media_raw else None + + sender_val = self.query_one("#exp_sender", Select).value + sender_ids = None + if sender_val and str(sender_val) != "all" and str(sender_val) != "Select.BLANK": + with contextlib.suppress(ValueError): + sender_ids = [int(str(sender_val))] + status = self.query_one("#exp_status", Static) log_view = self.query_one("#export_log", RichLog) @@ -336,10 +578,18 @@ def _prog(n: int) -> None: try: count = await tg_parsing.save_chat_history( - session, chat, dest, fmt=fmt, limit=limit, on_progress=_prog + session, + chat, + dest, + fmt=fmt, + limit=limit, + on_progress=_prog, + media_dir=media_dir, + sender_ids=sender_ids, ) - status.update(f"βœ… {count} messages β†’ {dest}") - log_view.write(f"βœ… Export complete: {dest} ({count} messages)") + media_note = f"; media β†’ {media_dir}" if media_dir else "" + status.update(f"βœ… {count} messages β†’ {dest}{media_note}") + log_view.write(f"βœ… Export complete: {dest} ({count} messages){media_note}") log.info("export done: %s messages from %s -> %s", count, chat, dest) except Exception as e: status.update(f"❌ {e}") @@ -349,22 +599,30 @@ def _prog(n: int) -> None: for bid in ("btn_exp_json", "btn_exp_txt"): self.query_one(f"#{bid}", Button).disabled = False - async def _do_parse(self) -> None: + async def _do_parse(self, *, save: bool = False) -> None: session = _get_session(self, "#pu_sess") - chat = self.query_one("#pu_chat", Input).value.strip() - if not session or not chat: - self.app.notify("Select a session and enter a group", severity="warning") + chats = _split_refs(self.query_one("#pu_chat", Input).value) + if not session or not chats: + self.app.notify("Select a session and enter one or more groups", severity="warning") return status = self.query_one("#pu_status", Static) log_view = self.query_one("#parse_log", RichLog) - self.query_one("#btn_parse", Button).disabled = True + for bid in ("btn_parse", "btn_parse_save"): + self.query_one(f"#{bid}", Button).disabled = True - def _prog(n: int) -> None: - status.update(f"[dim]Parsed {n} users…[/dim]") + def _prog(chat: str, count: int) -> None: + status.update(f"[dim]{chat}: parsed {count} users…[/dim]") try: - users = await tg_parsing.parse_chat_members(session, chat, on_progress=_prog) + avatar_dir_raw = self.query_one("#pu_avatars", Input).value.strip() + avatar_dir = Path(avatar_dir_raw or "parsed_avatars").absolute() + users = await tg_parsing.parse_chats_members( + session, + list(chats), # type: ignore[arg-type] + avatar_dir=avatar_dir, + on_progress=_prog, + ) self._parsed_users = [ { "id": u.id, @@ -372,19 +630,39 @@ def _prog(n: int) -> None: "first_name": u.first_name, "last_name": u.last_name, "phone": u.phone, + "avatar_path": u.avatar_path, + "bio": u.bio, + "song": u.song, + "birthday": u.birthday, + "gifts": u.gifts, + "source_chat_id": u.source_chat_id, + "source_chat_title": u.source_chat_title, + "source_chat_username": u.source_chat_username, } for u in users ] - status.update(f"βœ… Parsed {len(users)} users") - log_view.write(f"βœ… Parsed {len(users)} users from {chat!r}") - log.info("parsed %d users from %s", len(users), chat) + if save: + out_raw = self.query_one("#pu_out", Input).value.strip() + dest = Path(out_raw or "parsed_users.json").absolute() + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_text( + json.dumps(self._parsed_users, indent=2, ensure_ascii=False), + encoding="utf-8", + ) + status.update(f"βœ… Parsed {len(users)} users β†’ {dest}; avatars β†’ {avatar_dir}") + log_view.write(f"βœ… Parsed users saved: {dest} ({len(users)} users)") + else: + status.update(f"βœ… Parsed {len(users)} users") + log_view.write(f"βœ… Parsed {len(users)} users from {len(chats)} chats") + log.info("parsed %d users from %d chats", len(users), len(chats)) self.query_one("#btn_save_grp", Button).disabled = False except Exception as e: status.update(f"❌ {e}") log_view.write(f"❌ Parse failed: {e}") log.error("parse error: %s", e) finally: - self.query_one("#btn_parse", Button).disabled = False + for bid in ("btn_parse", "btn_parse_save"): + self.query_one(f"#{bid}", Button).disabled = False def _save_group(self) -> None: gname = self.query_one("#pu_grp_name", Input).value.strip() @@ -457,6 +735,9 @@ async def _do_snapshot(self) -> None: f"βœ… Snapshot: " f"{info['first_name']} {info['last_name']} " f"@{info['username'] or 'β€”'} " + f"birthday:{info.get('birthday') or 'β€”'} " + f"song:{info.get('song') or 'β€”'} " + f"gifts:{len(info.get('gifts') or [])} " f"[dim]{info['timestamp']}[/dim]" ) log.info("snapshot saved for %s", user_id) @@ -480,5 +761,8 @@ def _show_profile_history(self) -> None: f" [dim]{s['timestamp']}[/dim] " f"{s.get('first_name','')} {s.get('last_name','')} " f"@{s.get('username') or 'β€”'} " + f"[dim]birthday:[/dim] {s.get('birthday') or 'β€”'} " + f"[dim]song:[/dim] {s.get('song') or 'β€”'} " + f"[dim]gifts:[/dim] {len(s.get('gifts') or [])} " f"[dim]bio:[/dim] {s.get('bio') or 'β€”'}" ) diff --git a/src/accxus/ui/tg/sessions.py b/src/accxus/ui/tg/sessions.py index 1c34777..e775396 100644 --- a/src/accxus/ui/tg/sessions.py +++ b/src/accxus/ui/tg/sessions.py @@ -187,6 +187,7 @@ async def _finish(self) -> None: first_name=me.first_name or "", last_name=me.last_name or "", username=me.username or "", + dc_id=await self._client.storage.dc_id(), ) tg_sessions.update_metadata(self._name, info) await self._client.disconnect() @@ -497,10 +498,13 @@ def _reload_table(self) -> None: tbl.clear(columns=True) tbl.add_column("Session", key="name") tbl.add_column("Phone", key="phone") + tbl.add_column("DC", key="dc") tbl.add_column("Status", key="status") for info in tg_sessions.list_sessions(): status_str = self._status_markup(info.status) - tbl.add_row(info.name, info.phone or "β€”", status_str, key=info.name) + tbl.add_row( + info.name, info.phone or "β€”", str(info.dc_id or "β€”"), status_str, key=info.name + ) @staticmethod def _status_markup(s: SessionStatus) -> str: @@ -598,6 +602,7 @@ async def _do_access(self, name: str) -> None: f"[bold]{info.first_name} {info.last_name}[/bold] " f"{'@' + info.username if info.username else ''}\n" f"[dim]Phone:[/dim] {info.phone or 'β€”'}\n" + f"[dim]DC:[/dim] {info.dc_id or 'β€”'}\n" f"[dim]Bio:[/dim] {info.bio or 'β€”'}\n" f"[dim]Session:[/dim] {name}.session {kind_label}" ) @@ -619,11 +624,12 @@ async def _check_all(self) -> None: names = [s.name for s in sessions] results = await tg_client.check_all_validity(names) - meta = tg_sessions.load_metadata() + tg_sessions.update_metadata_statuses(results) + sessions_by_name = {info.name: info for info in tg_sessions.list_sessions()} for name, status in results.items(): - meta.setdefault(name, {})["status"] = status.value tbl.update_cell(name, "status", self._status_markup(status)) - tg_sessions.save_metadata(meta) + if name in sessions_by_name: + tbl.update_cell(name, "dc", str(sessions_by_name[name].dc_id or "β€”")) valid = sum(1 for s in results.values() if s == SessionStatus.VALID) self.app.notify(f"βœ“ {valid}/{len(results)} valid", title=" Sessions")