From 2aa6a91c777b81b4298df65ab1b2a3e125509c5f Mon Sep 17 00:00:00 2001 From: przemyslawbialon Date: Sat, 25 Apr 2026 17:31:55 +0200 Subject: [PATCH 1/5] feat: Add IBI Capital broker plugin (stock, PDF input) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First PDF-based broker plugin. Parses IBI Capital "Sale Of Stock Activity Statement" reports into the standardized stock CSV. Emits a synthetic BUY dated at Grant Date with cost basis = shares × Price For Tax, a SELL at Execution Date with Total Amount Due to Order as gross proceeds, and a SERVICE_FEE from Total Fees (when non-zero). Cost basis model follows the conservative KIS line: RSU (Price For Tax = 0) → BUY fiat_value = 0; ESPP (Price For Tax = discounted purchase price) → BUY fiat_value = shares × price. Documented in docs/BROKERS.md. pdfplumber ships as an optional extra ([ibi]) so users not using IBI keep a minimal install. The packaged companies.json (90 entries, seeded from Wikipedia's list of Israeli companies on NASDAQ) maps company→ticker; --ticker overrides for companies not yet in the seed. --- CONTRIBUTING.md | 7 +- README.md | 16 ++- README.pl.md | 16 ++- docs/BROKERS.md | 106 +++++++++++++++ pit38/cli.py | 54 ++++++++ pit38/plugins/stock/ibi_capital/__init__.py | 0 pit38/plugins/stock/ibi_capital/__main__.py | 74 ++++++++++ .../plugins/stock/ibi_capital/companies.json | 92 +++++++++++++ .../stock/ibi_capital/company_ticker.py | 68 ++++++++++ .../plugins/stock/ibi_capital/order_parser.py | 128 ++++++++++++++++++ pit38/plugins/stock/ibi_capital/pdf_reader.py | 33 +++++ .../stock/ibi_capital/record_builder.py | 67 +++++++++ pyproject.toml | 6 + tests/e2e/fixtures/ibi_order_fake_espp.txt | 41 ++++++ tests/e2e/fixtures/ibi_order_fake_rsu.txt | 41 ++++++ tests/e2e/test_ibi_e2e.py | 66 +++++++++ tests/test_ibi_company_ticker.py | 59 ++++++++ tests/test_ibi_order_parser.py | 89 ++++++++++++ tests/test_ibi_record_builder.py | 89 ++++++++++++ 19 files changed, 1038 insertions(+), 14 deletions(-) create mode 100644 docs/BROKERS.md create mode 100644 pit38/plugins/stock/ibi_capital/__init__.py create mode 100644 pit38/plugins/stock/ibi_capital/__main__.py create mode 100644 pit38/plugins/stock/ibi_capital/companies.json create mode 100644 pit38/plugins/stock/ibi_capital/company_ticker.py create mode 100644 pit38/plugins/stock/ibi_capital/order_parser.py create mode 100644 pit38/plugins/stock/ibi_capital/pdf_reader.py create mode 100644 pit38/plugins/stock/ibi_capital/record_builder.py create mode 100644 tests/e2e/fixtures/ibi_order_fake_espp.txt create mode 100644 tests/e2e/fixtures/ibi_order_fake_rsu.txt create mode 100644 tests/e2e/test_ibi_e2e.py create mode 100644 tests/test_ibi_company_ticker.py create mode 100644 tests/test_ibi_order_parser.py create mode 100644 tests/test_ibi_record_builder.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 116d9ac..f171060 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -169,8 +169,11 @@ architecture overview (PL/EN). **Reference implementations** (read these first): -- **Stocks** — `pit38/plugins/stock/revolut/` (most complete, handles - BOM, unknown operations, dividends, fees, stock splits) +- **Stocks, CSV input** — `pit38/plugins/stock/revolut/` (most complete, + handles BOM, unknown operations, dividends, fees, stock splits) +- **Stocks, PDF input** — `pit38/plugins/stock/ibi_capital/` (regex-based + PDF text parsing via pdfplumber as an optional extra, synthetic BUY + emitted from order confirmation) - **Crypto** — `pit38/plugins/crypto/binance/` **High-level recipe:** diff --git a/README.md b/README.md index 9474547..a7ef317 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,15 @@ A command-line tool for calculating Polish income tax on **stocks** and **crypto ## Supported Brokers -| Broker | Stocks | Crypto | -|----------|--------|--------| -| Revolut | Yes | Yes | -| E*Trade | Yes | — | -| Binance | — | Yes | -| Manual CSV | Yes | Yes | +| Broker | Stocks | Crypto | +|--------------|-----------------|--------| +| Revolut | Yes | Yes | +| E*Trade | Yes | — | +| IBI Capital | Yes (SELL-side, PDF input) | — | +| Binance | — | Yes | +| Manual CSV | Yes | Yes | + +For broker-specific quirks see [`docs/BROKERS.md`](docs/BROKERS.md). ## Quick Start @@ -54,6 +57,7 @@ pit38 import revolut-stock -i revolut_export.csv -o transactions.csv pit38 import revolut-crypto -i revolut_export.csv -o transactions.csv pit38 import etrade -i etrade_export.csv -o transactions.csv pit38 import binance -i binance_export.csv -o transactions.csv +pit38 import ibi-capital -i ~/ibi_orders/ -o transactions.csv # PDF input; install with: pipx install 'pit-38[ibi]' ``` You can combine multiple files from different brokers: diff --git a/README.pl.md b/README.pl.md index 8774ebc..9c6ef07 100644 --- a/README.pl.md +++ b/README.pl.md @@ -12,12 +12,15 @@ Narzędzie wiersza poleceń do obliczania polskiego podatku dochodowego od **akc ## Obsługiwani brokerzy -| Broker | Akcje | Krypto | -|----------|--------|--------| -| Revolut | Tak | Tak | -| E*Trade | Tak | — | -| Binance | — | Tak | -| Ręczny CSV | Tak | Tak | +| Broker | Akcje | Krypto | +|--------------|-------------------------------|--------| +| Revolut | Tak | Tak | +| E*Trade | Tak | — | +| IBI Capital | Tak (tylko sprzedaże, input PDF) | — | +| Binance | — | Tak | +| Ręczny CSV | Tak | Tak | + +Specyfika poszczególnych brokerów — zob. [`docs/BROKERS.md`](docs/BROKERS.md). ## Szybki start @@ -54,6 +57,7 @@ pit38 import revolut-stock -i eksport_revolut.csv -o transakcje.csv pit38 import revolut-crypto -i eksport_revolut.csv -o transakcje.csv pit38 import etrade -i eksport_etrade.csv -o transakcje.csv pit38 import binance -i eksport_binance.csv -o transakcje.csv +pit38 import ibi-capital -i ~/ibi_orders/ -o transakcje.csv # input: PDF; instalacja: pipx install 'pit-38[ibi]' ``` Możesz łączyć pliki z różnych brokerów: diff --git a/docs/BROKERS.md b/docs/BROKERS.md new file mode 100644 index 0000000..26b3390 --- /dev/null +++ b/docs/BROKERS.md @@ -0,0 +1,106 @@ +# Broker-specific guides + +Per-broker notes that don't belong in the README but matter when you're +importing a specific broker's export for the first time. One section per +broker; see the top-level `README.md` for the supported-brokers table. + +## IBI Capital + +IBI Capital is an Israeli trustee broker that administers equity +compensation (RSU + ESPP) plans for employees of companies listed on +NASDAQ/NYSE. The `pit38 import ibi-capital` command parses IBI's **Sale +Of Stock Activity Statement** PDFs — one PDF per executed sale order — +and emits standardized transactions plus service fees. + +### Scope — what this plugin does and doesn't + +**Does**: +- Parses every `*.pdf` Sale Of Stock Activity Statement under the paths + you pass via `-i` (files or directories, repeatable). +- Emits one synthetic `BUY` dated at the grant day, one `SELL` dated at + the execution day, and one `SERVICE_FEE` at the execution day when + total fees are non-zero. +- Looks up the company's ticker in a packaged JSON seed + (`pit38/plugins/stock/ibi_capital/companies.json`); `--ticker` overrides. + +**Doesn't**: +- Read vesting confirmations, ESPP purchase reports, dividend statements, + or any non-sale IBI document. +- Automatically handle currencies other than USD (all sample PDFs were + USD — if you have a non-USD order, open an issue). + +### Installation + +Parsing PDFs requires the optional `pdfplumber` dependency: + +```bash +pipx install 'pit-38[ibi]' +# or for development: +pip install -e ".[ibi,dev]" +``` + +Users who don't need IBI support can skip the `[ibi]` extra — the core +tool works without `pdfplumber`. + +### Usage + +Download your order confirmations from the IBI Capital portal into a +folder, then: + +```bash +pit38 import ibi-capital -i ~/Downloads/ibi_orders/ -o ibi.csv +pit38 stock -f ibi.csv -y 2025 +``` + +You can mix IBI output with exports from other brokers: + +```bash +pit38 stock -f ibi.csv -f revolut.csv -y 2025 +``` + +Use `--ticker MYSYM` to override the company→ticker mapping (useful when +your company isn't in the shipped `companies.json` yet). + +### Cost basis — RSU vs ESPP + +IBI's order PDFs expose a `Price For Tax` field that the plugin uses as +the per-share cost basis for the synthetic `BUY` transaction: + +- **RSU** (typical grant): `Price For Tax: USD 0.00`. The plugin emits + `BUY` with `fiat_value = 0`. This follows the conservative/KIS line of + Polish tax interpretation: for shares received via RSU the employee + did not bear an acquisition cost, and the value at vest was already + taxed as income from employment. The capital gain on sale is therefore + `proceeds − 0 − fees`. +- **ESPP** (employee stock purchase plan): `Price For Tax: USD N.NN` + reflects the discounted purchase price you actually paid. The plugin + uses that as the cost basis — `BUY` `fiat_value = shares × Price For Tax`. + +This is an interpretation of tax law, not a universal rule. If your own +advisor takes a different position (e.g. allowing the FMV-at-vest as RSU +cost basis), edit the resulting CSV directly — each `BUY` row's +`fiat_value` column is straightforward to adjust. + +### Adding a new company → ticker mapping + +If your company name isn't in the seed, you have two options: + +1. **Quick fix** — pass `--ticker MYSYM` on the CLI; the plugin uses it + for every PDF in that run. +2. **Permanent fix** — open a PR adding a line to + [`pit38/plugins/stock/ibi_capital/companies.json`](../pit38/plugins/stock/ibi_capital/companies.json). + Keys are the `Company:` value **exactly as it appears in your IBI + PDFs** (lowercase — the loader does case-insensitive lookup, but + keeping the file lowercase keeps diffs clean). Values are current + NASDAQ/NYSE tickers in uppercase. + +### Known limitations + +- The plugin assumes USD throughout. Non-USD orders would need a + `Currency` enum extension (ILS/EUR support isn't there yet). +- `Wire Fee` from the `Funds Proceed` section isn't surfaced separately. + In the sample PDFs it's always `0.00`; if you hit a non-zero wire fee + that matters for your tax calc, open an issue. +- Anonymized PDF fixtures aren't shipped in the repo (real IBI PDFs + contain PII). Parser unit tests use anonymized text snapshots under + `tests/e2e/fixtures/ibi_order_fake_*.txt`. diff --git a/pit38/cli.py b/pit38/cli.py index d88988d..1e852ed 100644 --- a/pit38/cli.py +++ b/pit38/cli.py @@ -99,6 +99,60 @@ def import_etrade(input_path, output_path, log_level): click.echo(f"Saved {len(transactions)} transactions to {output_path}") +@import_cmd.command("ibi-capital") +@click.option("-i", "--input", "input_paths", type=click.Path(exists=True), + multiple=True, required=True, + help="IBI order confirmation PDF or directory of PDFs (repeatable)") +@click.option("-o", "--output", "output_path", type=click.Path(), required=True, help="Output standardized CSV") +@click.option("--ticker", default=None, + help="Override ticker (skips companies.json lookup — useful for companies not yet in the seed)") +@click.option("-ll", "--log-level", default="INFO", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])) +def import_ibi_capital(input_paths, output_path, ticker, log_level): + """Import stock sale confirmations from IBI Capital (Israeli broker, PDF input).""" + setup_logger(log_level) + + from pathlib import Path + from pit38.plugins.stock.generic_saver import GenericCsvSaver + from pit38.plugins.stock.ibi_capital.company_ticker import resolve_ticker + from pit38.plugins.stock.ibi_capital.order_parser import parse_order_report + from pit38.plugins.stock.ibi_capital.pdf_reader import extract_text + from pit38.plugins.stock.ibi_capital.record_builder import build_records + + pdfs: list[Path] = [] + for raw in input_paths: + p = Path(raw) + pdfs.extend(sorted(p.rglob("*.pdf")) if p.is_dir() else [p]) + + if not pdfs: + click.echo("No PDF files found under the provided --input paths.", err=True) + raise click.Abort() + + transactions = [] + fees = [] + saw_rsu = False + for pdf_path in pdfs: + parsed = parse_order_report(extract_text(pdf_path)) + resolved = resolve_ticker(parsed.company, override=ticker) + t, f = build_records(parsed, resolved) + transactions.extend(t) + fees.extend(f) + if parsed.price_for_tax == 0: + saw_rsu = True + + GenericCsvSaver.save(transactions, fees, output_path) + click.echo( + f"Saved {len(transactions)} transactions and {len(fees)} service fees " + f"from {len(pdfs)} IBI Capital PDF(s) to {output_path}" + ) + if saw_rsu: + click.echo( + "\nℹ For RSU orders (Price For Tax = 0), cost basis is recorded as 0 — " + "consistent with the standard Polish tax treatment of RSU share sales. " + "See docs/BROKERS.md#ibi-capital for details.", + err=True, + ) + + @import_cmd.command("binance") @click.option("-i", "--input", "input_path", type=click.Path(exists=True), required=True, help="Binance export CSV") @click.option("-o", "--output", "output_path", type=click.Path(), required=True, help="Output standardized CSV") diff --git a/pit38/plugins/stock/ibi_capital/__init__.py b/pit38/plugins/stock/ibi_capital/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pit38/plugins/stock/ibi_capital/__main__.py b/pit38/plugins/stock/ibi_capital/__main__.py new file mode 100644 index 0000000..de7c4fb --- /dev/null +++ b/pit38/plugins/stock/ibi_capital/__main__.py @@ -0,0 +1,74 @@ +"""Standalone CLI entry: ``python -m pit38.plugins.stock.ibi_capital``. + +Mirrors ``pit38/plugins/stock/revolut/__main__.py``. The user-facing CLI +lives in ``pit38/cli.py`` (``pit38 import ibi-capital``) and is the +supported path; this entry point is a convenience for plugin-level +debugging. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import click +from loguru import logger + +from pit38.plugins.stock.generic_saver import GenericCsvSaver +from pit38.plugins.stock.ibi_capital.company_ticker import resolve_ticker +from pit38.plugins.stock.ibi_capital.order_parser import parse_order_report +from pit38.plugins.stock.ibi_capital.pdf_reader import extract_text +from pit38.plugins.stock.ibi_capital.record_builder import build_records + + +def setup_logger(log_level: str) -> None: + logger.remove() + logger.add(sys.stderr, level=log_level) + + +def _collect_pdfs(paths: tuple[str, ...]) -> list[Path]: + """Expand file/directory inputs into a flat list of PDF paths.""" + resolved: list[Path] = [] + for raw in paths: + p = Path(raw) + if p.is_dir(): + resolved.extend(sorted(p.rglob("*.pdf"))) + else: + resolved.append(p) + return resolved + + +@click.command() +@click.option( + "-i", "--input", "input_paths", + type=click.Path(exists=True), multiple=True, required=True, + help="IBI order confirmation PDF or directory of PDFs (repeatable)", +) +@click.option("-o", "--output", "output_path", type=click.Path(), required=True) +@click.option("--ticker", default=None, help="Override ticker (skips companies.json lookup)") +@click.option( + "-ll", "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), + default="INFO", +) +def main(input_paths, output_path, ticker, log_level): + setup_logger(log_level) + + all_transactions = [] + all_fees = [] + for pdf_path in _collect_pdfs(input_paths): + logger.debug(f"Reading {pdf_path}") + parsed = parse_order_report(extract_text(pdf_path)) + resolved_ticker = resolve_ticker(parsed.company, override=ticker) + transactions, fees = build_records(parsed, resolved_ticker) + all_transactions.extend(transactions) + all_fees.extend(fees) + + GenericCsvSaver.save(all_transactions, all_fees, output_path) + click.echo( + f"Saved {len(all_transactions)} transactions and {len(all_fees)} " + f"service fees to {output_path}" + ) + + +if __name__ == "__main__": + main() diff --git a/pit38/plugins/stock/ibi_capital/companies.json b/pit38/plugins/stock/ibi_capital/companies.json new file mode 100644 index 0000000..24c0abf --- /dev/null +++ b/pit38/plugins/stock/ibi_capital/companies.json @@ -0,0 +1,92 @@ +{ + "allot": "ALLT", + "arbe robotics": "ARBE", + "arcturus therapeutics": "ARCT", + "audiocodes": "AUDC", + "b.o.s. better online solutions": "BOSC", + "biolinerx": "BLRX", + "biondvax pharmaceuticals": "BVXV", + "brainstorm cell therapeutics": "BCLI", + "brainsway": "BWAY", + "caesarstone": "CSTE", + "camtek": "CAMT", + "cellebrite": "CLBT", + "ceragon networks": "CRNT", + "ceva": "CEVA", + "check point software technologies": "CHKP", + "collplant biotechnologies": "CLGN", + "compugen": "CGEN", + "cyberark": "CYBR", + "dariohealth": "DRIO", + "elbit systems": "ESLT", + "eltek": "ELTK", + "enlight renewable energy": "ENLT", + "entera bio": "ENTX", + "evogene": "EVGN", + "fiverr": "FVRR", + "foresight autonomous holdings": "FRSX", + "formula systems": "FORTY", + "galmed pharmaceuticals": "GLMD", + "gilat satellite networks": "GILT", + "global-e online": "GLBE", + "hippo holdings": "HIPO", + "icl group": "ICL", + "inmode": "INMD", + "innoviz technologies": "INVZ", + "ituran location and control": "ITRN", + "jfrog": "FROG", + "kaltura": "KLTR", + "kamada": "KMDA", + "kornit digital": "KRNT", + "lemonade": "LMND", + "magic software enterprises": "MGIC", + "mediwound": "MDWD", + "mind cti": "MNDO", + "mobileye": "MBLY", + "monday.com": "MNDY", + "nano dimension": "NNDM", + "nano-x imaging": "NNOX", + "nayax": "NYAX", + "nice": "NICE", + "nova": "NVMI", + "oddity tech": "ODD", + "optibase": "OBAS", + "oramed pharmaceuticals": "ORMP", + "ormat technologies": "ORA", + "otonomo technologies": "OTMO", + "outbrain": "OB", + "pagaya technologies": "PGY", + "partner communications": "PTNR", + "payoneer": "PAYO", + "perion network": "PERI", + "playtika": "PLTK", + "polypid": "PYPD", + "radcom": "RDCM", + "radware": "RDWR", + "rani therapeutics": "RANI", + "redhill biopharma": "RDHL", + "ree automotive": "REE", + "rewalk robotics": "LFWD", + "riskified": "RSKD", + "sapiens international": "SPNS", + "satixfy communications": "SATX", + "scisparc": "SPRC", + "silicom": "SILC", + "sol-gel technologies": "SLGL", + "solaredge technologies": "SEDG", + "stratasys": "SSYS", + "supercom": "SPCB", + "taboola": "TBLA", + "taro pharmaceutical industries": "TARO", + "tat technologies": "TATT", + "teva pharmaceutical industries": "TEVA", + "tigo energy": "TYGO", + "tower semiconductor": "TSEM", + "urogen pharma": "URGN", + "valens semiconductor": "VLN", + "varonis systems": "VRNS", + "verint systems": "VRNT", + "wix.com": "WIX", + "xtl biopharmaceuticals": "XTLB", + "zim integrated shipping services": "ZIM" +} diff --git a/pit38/plugins/stock/ibi_capital/company_ticker.py b/pit38/plugins/stock/ibi_capital/company_ticker.py new file mode 100644 index 0000000..ccffe00 --- /dev/null +++ b/pit38/plugins/stock/ibi_capital/company_ticker.py @@ -0,0 +1,68 @@ +"""Resolve the ``Company`` field of an IBI order PDF to a stock ticker. + +IBI order reports don't include the NASDAQ/NYSE ticker — only a company +name such as ``monday.com``. The mapping to tickers lives in +``companies.json`` shipped alongside this module; seeding and extension +happen via PR. The ``--ticker`` CLI flag overrides the mapping so a user +whose company isn't in the JSON can still import without waiting for a +PR merge. + +The JSON is loaded once per process and cached at module level. Lookup +is case-insensitive and trims surrounding whitespace, because we can't +guarantee IBI's formatting stays 100% stable across account types. +""" +from __future__ import annotations + +import json +from functools import lru_cache +from importlib.resources import files +from typing import Mapping + + +class UnknownCompanyError(KeyError): + """Raised when ``Company`` from a PDF has no ticker mapping and no override.""" + + +_COMPANIES_JSON = "companies.json" +_PR_LINK = ( + "https://github.com/pbialon/pit-38/blob/main/" + "pit38/plugins/stock/ibi_capital/companies.json" +) + + +@lru_cache(maxsize=1) +def _load_mapping() -> Mapping[str, str]: + raw = files(__package__).joinpath(_COMPANIES_JSON).read_text(encoding="utf-8") + data = json.loads(raw) + # Normalize keys to lowercase once up front so lookup stays O(1) and + # doesn't allocate on each call. + return {k.strip().lower(): v for k, v in data.items()} + + +def resolve_ticker( + company: str, + override: str | None = None, + *, + mapping: Mapping[str, str] | None = None, +) -> str: + """Return the stock ticker for ``company``. + + ``override`` wins unconditionally — useful when the packaged JSON + doesn't yet include the user's company, or when the PDF has an + unusual company-name spelling. + + ``mapping`` is test-only dependency injection; production callers + pass nothing and get the packaged ``companies.json``. + """ + if override: + return override + + table = mapping if mapping is not None else _load_mapping() + key = company.strip().lower() + if key not in table: + raise UnknownCompanyError( + f"Company '{company}' has no ticker mapping. " + f"Either pass --ticker on the CLI, or open a PR " + f"adding '{company}' to {_PR_LINK}" + ) + return table[key] diff --git a/pit38/plugins/stock/ibi_capital/order_parser.py b/pit38/plugins/stock/ibi_capital/order_parser.py new file mode 100644 index 0000000..547c05d --- /dev/null +++ b/pit38/plugins/stock/ibi_capital/order_parser.py @@ -0,0 +1,128 @@ +"""Parse an IBI Capital 'Sale Of Stock Activity Statement' from extracted PDF text. + +IBI Capital ships one PDF per executed sale order. Every PDF has the same +fixed layout; the interesting fields are exposed as ``Label: value`` pairs +or inside one well-identified line (``Total Amount Due to Order N USD +P.PP USD T.TT``). This module uses regex rather than table-extraction +because pdfplumber's text flow scrambles the ``Fees`` block (values appear +before labels) — the labelled totals we care about are easier to read off +the flat text than off the partially-ordered table cells. + +This module must not import pdfplumber. That way unit tests can exercise +the parser against anonymized ``.txt`` fixtures without installing the +optional ``[ibi]`` extra. +""" +from __future__ import annotations + +import re +from dataclasses import dataclass + +import pendulum + +from pit38.plugins.normalization import parse_amount + + +class OrderParseError(ValueError): + """Raised when a required field is missing from an IBI order PDF text.""" + + +@dataclass(frozen=True) +class ParsedOrder: + order_number: str + company: str + plan: str + grant_date: pendulum.DateTime + execution_date: pendulum.DateTime + shares: int + sale_price: float + total_amount: float + total_fees: float + price_for_tax: float + + +# English month names as IBI writes them, e.g. "February 04, 2024". +_DATE_FORMAT = "MMMM DD, YYYY" + + +def parse_order_report(text: str) -> ParsedOrder: + """Parse the full text of one IBI order confirmation PDF.""" + return ParsedOrder( + order_number=_required(r"Order Number:\s+(\S+)", text, "Order Number"), + company=_required(r"Company:\s*(.+?)\s*$", text, "Company", flags=re.MULTILINE), + plan=_required(r"Plan:\s*(.+?)\s*$", text, "Plan", flags=re.MULTILINE), + grant_date=_date(r"Grant Date:\s+(\w+\s+\d{1,2},\s+\d{4})", text, "Grant Date"), + execution_date=_date(r"Execution Date:\s+(\w+\s+\d{1,2},\s+\d{4})", text, "Execution Date"), + shares=_shares(text), + sale_price=_sale_price(text), + total_amount=_total_amount(text), + total_fees=_total_fees(text), + price_for_tax=_price_for_tax(text), + ) + + +_ORDER_LINE = re.compile( + r"Total Amount Due to Order\s+" + r"(?P\d+)\s+" + r"USD\s+(?P[\d,]+\.?\d*)\s+" + r"USD\s+(?P[\d,]+\.?\d*)" +) + + +def _shares(text: str) -> int: + m = _ORDER_LINE.search(text) + if not m: + raise OrderParseError("Could not locate 'Total Amount Due to Order' line") + return int(m.group("shares")) + + +def _sale_price(text: str) -> float: + m = _ORDER_LINE.search(text) + if not m: + raise OrderParseError("Could not locate 'Total Amount Due to Order' line") + return parse_amount(m.group("price")) + + +def _total_amount(text: str) -> float: + m = _ORDER_LINE.search(text) + if not m: + raise OrderParseError("Could not locate 'Total Amount Due to Order' line") + return parse_amount(m.group("total")) + + +# The parenthetical note may drift; make it optional so a minor format +# change doesn't break the parser. +_TOTAL_FEES = re.compile( + r"Total Fees\s+(?:\([^)]*\)\s+)?USD\s+([\d,]+\.?\d*)" +) + + +def _total_fees(text: str) -> float: + m = _TOTAL_FEES.search(text) + if not m: + raise OrderParseError("Could not locate 'Total Fees' line") + return parse_amount(m.group(1)) + + +_PRICE_FOR_TAX = re.compile(r"Price For Tax:\s+USD\s+([\d,]+\.?\d*)") + + +def _price_for_tax(text: str) -> float: + m = _PRICE_FOR_TAX.search(text) + if not m: + raise OrderParseError("Could not locate 'Price For Tax' field") + return parse_amount(m.group(1)) + + +def _required(pattern: str, text: str, field: str, flags: int = 0) -> str: + m = re.search(pattern, text, flags) + if not m: + raise OrderParseError(f"Could not find '{field}' in PDF text") + return m.group(1).strip() + + +def _date(pattern: str, text: str, field: str) -> pendulum.DateTime: + raw = _required(pattern, text, field) + try: + return pendulum.from_format(raw, _DATE_FORMAT) + except Exception as e: + raise OrderParseError(f"Could not parse '{field}' date {raw!r}: {e}") from e diff --git a/pit38/plugins/stock/ibi_capital/pdf_reader.py b/pit38/plugins/stock/ibi_capital/pdf_reader.py new file mode 100644 index 0000000..3ed6cca --- /dev/null +++ b/pit38/plugins/stock/ibi_capital/pdf_reader.py @@ -0,0 +1,33 @@ +"""Extract plain text from an IBI Capital order confirmation PDF. + +Thin wrapper over pdfplumber so the optional ``[ibi]`` dependency stays +isolated. Plugin users who don't need IBI support don't install +pdfplumber and don't get import-time breakage. +""" +from __future__ import annotations + +from pathlib import Path + + +def extract_text(pdf_path: str | Path) -> str: + """Return the concatenated text of every page in ``pdf_path``. + + pdfplumber preserves layout order reasonably well for IBI's fixed + template, which is all the parser needs. Rare multi-page orders + still work because we join pages with newlines and the parser uses + global regex search. + """ + try: + import pdfplumber + except ImportError as e: + raise ImportError( + "IBI Capital plugin requires pdfplumber. " + "Install with: pip install 'pit-38[ibi]'" + ) from e + + pages: list[str] = [] + with pdfplumber.open(str(pdf_path)) as pdf: + for page in pdf.pages: + text = page.extract_text() or "" + pages.append(text) + return "\n".join(pages) diff --git a/pit38/plugins/stock/ibi_capital/record_builder.py b/pit38/plugins/stock/ibi_capital/record_builder.py new file mode 100644 index 0000000..f2ba561 --- /dev/null +++ b/pit38/plugins/stock/ibi_capital/record_builder.py @@ -0,0 +1,67 @@ +"""Turn a ParsedOrder into domain records that the standardized loader consumes. + +Each IBI order yields: + +* ``BUY`` transaction dated at the grant day — synthetic, because IBI order + reports do not include the original basis. We encode the Polish tax + interpretation (KIS line) where RSU cost basis is 0 and ESPP cost basis + is the discounted purchase price, via ``Price For Tax``. +* ``SELL`` transaction dated at the execution day, using ``Total Amount + Due to Order`` as gross proceeds. +* ``ServiceFee`` dated at the execution day (if non-zero). + +BUY uses 09:00:00 and SELL uses 10:00:00 so that FIFO matching stays +deterministic when ``Grant Date == Execution Date`` (possible for same-day +equity events). +""" +from __future__ import annotations + +from pit38.domain.currency_exchange_service.currencies import Currency, FiatValue +from pit38.domain.stock.operations.service_fee import ServiceFee +from pit38.domain.transactions import Action, AssetValue, Transaction + +from .order_parser import ParsedOrder + +_BUY_TIME = {"hour": 9, "minute": 0, "second": 0} +_SELL_TIME = {"hour": 10, "minute": 0, "second": 0} + + +def build_records( + order: ParsedOrder, + ticker: str, +) -> tuple[list[Transaction], list[ServiceFee]]: + """Emit BUY + SELL transactions and a ServiceFee (if fees > 0). + + The shares count and ticker are identical for BUY and SELL; the pair + represents one acquisition + one liquidation of the same position. + """ + shares = order.shares + asset = AssetValue(amount=float(shares), asset_name=ticker) + + buy = Transaction( + asset=asset, + fiat_value=FiatValue( + amount=round(shares * order.price_for_tax, 2), + currency=Currency.DOLLAR, + ), + action=Action.BUY, + date=order.grant_date.set(**_BUY_TIME), + ) + + sell = Transaction( + asset=asset, + fiat_value=FiatValue(amount=order.total_amount, currency=Currency.DOLLAR), + action=Action.SELL, + date=order.execution_date.set(**_SELL_TIME), + ) + + fees: list[ServiceFee] = [] + if order.total_fees > 0: + fees.append( + ServiceFee( + date=order.execution_date.set(**_SELL_TIME), + value=FiatValue(amount=order.total_fees, currency=Currency.DOLLAR), + ) + ) + + return [buy, sell], fees diff --git a/pyproject.toml b/pyproject.toml index b525526..84b44f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,9 +27,15 @@ dev = [ "pytest-cov~=5.0", "ipdb==0.13.13", ] +ibi = [ + "pdfplumber~=0.11.0", +] [tool.setuptools.packages.find] where = ["."] +[tool.setuptools.package-data] +"pit38.plugins.stock.ibi_capital" = ["companies.json"] + [project.scripts] pit38 = "pit38.cli:main" diff --git a/tests/e2e/fixtures/ibi_order_fake_espp.txt b/tests/e2e/fixtures/ibi_order_fake_espp.txt new file mode 100644 index 0000000..33b9150 --- /dev/null +++ b/tests/e2e/fixtures/ibi_order_fake_espp.txt @@ -0,0 +1,41 @@ +April 25, 2026 +Sale Of Stock Activity Statement Order Number: 9000002 + .00 +Test User ID / SS # Company: acme.com +General Details +Order Details +Grant Date: March 15, 2024 Grant No.: ESPP2222 Plan: Acme ESPP +Order Date: May 01, 2025 Execution Date: May 01, 2025 Price For Tax: USD 50.00 +Total Amount Due to Order 10 USD 120.0000 USD 1,200.00 +Total Amount Due to Order Shares X Sale Price = Value += Value +USD +USD 0.00 + 4.00 +USD 1.20 +USD 0.00 +Total Fees (THE ABOVE FEES DO NOT INCLUDE TRANSFER FEES) USD 5.20 +Other Fees +Service Fee +Foreign Fee +Trustee Fee +Fees +Capital Tax USD 0.00 +Tax Rate X = Value + - % +Total Cost USD 5.20 + 5.20 +USD +USD +Total Tax +Total Fees +Total Cost = Value + 0.00 +Net Gain Due to Order +Total Cost +Amount Due to Order USD +USD +USD 1,194.80 + 5.20 + 1,200.00 +Net Gain Due to Order = Value diff --git a/tests/e2e/fixtures/ibi_order_fake_rsu.txt b/tests/e2e/fixtures/ibi_order_fake_rsu.txt new file mode 100644 index 0000000..8650876 --- /dev/null +++ b/tests/e2e/fixtures/ibi_order_fake_rsu.txt @@ -0,0 +1,41 @@ +April 25, 2026 +Sale Of Stock Activity Statement Order Number: 9000001 + .00 +Test User ID / SS # Company: acme.com +General Details +Order Details +Grant Date: January 10, 2023 Grant No.: 1111 Plan: Acme 2022 RSU +Order Date: February 20, 2025 Execution Date: February 20, 2025 Price For Tax: USD 0.00 +Total Amount Due to Order 20 USD 100.0000 USD 2,000.00 +Total Amount Due to Order Shares X Sale Price = Value += Value +USD +USD 0.00 + 4.00 +USD 2.00 +USD 0.00 +Total Fees (THE ABOVE FEES DO NOT INCLUDE TRANSFER FEES) USD 6.00 +Other Fees +Service Fee +Foreign Fee +Trustee Fee +Fees +Capital Tax USD 0.00 +Tax Rate X = Value + - % +Total Cost USD 6.00 + 6.00 +USD +USD +Total Tax +Total Fees +Total Cost = Value + 0.00 +Net Gain Due to Order +Total Cost +Amount Due to Order USD +USD +USD 1,994.00 + 6.00 + 2,000.00 +Net Gain Due to Order = Value diff --git a/tests/e2e/test_ibi_e2e.py b/tests/e2e/test_ibi_e2e.py new file mode 100644 index 0000000..6cf870b --- /dev/null +++ b/tests/e2e/test_ibi_e2e.py @@ -0,0 +1,66 @@ +"""End-to-end test for the IBI Capital plugin. + +Exercises: text fixture → order_parser → record_builder → GenericCsvSaver +→ StockLoader round-trip. We bypass ``pdf_reader.extract_text`` because +pdfplumber PDF output is deterministic for a given template and testing +it adds no extra assurance; what matters is that the parser → builder → +saver → loader chain holds. +""" +import pathlib +import tempfile +from unittest import TestCase + +from pit38.data_sources.stock_loader.csv_loader import Loader as StockLoader +from pit38.domain.transactions import Action, Transaction +from pit38.plugins.stock.generic_saver import GenericCsvSaver +from pit38.plugins.stock.ibi_capital.order_parser import parse_order_report +from pit38.plugins.stock.ibi_capital.record_builder import build_records + +FIXTURES = pathlib.Path(__file__).parent / "fixtures" + + +class TestIbiPipeline(TestCase): + def test_rsu_and_espp_fixtures_roundtrip_through_loader(self): + rsu_text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + espp_text = (FIXTURES / "ibi_order_fake_espp.txt").read_text() + + transactions = [] + fees = [] + for text in (rsu_text, espp_text): + parsed = parse_order_report(text) + t, f = build_records(parsed, ticker="ACME") + transactions.extend(t) + fees.extend(f) + + with tempfile.NamedTemporaryFile(suffix=".csv", mode="w", delete=False) as tmp: + tmp_path = tmp.name + GenericCsvSaver.save(transactions, fees, tmp_path) + + loaded = StockLoader.load(tmp_path) + + # Loader returns a mixed list of Transaction + ServiceFee. Filter + # to transactions for the FIFO assertions. + txs = [item for item in loaded if isinstance(item, Transaction)] + + buy_sell = [t for t in txs if t.action in (Action.BUY, Action.SELL)] + self.assertEqual(len(buy_sell), 4) + + # RSU: BUY fiat_value is 0 (zero cost basis). + rsu_buys = [ + t for t in txs + if t.action == Action.BUY and t.fiat_value.amount == 0 + ] + self.assertEqual(len(rsu_buys), 1) + + # ESPP: BUY fiat_value = 10 shares × $50 Price For Tax = $500. + espp_buys = [ + t for t in txs + if t.action == Action.BUY and t.fiat_value.amount == 500.0 + ] + self.assertEqual(len(espp_buys), 1) + + # SELL proceeds match Total Amount Due to Order from fixtures. + sell_amounts = sorted( + t.fiat_value.amount for t in txs if t.action == Action.SELL + ) + self.assertEqual(sell_amounts, [1200.0, 2000.0]) diff --git a/tests/test_ibi_company_ticker.py b/tests/test_ibi_company_ticker.py new file mode 100644 index 0000000..0818557 --- /dev/null +++ b/tests/test_ibi_company_ticker.py @@ -0,0 +1,59 @@ +from unittest import TestCase + +from pit38.plugins.stock.ibi_capital.company_ticker import ( + UnknownCompanyError, + _load_mapping, + resolve_ticker, +) + + +class TestResolveTicker(TestCase): + def test_packaged_json_resolves_monday_com(self): + # Smoke test that the shipped seed actually loads and contains + # the reference case we verified against real IBI PDFs. + self.assertEqual(resolve_ticker("monday.com"), "MNDY") + + def test_case_insensitive_lookup(self): + self.assertEqual( + resolve_ticker("MONDAY.COM", mapping={"monday.com": "MNDY"}), + "MNDY", + ) + self.assertEqual( + resolve_ticker("Monday.Com", mapping={"monday.com": "MNDY"}), + "MNDY", + ) + + def test_surrounding_whitespace_trimmed(self): + self.assertEqual( + resolve_ticker(" monday.com ", mapping={"monday.com": "MNDY"}), + "MNDY", + ) + + def test_override_wins_over_mapping(self): + self.assertEqual( + resolve_ticker("monday.com", override="XXX", mapping={"monday.com": "MNDY"}), + "XXX", + ) + + def test_override_wins_even_for_unknown_company(self): + # User escape hatch: company not in seed, pass --ticker and move on. + self.assertEqual( + resolve_ticker("unknown llc", override="UNK", mapping={}), + "UNK", + ) + + def test_unknown_company_raises_with_helpful_message(self): + with self.assertRaises(UnknownCompanyError) as ctx: + resolve_ticker("unknown llc", mapping={"monday.com": "MNDY"}) + msg = str(ctx.exception) + self.assertIn("unknown llc", msg) + self.assertIn("--ticker", msg) + self.assertIn("companies.json", msg) + + def test_load_mapping_normalises_keys(self): + _load_mapping.cache_clear() + mapping = _load_mapping() + # All keys must already be lowercase so resolve_ticker's lookup + # stays hot-path simple. + for key in mapping: + self.assertEqual(key, key.lower()) diff --git a/tests/test_ibi_order_parser.py b/tests/test_ibi_order_parser.py new file mode 100644 index 0000000..cc9bb42 --- /dev/null +++ b/tests/test_ibi_order_parser.py @@ -0,0 +1,89 @@ +from pathlib import Path +from unittest import TestCase + +import pendulum + +from pit38.plugins.stock.ibi_capital.order_parser import ( + OrderParseError, + parse_order_report, +) + +FIXTURES = Path(__file__).parent / "e2e" / "fixtures" + + +class TestParseOrderReport(TestCase): + def test_parses_rsu_fixture(self): + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + + parsed = parse_order_report(text) + + self.assertEqual(parsed.order_number, "9000001") + self.assertEqual(parsed.company, "acme.com") + self.assertEqual(parsed.plan, "Acme 2022 RSU") + self.assertEqual(parsed.grant_date, pendulum.datetime(2023, 1, 10)) + self.assertEqual(parsed.execution_date, pendulum.datetime(2025, 2, 20)) + self.assertEqual(parsed.shares, 20) + self.assertEqual(parsed.sale_price, 100.0) + self.assertEqual(parsed.total_amount, 2000.0) + self.assertEqual(parsed.total_fees, 6.0) + self.assertEqual(parsed.price_for_tax, 0.0) + + def test_parses_espp_fixture(self): + text = (FIXTURES / "ibi_order_fake_espp.txt").read_text() + + parsed = parse_order_report(text) + + self.assertEqual(parsed.order_number, "9000002") + self.assertEqual(parsed.plan, "Acme ESPP") + self.assertEqual(parsed.shares, 10) + self.assertEqual(parsed.total_amount, 1200.0) + self.assertEqual(parsed.total_fees, 5.2) + # ESPP encodes cost-basis-per-share in Price For Tax, unlike RSU. + self.assertEqual(parsed.price_for_tax, 50.0) + + def test_parses_thousands_separator_in_amount(self): + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + # Swap in a larger amount to exercise the 1,234.56 formatting that + # real IBI exports use (e.g. USD 13,731.52). + text = text.replace( + "Total Amount Due to Order 20 USD 100.0000 USD 2,000.00", + "Total Amount Due to Order 47 USD 309.2001 USD 14,532.40", + ) + + parsed = parse_order_report(text) + + self.assertEqual(parsed.shares, 47) + self.assertEqual(parsed.sale_price, 309.2001) + self.assertEqual(parsed.total_amount, 14532.40) + + def test_raises_on_missing_execution_date(self): + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + stripped = text.replace("Execution Date: February 20, 2025", "") + + with self.assertRaises(OrderParseError) as ctx: + parse_order_report(stripped) + self.assertIn("Execution Date", str(ctx.exception)) + + def test_raises_on_missing_total_fees(self): + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + stripped = text.replace( + "Total Fees (THE ABOVE FEES DO NOT INCLUDE TRANSFER FEES) USD 6.00", + "", + ) + + with self.assertRaises(OrderParseError) as ctx: + parse_order_report(stripped) + self.assertIn("Total Fees", str(ctx.exception)) + + def test_total_fees_line_without_paren_note(self): + # Defensive: if IBI ever drops the parenthetical note, the parser + # still extracts the total fee value. + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + text = text.replace( + "Total Fees (THE ABOVE FEES DO NOT INCLUDE TRANSFER FEES) USD 6.00", + "Total Fees USD 6.00", + ) + + parsed = parse_order_report(text) + + self.assertEqual(parsed.total_fees, 6.0) diff --git a/tests/test_ibi_record_builder.py b/tests/test_ibi_record_builder.py new file mode 100644 index 0000000..ad8f678 --- /dev/null +++ b/tests/test_ibi_record_builder.py @@ -0,0 +1,89 @@ +from unittest import TestCase + +import pendulum + +from pit38.domain.currency_exchange_service.currencies import Currency, FiatValue +from pit38.domain.transactions import Action, AssetValue +from pit38.plugins.stock.ibi_capital.order_parser import ParsedOrder +from pit38.plugins.stock.ibi_capital.record_builder import build_records + + +def _order(**overrides) -> ParsedOrder: + defaults = { + "order_number": "9000001", + "company": "acme.com", + "plan": "Acme 2022 RSU", + "grant_date": pendulum.datetime(2023, 1, 10), + "execution_date": pendulum.datetime(2025, 2, 20), + "shares": 20, + "sale_price": 100.0, + "total_amount": 2000.0, + "total_fees": 6.0, + "price_for_tax": 0.0, + } + defaults.update(overrides) + return ParsedOrder(**defaults) + + +class TestBuildRecords(TestCase): + def test_emits_buy_sell_and_fee(self): + transactions, fees = build_records(_order(), ticker="ACME") + + self.assertEqual(len(transactions), 2) + self.assertEqual(len(fees), 1) + actions = [t.action for t in transactions] + self.assertIn(Action.BUY, actions) + self.assertIn(Action.SELL, actions) + + def test_buy_uses_grant_date_and_price_for_tax(self): + transactions, _ = build_records( + _order(shares=10, price_for_tax=50.0), + ticker="ACME", + ) + buy = next(t for t in transactions if t.action == Action.BUY) + + self.assertEqual(buy.date, pendulum.datetime(2023, 1, 10, 9, 0, 0)) + self.assertEqual(buy.asset, AssetValue(10.0, "ACME")) + self.assertEqual(buy.fiat_value, FiatValue(500.0, Currency.DOLLAR)) + + def test_sell_uses_execution_date_and_total_amount(self): + transactions, _ = build_records(_order(), ticker="ACME") + sell = next(t for t in transactions if t.action == Action.SELL) + + self.assertEqual(sell.date, pendulum.datetime(2025, 2, 20, 10, 0, 0)) + self.assertEqual(sell.asset, AssetValue(20.0, "ACME")) + self.assertEqual(sell.fiat_value, FiatValue(2000.0, Currency.DOLLAR)) + + def test_rsu_zero_cost_basis_still_emits_buy(self): + # price_for_tax = 0 for RSU; we still emit a zero-cost BUY so that + # the FIFO calculator has something to match the SELL against. + transactions, _ = build_records(_order(price_for_tax=0.0), ticker="ACME") + + buy = next(t for t in transactions if t.action == Action.BUY) + self.assertEqual(buy.asset.amount, 20.0) + self.assertEqual(buy.fiat_value, FiatValue(0.0, Currency.DOLLAR)) + + def test_zero_fees_skipped(self): + _, fees = build_records(_order(total_fees=0.0), ticker="ACME") + self.assertEqual(fees, []) + + def test_fee_dated_at_execution_time(self): + _, fees = build_records(_order(total_fees=6.0), ticker="ACME") + self.assertEqual(fees[0].date, pendulum.datetime(2025, 2, 20, 10, 0, 0)) + self.assertEqual(fees[0].value, FiatValue(6.0, Currency.DOLLAR)) + + def test_fifo_order_preserved_when_grant_and_execution_same_day(self): + # Defensive: when an order is executed on the grant day itself + # (e.g. same-day sale of newly-vested shares), BUY's 09:00 and + # SELL's 10:00 keep FIFO deterministic. + transactions, _ = build_records( + _order( + grant_date=pendulum.datetime(2025, 2, 20), + execution_date=pendulum.datetime(2025, 2, 20), + ), + ticker="ACME", + ) + buy = next(t for t in transactions if t.action == Action.BUY) + sell = next(t for t in transactions if t.action == Action.SELL) + + self.assertLess(buy.date, sell.date) From e796e0ba4aae26fe9f03bca724746947eff5788d Mon Sep 17 00:00:00 2001 From: przemyslawbialon Date: Sat, 25 Apr 2026 18:35:30 +0200 Subject: [PATCH 2/5] test: Reach 100% coverage on IBI Capital plugin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - De-duplicate order-line parsing (_shares/_sale_price/_total_amount each re-ran the same regex with identical failure handling; now a single _order_line() returns the tuple). Drops three dead error branches and trims the module. - Add parser tests for the previously uncovered paths: missing Total Amount line, missing Price For Tax, missing Order Number, invalid date format (regex matches the shape, pendulum rejects the month). - Tighten Order Number regex to space/tab (was \s+, which would happily skip a newline and pick up the next token if the value were blank). - Add pdf_reader tests: ImportError guard for users missing the [ibi] extra, and happy path using a minimal hand-crafted PDF byte string so no binary fixture needs to be committed. - Delete __main__.py — pure duplicate of `pit38 import ibi-capital` with zero added value. Coverage on pit38.plugins.stock.ibi_capital: 93.2% → 100% (lines), 100% branches. Full suite: 172 → 179 tests, zero regressions. --- pit38/plugins/stock/ibi_capital/__main__.py | 74 ------------- .../plugins/stock/ibi_capital/order_parser.py | 32 +++--- tests/test_ibi_order_parser.py | 43 ++++++++ tests/test_ibi_pdf_reader.py | 102 ++++++++++++++++++ 4 files changed, 157 insertions(+), 94 deletions(-) delete mode 100644 pit38/plugins/stock/ibi_capital/__main__.py create mode 100644 tests/test_ibi_pdf_reader.py diff --git a/pit38/plugins/stock/ibi_capital/__main__.py b/pit38/plugins/stock/ibi_capital/__main__.py deleted file mode 100644 index de7c4fb..0000000 --- a/pit38/plugins/stock/ibi_capital/__main__.py +++ /dev/null @@ -1,74 +0,0 @@ -"""Standalone CLI entry: ``python -m pit38.plugins.stock.ibi_capital``. - -Mirrors ``pit38/plugins/stock/revolut/__main__.py``. The user-facing CLI -lives in ``pit38/cli.py`` (``pit38 import ibi-capital``) and is the -supported path; this entry point is a convenience for plugin-level -debugging. -""" -from __future__ import annotations - -import sys -from pathlib import Path - -import click -from loguru import logger - -from pit38.plugins.stock.generic_saver import GenericCsvSaver -from pit38.plugins.stock.ibi_capital.company_ticker import resolve_ticker -from pit38.plugins.stock.ibi_capital.order_parser import parse_order_report -from pit38.plugins.stock.ibi_capital.pdf_reader import extract_text -from pit38.plugins.stock.ibi_capital.record_builder import build_records - - -def setup_logger(log_level: str) -> None: - logger.remove() - logger.add(sys.stderr, level=log_level) - - -def _collect_pdfs(paths: tuple[str, ...]) -> list[Path]: - """Expand file/directory inputs into a flat list of PDF paths.""" - resolved: list[Path] = [] - for raw in paths: - p = Path(raw) - if p.is_dir(): - resolved.extend(sorted(p.rglob("*.pdf"))) - else: - resolved.append(p) - return resolved - - -@click.command() -@click.option( - "-i", "--input", "input_paths", - type=click.Path(exists=True), multiple=True, required=True, - help="IBI order confirmation PDF or directory of PDFs (repeatable)", -) -@click.option("-o", "--output", "output_path", type=click.Path(), required=True) -@click.option("--ticker", default=None, help="Override ticker (skips companies.json lookup)") -@click.option( - "-ll", "--log-level", - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), - default="INFO", -) -def main(input_paths, output_path, ticker, log_level): - setup_logger(log_level) - - all_transactions = [] - all_fees = [] - for pdf_path in _collect_pdfs(input_paths): - logger.debug(f"Reading {pdf_path}") - parsed = parse_order_report(extract_text(pdf_path)) - resolved_ticker = resolve_ticker(parsed.company, override=ticker) - transactions, fees = build_records(parsed, resolved_ticker) - all_transactions.extend(transactions) - all_fees.extend(fees) - - GenericCsvSaver.save(all_transactions, all_fees, output_path) - click.echo( - f"Saved {len(all_transactions)} transactions and {len(all_fees)} " - f"service fees to {output_path}" - ) - - -if __name__ == "__main__": - main() diff --git a/pit38/plugins/stock/ibi_capital/order_parser.py b/pit38/plugins/stock/ibi_capital/order_parser.py index 547c05d..22c42b9 100644 --- a/pit38/plugins/stock/ibi_capital/order_parser.py +++ b/pit38/plugins/stock/ibi_capital/order_parser.py @@ -46,15 +46,16 @@ class ParsedOrder: def parse_order_report(text: str) -> ParsedOrder: """Parse the full text of one IBI order confirmation PDF.""" + shares, sale_price, total_amount = _order_line(text) return ParsedOrder( - order_number=_required(r"Order Number:\s+(\S+)", text, "Order Number"), + order_number=_required(r"Order Number:[ \t]+(\S+)", text, "Order Number"), company=_required(r"Company:\s*(.+?)\s*$", text, "Company", flags=re.MULTILINE), plan=_required(r"Plan:\s*(.+?)\s*$", text, "Plan", flags=re.MULTILINE), grant_date=_date(r"Grant Date:\s+(\w+\s+\d{1,2},\s+\d{4})", text, "Grant Date"), execution_date=_date(r"Execution Date:\s+(\w+\s+\d{1,2},\s+\d{4})", text, "Execution Date"), - shares=_shares(text), - sale_price=_sale_price(text), - total_amount=_total_amount(text), + shares=shares, + sale_price=sale_price, + total_amount=total_amount, total_fees=_total_fees(text), price_for_tax=_price_for_tax(text), ) @@ -68,25 +69,16 @@ def parse_order_report(text: str) -> ParsedOrder: ) -def _shares(text: str) -> int: +def _order_line(text: str) -> tuple[int, float, float]: + """Return (shares, sale_price_per_share, total_amount) from the order line.""" m = _ORDER_LINE.search(text) if not m: raise OrderParseError("Could not locate 'Total Amount Due to Order' line") - return int(m.group("shares")) - - -def _sale_price(text: str) -> float: - m = _ORDER_LINE.search(text) - if not m: - raise OrderParseError("Could not locate 'Total Amount Due to Order' line") - return parse_amount(m.group("price")) - - -def _total_amount(text: str) -> float: - m = _ORDER_LINE.search(text) - if not m: - raise OrderParseError("Could not locate 'Total Amount Due to Order' line") - return parse_amount(m.group("total")) + return ( + int(m.group("shares")), + parse_amount(m.group("price")), + parse_amount(m.group("total")), + ) # The parenthetical note may drift; make it optional so a minor format diff --git a/tests/test_ibi_order_parser.py b/tests/test_ibi_order_parser.py index cc9bb42..91c7712 100644 --- a/tests/test_ibi_order_parser.py +++ b/tests/test_ibi_order_parser.py @@ -87,3 +87,46 @@ def test_total_fees_line_without_paren_note(self): parsed = parse_order_report(text) self.assertEqual(parsed.total_fees, 6.0) + + def test_raises_on_missing_total_amount_line(self): + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + stripped = text.replace( + "Total Amount Due to Order 20 USD 100.0000 USD 2,000.00", + "", + ) + + with self.assertRaises(OrderParseError) as ctx: + parse_order_report(stripped) + self.assertIn("Total Amount Due to Order", str(ctx.exception)) + + def test_raises_on_missing_price_for_tax(self): + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + stripped = text.replace("Price For Tax: USD 0.00", "") + + with self.assertRaises(OrderParseError) as ctx: + parse_order_report(stripped) + self.assertIn("Price For Tax", str(ctx.exception)) + + def test_raises_on_missing_order_number(self): + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + stripped = text.replace("Order Number: 9000001", "Order Number:") + + with self.assertRaises(OrderParseError) as ctx: + parse_order_report(stripped) + self.assertIn("Order Number", str(ctx.exception)) + + def test_raises_on_invalid_date_format(self): + # Defensive: regex is permissive enough to catch "Febxxx 04, 2024" + # (pendulum then rejects the bogus month). We bubble up a clear + # parse error rather than leaking pendulum's internals. + text = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + text = text.replace( + "Grant Date: January 10, 2023", + "Grant Date: Februari 10, 2023", + ) + + with self.assertRaises(OrderParseError) as ctx: + parse_order_report(text) + msg = str(ctx.exception) + self.assertIn("Grant Date", msg) + self.assertIn("Februari 10, 2023", msg) diff --git a/tests/test_ibi_pdf_reader.py b/tests/test_ibi_pdf_reader.py new file mode 100644 index 0000000..c6fa210 --- /dev/null +++ b/tests/test_ibi_pdf_reader.py @@ -0,0 +1,102 @@ +"""Tests for the pdfplumber wrapper. + +The wrapper is three lines of real logic. The tests below cover both +branches: the ImportError guard (for users who install ``pit-38`` +without the ``[ibi]`` extra) and the happy path, exercised against a +minimal hand-crafted PDF so we don't need to commit a binary fixture +that would risk leaking PII. +""" +import builtins +import sys +import tempfile +from pathlib import Path +from unittest import TestCase +from unittest.mock import patch + +# A valid but minimal PDF containing a single page with one text run. +# Hand-crafted so we can test without committing a binary fixture — +# anonymization of real IBI PDFs would be involved, and pdfplumber +# itself doesn't ship sample PDFs. Offsets in the xref table are +# manually aligned with the object declarations above them. +_MINIMAL_PDF_BYTES = b"""%PDF-1.4 +1 0 obj<>endobj +2 0 obj<>endobj +3 0 obj<>>>>>endobj +4 0 obj<>stream +BT /F1 12 Tf 72 720 Td (IBI fixture text) Tj ET +endstream +endobj +5 0 obj<>endobj +xref +0 6 +0000000000 65535 f +0000000009 00000 n +0000000054 00000 n +0000000101 00000 n +0000000200 00000 n +0000000290 00000 n +trailer<> +startxref +352 +%%EOF +""" + + +class TestExtractTextImportGuard(TestCase): + def test_import_error_has_actionable_message(self): + # Simulate a user who ran `pipx install pit-38` without `[ibi]` + # by hiding any already-imported pdfplumber module and blocking + # re-import. + real_import = builtins.__import__ + + def _block_pdfplumber(name, *args, **kwargs): + if name == "pdfplumber": + raise ImportError("No module named 'pdfplumber'") + return real_import(name, *args, **kwargs) + + saved_pdfplumber = sys.modules.pop("pdfplumber", None) + try: + with patch.object(builtins, "__import__", _block_pdfplumber): + from pit38.plugins.stock.ibi_capital import pdf_reader + + with self.assertRaises(ImportError) as ctx: + pdf_reader.extract_text("/tmp/whatever.pdf") + + msg = str(ctx.exception) + self.assertIn("pdfplumber", msg) + # The whole point of the wrapped error is steering the user + # to the right install incantation. + self.assertIn("pit-38[ibi]", msg) + finally: + if saved_pdfplumber is not None: + sys.modules["pdfplumber"] = saved_pdfplumber + + +class TestExtractTextHappyPath(TestCase): + def test_extracts_text_from_minimal_pdf(self): + from pit38.plugins.stock.ibi_capital.pdf_reader import extract_text + + with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f: + f.write(_MINIMAL_PDF_BYTES) + path = Path(f.name) + + try: + text = extract_text(path) + self.assertIn("IBI fixture text", text) + finally: + path.unlink() + + def test_accepts_string_path(self): + # pdfplumber.open accepts both str and Path; the wrapper signature + # is typed as str | Path so we test both. + from pit38.plugins.stock.ibi_capital.pdf_reader import extract_text + + with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f: + f.write(_MINIMAL_PDF_BYTES) + path_str = f.name + + try: + text = extract_text(path_str) + self.assertIn("IBI fixture text", text) + finally: + Path(path_str).unlink() From 86da10094615bb1e21bc8265cdf79f409825d79a Mon Sep 17 00:00:00 2001 From: przemyslawbialon Date: Sat, 25 Apr 2026 18:39:39 +0200 Subject: [PATCH 3/5] feat: Make pdfplumber a required dependency (was optional [ibi] extra) The optional extra was causing install-time foot-guns: - CI installed [dev] but not [ibi], so pdf_reader tests crashed with ImportError unless extras were chained manually. - Users who typed `pipx install pit-38` and then tried IBI import got a runtime ImportError instead of a clean install. - Tests needed a mocked-out ImportError path purely to cover the lazy-import guard that only existed because of the optional extra. Treating pdfplumber like pandas/openpyxl (already required) removes the whole class of problem. Install size grows ~50MB; acceptable for a tool run once a year. Drops the [ibi] extra, unconditional `import pdfplumber` at the top of pdf_reader.py, the ImportError-guard test, and the install-with-extras notes from the READMEs and broker docs. Coverage stays at 100%. --- CONTRIBUTING.md | 4 +- README.md | 2 +- README.pl.md | 2 +- docs/BROKERS.md | 13 ------ .../plugins/stock/ibi_capital/order_parser.py | 5 ++- pit38/plugins/stock/ibi_capital/pdf_reader.py | 22 ++++------ pyproject.toml | 4 +- tests/test_ibi_pdf_reader.py | 41 ++----------------- 8 files changed, 19 insertions(+), 74 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f171060..0e45df2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -172,8 +172,8 @@ architecture overview (PL/EN). - **Stocks, CSV input** — `pit38/plugins/stock/revolut/` (most complete, handles BOM, unknown operations, dividends, fees, stock splits) - **Stocks, PDF input** — `pit38/plugins/stock/ibi_capital/` (regex-based - PDF text parsing via pdfplumber as an optional extra, synthetic BUY - emitted from order confirmation) + PDF text parsing via pdfplumber, synthetic BUY emitted from order + confirmation) - **Crypto** — `pit38/plugins/crypto/binance/` **High-level recipe:** diff --git a/README.md b/README.md index a7ef317..9332bae 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ pit38 import revolut-stock -i revolut_export.csv -o transactions.csv pit38 import revolut-crypto -i revolut_export.csv -o transactions.csv pit38 import etrade -i etrade_export.csv -o transactions.csv pit38 import binance -i binance_export.csv -o transactions.csv -pit38 import ibi-capital -i ~/ibi_orders/ -o transactions.csv # PDF input; install with: pipx install 'pit-38[ibi]' +pit38 import ibi-capital -i ~/ibi_orders/ -o transactions.csv # PDF input (single file or directory) ``` You can combine multiple files from different brokers: diff --git a/README.pl.md b/README.pl.md index 9c6ef07..4d4b966 100644 --- a/README.pl.md +++ b/README.pl.md @@ -57,7 +57,7 @@ pit38 import revolut-stock -i eksport_revolut.csv -o transakcje.csv pit38 import revolut-crypto -i eksport_revolut.csv -o transakcje.csv pit38 import etrade -i eksport_etrade.csv -o transakcje.csv pit38 import binance -i eksport_binance.csv -o transakcje.csv -pit38 import ibi-capital -i ~/ibi_orders/ -o transakcje.csv # input: PDF; instalacja: pipx install 'pit-38[ibi]' +pit38 import ibi-capital -i ~/ibi_orders/ -o transakcje.csv # input: PDF (plik lub katalog) ``` Możesz łączyć pliki z różnych brokerów: diff --git a/docs/BROKERS.md b/docs/BROKERS.md index 26b3390..6876f46 100644 --- a/docs/BROKERS.md +++ b/docs/BROKERS.md @@ -29,19 +29,6 @@ and emits standardized transactions plus service fees. - Automatically handle currencies other than USD (all sample PDFs were USD — if you have a non-USD order, open an issue). -### Installation - -Parsing PDFs requires the optional `pdfplumber` dependency: - -```bash -pipx install 'pit-38[ibi]' -# or for development: -pip install -e ".[ibi,dev]" -``` - -Users who don't need IBI support can skip the `[ibi]` extra — the core -tool works without `pdfplumber`. - ### Usage Download your order confirmations from the IBI Capital portal into a diff --git a/pit38/plugins/stock/ibi_capital/order_parser.py b/pit38/plugins/stock/ibi_capital/order_parser.py index 22c42b9..cf12853 100644 --- a/pit38/plugins/stock/ibi_capital/order_parser.py +++ b/pit38/plugins/stock/ibi_capital/order_parser.py @@ -9,8 +9,9 @@ the flat text than off the partially-ordered table cells. This module must not import pdfplumber. That way unit tests can exercise -the parser against anonymized ``.txt`` fixtures without installing the -optional ``[ibi]`` extra. +the parser against anonymized ``.txt`` fixtures without pulling in the +PDF-parsing stack, and the parser stays usable for upstream callers that +feed text from a different source. """ from __future__ import annotations diff --git a/pit38/plugins/stock/ibi_capital/pdf_reader.py b/pit38/plugins/stock/ibi_capital/pdf_reader.py index 3ed6cca..2c33d41 100644 --- a/pit38/plugins/stock/ibi_capital/pdf_reader.py +++ b/pit38/plugins/stock/ibi_capital/pdf_reader.py @@ -1,30 +1,24 @@ """Extract plain text from an IBI Capital order confirmation PDF. -Thin wrapper over pdfplumber so the optional ``[ibi]`` dependency stays -isolated. Plugin users who don't need IBI support don't install -pdfplumber and don't get import-time breakage. +Thin wrapper over pdfplumber so callers don't need to know about the +``with`` protocol or per-page iteration. The parser module sits directly +on top of this and treats the full-document text as a flat string. """ from __future__ import annotations from pathlib import Path +import pdfplumber + def extract_text(pdf_path: str | Path) -> str: """Return the concatenated text of every page in ``pdf_path``. pdfplumber preserves layout order reasonably well for IBI's fixed - template, which is all the parser needs. Rare multi-page orders - still work because we join pages with newlines and the parser uses - global regex search. + template, which is all the parser needs. Multi-page orders still + work because pages join with newlines and the parser uses global + regex search. """ - try: - import pdfplumber - except ImportError as e: - raise ImportError( - "IBI Capital plugin requires pdfplumber. " - "Install with: pip install 'pit-38[ibi]'" - ) from e - pages: list[str] = [] with pdfplumber.open(str(pdf_path)) as pdf: for page in pdf.pages: diff --git a/pyproject.toml b/pyproject.toml index 84b44f3..09e3707 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "pandas~=2.2.2", "openpyxl~=3.1.2", "babel~=2.14", + "pdfplumber~=0.11.0", ] [project.optional-dependencies] @@ -27,9 +28,6 @@ dev = [ "pytest-cov~=5.0", "ipdb==0.13.13", ] -ibi = [ - "pdfplumber~=0.11.0", -] [tool.setuptools.packages.find] where = ["."] diff --git a/tests/test_ibi_pdf_reader.py b/tests/test_ibi_pdf_reader.py index c6fa210..51244da 100644 --- a/tests/test_ibi_pdf_reader.py +++ b/tests/test_ibi_pdf_reader.py @@ -1,17 +1,12 @@ """Tests for the pdfplumber wrapper. -The wrapper is three lines of real logic. The tests below cover both -branches: the ImportError guard (for users who install ``pit-38`` -without the ``[ibi]`` extra) and the happy path, exercised against a +The wrapper is three lines of real logic. We exercise it against a minimal hand-crafted PDF so we don't need to commit a binary fixture -that would risk leaking PII. +that would risk leaking PII from real IBI statements. """ -import builtins -import sys import tempfile from pathlib import Path from unittest import TestCase -from unittest.mock import patch # A valid but minimal PDF containing a single page with one text run. # Hand-crafted so we can test without committing a binary fixture — @@ -42,37 +37,7 @@ """ -class TestExtractTextImportGuard(TestCase): - def test_import_error_has_actionable_message(self): - # Simulate a user who ran `pipx install pit-38` without `[ibi]` - # by hiding any already-imported pdfplumber module and blocking - # re-import. - real_import = builtins.__import__ - - def _block_pdfplumber(name, *args, **kwargs): - if name == "pdfplumber": - raise ImportError("No module named 'pdfplumber'") - return real_import(name, *args, **kwargs) - - saved_pdfplumber = sys.modules.pop("pdfplumber", None) - try: - with patch.object(builtins, "__import__", _block_pdfplumber): - from pit38.plugins.stock.ibi_capital import pdf_reader - - with self.assertRaises(ImportError) as ctx: - pdf_reader.extract_text("/tmp/whatever.pdf") - - msg = str(ctx.exception) - self.assertIn("pdfplumber", msg) - # The whole point of the wrapped error is steering the user - # to the right install incantation. - self.assertIn("pit-38[ibi]", msg) - finally: - if saved_pdfplumber is not None: - sys.modules["pdfplumber"] = saved_pdfplumber - - -class TestExtractTextHappyPath(TestCase): +class TestExtractText(TestCase): def test_extracts_text_from_minimal_pdf(self): from pit38.plugins.stock.ibi_capital.pdf_reader import extract_text From 400d9282e9ba5dc8edf4314f991d445e9930b5dc Mon Sep 17 00:00:00 2001 From: przemyslawbialon Date: Sat, 25 Apr 2026 18:43:41 +0200 Subject: [PATCH 4/5] test: Add CliRunner e2e tests for ibi-capital import command Parity with the existing revolut-stock CliRunner test, plus five additional paths that matter specifically for IBI: - Single PDF + --ticker override (happy path) - RSU informational note appears only when Price For Tax = 0 - Directory input recurses for *.pdf and ignores non-PDF files - Empty directory aborts with "No PDF files" message - Unknown company without --ticker surfaces the helpful error pointing at --ticker and the companies.json PR link extract_text is mocked with the existing text fixtures so the CLI drives the full function end-to-end without needing IBI-shaped binary PDFs in the repo. cli.py coverage on the import_ibi_capital function: 100% (the three remaining uncovered lines in cli.py belong to import_binance, which was uncovered before this PR). --- tests/e2e/test_cli_e2e.py | 152 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/tests/e2e/test_cli_e2e.py b/tests/e2e/test_cli_e2e.py index b36c4a4..e6edcd8 100644 --- a/tests/e2e/test_cli_e2e.py +++ b/tests/e2e/test_cli_e2e.py @@ -68,3 +68,155 @@ def test_import_revolut_stock(self): self.assertEqual(result.exit_code, 0, msg=result.output) self.assertIn("Saved", result.output) self.assertTrue(pathlib.Path("output.csv").exists()) + + +class TestImportIbiCapitalCLI(TestCase): + """CLI integration for ``pit38 import ibi-capital``. + + pdfplumber's PDF-extraction path is covered directly in + test_ibi_pdf_reader.py. Here we mock it out so tests stay text- + fixture-based (no binary fixtures, no PII risk) while still driving + the full CLI function end-to-end. + """ + + RSU_TEXT = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() + ESPP_TEXT = (FIXTURES / "ibi_order_fake_espp.txt").read_text() + + def _invoke(self, args, extract_text_return): + """Invoke the CLI with extract_text mocked to a canned string.""" + with patch( + "pit38.plugins.stock.ibi_capital.pdf_reader.extract_text", + return_value=extract_text_return, + ): + return CliRunner().invoke(main, args) + + def test_import_single_pdf_with_ticker_override(self): + with CliRunner().isolated_filesystem(): + # The file's content doesn't matter — extract_text is mocked. + # click.Path(exists=True) just needs the file to be present. + pathlib.Path("order.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") + + result = self._invoke( + [ + "import", "ibi-capital", + "-i", "order.pdf", + "-o", "out.csv", + "--ticker", "ACME", + "-ll", "ERROR", + ], + extract_text_return=self.RSU_TEXT, + ) + + self.assertEqual(result.exit_code, 0, msg=result.output) + self.assertIn("Saved", result.output) + self.assertIn("IBI Capital PDF", result.output) + csv_text = pathlib.Path("out.csv").read_text() + self.assertIn("BUY", csv_text) + self.assertIn("SELL", csv_text) + self.assertIn("ACME", csv_text) + + def test_rsu_info_message_shown(self): + with CliRunner().isolated_filesystem(): + pathlib.Path("order.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") + + result = self._invoke( + [ + "import", "ibi-capital", + "-i", "order.pdf", + "-o", "out.csv", + "--ticker", "ACME", + "-ll", "ERROR", + ], + extract_text_return=self.RSU_TEXT, + ) + + self.assertEqual(result.exit_code, 0, msg=result.output) + # Informational RSU note appears only when any order has + # Price For Tax = 0 (the RSU fixture does). + self.assertIn("RSU", result.output) + + def test_espp_does_not_trigger_rsu_message(self): + with CliRunner().isolated_filesystem(): + pathlib.Path("order.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") + + result = self._invoke( + [ + "import", "ibi-capital", + "-i", "order.pdf", + "-o", "out.csv", + "--ticker", "ACME", + "-ll", "ERROR", + ], + extract_text_return=self.ESPP_TEXT, + ) + + self.assertEqual(result.exit_code, 0, msg=result.output) + self.assertNotIn("RSU", result.output) + + def test_directory_input_scans_pdfs(self): + with CliRunner().isolated_filesystem(): + pathlib.Path("orders").mkdir() + pathlib.Path("orders/a.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") + pathlib.Path("orders/b.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") + # Non-PDF files in the same directory are ignored. + pathlib.Path("orders/readme.txt").write_text("not a pdf") + + # Both PDFs resolve through the same mocked text — this is + # fine for asserting "everything got processed", just not for + # per-file content validation. + result = self._invoke( + [ + "import", "ibi-capital", + "-i", "orders", + "-o", "out.csv", + "--ticker", "ACME", + "-ll", "ERROR", + ], + extract_text_return=self.RSU_TEXT, + ) + + self.assertEqual(result.exit_code, 0, msg=result.output) + self.assertIn("2 IBI Capital PDF", result.output) + + def test_no_pdfs_found_aborts_cleanly(self): + with CliRunner().isolated_filesystem(): + pathlib.Path("orders").mkdir() + # Empty directory — click.Path(exists=True) accepts the dir, + # but the rglob finds zero PDFs. + + result = self._invoke( + [ + "import", "ibi-capital", + "-i", "orders", + "-o", "out.csv", + "-ll", "ERROR", + ], + extract_text_return="", + ) + + self.assertNotEqual(result.exit_code, 0) + self.assertIn("No PDF files", result.output) + + def test_unknown_company_without_ticker_errors(self): + with CliRunner().isolated_filesystem(): + pathlib.Path("order.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") + + # RSU fixture has Company: acme.com which is NOT in + # companies.json — without --ticker, the resolver must fail + # with a helpful message rather than silently defaulting. + result = self._invoke( + [ + "import", "ibi-capital", + "-i", "order.pdf", + "-o", "out.csv", + "-ll", "ERROR", + ], + extract_text_return=self.RSU_TEXT, + ) + + self.assertNotEqual(result.exit_code, 0) + # Click wraps uncaught exceptions; the inner message surfaces + # in `result.exception` or `result.output` depending on mode. + error_text = str(result.exception) if result.exception else result.output + self.assertIn("acme.com", error_text) + self.assertIn("--ticker", error_text) From 278b31631124a150954a5c7e01e96421e6b22c1d Mon Sep 17 00:00:00 2001 From: przemyslawbialon Date: Sat, 25 Apr 2026 19:07:35 +0200 Subject: [PATCH 5/5] refactor: Drop RSU informational message from ibi-capital CLI The conservative cost-basis-is-0 treatment for RSU sales is the standard KIS line, not a risky default that needs user review on every run. Printing it after every import was noise. Also drops the two tests that asserted on that message. Remaining tests still cover happy path, directory scan, empty directory, and unknown-company error. --- pit38/cli.py | 10 ---------- tests/e2e/test_cli_e2e.py | 39 --------------------------------------- 2 files changed, 49 deletions(-) diff --git a/pit38/cli.py b/pit38/cli.py index 1e852ed..9310b14 100644 --- a/pit38/cli.py +++ b/pit38/cli.py @@ -129,28 +129,18 @@ def import_ibi_capital(input_paths, output_path, ticker, log_level): transactions = [] fees = [] - saw_rsu = False for pdf_path in pdfs: parsed = parse_order_report(extract_text(pdf_path)) resolved = resolve_ticker(parsed.company, override=ticker) t, f = build_records(parsed, resolved) transactions.extend(t) fees.extend(f) - if parsed.price_for_tax == 0: - saw_rsu = True GenericCsvSaver.save(transactions, fees, output_path) click.echo( f"Saved {len(transactions)} transactions and {len(fees)} service fees " f"from {len(pdfs)} IBI Capital PDF(s) to {output_path}" ) - if saw_rsu: - click.echo( - "\nℹ For RSU orders (Price For Tax = 0), cost basis is recorded as 0 — " - "consistent with the standard Polish tax treatment of RSU share sales. " - "See docs/BROKERS.md#ibi-capital for details.", - err=True, - ) @import_cmd.command("binance") diff --git a/tests/e2e/test_cli_e2e.py b/tests/e2e/test_cli_e2e.py index e6edcd8..6c9aa61 100644 --- a/tests/e2e/test_cli_e2e.py +++ b/tests/e2e/test_cli_e2e.py @@ -80,7 +80,6 @@ class TestImportIbiCapitalCLI(TestCase): """ RSU_TEXT = (FIXTURES / "ibi_order_fake_rsu.txt").read_text() - ESPP_TEXT = (FIXTURES / "ibi_order_fake_espp.txt").read_text() def _invoke(self, args, extract_text_return): """Invoke the CLI with extract_text mocked to a canned string.""" @@ -115,44 +114,6 @@ def test_import_single_pdf_with_ticker_override(self): self.assertIn("SELL", csv_text) self.assertIn("ACME", csv_text) - def test_rsu_info_message_shown(self): - with CliRunner().isolated_filesystem(): - pathlib.Path("order.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") - - result = self._invoke( - [ - "import", "ibi-capital", - "-i", "order.pdf", - "-o", "out.csv", - "--ticker", "ACME", - "-ll", "ERROR", - ], - extract_text_return=self.RSU_TEXT, - ) - - self.assertEqual(result.exit_code, 0, msg=result.output) - # Informational RSU note appears only when any order has - # Price For Tax = 0 (the RSU fixture does). - self.assertIn("RSU", result.output) - - def test_espp_does_not_trigger_rsu_message(self): - with CliRunner().isolated_filesystem(): - pathlib.Path("order.pdf").write_bytes(b"%PDF-1.4\n%%EOF\n") - - result = self._invoke( - [ - "import", "ibi-capital", - "-i", "order.pdf", - "-o", "out.csv", - "--ticker", "ACME", - "-ll", "ERROR", - ], - extract_text_return=self.ESPP_TEXT, - ) - - self.assertEqual(result.exit_code, 0, msg=result.output) - self.assertNotIn("RSU", result.output) - def test_directory_input_scans_pdfs(self): with CliRunner().isolated_filesystem(): pathlib.Path("orders").mkdir()