Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions pit38/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from collections import Counter

import click
from loguru import logger
Expand All @@ -24,6 +25,24 @@ def import_cmd():
pass


def _print_skipped_summary(skipped_by_type: Counter) -> None:
"""Pretty-print a skip summary to stderr for the user to review."""
if not skipped_by_type:
return
total = sum(skipped_by_type.values())
click.echo(
f"\nSkipped {total} rows (operation types not recognized as tax-relevant):",
err=True,
)
for op_type, count in skipped_by_type.most_common():
click.echo(f" • {op_type}: {count} rows", err=True)
click.echo(
"\nIf you believe any of these should be taxed, please check with your\n"
"tax advisor and open an issue: https://github.com/pbialon/pit-38/issues",
err=True,
)


@import_cmd.command("revolut-stock")
@click.option("-i", "--input", "input_path", type=click.Path(exists=True), required=True, help="Revolut stock export CSV")
@click.option("-o", "--output", "output_path", type=click.Path(), required=True, help="Output standardized CSV")
Expand All @@ -36,10 +55,17 @@ def import_revolut_stock(input_path, output_path, log_level):
from pit38.plugins.stock.revolut.operation_row_parser import OperationRowParser
from pit38.plugins.stock.generic_saver import GenericCsvSaver

transactions = CsvService(input_path, TransactionRowParser).read()
operations = CsvService(input_path, OperationRowParser).read()
GenericCsvSaver.save(transactions, operations, output_path)
click.echo(f"Saved {len(transactions)} transactions and {len(operations)} operations to {output_path}")
# Both parsers scan every row; they skip the same unknown-type rows, so
# the skip counters are identical. Report just once (from the first pass).
transaction_result = CsvService(input_path, TransactionRowParser).read_with_summary()
operation_result = CsvService(input_path, OperationRowParser).read_with_summary()

GenericCsvSaver.save(transaction_result.records, operation_result.records, output_path)
click.echo(
f"Saved {len(transaction_result.records)} transactions and "
f"{len(operation_result.records)} operations to {output_path}"
)
_print_skipped_summary(transaction_result.skipped_by_type)


@import_cmd.command("revolut-crypto")
Expand Down
7 changes: 3 additions & 4 deletions pit38/data_sources/crypto_loader/csv_loader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import csv
from typing import List, Optional
import pendulum
from loguru import logger

from pit38.data_sources.csv_utils import open_csv_reader
from pit38.domain.transactions import Transaction, AssetValue, Action
from pit38.domain.currency_exchange_service.currencies import parse_currency, FiatValue, Currency, InvalidCurrencyException

Expand All @@ -11,9 +11,8 @@ class Loader:
def load(cls, file_path: str) -> List[Transaction]:
transactions = []
logger.info(f"Loading transactions from {file_path}...")

with open(file_path, "r") as csvfile:
reader = csv.DictReader(csvfile)

with open_csv_reader(file_path) as reader:
for row in reader:
transaction = cls._parse_row(row)
if transaction:
Expand Down
35 changes: 35 additions & 0 deletions pit38/data_sources/csv_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Shared CSV reading utilities.

Centralizes two quirks that broke real-world broker exports (#33):

1. UTF-8 BOM — Revolut (and many Excel-generated CSVs) prefix files with
`\xef\xbb\xbf`. Opening with plain `"r"` leaves the BOM in the first
column name (e.g. `'\\ufeffdate'` instead of `'date'`). `utf-8-sig`
strips it cleanly and is a no-op for files without BOM.

2. Header case drift — brokers change column capitalization between
exports (`Date` → `date`). We normalize headers to `lower().strip()`
once at read time so downstream code uses a stable form.

Write side stays as plain `utf-8` (no BOM) — Postel's law: liberal in
what we accept, strict in what we emit.
"""
import csv
from contextlib import contextmanager
from typing import Iterator


@contextmanager
def open_csv_reader(path: str, delimiter: str = ",") -> Iterator[csv.DictReader]:
"""Open CSV tolerantly: strips UTF-8 BOM and normalizes headers.

Usage:
with open_csv_reader("export.csv") as reader:
for row in reader:
value = row["date"] # always lowercase
"""
with open(path, "r", encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f, delimiter=delimiter)
if reader.fieldnames:
reader.fieldnames = [name.strip().lower() for name in reader.fieldnames]
yield reader
7 changes: 3 additions & 4 deletions pit38/data_sources/stock_loader/csv_loader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import csv
from typing import List, Optional
import pendulum
from loguru import logger

from pit38.data_sources.csv_utils import open_csv_reader
from pit38.domain.stock.operations.operation import Operation, OperationType
from pit38.data_sources.stock_loader.factory import OperationFactory
from pit38.domain.currency_exchange_service.currencies import parse_currency, FiatValue, InvalidCurrencyException
Expand All @@ -14,9 +14,8 @@ class Loader:
def load(cls, file_path: str) -> List[Operation | Transaction]:
operations = []
logger.info(f"Loading stock operations from {file_path}...")

with open(file_path, "r") as csvfile:
reader = csv.DictReader(csvfile)

with open_csv_reader(file_path) as reader:
for row in reader:
operation = cls._parse_row(row)
if operation:
Expand Down
13 changes: 6 additions & 7 deletions pit38/plugins/crypto/binance/csv.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import csv
from datetime import datetime
from enum import Enum
from typing import List

import pendulum
from loguru import logger
from pit38.data_sources.csv_utils import open_csv_reader
from pit38.domain.currency_exchange_service.currencies import CURRENCY_MAP, FiatValue
from pit38.domain.transactions.action import Action
from pit38.domain.transactions.asset import AssetValue
Expand All @@ -19,10 +19,10 @@ class BinanceOperationType(Enum):

class BinanceTransaction:
def __init__(self, row: dict):
self.utc_time = datetime.strptime(row["UTC_Time"], "%Y-%m-%d %H:%M:%S")
self.operation = row["Operation"]
self.coin = row["Coin"]
self.change = float(row["Change"])
self.utc_time = datetime.strptime(row["utc_time"], "%Y-%m-%d %H:%M:%S")
self.operation = row["operation"]
self.coin = row["coin"]
self.change = float(row["change"])

def operation_type(self) -> BinanceOperationType:
if self.operation == BinanceOperationType.CONVERT.value:
Expand Down Expand Up @@ -75,8 +75,7 @@ def _process_transaction_operations(self, binance_transactions: List[BinanceTran
def read(self, file_path: str) -> List[Transaction]:
convert_operations = []
transaction_operations = []
with open(file_path, 'r') as file:
reader = csv.DictReader(file)
with open_csv_reader(file_path) as reader:
for row in reader:
binance_transaction = BinanceTransaction(row)
if binance_transaction.operation_type() == BinanceOperationType.DEPOSIT:
Expand Down
6 changes: 3 additions & 3 deletions pit38/plugins/crypto/revolut/csv.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import List

from loguru import logger
import csv

from pit38.data_sources.csv_utils import open_csv_reader
from pit38.domain.transactions.transaction import Transaction
from pit38.plugins.crypto.revolut.row_parser import RowParser

Expand All @@ -13,8 +14,7 @@ def __init__(self, row_parser: RowParser):
def read(self, input_path: str) -> List[Transaction]:
transactions = []
logger.info(f"Reading transactions from {input_path}...")
with open(input_path, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
with open_csv_reader(input_path) as reader:
for row in reader:
transaction = self.row_parser.parse(row)
if not transaction:
Expand Down
14 changes: 7 additions & 7 deletions pit38/plugins/crypto/revolut/row_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ def parse(cls, row: Dict) -> Transaction:

@classmethod
def _crypto_value(cls, row: dict) -> AssetValue:
currency = row["Symbol"]
amount = row["Quantity"].replace(",", "")
currency = row["symbol"]
amount = row["quantity"].replace(",", "")
return AssetValue(float(amount), currency)

@classmethod
def _fiat_value(cls, row: dict) -> FiatValue:
if row["Value"] == "":
if row["value"] == "":
return FiatValue(0, Currency.ZLOTY)

value = row["Value"].replace(",", "")
value = row["value"].replace(",", "")

amount_match = re.search(r'\d+\.?\d{2}', value)
if not amount_match:
Expand All @@ -53,16 +53,16 @@ def _fiat_value(cls, row: dict) -> FiatValue:

@classmethod
def _action(cls, row: dict) -> Action:
if row["Type"] in BUY_OPERATION_TYPES:
if row["type"] in BUY_OPERATION_TYPES:
return Action.BUY
if row["Type"] in SELL_OPERATION_TYPES:
if row["type"] in SELL_OPERATION_TYPES:
return Action.SELL
# Staking and unstaking are not important for tax purposes
return None

@classmethod
def _datetime(cls, row: dict) -> pendulum.DateTime:
date_str = row["Date"].replace("Sept", "Sep") # revolut uses Sept instead of Sep
date_str = row["date"].replace("Sept", "Sep") # revolut uses Sept instead of Sep
supported_formats = (
"DD MMM YYYY, HH:mm:ss",
"MMM DD, YYYY, hh:mm:ss A",
Expand Down
95 changes: 95 additions & 0 deletions pit38/plugins/normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Shared CSV value normalization helpers for broker plugins.

Brokers emit various layouts for the same logical value (amounts,
currencies). This module centralizes rewrite rules so plugins can focus
on broker-specific shape differences rather than re-inventing number
parsing or locale detection.

Used by:
- pit38.plugins.stock.revolut.row_parser
- pit38.plugins.stock.etrade.row_parser
"""
import re

from babel.numbers import parse_decimal, NumberFormatError


# Locales tried in order. en_US covers Revolut's historical exports
# (1,234.56); de_DE covers E*Trade's European output (1 234,56 / 1.234,56)
# and forward-compatible Revolut EU exports. If a new broker needs
# something else (e.g. fr_FR with nbsp thousand separator), add it here.
_NUMBER_LOCALES = ("en_US", "de_DE")


def parse_amount(s: str) -> float:
"""Parse a number string, trying configured locales in order.

Handles thousand/decimal separator combinations:
"1,234.56" (en_US) → 1234.56
"1.234,56" (de_DE) → 1234.56
"1 234,56" (whitespace stripped, de_DE) → 1234.56
"-0.07" (en_US) → -0.07
"-0,07" (de_DE) → -0.07

Whitespace (regular space and non-breaking space U+00A0) is stripped
before parsing. Some brokers use these as thousand separators, but
CLDR locales don't uniformly recognize them — stripping first keeps
the code simple.

Uses babel's strict=True so ambiguous inputs (e.g. "1,317.06"
under de_DE locale, which would otherwise be misinterpreted as
1.31706) fail fast instead of being silently mis-parsed. The
locale chain then tries the next locale.

Returns a float for compatibility with existing FiatValue.amount.
When #61 migrates to Decimal, this can return babel's Decimal
directly (it already does internally).

Raises ValueError if no configured locale can parse the input.
"""
cleaned = s.replace("\xa0", "").replace(" ", "")
for locale in _NUMBER_LOCALES:
try:
return float(parse_decimal(cleaned, locale=locale, strict=True))
except NumberFormatError:
continue
raise ValueError(
f"Cannot parse amount in any supported locale "
f"({', '.join(_NUMBER_LOCALES)}): {s!r}"
)


def normalize_currency_layout(raw: str) -> str:
"""Rewrite a currency+amount string to canonical layout.

Canonical form: ``<currency><single space><signed_amount>``

Input variants collected from real Revolut and E*Trade exports:

============== ================
input → canonical
============== ================
"USD 529.68" → "USD 529.68" (already canonical)
"-USD 529.68" → "USD -529.68" (move sign past currency code)
"USD -0.07" → "USD -0.07" (already; #33 @inobrevi case)
"$500" → "$ 500" (insert space)
"-$529.68" → "$ -529.68" (move sign + insert space)
"$-0.07" → "$ -0.07" (insert space)
"€250.00" → "€ 250.00" (insert space)
"$25 001,75" → "$ 25 001,75" (only insert space; amount preserved)

Sign stays attached to the amount side. parse_amount() handles the
signed number per-locale.

Format-only: does NOT strip the sign and does NOT touch the inside
of the amount (digits/separators). That's parse_amount's job.
"""
# Move leading minus past currency code: "-USD X" → "USD -X"
raw = re.sub(r"^-([A-Z]{3}\s+)", r"\1-", raw)
# Move leading minus past currency symbol + insert space: "-$X" → "$ -X"
raw = re.sub(r"^-([^\w\s])", r"\1 -", raw)
# Insert space between currency symbol and amount: "$X" → "$ X"
# Currency codes already have a space after them (3-letter codes are
# word characters, so the regex starts with [^\w\s] = symbol only).
raw = re.sub(r"^([^\w\s])(?=[-\d])", r"\1 ", raw)
return raw
23 changes: 11 additions & 12 deletions pit38/plugins/stock/etrade/csv.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import csv
from typing import Dict, List, Tuple
from loguru import logger
import pendulum

from pit38.data_sources.csv_utils import open_csv_reader
from pit38.domain.currency_exchange_service.currencies import FiatValue
from pit38.domain.transactions.action import Action
from pit38.domain.transactions.asset import AssetValue
Expand All @@ -15,10 +15,9 @@ class EtradeCsvReader:
def read(cls, file_path: str) -> List[Transaction]:
transactions = []
logger.info(f"Reading transactions from {file_path}...")
with open(file_path, "r") as csvfile:
reader = csv.DictReader(csvfile, delimiter=",")
with open_csv_reader(file_path) as reader:
for row in reader:
if row["Record Type"] == "Summary":
if row["record type"] == "Summary":
continue
buy, sell = cls.parse_row(row)
transactions.extend((buy, sell))
Expand All @@ -42,28 +41,28 @@ def _buy_transaction(cls, row: Dict) -> Transaction:
asset=cls._asset(row),
fiat_value=cls._buy_cost(row),
action=Action.BUY,
date=pendulum.parse(str(row["Date Acquired"]), strict=False),
date=pendulum.parse(str(row["date acquired"]), strict=False),
)

@classmethod
def _sell_transaction(cls, row: Dict) -> Transaction:
return Transaction(
asset=cls._asset(row),
fiat_value=cls._sell_cost(row),
action=Action.SELL,
date=pendulum.parse(str(row["Date Sold"]), strict=False),
date=pendulum.parse(str(row["date sold"]), strict=False),
)

@classmethod
def _asset(cls, row: Dict) -> AssetValue:
quantity = float(row['Qty.'])
stock_name = row['Symbol']
quantity = float(row['qty.'])
stock_name = row['symbol']
return AssetValue(quantity, stock_name)

@classmethod
def _buy_cost(cls, row: Dict) -> FiatValue:
return FiatValueParser.parse(row["Acquisition Cost"])
return FiatValueParser.parse(row["acquisition cost"])

@classmethod
def _sell_cost(cls, row: Dict) -> FiatValue:
return FiatValueParser.parse(row["Total Proceeds"])
return FiatValueParser.parse(row["total proceeds"])
Loading
Loading