Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions .claude/settings.local.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,23 @@
"permissions": {
"allow": [
"Bash(wc *)",
"Bash(poetry show *)",
"WebFetch(domain:pypi.org)",
"WebFetch(domain:commons.wikimedia.org)",
"WebFetch(domain:doc.wikimedia.org)",
"WebFetch(domain:pypi.org)",
"Skill(code-review:code-review)",
"mcp__plugin_context7_context7__resolve-library-id",
"mcp__plugin_context7_context7__query-docs",
"Bash(poetry run ty check *)",
"Bash(poetry run ruff check *)",
"Bash(but status *)",
"Bash(poetry run ty check *)",
"Bash(poetry show *)",
"Bash(gh pr view *)",
"Bash(but branch --help)",
"Bash(but commit *)",
"Bash(but pr new *)",
"WebFetch(domain:commons.wikimedia.org)",
"Bash(but commit *)"
"Bash(but status *)",
"Bash(but --help)",
"Bash(poetry run pytest *)",
"Bash(poetry run isort *)"
]
},
"spinnerTipsEnabled": false
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ jobs:
run: poetry run isort --check .

- name: Run type checker
run: poetry run ty check
run: poetry run ty check src/
29 changes: 29 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Tests

on:
pull_request:

permissions:
contents: read

jobs:
tests:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v6

- name: Install poetry
run: pipx install poetry

- name: Set up Python 3.13
uses: actions/setup-python@v6
with:
python-version: '3.13'
cache: 'poetry'

- name: Install dependencies
run: poetry install --with dev

- name: Run tests
run: poetry run pytest
7 changes: 4 additions & 3 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ poetry run ruff format
poetry run isort .

# Type check
poetry run ty check
```
poetry run ty check src/

Tests are not yet set up in this project.
# Run tests
poetry run pytest
```

## Architecture

Expand Down
125 changes: 124 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ exclude = [
dev = [
"ruff (>=0.15.9,<0.16.0)",
"ty (>=0.0.28,<0.0.29)",
"isort (>=8.0.1,<9.0.0)"
"isort (>=8.0.1,<9.0.0)",
"pytest (>=9.0.3,<10.0.0)",
"pytest-mock (>=3.15.1,<4.0.0)",
"pytest-timeout (>=2.4.0,<3.0.0)"
]


[tool.isort]
profile = "black"
6 changes: 6 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[pytest]
testpaths = tests
python_files = test_*.py
python_functions = test_*
pythonpath = src
timeout = 0.25
43 changes: 30 additions & 13 deletions src/wikibots/flickr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,21 @@
from datetime import datetime
from time import perf_counter

import httpx
from flickr_api import FlickrApi
from flickr_api.exceptions import PermissionDenied, ResourceNotFound
from flickr_api.models import SinglePhotoInfo
from flickr_api.models.photo import Location
from flickr_url_parser import parse_flickr_url

from wikibots.lib.bot import BaseBot
from wikibots.lib.bot import BaseBot, RateLimitExhausted
from wikibots.lib.claim import WBTIME_PRECISION, Claim
from wikibots.lib.wikidata import WikidataEntity, WikidataProperty

logger = logging.getLogger(__name__)

RATE_LIMIT_DELAYS = (60, 180, 300)


class FlickrBot(BaseBot):
redis_prefix = "xQ6cz5J84Viw/K6FIcOH1kxJjfiS8jO56AoSmhBgO/A="
Expand Down Expand Up @@ -137,18 +140,32 @@ def get_flickr_photo(self, flickr_photo_id: str) -> None:
logger.warning("Flickr photo skipped due to Redis cache")
return

try:
start = perf_counter()
self.photo = self.flickr_api.get_single_photo_info(photo_id=flickr_photo_id)
logger.info(
f"Retrieved Flickr photo in {(perf_counter() - start) * 1000:.0f} ms"
)
except (ResourceNotFound, PermissionDenied) as e:
logger.warning(f"[{flickr_photo_id}] {e}")
self.redis.set(redis_key_photo, 1)
except Exception as e:
logger.error(f"[{flickr_photo_id}] {e}")
time.sleep(60)
delays = iter(RATE_LIMIT_DELAYS)
while True:
try:
start = perf_counter()
self.photo = self.flickr_api.get_single_photo_info(photo_id=flickr_photo_id)
logger.info(
f"Retrieved Flickr photo in {(perf_counter() - start) * 1000:.0f} ms"
)
return
except (ResourceNotFound, PermissionDenied) as e:
logger.warning(f"[{flickr_photo_id}] {e}")
self.redis.set(redis_key_photo, 1)
return
except httpx.HTTPStatusError as e:
if e.response.status_code != 429:
logger.error(f"[{flickr_photo_id}] {e}")
return
delay = next(delays, None)
if delay is None:
logger.critical(f"[{flickr_photo_id}] Rate limit exhausted after all retries")
raise RateLimitExhausted
logger.warning(f"[{flickr_photo_id}] Rate limited, retrying in {delay}s")
time.sleep(delay)
Comment on lines +156 to +165
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The retry logic for HTTP 429 (Too Many Requests) uses hardcoded delays. While this is a good start, the Flickr API typically returns a Retry-After header indicating how long the client should wait. It would be more efficient to respect this header if present, falling back to the hardcoded delays only if the header is missing.

except Exception as e:
logger.error(f"[{flickr_photo_id}] {e}")
return
Comment on lines +166 to +168
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This catch-all Exception block silently skips the photo on any error that isn't a 429, 404, or 403. While logging the error is good, consider if certain other exceptions (like connection timeouts or 5xx server errors) should also trigger a retry with backoff, as they are often transient.


def hook_creator_claim(self, claim: Claim) -> None:
if not self.photo:
Expand Down
12 changes: 8 additions & 4 deletions src/wikibots/lib/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
WIKIDATA_SPARQL = "https://query.wikidata.org/sparql"


class _DryRunStop(Exception):
class DryRunStop(Exception):
pass


class RateLimitExhausted(Exception):
pass


Expand Down Expand Up @@ -139,7 +143,7 @@ def run(self) -> None:

self.treat_page()
time.sleep(self.throttle)
except _DryRunStop:
except (DryRunStop, RateLimitExhausted):
pass

def skip_page(self, page: dict[str, Any]) -> bool:
Expand Down Expand Up @@ -269,7 +273,7 @@ def save(self) -> None:

if self.dry_run:
logger.info("Dry run mode: skipping save operation")
raise _DryRunStop()
raise DryRunStop()

if self.always_null_edit:
logger.info("Performing null edit to flush any tracker categories")
Expand All @@ -283,7 +287,7 @@ def save(self) -> None:

if self.dry_run:
logger.info("Dry run mode: skipping save operation")
raise _DryRunStop()
raise DryRunStop()

try:
start = perf_counter()
Expand Down
Empty file added tests/__init__.py
Empty file.
Loading
Loading