Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ jobs:

- name: Run isort
run: poetry run isort --check .

- name: Run type checker
run: poetry run ty check
10 changes: 3 additions & 7 deletions src/wikibots/flickr.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,6 @@ def treat_page(self) -> None:
)
self.create_source_claim(self.photo["url"], WikidataEntity.Flickr)
self.create_location_claim(self.photo["location"])
# self.create_published_in_claim(
# published_in=WikidataEntity.Flickr, date_posted=self.photo["date_posted"]
# )
self._create_inception_claim()

self.save()
Expand Down Expand Up @@ -137,7 +134,8 @@ def get_flickr_photo(self, flickr_photo_id: str) -> None:
time.sleep(60)

def hook_creator_claim(self, claim: Claim) -> None:
assert self.photo
if not self.photo:
return

flickr_user_id_qualifier = Claim(self.commons, WikidataProperty.FlickrUserId)
flickr_user_id_qualifier.setTarget(self.photo["owner"]["id"])
Expand Down Expand Up @@ -296,7 +294,7 @@ def create_location_claim(self, location: Location | None) -> None:
"""

error(f"Unrecognised location accuracy: {location['accuracy']}")
return None
return

claim = Claim(self.commons, WikidataProperty.CoordinatesOfThePointOfView)
claim.setTarget(
Expand All @@ -309,8 +307,6 @@ def create_location_claim(self, location: Location | None) -> None:

self.wiki_properties.new_claims.append(claim)

return None


def main() -> None:
FlickrBot().run()
Expand Down
35 changes: 15 additions & 20 deletions src/wikibots/inaturalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,10 @@ class PhotoData:


def _extract_orcid_id(orcid_url: str | None) -> str | None:
"""Extract ORCID ID from URL.

Args:
orcid_url: ORCID URL or None

Returns:
ORCID ID or None
"""
"""Extract ORCID ID from a URL like https://orcid.org/0000-0000-0000-0000."""
if not orcid_url:
return None

# Extract ORCID ID from URL (e.g., https://orcid.org/0000-0000-0000-0000)
# Using a simpler regex pattern that matches the ORCID format
# WikidataProperty.ORCID is P496
matches = re.search(r"(\d{4}-\d{4}-\d{4}-\d{3}[\dX])", orcid_url)

return matches.group(1) if matches else None
Expand All @@ -69,12 +59,13 @@ def _extract_orcid_id(orcid_url: str | None) -> str | None:
class INaturalistBot(BaseBot):
redis_prefix = "ZHXgxFHT4ZBJjR+fLxCH9quuLYl7ky4N6fNV/oC4fbs="
summary = "add [[Commons:Structured data|SDC]] based on metadata from iNaturalist"
# Cache for taxa-Wikidata item mappings
taxa_wikidata_map = {}

def __init__(self, **kwargs: Any):
super().__init__(**kwargs)

# Cache for taxa-Wikidata item mappings
self.taxa_wikidata_map: dict[int, ItemPage] = {}

self.generator = SearchPageGenerator(
f"file: hastemplate:iNaturalist hastemplate:iNaturalistReview -haswbstatement:{WikidataProperty.INaturalistPhotoId}",
site=self.commons,
Expand Down Expand Up @@ -205,11 +196,11 @@ def determine_taxa(self, observation: dict) -> None:

for taxa_id in reversed(taxa["ancestor_ids"]):
# Check if taxa_id is already in the cache
if taxa_id in self.__class__.taxa_wikidata_map:
if taxa_id in self.taxa_wikidata_map:
info(
f"Using cached Wikidata item for taxa https://www.inaturalist.org/taxa/{taxa_id} - {self.__class__.taxa_wikidata_map[taxa_id].getID()}"
f"Using cached Wikidata item for taxa https://www.inaturalist.org/taxa/{taxa_id} - {self.taxa_wikidata_map[taxa_id].getID()}"
)
self.photo.depicts = self.__class__.taxa_wikidata_map[taxa_id]
self.photo.depicts = self.taxa_wikidata_map[taxa_id]
break

info(f"Searching Wikidata for taxon with ID {taxa_id}")
Expand All @@ -236,12 +227,13 @@ def determine_taxa(self, observation: dict) -> None:
)

# Store the mapping in the cache
self.__class__.taxa_wikidata_map[taxa_id] = item
self.taxa_wikidata_map[taxa_id] = item
self.photo.depicts = item
break

def hook_creator_claim(self, claim: Claim) -> None:
assert self.photo and self.photo.creator
if not self.photo or not self.photo.creator:
return

inaturalist_user_id_qualifier = Claim(
self.commons, WikidataProperty.INaturalistUserId
Expand All @@ -255,7 +247,9 @@ def hook_creator_claim(self, claim: Claim) -> None:
claim.addQualifier(orcid_qualifier)

def hook_creator_target(self, claim: Claim) -> None:
assert self.photo and self.photo.creator
if not self.photo or not self.photo.creator:
claim.setSnakType("somevalue")
return

creator_item = self.find_creator_wikidata_item()

Expand All @@ -265,7 +259,8 @@ def hook_creator_target(self, claim: Claim) -> None:
claim.setSnakType("somevalue")

def hook_depicts_claim(self, claim: Claim) -> None:
assert self.photo and self.photo.depicts
if not self.photo or not self.photo.depicts:
return

stated_in_ref = Claim(self.commons, WikidataProperty.StatedIn)
stated_in_ref.setTarget(self.inaturalist_wd)
Expand Down
13 changes: 4 additions & 9 deletions src/wikibots/lib/bot.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import os
import sys
from contextlib import suppress
from dataclasses import dataclass, field
from datetime import datetime
from fractions import Fraction
Expand Down Expand Up @@ -232,8 +231,7 @@ def create_creator_claim(

claim = Claim(self.commons, WikidataProperty.Creator)

with suppress(AssertionError):
self.hook_creator_target(claim)
self.hook_creator_target(claim)

if author_name_string:
author_qualifier = Claim(self.commons, WikidataProperty.AuthorNameString)
Expand All @@ -245,8 +243,7 @@ def create_creator_claim(
url_qualifier.setTarget(url)
claim.addQualifier(url_qualifier)

with suppress(AssertionError):
self.hook_creator_claim(claim)
self.hook_creator_claim(claim)

self.wiki_properties.new_claims.append(claim)

Expand Down Expand Up @@ -282,8 +279,7 @@ def create_depicts_claim(self, depicts: ItemPage | None) -> None:
claim = Claim(self.commons, WikidataProperty.Depicts)
claim.setTarget(depicts)

with suppress(AssertionError):
self.hook_depicts_claim(claim)
self.hook_depicts_claim(claim)

self.wiki_properties.new_claims.append(claim)

Expand Down Expand Up @@ -479,8 +475,7 @@ def create_source_claim(self, source: str, operator: str | None = None) -> None:
operator_qualifier.setTarget(ItemPage(self.wikidata, operator))
claim.addQualifier(operator_qualifier)

with suppress(AssertionError):
self.hook_source_claim(claim)
self.hook_source_claim(claim)

self.wiki_properties.new_claims.append(claim)

Expand Down
50 changes: 7 additions & 43 deletions src/wikibots/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,6 @@ class YouTubeBot(BaseBot):
throttle = 30

def __init__(self, **kwargs: Any):
"""
Initializes the YouTubeBot instance.

Passes any keyword arguments to the base class initializer and configures key components:
- A search generator to locate Commons files that lack a YouTube video ID.
- A YouTube API client built with the API key from the environment.
- A language detector constructed from all available languages.
"""
super().__init__(**kwargs)

self.generator = SearchPageGenerator(
Expand All @@ -57,21 +49,9 @@ def __init__(self, **kwargs: Any):
)
self.video: YouTubeVideo | None = None

self.items = {
"youtube": ItemPage(self.wikidata, WikidataEntity.YouTube),
}
self.youtube_item = ItemPage(self.wikidata, WikidataEntity.YouTube)

def treat_page(self) -> None:
"""
Processes the page to extract YouTube metadata and update Wikidata claims.

Parses the current page's wikitext to retrieve the YouTube video ID from a
"YouTube CC-BY" template, then uses the YouTube API to fetch video details such as
the title, publication date, and channel information. If valid video data is found,
it creates or updates claims for the video ID, publication date, creator details,
source URL, and copyright license. If the video is not found, the method exits
without making changes. Finally, it saves the updates with a descriptive edit summary.
"""
# Reset
self.video = None

Expand All @@ -89,21 +69,12 @@ def treat_page(self) -> None:
self.save()
return

assert self.video

# self.create_published_in_claim(WikidataEntity.YouTube, self.video.published_at)
self.create_creator_claim(self.video.channel.title)
self.create_source_claim(f"https://www.youtube.com/watch?v={youtube_id}")

self.save()

def _fetch_youtube_data(self, youtube_id: str) -> None:
"""
Fetches video details from the YouTube API

:param str youtube_id: The ID of the YouTube video to fetch
:return: None
"""
try:
start = perf_counter()
video = self.youtube.videos().list(part="snippet", id=youtube_id).execute()
Expand Down Expand Up @@ -153,7 +124,8 @@ def _fetch_youtube_data(self, youtube_id: str) -> None:
)

def hook_creator_claim(self, claim: Claim) -> None:
assert self.video
if not self.video:
return

if self.video.channel and self.video.channel.handle:
youtube_handle_qualifier = Claim(
Expand All @@ -169,25 +141,17 @@ def hook_creator_claim(self, claim: Claim) -> None:
claim.addQualifier(youtube_channel_id_qualifier)

def hook_source_claim(self, claim: Claim) -> None:
assert self.video
if not self.video:
return

content_deliverer_qualifier = Claim(
self.commons, WikidataProperty.ContentDeliverer
)
content_deliverer_qualifier.setTarget(self.items["youtube"])
content_deliverer_qualifier.setTarget(self.youtube_item)
claim.addQualifier(content_deliverer_qualifier)


def main():
"""
Entrypoint for running the YouTube bot.

Instantiates the YouTubeBot and initiates its execution, starting the process of
retrieving video metadata and updating corresponding Wikidata claims.

If the --dry-run flag is provided, the bot will run in dry-run mode, which means it will
not save any changes to Wikimedia Commons and will exit after processing the first page.
"""
def main() -> None:
YouTubeBot().run()


Expand Down
Loading