Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Python build
on:
[pull_request, push]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12", "3.13"]
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install package
run: pip install .

- name: Run tests
run: |
pip install pytest
pytest tests/python
python -c "import languagedata; print('languagedata package loaded successfully')"

publish:
runs-on: ubuntu-latest
needs: build
if: startsWith(github.ref, 'refs/tags/v')
environment: release
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python 3.13
uses: actions/setup-python@v5
with:
python-version: "3.13"

- name: Build package
run: |
pip install uv
uv build

- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
44 changes: 44 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "languagedata"
description = "Language data and utilities"
version = "2.0.3"
requires-python = ">=3.10"
license = "GPL-2.0-or-later"
readme = "README.md"
authors = [{name= "Santhosh Thottingal", email= "santhosh.thottingal@gmail.com"}]
classifiers = [
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
]

[tool.hatch.build.targets.wheel]
packages = ["python/languagedata"]

[tool.hatch.build.targets.sdist]
include = [
"python/languagedata/",
"data/language-data.json",
]
exclude = [
"src*",
"docs*",
".venv*",
".ruff_cache*",
".pytest_cache*",
]

[tool.pytest.ini_options]
testpaths = ["tests/python"]
pythonpath = ["python"]


[dependency-groups]
dev = [
"pytest>=7.0",
"ruff>=0.1.0",
]

153 changes: 153 additions & 0 deletions python/languagedata/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from typing import Union, List
from importlib.resources import files
import functools
import json

with files('languagedata').joinpath('language-data.json').open('r') as f:
languageData = json.load(f)


def isKnown(languageCode):
return languageCode in languageData['languages']


def isRedirect(language):
if isKnown(language) and len(languageData['languages'][language]) == 1:
return languageData['languages'][language][0]
else:
return False


def getLanguages():
return languageData['languages']


def getScript(language):
target = isRedirect(language)
if target:
return getScript(target)
elif not isKnown(language):
return 'Zyyy'
else:
return languageData['languages'][language][0]


def getRegions(language: str) -> Union[str, List[str]]:
target = isRedirect(language)
if target:
return getRegions(target)
elif isKnown(language) and languageData['languages'][language][1]:
return languageData['languages'][language][1]
else:
return 'UNKNOWN'


def getAutonym(language: str) -> str:
target = isRedirect(language)
if target:
return getAutonym(target)
return languageData['languages'][language][2] if (isKnown(language) and len(languageData['languages'][language]) > 2) else language


def getAutonyms():
autonymsByCode = {}
for language in languageData['languages']:
if isRedirect(language):
continue
autonymsByCode[language] = getAutonym(language)
return autonymsByCode


def getLanguagesInScripts(scripts):
languagesInScripts = []
for language in languageData['languages']:
if isRedirect(language):
continue
for script in scripts:
if script == getScript(language):
languagesInScripts.append(language)
break
return languagesInScripts


def getLanguagesInScript(script):
return getLanguagesInScripts([script])


def getGroupOfScript(script):
for scriptGroup in languageData['scriptgroups']:
if script in languageData['scriptgroups'][scriptGroup]:
return scriptGroup
return 'Other'


def getScriptGroupOfLanguage(language):
return getGroupOfScript(getScript(language))


def getLanguagesByScriptGroup(languages):
languagesByScriptGroup = {}
for language in languages:
resolvedRedirect = isRedirect(language) or language
langScriptGroup = getScriptGroupOfLanguage(resolvedRedirect)
if langScriptGroup not in languagesByScriptGroup:
languagesByScriptGroup[langScriptGroup] = []
languagesByScriptGroup[langScriptGroup].append(language)
return languagesByScriptGroup


def getLanguagesByScriptGroupInRegions(regions):
languagesByScriptGroupInRegions = {}
for language in languageData['languages']:
if isRedirect(language):
continue
for region in regions:
if region in getRegions(language):
scriptGroup = getScriptGroupOfLanguage(language)
if scriptGroup not in languagesByScriptGroupInRegions:
languagesByScriptGroupInRegions[scriptGroup] = []
languagesByScriptGroupInRegions[scriptGroup].append(language)
break
return languagesByScriptGroupInRegions


def getLanguagesByScriptGroupInRegion(region):
return getLanguagesByScriptGroupInRegions([region])


def sortByScriptGroup(languages):
groupedLanguages = getLanguagesByScriptGroup(languages)
scriptGroups = sorted(groupedLanguages.keys())
allLanguages = []
for scriptGroup in scriptGroups:
allLanguages.extend(groupedLanguages[scriptGroup])
return allLanguages


def _cmpByAutonym(a, b):
autonymA = getAutonym(a) or a
autonymB = getAutonym(b) or b
return -1 if autonymA.lower() < autonymB.lower() else 1


def sortByAutonym(languages):
return sorted(languages, key=functools.cmp_to_key(_cmpByAutonym))


def isRtl(language):
return getScript(language) in languageData['rtlscripts']


def getDir(language):
return 'rtl' if isRtl(language) else 'ltr'


def getLanguagesInTerritory(territory):
return languageData.get('territories', {}).get(territory, [])


def addLanguage(code, options):
if options.get('target'):
languageData['languages'][code] = [options['target']]
else:
languageData['languages'][code] = [options.get('script'), options.get('regions'), options.get('autonym')]
1 change: 1 addition & 0 deletions python/languagedata/language-data.json
Loading
Loading