Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
name: Deployment
name: Publish to PyPI

on:
push:
branches-ignore: ["*"]
tags: ["*"]
release:
types: [created]

jobs:
deploy:
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') && github.repository == 'gmr/email-normalize'
environment: release

environment:
name: pypi
url: https://pypi.org/project/email-normalize/

permissions:
contents: read
id-token: write

steps:
- name: Checkout repository
uses: actions/checkout@v5
- uses: actions/checkout@v5
Comment thread
gmr marked this conversation as resolved.

- name: Install uv
uses: astral-sh/setup-uv@v6

- name: Build package
run: uv build
- name: Build and check package
run: |
uv build
uvx twine check dist/*

- name: Publish package
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
7 changes: 1 addition & 6 deletions .github/workflows/testing.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
name: Testing
on:
pull_request:
paths-ignore:
- 'docs/**'
- '*.md'
- '*.rst'
push:
branches: ["*"]
paths-ignore:
- 'docs/**'
- '*.md'
- '*.rst'
tags-ignore: ["*"]

jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 3
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -47,7 +43,6 @@ jobs:

- name: Upload Coverage
uses: codecov/codecov-action@v5
if: github.event_name == 'push' && github.repository == 'gmr/email-normalize'
with:
files: ./build/coverage.xml
flags: unittests
37 changes: 31 additions & 6 deletions email_normalize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@

MXRecords = list[tuple[int, str]]

_tld_extract = tldextract.TLDExtract(
suffix_list_urls=(), cache_dir=None, fallback_to_snapshot=True
)

cache: dict[str, 'CachedItem'] = {}


Expand Down Expand Up @@ -99,8 +103,11 @@ def __init__(
cache_limit: int = 1024,
cache_failures: bool = True,
failure_ttl: int = 300,
skip_dns: bool = False,
) -> None:
self._resolver = aiodns.DNSResolver(name_servers)
self._skip_dns = skip_dns
if not skip_dns:
self._resolver = aiodns.DNSResolver(name_servers)
self.cache_failures = cache_failures
self.cache_limit = cache_limit
self.failure_ttl = failure_ttl
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Expand All @@ -114,6 +121,8 @@ async def mx_records(self, domain_part: str) -> MXRecords:
domain_part: The domain to resolve MX records for.

"""
if self._skip_dns:
return []
if self._skip_cache(domain_part):
try:
records = await self._resolver.query(domain_part, 'MX')
Expand Down Expand Up @@ -158,8 +167,12 @@ async def normalize(self, email_address: str) -> Result:
"""
address = utils.parseaddr(email_address)
local_part, domain_part = address[1].lower().split('@')
mx_records = await self.mx_records(domain_part)
provider = self._lookup_provider(mx_records)
if self._skip_dns:
mx_records = []
provider = self._lookup_provider_by_domain(domain_part)
else:
mx_records = await self.mx_records(domain_part)
provider = self._lookup_provider(mx_records)
Comment thread
gmr marked this conversation as resolved.
if provider:
if provider.Flags & providers.Rules.LOCAL_PART_AS_HOSTNAME:
local_part, domain_part = self._local_part_as_hostname(
Expand All @@ -181,7 +194,7 @@ def _local_part_as_hostname(
local_part: str,
domain_part: str,
) -> tuple[str, str]:
extracted = tldextract.extract(domain_part)
extracted = _tld_extract(domain_part)
if extracted.subdomain:
subdomain_parts = extracted.subdomain.split('.')
local_part = subdomain_parts[0]
Expand All @@ -200,6 +213,12 @@ def _local_part_as_hostname(
domain_part = '.'.join(components)
return local_part, domain_part

@staticmethod
def _lookup_provider_by_domain(
domain_part: str,
) -> type[providers.MailboxProvider] | None:
return providers.DomainMap.get(domain_part)

@staticmethod
def _lookup_provider(
mx_records: list[tuple[int, str]],
Expand All @@ -221,7 +240,10 @@ def _skip_cache(self, domain: str) -> bool:
return False


def normalize(email_address: str) -> Result:
def normalize(
email_address: str,
skip_dns: bool = False,
) -> Result:
"""Normalize an email address.

This function abstracts the asyncio base for this library and
Expand All @@ -231,10 +253,13 @@ def normalize(email_address: str) -> Result:

Args:
email_address: The address to normalize.
skip_dns: Skip DNS MX record lookups and use a static
domain map to detect well-known mailbox providers.
Defaults to ``False``.

"""

async def _normalize():
return await Normalizer().normalize(email_address)
return await Normalizer(skip_dns=skip_dns).normalize(email_address)

return asyncio.run(_normalize())
27 changes: 27 additions & 0 deletions email_normalize/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,30 @@ class Zoho(MailboxProvider):
Yandex,
Zoho,
]

DomainMap: dict[str, type[MailboxProvider]] = {
'icloud.com': Apple,
'me.com': Apple,
'mac.com': Apple,
'fastmail.com': Fastmail,
'fastmail.fm': Fastmail,
'gmail.com': Google,
'googlemail.com': Google,
'outlook.com': Microsoft,
'hotmail.com': Microsoft,
'live.com': Microsoft,
'msn.com': Microsoft,
'proton.me': ProtonMail,
'protonmail.com': ProtonMail,
'pm.me': ProtonMail,
'yahoo.com': Yahoo,
'yahoo.co.uk': Yahoo,
'yahoo.co.jp': Yahoo,
'ymail.com': Yahoo,
'aol.com': Yahoo,
'yandex.com': Yandex,
'yandex.ru': Yandex,
'ya.ru': Yandex,
'zoho.com': Zoho,
'zohomail.com': Zoho,
}
116 changes: 116 additions & 0 deletions tests/test_normalize.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import unittest
import uuid
import warnings
Expand Down Expand Up @@ -185,3 +186,118 @@ def test_zoho(self):
self._perform_test(
address, f'{local_part}@{domain_part}', mx_records, 'Zoho'
)


class SkipDNSTestCase(TestCase):
def _normalize(self, address):
return email_normalize.normalize(address, skip_dns=True)

def _assert_provider(self, domain, expected_provider):
local_part = str(uuid.uuid4())
address = f'{local_part}@{domain}'
result = self._normalize(address)
self.assertEqual(result.mailbox_provider, expected_provider)
self.assertListEqual(result.mx_records, [])
return result

def test_gmail(self):
self._assert_provider('gmail.com', 'Google')

def test_googlemail(self):
self._assert_provider('googlemail.com', 'Google')

def test_outlook(self):
self._assert_provider('outlook.com', 'Microsoft')

def test_hotmail(self):
self._assert_provider('hotmail.com', 'Microsoft')

def test_live(self):
self._assert_provider('live.com', 'Microsoft')

def test_msn(self):
self._assert_provider('msn.com', 'Microsoft')

def test_icloud(self):
self._assert_provider('icloud.com', 'Apple')

def test_me(self):
self._assert_provider('me.com', 'Apple')

def test_mac(self):
self._assert_provider('mac.com', 'Apple')

def test_fastmail(self):
self._assert_provider('fastmail.com', 'Fastmail')

def test_fastmail_fm(self):
self._assert_provider('fastmail.fm', 'Fastmail')

def test_protonmail(self):
self._assert_provider('protonmail.com', 'ProtonMail')

def test_proton_me(self):
self._assert_provider('proton.me', 'ProtonMail')

def test_pm_me(self):
self._assert_provider('pm.me', 'ProtonMail')

def test_yahoo(self):
self._assert_provider('yahoo.com', 'Yahoo')

def test_yahoo_co_uk(self):
self._assert_provider('yahoo.co.uk', 'Yahoo')

def test_yahoo_co_jp(self):
self._assert_provider('yahoo.co.jp', 'Yahoo')

def test_ymail(self):
self._assert_provider('ymail.com', 'Yahoo')

def test_aol(self):
self._assert_provider('aol.com', 'Yahoo')

def test_yandex_com(self):
self._assert_provider('yandex.com', 'Yandex')

def test_yandex_ru(self):
self._assert_provider('yandex.ru', 'Yandex')

def test_ya_ru(self):
self._assert_provider('ya.ru', 'Yandex')

def test_zoho(self):
self._assert_provider('zoho.com', 'Zoho')

def test_zohomail(self):
self._assert_provider('zohomail.com', 'Zoho')

def test_google_rules_applied(self):
result = self._normalize('u.s.e.r+tag@gmail.com')
self.assertEqual(result.normalized_address, 'user@gmail.com')
self.assertEqual(result.mailbox_provider, 'Google')

def test_microsoft_plus_addressing(self):
result = self._normalize('user+tag@outlook.com')
self.assertEqual(result.normalized_address, 'user@outlook.com')

def test_unknown_domain(self):
domain = f'{uuid.uuid4()}.example.com'
result = self._normalize(f'user@{domain}')
self.assertIsNone(result.mailbox_provider)
self.assertListEqual(result.mx_records, [])

def test_dns_not_called(self):
with mock.patch(
'email_normalize.Normalizer.mx_records',
side_effect=AssertionError('DNS should not be called'),
):
result = self._normalize('test@gmail.com')
self.assertEqual(result.mailbox_provider, 'Google')

def test_async_normalizer(self):
normalizer = email_normalize.Normalizer(skip_dns=True)
result = asyncio.run(normalizer.normalize('u.s.e.r+tag@gmail.com'))
self.assertEqual(result.normalized_address, 'user@gmail.com')
self.assertEqual(result.mailbox_provider, 'Google')
self.assertListEqual(result.mx_records, [])
Loading