From 1616af0dc2de66b79a3c50d675d5be83da187e4d Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Fri, 13 Mar 2026 13:20:11 -0400 Subject: [PATCH 1/6] Add skip_dns option for normalization without MX lookups Add a static DomainMap of well-known email domains to providers, a skip_dns parameter to Normalizer and the sync wrapper, and domain-based provider lookup for offline/fast-path use cases. Closes #9 Co-Authored-By: Claude Opus 4.6 --- email_normalize/__init__.py | 30 +++++++-- email_normalize/providers.py | 27 ++++++++ tests/test_normalize.py | 119 +++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 5 deletions(-) diff --git a/email_normalize/__init__.py b/email_normalize/__init__.py index 7785ffa..1a8a536 100644 --- a/email_normalize/__init__.py +++ b/email_normalize/__init__.py @@ -99,8 +99,11 @@ def __init__( cache_limit: int = 1024, cache_failures: bool = True, failure_ttl: int = 300, + skip_dns: bool = False, ) -> None: - self._resolver = aiodns.DNSResolver(name_servers) + self._skip_dns = skip_dns + if not skip_dns: + self._resolver = aiodns.DNSResolver(name_servers) self.cache_failures = cache_failures self.cache_limit = cache_limit self.failure_ttl = failure_ttl @@ -158,8 +161,12 @@ async def normalize(self, email_address: str) -> Result: """ address = utils.parseaddr(email_address) local_part, domain_part = address[1].lower().split('@') - mx_records = await self.mx_records(domain_part) - provider = self._lookup_provider(mx_records) + if self._skip_dns: + mx_records = [] + provider = self._lookup_provider_by_domain(domain_part) + else: + mx_records = await self.mx_records(domain_part) + provider = self._lookup_provider(mx_records) if provider: if provider.Flags & providers.Rules.LOCAL_PART_AS_HOSTNAME: local_part, domain_part = self._local_part_as_hostname( @@ -200,6 +207,12 @@ def _local_part_as_hostname( domain_part = '.'.join(components) return local_part, domain_part + @staticmethod + def _lookup_provider_by_domain( + domain_part: str, + ) -> type[providers.MailboxProvider] | None: + return providers.DomainMap.get(domain_part) + @staticmethod def _lookup_provider( mx_records: list[tuple[int, str]], @@ -221,7 +234,10 @@ def _skip_cache(self, domain: str) -> bool: return False -def normalize(email_address: str) -> Result: +def normalize( + email_address: str, + skip_dns: bool = False, +) -> Result: """Normalize an email address. This function abstracts the asyncio base for this library and @@ -231,10 +247,14 @@ def normalize(email_address: str) -> Result: Args: email_address: The address to normalize. + skip_dns: Skip DNS MX record lookups and use a static + domain map to detect well-known mailbox providers. + Defaults to ``False``. """ async def _normalize(): - return await Normalizer().normalize(email_address) + return await Normalizer( + skip_dns=skip_dns).normalize(email_address) return asyncio.run(_normalize()) diff --git a/email_normalize/providers.py b/email_normalize/providers.py index a014f60..03c8833 100644 --- a/email_normalize/providers.py +++ b/email_normalize/providers.py @@ -84,3 +84,30 @@ class Zoho(MailboxProvider): Yandex, Zoho, ] + +DomainMap: dict[str, type[MailboxProvider]] = { + 'icloud.com': Apple, + 'me.com': Apple, + 'mac.com': Apple, + 'fastmail.com': Fastmail, + 'fastmail.fm': Fastmail, + 'gmail.com': Google, + 'googlemail.com': Google, + 'outlook.com': Microsoft, + 'hotmail.com': Microsoft, + 'live.com': Microsoft, + 'msn.com': Microsoft, + 'proton.me': ProtonMail, + 'protonmail.com': ProtonMail, + 'pm.me': ProtonMail, + 'yahoo.com': Yahoo, + 'yahoo.co.uk': Yahoo, + 'yahoo.co.jp': Yahoo, + 'ymail.com': Yahoo, + 'aol.com': Yahoo, + 'yandex.com': Yandex, + 'yandex.ru': Yandex, + 'ya.ru': Yandex, + 'zoho.com': Zoho, + 'zohomail.com': Zoho, +} diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 719e9ce..78a2754 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -1,9 +1,11 @@ +import asyncio import unittest import uuid import warnings from unittest import mock import email_normalize +from email_normalize import providers class TestCase(unittest.TestCase): @@ -185,3 +187,120 @@ def test_zoho(self): self._perform_test( address, f'{local_part}@{domain_part}', mx_records, 'Zoho' ) + + +class SkipDNSTestCase(TestCase): + + def _normalize(self, address): + return email_normalize.normalize(address, skip_dns=True) + + def _assert_provider(self, domain, expected_provider): + local_part = str(uuid.uuid4()) + address = f'{local_part}@{domain}' + result = self._normalize(address) + self.assertEqual(result.mailbox_provider, expected_provider) + self.assertListEqual(result.mx_records, []) + return result + + def test_gmail(self): + self._assert_provider('gmail.com', 'Google') + + def test_googlemail(self): + self._assert_provider('googlemail.com', 'Google') + + def test_outlook(self): + self._assert_provider('outlook.com', 'Microsoft') + + def test_hotmail(self): + self._assert_provider('hotmail.com', 'Microsoft') + + def test_live(self): + self._assert_provider('live.com', 'Microsoft') + + def test_msn(self): + self._assert_provider('msn.com', 'Microsoft') + + def test_icloud(self): + self._assert_provider('icloud.com', 'Apple') + + def test_me(self): + self._assert_provider('me.com', 'Apple') + + def test_mac(self): + self._assert_provider('mac.com', 'Apple') + + def test_fastmail(self): + self._assert_provider('fastmail.com', 'Fastmail') + + def test_fastmail_fm(self): + self._assert_provider('fastmail.fm', 'Fastmail') + + def test_protonmail(self): + self._assert_provider('protonmail.com', 'ProtonMail') + + def test_proton_me(self): + self._assert_provider('proton.me', 'ProtonMail') + + def test_pm_me(self): + self._assert_provider('pm.me', 'ProtonMail') + + def test_yahoo(self): + self._assert_provider('yahoo.com', 'Yahoo') + + def test_yahoo_co_uk(self): + self._assert_provider('yahoo.co.uk', 'Yahoo') + + def test_yahoo_co_jp(self): + self._assert_provider('yahoo.co.jp', 'Yahoo') + + def test_ymail(self): + self._assert_provider('ymail.com', 'Yahoo') + + def test_aol(self): + self._assert_provider('aol.com', 'Yahoo') + + def test_yandex_com(self): + self._assert_provider('yandex.com', 'Yandex') + + def test_yandex_ru(self): + self._assert_provider('yandex.ru', 'Yandex') + + def test_ya_ru(self): + self._assert_provider('ya.ru', 'Yandex') + + def test_zoho(self): + self._assert_provider('zoho.com', 'Zoho') + + def test_zohomail(self): + self._assert_provider('zohomail.com', 'Zoho') + + def test_google_rules_applied(self): + result = self._normalize('u.s.e.r+tag@gmail.com') + self.assertEqual(result.normalized_address, 'user@gmail.com') + self.assertEqual(result.mailbox_provider, 'Google') + + def test_microsoft_plus_addressing(self): + result = self._normalize('user+tag@outlook.com') + self.assertEqual(result.normalized_address, 'user@outlook.com') + + def test_unknown_domain(self): + domain = f'{uuid.uuid4()}.example.com' + result = self._normalize(f'user@{domain}') + self.assertIsNone(result.mailbox_provider) + self.assertListEqual(result.mx_records, []) + + def test_dns_not_called(self): + with mock.patch( + 'email_normalize.Normalizer.mx_records', + side_effect=AssertionError('DNS should not be called'), + ): + result = self._normalize('test@gmail.com') + self.assertEqual(result.mailbox_provider, 'Google') + + def test_async_normalizer(self): + normalizer = email_normalize.Normalizer(skip_dns=True) + result = asyncio.run( + normalizer.normalize('u.s.e.r+tag@gmail.com')) + self.assertEqual(result.normalized_address, 'user@gmail.com') + self.assertEqual(result.mailbox_provider, 'Google') + self.assertListEqual(result.mx_records, []) From 77adec7043ce87fca75160220e001b5ed2000666 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Fri, 13 Mar 2026 15:40:26 -0400 Subject: [PATCH 2/6] Fix ruff formatting and remove unused import - Reformat email_normalize/__init__.py and tests/test_normalize.py - Remove unused `providers` import in tests Co-Authored-By: Claude Opus 4.6 (1M context) --- email_normalize/__init__.py | 3 +-- tests/test_normalize.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/email_normalize/__init__.py b/email_normalize/__init__.py index 1a8a536..925ff23 100644 --- a/email_normalize/__init__.py +++ b/email_normalize/__init__.py @@ -254,7 +254,6 @@ def normalize( """ async def _normalize(): - return await Normalizer( - skip_dns=skip_dns).normalize(email_address) + return await Normalizer(skip_dns=skip_dns).normalize(email_address) return asyncio.run(_normalize()) diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 78a2754..6e64a83 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -5,7 +5,6 @@ from unittest import mock import email_normalize -from email_normalize import providers class TestCase(unittest.TestCase): @@ -190,7 +189,6 @@ def test_zoho(self): class SkipDNSTestCase(TestCase): - def _normalize(self, address): return email_normalize.normalize(address, skip_dns=True) @@ -299,8 +297,7 @@ def test_dns_not_called(self): def test_async_normalizer(self): normalizer = email_normalize.Normalizer(skip_dns=True) - result = asyncio.run( - normalizer.normalize('u.s.e.r+tag@gmail.com')) + result = asyncio.run(normalizer.normalize('u.s.e.r+tag@gmail.com')) self.assertEqual(result.normalized_address, 'user@gmail.com') self.assertEqual(result.mailbox_provider, 'Google') self.assertListEqual(result.mx_records, []) From 15a204c39a861cd56a4ca01ea74fc0d39248c550 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Fri, 13 Mar 2026 16:41:59 -0400 Subject: [PATCH 3/6] Guard mx_records() against use when skip_dns is enabled Return an empty list early if skip_dns is True, preventing an AttributeError from accessing the unset _resolver attribute. Co-Authored-By: Claude Opus 4.6 (1M context) --- email_normalize/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/email_normalize/__init__.py b/email_normalize/__init__.py index 925ff23..5b44a16 100644 --- a/email_normalize/__init__.py +++ b/email_normalize/__init__.py @@ -117,6 +117,8 @@ async def mx_records(self, domain_part: str) -> MXRecords: domain_part: The domain to resolve MX records for. """ + if self._skip_dns: + return [] if self._skip_cache(domain_part): try: records = await self._resolver.query(domain_part, 'MX') From 6cc928bec9cc785285947b01cae36772bf3ec871 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Fri, 13 Mar 2026 16:49:02 -0400 Subject: [PATCH 4/6] Align CI workflows with pgdumplib patterns - deploy: trigger on release creation, add twine check, set pypi environment with URL - testing: remove paths-ignore from pull_request, remove timeout, remove conditional on codecov upload Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/deploy.yaml | 29 +++++++++++++++++------------ .github/workflows/testing.yaml | 7 +------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 072f8e2..20abaa3 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -1,25 +1,30 @@ -name: Deployment +name: Publish to PyPI + on: - push: - branches-ignore: ["*"] - tags: ["*"] + release: + types: [created] + jobs: deploy: runs-on: ubuntu-latest - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') && github.repository == 'gmr/email-normalize' - environment: release + + environment: + name: pypi + url: https://pypi.org/p/email-normalize + permissions: - contents: read id-token: write + steps: - - name: Checkout repository - uses: actions/checkout@v5 + - uses: actions/checkout@v5 - name: Install uv uses: astral-sh/setup-uv@v6 - - name: Build package - run: uv build + - name: Build and check package + run: | + uv build + uvx twine check dist/* - - name: Publish package + - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index f7fb3fc..314027f 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -1,10 +1,6 @@ name: Testing on: pull_request: - paths-ignore: - - 'docs/**' - - '*.md' - - '*.rst' push: branches: ["*"] paths-ignore: @@ -12,10 +8,10 @@ on: - '*.md' - '*.rst' tags-ignore: ["*"] + jobs: test: runs-on: ubuntu-latest - timeout-minutes: 3 strategy: fail-fast: false matrix: @@ -47,7 +43,6 @@ jobs: - name: Upload Coverage uses: codecov/codecov-action@v5 - if: github.event_name == 'push' && github.repository == 'gmr/email-normalize' with: files: ./build/coverage.xml flags: unittests From 348213f46522a7f0bf8b237415f99b8462f24f07 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Fri, 13 Mar 2026 16:51:49 -0400 Subject: [PATCH 5/6] Use offline tldextract to prevent network fetches Replace tldextract.extract() with a module-level TLDExtract instance configured with no suffix list URLs and no cache dir, using only the bundled PSL snapshot. This prevents surprise HTTP requests during TLD extraction, particularly in the skip_dns code path. Co-Authored-By: Claude Opus 4.6 (1M context) --- email_normalize/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/email_normalize/__init__.py b/email_normalize/__init__.py index 5b44a16..87afb39 100644 --- a/email_normalize/__init__.py +++ b/email_normalize/__init__.py @@ -24,6 +24,10 @@ MXRecords = list[tuple[int, str]] +_tld_extract = tldextract.TLDExtract( + suffix_list_urls=(), cache_dir=None, fallback_to_snapshot=True +) + cache: dict[str, 'CachedItem'] = {} @@ -190,7 +194,7 @@ def _local_part_as_hostname( local_part: str, domain_part: str, ) -> tuple[str, str]: - extracted = tldextract.extract(domain_part) + extracted = _tld_extract(domain_part) if extracted.subdomain: subdomain_parts = extracted.subdomain.split('.') local_part = subdomain_parts[0] From 6532e2db546988294c3edd0db1907eca80dbb650 Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Fri, 13 Mar 2026 16:56:20 -0400 Subject: [PATCH 6/6] Fix deploy workflow: correct PyPI URL and add contents:read permission - Use canonical PyPI project URL format (pypi.org/project/...) - Add contents: read permission required by actions/checkout@v5 Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/deploy.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 20abaa3..55a40c8 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -10,9 +10,10 @@ jobs: environment: name: pypi - url: https://pypi.org/p/email-normalize + url: https://pypi.org/project/email-normalize/ permissions: + contents: read id-token: write steps: