Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.0
3.0.0
27 changes: 27 additions & 0 deletions docs/history.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Release History
===============

3.0.0
-----
- FIXED: Multi-segment TLD normalization using tldextract library
- FIXED: Fastmail subdomain parsing for domains like .co.uk, .com.au
- FIXED: Cache entires expiring immediately if aiodns returned invalid TTL

2.0.0
-----
- FIXED: Remove period stripping from Yahoo domain normalization
- ADDED: googlemail.com support and consistent host lowercasing
- CHANGED: Make Normalizer no longer a singleton
- FIXED: Handle domains with no MX records

1.0.2
-----
- FIXED: Documentation and distribution improvements
- CHANGED: Modernized test infrastructure and Python version support
- REMOVED: Python 3.5 and 3.6 support

1.0.0
-----
- Initial stable release with modernized codebase
- ADDED: Optional DNS resolution and timeout handling
- FIXED: Python 2/3 compatibility issues
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Documentation
normalizer
mxrecords
result
history

Currently Supported Mailbox Providers
-------------------------------------
Expand Down
32 changes: 26 additions & 6 deletions email_normalize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import operator
import time
import typing
import tldextract
from email import utils

import aiodns
Expand Down Expand Up @@ -154,8 +155,7 @@ async def mx_records(self, domain_part: str) -> MXRecords:
mx_records, ttl = [], self.failure_ttl
else:
mx_records = [(r.priority, r.host) for r in records]
ttl = min(r.ttl for r in records) \
if records else self.failure_ttl
ttl = min((r.ttl for r in records if r.ttl >= 0), default=self.failure_ttl)

# Prune the cache if over the limit, finding least used, oldest
if len(cache.keys()) >= self.cache_limit:
Expand Down Expand Up @@ -205,10 +205,30 @@ async def normalize(self, email_address: str) -> Result:
@staticmethod
def _local_part_as_hostname(local_part: str,
domain_part: str) -> typing.Tuple[str, str]:
domain_segments = domain_part.split('.')
if len(domain_segments) > 2:
local_part = domain_segments[0]
domain_part = '.'.join(domain_segments[1:])
# Use tldextract to properly parse the domain
extracted = tldextract.extract(domain_part)

# If there's a subdomain, use the first part of the subdomain as the local part
# and the rest (domain + suffix) as the domain part
if extracted.subdomain:
subdomain_parts = extracted.subdomain.split('.')
local_part = subdomain_parts[0]

# Reconstruct domain_part: remaining subdomain parts + domain + suffix
remaining_subdomain = '.'.join(subdomain_parts[1:]) if len(subdomain_parts) > 1 else ''
domain_name = extracted.domain
suffix = extracted.suffix

# Build the new domain part
domain_part_components = []
if remaining_subdomain:
domain_part_components.append(remaining_subdomain)
if domain_name:
domain_part_components.append(domain_name)
if suffix:
domain_part_components.append(suffix)

domain_part = '.'.join(domain_part_components)
return local_part, domain_part

@staticmethod
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ keywords =
include_package_data = True
install_requires =
aiodns
tldextract
packages =
email_normalize
zip_safe = true
Expand Down
43 changes: 43 additions & 0 deletions tests/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,49 @@ def test_fastmail_local_part_as_hostname(self):
address, '{}@{}'.format(local_part, domain_part),
mx_records, 'Fastmail')

def test_fastmail_multi_segment_tld_no_subdomain(self):
"""Test that domains with multi-segment TLDs but no subdomain are not modified."""
local_part = str(uuid.uuid4())
domain_part = '{}.co.uk'.format(uuid.uuid4())
address = '{}@{}'.format(local_part, domain_part)
mx_records = [(10, 'in1-smtp.messagingengine.com')]
self._perform_test(
address, '{}@{}'.format(local_part, domain_part),
mx_records, 'Fastmail')

def test_fastmail_multi_segment_tld_with_subdomain(self):
"""Test that domains with multi-segment TLDs and subdomains are correctly normalized."""
local_part = str(uuid.uuid4())
domain_part = '{}.com.au'.format(uuid.uuid4())
address = 'testing@{}.{}'.format(local_part, domain_part)
mx_records = [(10, 'in1-smtp.messagingengine.com')]
self._perform_test(
address, '{}@{}'.format(local_part, domain_part),
mx_records, 'Fastmail')

def test_fastmail_complex_multi_segment_tld(self):
"""Test complex case with multiple subdomains and multi-segment TLD."""
local_part = str(uuid.uuid4())
subdomain_part = str(uuid.uuid4())
domain_part = '{}.org.uk'.format(uuid.uuid4())
address = 'testing@{}.{}.{}'.format(local_part, subdomain_part, domain_part)
mx_records = [(10, 'in1-smtp.messagingengine.com')]
self._perform_test(
address, '{}@{}.{}'.format(local_part, subdomain_part, domain_part),
mx_records, 'Fastmail')

def test_fastmail_deep_subdomain_single_tld(self):
"""Test deep subdomain structure with single TLD."""
local_part = str(uuid.uuid4())
subdomain1 = str(uuid.uuid4())
subdomain2 = str(uuid.uuid4())
domain_part = '{}.com'.format(uuid.uuid4())
address = 'testing@{}.{}.{}.{}'.format(local_part, subdomain1, subdomain2, domain_part)
mx_records = [(10, 'in1-smtp.messagingengine.com')]
self._perform_test(
address, '{}@{}.{}.{}'.format(local_part, subdomain1, subdomain2, domain_part),
mx_records, 'Fastmail')

def test_google(self):
local_part = str(uuid.uuid4()).replace('-', '.')
domain_part = str(uuid.uuid4())
Expand Down