diff --git a/VERSION b/VERSION index 227cea2..4a36342 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.0 +3.0.0 diff --git a/docs/history.rst b/docs/history.rst new file mode 100644 index 0000000..7d06d81 --- /dev/null +++ b/docs/history.rst @@ -0,0 +1,27 @@ +Release History +=============== + +3.0.0 +----- +- FIXED: Multi-segment TLD normalization using tldextract library +- FIXED: Fastmail subdomain parsing for domains like .co.uk, .com.au +- FIXED: Cache entires expiring immediately if aiodns returned invalid TTL + +2.0.0 +----- +- FIXED: Remove period stripping from Yahoo domain normalization +- ADDED: googlemail.com support and consistent host lowercasing +- CHANGED: Make Normalizer no longer a singleton +- FIXED: Handle domains with no MX records + +1.0.2 +----- +- FIXED: Documentation and distribution improvements +- CHANGED: Modernized test infrastructure and Python version support +- REMOVED: Python 3.5 and 3.6 support + +1.0.0 +----- +- Initial stable release with modernized codebase +- ADDED: Optional DNS resolution and timeout handling +- FIXED: Python 2/3 compatibility issues diff --git a/docs/index.rst b/docs/index.rst index 18a45cc..ef5edee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,6 +22,7 @@ Documentation normalizer mxrecords result + history Currently Supported Mailbox Providers ------------------------------------- diff --git a/email_normalize/__init__.py b/email_normalize/__init__.py index 825ca1c..461a891 100644 --- a/email_normalize/__init__.py +++ b/email_normalize/__init__.py @@ -13,6 +13,7 @@ import operator import time import typing +import tldextract from email import utils import aiodns @@ -154,8 +155,7 @@ async def mx_records(self, domain_part: str) -> MXRecords: mx_records, ttl = [], self.failure_ttl else: mx_records = [(r.priority, r.host) for r in records] - ttl = min(r.ttl for r in records) \ - if records else self.failure_ttl + ttl = min((r.ttl for r in records if r.ttl >= 0), default=self.failure_ttl) # Prune the cache if over the limit, finding least used, oldest if len(cache.keys()) >= self.cache_limit: @@ -205,10 +205,30 @@ async def normalize(self, email_address: str) -> Result: @staticmethod def _local_part_as_hostname(local_part: str, domain_part: str) -> typing.Tuple[str, str]: - domain_segments = domain_part.split('.') - if len(domain_segments) > 2: - local_part = domain_segments[0] - domain_part = '.'.join(domain_segments[1:]) + # Use tldextract to properly parse the domain + extracted = tldextract.extract(domain_part) + + # If there's a subdomain, use the first part of the subdomain as the local part + # and the rest (domain + suffix) as the domain part + if extracted.subdomain: + subdomain_parts = extracted.subdomain.split('.') + local_part = subdomain_parts[0] + + # Reconstruct domain_part: remaining subdomain parts + domain + suffix + remaining_subdomain = '.'.join(subdomain_parts[1:]) if len(subdomain_parts) > 1 else '' + domain_name = extracted.domain + suffix = extracted.suffix + + # Build the new domain part + domain_part_components = [] + if remaining_subdomain: + domain_part_components.append(remaining_subdomain) + if domain_name: + domain_part_components.append(domain_name) + if suffix: + domain_part_components.append(suffix) + + domain_part = '.'.join(domain_part_components) return local_part, domain_part @staticmethod diff --git a/setup.cfg b/setup.cfg index c329e2a..64f6dc5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,7 @@ keywords = include_package_data = True install_requires = aiodns + tldextract packages = email_normalize zip_safe = true diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 3c73b5e..0ba5377 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -70,6 +70,49 @@ def test_fastmail_local_part_as_hostname(self): address, '{}@{}'.format(local_part, domain_part), mx_records, 'Fastmail') + def test_fastmail_multi_segment_tld_no_subdomain(self): + """Test that domains with multi-segment TLDs but no subdomain are not modified.""" + local_part = str(uuid.uuid4()) + domain_part = '{}.co.uk'.format(uuid.uuid4()) + address = '{}@{}'.format(local_part, domain_part) + mx_records = [(10, 'in1-smtp.messagingengine.com')] + self._perform_test( + address, '{}@{}'.format(local_part, domain_part), + mx_records, 'Fastmail') + + def test_fastmail_multi_segment_tld_with_subdomain(self): + """Test that domains with multi-segment TLDs and subdomains are correctly normalized.""" + local_part = str(uuid.uuid4()) + domain_part = '{}.com.au'.format(uuid.uuid4()) + address = 'testing@{}.{}'.format(local_part, domain_part) + mx_records = [(10, 'in1-smtp.messagingengine.com')] + self._perform_test( + address, '{}@{}'.format(local_part, domain_part), + mx_records, 'Fastmail') + + def test_fastmail_complex_multi_segment_tld(self): + """Test complex case with multiple subdomains and multi-segment TLD.""" + local_part = str(uuid.uuid4()) + subdomain_part = str(uuid.uuid4()) + domain_part = '{}.org.uk'.format(uuid.uuid4()) + address = 'testing@{}.{}.{}'.format(local_part, subdomain_part, domain_part) + mx_records = [(10, 'in1-smtp.messagingengine.com')] + self._perform_test( + address, '{}@{}.{}'.format(local_part, subdomain_part, domain_part), + mx_records, 'Fastmail') + + def test_fastmail_deep_subdomain_single_tld(self): + """Test deep subdomain structure with single TLD.""" + local_part = str(uuid.uuid4()) + subdomain1 = str(uuid.uuid4()) + subdomain2 = str(uuid.uuid4()) + domain_part = '{}.com'.format(uuid.uuid4()) + address = 'testing@{}.{}.{}.{}'.format(local_part, subdomain1, subdomain2, domain_part) + mx_records = [(10, 'in1-smtp.messagingengine.com')] + self._perform_test( + address, '{}@{}.{}.{}'.format(local_part, subdomain1, subdomain2, domain_part), + mx_records, 'Fastmail') + def test_google(self): local_part = str(uuid.uuid4()).replace('-', '.') domain_part = str(uuid.uuid4())