From ebd8e67fe2e4e6f1ca9a12c251a480d5633856c3 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 10:31:19 -0400 Subject: [PATCH 01/13] Format README --- README.md | 177 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 97 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index e0cb306..acdca9b 100644 --- a/README.md +++ b/README.md @@ -17,92 +17,101 @@ cg.address('1600 Pennsylvania Avenue', city='Washington', state='DC', zip='20006 cg.addressbatch('data/addresses.csv') ``` -Use the returntype keyword to specify 'locations' or 'geographies'. 'Locations' yields structured information about the address, and 'geographies' yields information about the Census geographies. Geographies is the default. +Use the returntype keyword to specify 'locations' or 'geographies'. 'Locations' yields structured information about the address, and 'geographies' yields information about the Census geographies. Geographies is the default: + ```python +import censusgeocode as cg + cg.onelineaddress('1600 Pennsylvania Avenue, Washington, DC', returntype='locations') ``` -Queries return a CensusResult object, which is basically a Python list with an extra 'input' property, which the Census returns to tell you how they interpreted your request. +Queries return a CensusResult object, which is basically a Python list with an extra 'input' property, which the Census returns to tell you how they interpreted your request: ```python ->>> result = cg.coordinates(x=-76, y=41) ->>> result.input -{ - u'vintage': { - u'vintageName': u'Current_Current', - u'id': u'4', - u'vintageDescription': u'Current Vintage - Current Benchmark', - u'isDefault': True - }, - u'benchmark': { - u'benchmarkName': u'Public_AR_Current', - u'id': u'4', - u'isDefault': False, - u'benchmarkDescription': u'Public Address Ranges - Current Benchmark' - }, - u'location': { - u'y': 41.0, - u'x': -76.0 - } -} ->>> result -[{ - '2010 Census Blocks': [{ - 'AREALAND': 1409023, - 'AREAWATER': 0, - 'BASENAME': '1045', - 'BLKGRP': '1', - 'BLOCK': '1045', - 'CENTLAT': '+40.9957436', - 'CENTLON': '-076.0089338', - 'COUNTY': '079', - 'FUNCSTAT': 'S', - 'GEOID': '420792166001045', - 'INTPTLAT': '+40.9957436', - 'INTPTLON': '-076.0089338', - 'LSADC': 'BK', - 'LWBLKTYP': 'L', - 'MTFCC': 'G5040', - 'NAME': 'Block 1045', - 'OBJECTID': 9940449, - 'OID': 210404020212114, - 'STATE': '42', - 'SUFFIX': '', - 'TRACT': '216600' - }], - 'Census Tracts': [{ - # snip - 'NAME': 'Census Tract 2166', - 'OBJECTID': 61245, - 'OID': 20790277158250, - 'STATE': '42', - 'TRACT': '216600' - }], - 'Counties': [{ - # snip - 'NAME': 'Luzerne County', - 'OBJECTID': 866, - 'OID': 27590277115518, - 'STATE': '42' - }], - 'States': [{ - # snip - 'NAME': 'Pennsylvania', - 'REGION': '1', - 'STATE': '42', - 'STATENS': '01779798', - 'STUSAB': 'PA' - }] -}] +import censusgeocode as cg + +result = cg.coordinates(x=-76, y=41) + +print(result.input) +# { +# 'vintage': { +# 'vintageName': 'Current_Current', +# 'id': '4', +# 'vintageDescription': 'Current Vintage - Current Benchmark', +# 'isDefault': True +# }, +# 'benchmark': { +# 'benchmarkName': 'Public_AR_Current', +# 'id': '4', +# 'isDefault': False, +# 'benchmarkDescription': 'Public Address Ranges - Current Benchmark' +# }, +# 'location': { +# 'y': 41.0, +# 'x': -76.0 +# } +# } + +print(result) +# [{ +# '2010 Census Blocks': [{ +# 'AREALAND': 1409023, +# 'AREAWATER': 0, +# 'BASENAME': '1045', +# 'BLKGRP': '1', +# 'BLOCK': '1045', +# 'CENTLAT': '+40.9957436', +# 'CENTLON': '-076.0089338', +# 'COUNTY': '079', +# 'FUNCSTAT': 'S', +# 'GEOID': '420792166001045', +# 'INTPTLAT': '+40.9957436', +# 'INTPTLON': '-076.0089338', +# 'LSADC': 'BK', +# 'LWBLKTYP': 'L', +# 'MTFCC': 'G5040', +# 'NAME': 'Block 1045', +# 'OBJECTID': 9940449, +# 'OID': 210404020212114, +# 'STATE': '42', +# 'SUFFIX': '', +# 'TRACT': '216600' +# }], +# 'Census Tracts': [{ +# # snip +# 'NAME': 'Census Tract 2166', +# 'OBJECTID': 61245, +# 'OID': 20790277158250, +# 'STATE': '42', +# 'TRACT': '216600' +# }], +# 'Counties': [{ +# # snip +# 'NAME': 'Luzerne County', +# 'OBJECTID': 866, +# 'OID': 27590277115518, +# 'STATE': '42' +# }], +# 'States': [{ +# # snip +# 'NAME': 'Pennsylvania', +# 'REGION': '1', +# 'STATE': '42', +# 'STATENS': '01779798', +# 'STUSAB': 'PA' +# }] +# }] ``` ## Advanced By default, the geocoder uses the "Current" vintage and benchmarks. To use another vintage or benchmark, use the `CensusGeocode` class: + ````python from censusgeocode import CensusGeocode + cg = CensusGeocode(benchmark='Public_AR_Current', vintage='Census2020_Current') -cg.onelineaddress(foobar) +cg.onelineaddress("foobar") ```` The Census may update the available benchmarks and vintages. Review the Census Geocoder docs for the currently available [benchmarks](https://geocoding.geo.census.gov/geocoder/benchmarks) and [vintages](https://geocoding.geo.census.gov/geocoder/vintages?form). @@ -111,7 +120,8 @@ The Census may update the available benchmarks and vintages. Review the Census G The `censusgeocode` tool has two settings. -At the simplest, it takes one argument, an address, and returns a comma-delimited longitude, latitude pair. +At the simplest, it takes one argument, an address, and returns a comma-delimited longitude, latitude pair: + ````bash censusgeocode '100 Fifth Avenue, New York, NY' -73.992195,40.73797 @@ -120,31 +130,37 @@ censusgeocode '1600 Pennsylvania Avenue, Washington DC' -77.03535,38.898754 ```` -The Census geocoder is reasonably good at recognizing non-standard addresses. +The Census geocoder is reasonably good at recognizing non-standard addresses: + ````bash censusgeocode 'Hollywood & Vine, LA, CA' -118.32668,34.101624 ```` It can also use the Census Geocoder's batch function to process an entire file. The file must be comma-delimited, have no header, and include the following columns: -```` + +````csv unique id, street address, state, city, zip code ```` The geocoder can read from a file: -```` + +```bash censusgeocode --csv tests/fixtures/batch.csv -```` +``` + ([example file](https://github.com/fitnr/censusgeocode/blob/master/tests/fixtures/batch.csv)) Or from stdin, using `-` as the filename: -```` + +```bash head tests/fixtures/batch.csv | censusgeocode --csv - -```` +``` According to the Census docs, the batch geocoder is limited to 10,000 rows. The output will be a CSV file (with a header) and the columns: + * id * address * match @@ -156,7 +172,8 @@ The output will be a CSV file (with a header) and the columns: * lon If your data doesn't have a unique id, try adding line numbers with the Unix command line utility `nl`: -``` + +```bash nl -s , input.csv | censusgeocode --csv - > output.csv ``` From 39b66bebccd06bb98f53f49c6d4a0dd1f3bc6c06 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 10:33:55 -0400 Subject: [PATCH 02/13] update copyright dates --- .github/workflows/publish.yml | 2 +- .github/workflows/test.yml | 2 +- .gitignore | 2 +- Makefile | 2 +- setup.py | 2 +- src/censusgeocode/__init__.py | 6 ++--- src/censusgeocode/__main__.py | 36 +++++++++++++----------------- src/censusgeocode/censusgeocode.py | 26 +++++++++------------ tests/__init__.py | 10 --------- tests/test_censusgeocode.py | 4 ++-- 10 files changed, 34 insertions(+), 58 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index ccdf8df..2e0d07b 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -3,7 +3,7 @@ # Licensed under the General Public License (version 3) # http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-7, Neil Freeman +# Copyright (c) 2015-2026, Neil Freeman name: Publish to PyPi on: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 31a3f57..b7b141c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,7 +3,7 @@ # Licensed under the General Public License (version 3) # http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-7, Neil Freeman +# Copyright (c) 2015-2026, Neil Freeman name: Test package diff --git a/.gitignore b/.gitignore index 495684d..bdd9204 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ # Licensed under the General Public License (version 3) # http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-7, Neil Freeman +# Copyright (c) 2015-2026, Neil Freeman # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Makefile b/Makefile index 45364b5..186ca22 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ # Licensed under the General Public License (version 3) # http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-9, Neil Freeman +# Copyright (c) 2015-2026, Neil Freeman .PHONY: install build upload clean deploy test diff --git a/setup.py b/setup.py index 211a2af..2f00cd0 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ # Licensed under the General Public License (version 3) # http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-9, Neil Freeman +# Copyright (c) 2015-2026, Neil Freeman from setuptools import setup diff --git a/src/censusgeocode/__init__.py b/src/censusgeocode/__init__.py index d30a972..855065c 100644 --- a/src/censusgeocode/__init__.py +++ b/src/censusgeocode/__init__.py @@ -1,15 +1,13 @@ -# -*- coding: utf-8 -*- - # This file is part of censusgeocode. # https://github.com/fitnr/censusgeocode # Licensed under the General Public License (version 3) # http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-9, Neil Freeman +# Copyright (c) 2015-2026, Neil Freeman from .censusgeocode import CensusGeocode -__version__ = '0.5.3' +__version__ = "0.5.3" cg = CensusGeocode() diff --git a/src/censusgeocode/__main__.py b/src/censusgeocode/__main__.py index 6db5700..28667d7 100644 --- a/src/censusgeocode/__main__.py +++ b/src/censusgeocode/__main__.py @@ -1,28 +1,22 @@ -# Copyright (C) 2015-7 Neil Freeman +"""Command-line interface for censusgeocode""" -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# This file is part of censusgeocode. +# https://github.com/fitnr/censusgeocode -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# Licensed under the General Public License (version 3) +# http://opensource.org/licenses/LGPL-3.0 +# Copyright (c) 2015-2026, Neil Freeman -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -"""Command-line interface for censusgeocode""" import argparse import csv import io import sys from . import __version__ -from .censusgeocode import DEFAULT_BENCHMARK, DEFAULT_VINTAGE, CensusGeocode +from .censusgeocode import DEFAULT_BENCHMARK, DEFAULT_VINTAGE, CensusGeocode, DEFAULT_TIMEOUT -def main(): +def main() -> None: """Command-line interface for censusgeocode""" parser = argparse.ArgumentParser("censusgeocode", description="Command-line interface for the Census Geocoding API") @@ -59,21 +53,21 @@ def main(): "--timeout", metavar="SECONDS", type=int, - default=12, - help="Request timeout [default: 12]", + default=DEFAULT_TIMEOUT, + help=f"Request timeout [default: {DEFAULT_TIMEOUT}]", ) args = parser.parse_args() cg = CensusGeocode(benchmark=args.benchmark, vintage=args.vintage) if args.address: - result = cg.onelineaddress(args.address, returntype=args.rettype, timeout=args.timeout) + search_result = cg.onelineaddress(args.address, returntype=args.rettype, timeout=args.timeout) try: - print("{},{}".format(result[0]["coordinates"]["x"], result[0]["coordinates"]["y"])) + print("{},{}".format(search_result[0]["coordinates"]["x"], search_result[0]["coordinates"]["y"])) except IndexError: - print("Address not found: {}".format(args.address), file=sys.stderr) + print(f"Address not found: {args.address}", file=sys.stderr) sys.exit(1) elif args.csv: @@ -86,13 +80,13 @@ def main(): else: infile = args.csv - result = cg.addressbatch(infile, returntype=args.rettype, timeout=args.timeout) + csv_result = cg.addressbatch(infile, returntype=args.rettype, timeout=args.timeout) fieldnames = cg.batchfields[args.rettype] + ["lat", "lon"] fieldnames.pop(fieldnames.index("coordinate")) writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames) writer.writeheader() - writer.writerows(result) + writer.writerows(csv_result) else: print("Address or csv file required", file=sys.stderr) diff --git a/src/censusgeocode/censusgeocode.py b/src/censusgeocode/censusgeocode.py index 3db60a3..dc60eb7 100644 --- a/src/censusgeocode/censusgeocode.py +++ b/src/censusgeocode/censusgeocode.py @@ -1,22 +1,16 @@ -# Copyright (C) 2015-9 Neil Freeman - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . """ -Census Geocoder wrapper +Census Geocoder wrapper. For details on the API, see: -http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf +https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf """ + +# This file is part of censusgeocode. +# https://github.com/fitnr/censusgeocode + +# Licensed under the General Public License (version 3) +# http://opensource.org/licenses/LGPL-3.0 +# Copyright (c) 2015-2026, Neil Freeman + import csv import io import warnings diff --git a/tests/__init__.py b/tests/__init__.py index 4e2409f..e69de29 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- - -# This file is part of censusgeocode. -# https://github.com/fitnr/censusgeocode - -# Licensed under the General Public License (version 3) -# http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-7, Neil Freeman - -from . import test_censusgeocode diff --git a/tests/test_censusgeocode.py b/tests/test_censusgeocode.py index 51cee20..1e2824c 100644 --- a/tests/test_censusgeocode.py +++ b/tests/test_censusgeocode.py @@ -1,11 +1,11 @@ -# -*- coding: utf-8 -*- """Tests for censusgeocode""" + # This file is part of censusgeocode. # https://github.com/fitnr/censusgeocode # Licensed under the General Public License (version 3) # http://opensource.org/licenses/LGPL-3.0 -# Copyright (c) 2015-7, Neil Freeman +# Copyright (c) 2015-2026, Neil Freeman import unittest import vcr From 94f94c47e3edc80e2ae5c1140324647462ced6f5 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 10:36:06 -0400 Subject: [PATCH 03/13] update test suite (formatting, pytest style, exception chaining) --- tests/conftest.py | 23 ++++ tests/test_censusgeocode.py | 207 +++++++++++++++++++----------------- 2 files changed, 132 insertions(+), 98 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b5ec517 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,23 @@ +"""Fixtures for censusgeocode.""" + +# This file is part of censusgeocode. +# https://github.com/fitnr/censusgeocode + +# Licensed under the General Public License (version 3) +# http://opensource.org/licenses/LGPL-3.0 +# Copyright (c) 2015-2026, Neil Freeman + +import pytest +from censusgeocode import CensusGeocode + + +@pytest.fixture +def cg(): + """Provides an initialized CensusGeocode instance.""" + return CensusGeocode() + + +@pytest.fixture +def batch_path(request): + """Provides either a string or a Path object for batch testing.""" + return request.param diff --git a/tests/test_censusgeocode.py b/tests/test_censusgeocode.py index 1e2824c..247305e 100644 --- a/tests/test_censusgeocode.py +++ b/tests/test_censusgeocode.py @@ -1,5 +1,7 @@ """Tests for censusgeocode""" +import warnings + # This file is part of censusgeocode. # https://github.com/fitnr/censusgeocode @@ -7,103 +9,112 @@ # http://opensource.org/licenses/LGPL-3.0 # Copyright (c) 2015-2026, Neil Freeman -import unittest +import pytest import vcr -import warnings - +from pathlib import Path from censusgeocode import CensusGeocode -from censusgeocode.censusgeocode import AddressResult, GeographyResult - - -class CensusGeoCodeTestCase(unittest.TestCase): - - cg = None - - def setUp(self): - self.cg = CensusGeocode() - - @vcr.use_cassette('tests/fixtures/coordinates.yaml') - def test_returns_geo(self): - results = self.cg.coordinates(-74, 43, returntype='geographies') - assert isinstance(results, GeographyResult) - assert results.input - - @vcr.use_cassette('tests/fixtures/coordinates.yaml') - def test_coords(self): - results = self.cg.coordinates(-74, 43) - assert results['Counties'][0]['BASENAME'] == 'Saratoga' - assert results['Counties'][0]['GEOID'] == '36091' - assert results['Census Tracts'][0]['BASENAME'] == "615" - - def test_url(self): - r = self.cg._geturl('coordinates', 'geographies') - assert r == 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates' - - @vcr.use_cassette('tests/fixtures/address-geographies.yaml') - def test_address_zipcode(self): - results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500') - assert results[0] - assert results[0]['geographies']['Counties'][0]['BASENAME'] == 'District of Columbia' - - @vcr.use_cassette('tests/fixtures/address-geographies.yaml') - def test_address_zip(self): - results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zip='20500') - assert results[0] - assert results[0]['geographies']['Counties'][0]['BASENAME'] == 'District of Columbia' - - @vcr.use_cassette('tests/fixtures/onelineaddress.yaml') - def test_onelineaddress(self): - results = self.cg.onelineaddress('1600 Pennsylvania Avenue NW, Washington, DC, 20500', layers='all') - assert results[0] - try: - assert results[0]['geographies']['Counties'][0]['BASENAME'] == 'District of Columbia' - except AssertionError: - print(results[0]['geographies']['Counties'][0]) - raise - - assert 'Metropolitan Divisions' in results[0]['geographies'].keys() - assert 'Alaska Native Village Statistical Areas' in results[0]['geographies'].keys() - - @vcr.use_cassette('tests/fixtures/address-locations.yaml') - def test_address_return_type(self): - results = self.cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='locations') - - assert results[0]['matchedAddress'].upper() == '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502' - assert results[0]['addressComponents']['streetName'] == 'PENNSYLVANIA' - - @vcr.use_cassette('tests/fixtures/test_benchmark_vintage.yaml') - def test_benchmark_vintage(self): - """Initializing CensuGeocode with benchmark and vintage keywords works""" - bmark, vint = 'Public_AR_Census2020', 'Census2020_Current' - - cg = CensusGeocode(benchmark=bmark, vintage=vint) - result = cg.address('1600 Pennsylvania Avenue NW', city='Washington', state='DC', zipcode='20500', returntype='geographies') - - self.assertEqual(result.input['benchmark']['benchmarkName'], bmark) - self.assertEqual(result.input['vintage']['vintageName'], vint) - self.assertEqual(result[0]['geographies']['Census Tracts'][0]['GEOID'], '11001006202') - - @vcr.use_cassette('tests/fixtures/address-batch.yaml') - def test_addressbatch(self): - """batch() function works""" - result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='locations') - assert isinstance(result, list) - resultdict = {int(r['id']): r for r in result} - assert resultdict[3]['parsed'] == '3 GRAMERCY PARK W, NEW YORK, NY, 10003' - assert resultdict[2]['match'] is False - - result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='geographies') - assert isinstance(result, list) - resultdict = {int(r['id']): r for r in result} - assert resultdict[3]['tigerlineid'] == '59653655' - assert resultdict[3]['statefp'] == '36' - assert resultdict[2]['match'] is False - - def test_warning10k(self): - """Sending more than 10,000 records to batch raises a warning""" - warnings.simplefilter("error") - data = ({} for _ in range(10001)) - result = [] - with self.assertRaises(UserWarning, msg="Get a warning when sending more than 10k rows to batch()"): - result = self.cg.addressbatch(data) - self.assertEqual(result, [], "Result is empty") +from censusgeocode.censusgeocode import GeographyResult + + +@vcr.use_cassette("tests/fixtures/coordinates.yaml") +def test_returns_geo(cg): + results = cg.coordinates(-74, 43, returntype="geographies") + assert isinstance(results, GeographyResult) + assert results.input + + +@vcr.use_cassette("tests/fixtures/coordinates.yaml") +def test_coords(cg): + results = cg.coordinates(-74, 43) + assert results["Counties"][0]["BASENAME"] == "Saratoga" + assert results["Counties"][0]["GEOID"] == "36091" + assert results["Census Tracts"][0]["BASENAME"] == "615" + + +def test_url(cg): + r = cg._geturl("coordinates", "geographies") + assert r == "https://geocoding.geo.census.gov/geocoder/geographies/coordinates" + + +@vcr.use_cassette("tests/fixtures/address-geographies.yaml") +def test_address_zipcode(cg): + results = cg.address("1600 Pennsylvania Avenue NW", city="Washington", state="DC", zipcode="20500") + assert results[0] + assert results[0]["geographies"]["Counties"][0]["BASENAME"] == "District of Columbia" + + +@vcr.use_cassette("tests/fixtures/address-geographies.yaml") +def test_address_zip(cg): + results = cg.address("1600 Pennsylvania Avenue NW", city="Washington", state="DC", zip="20500") + assert results[0] + assert results[0]["geographies"]["Counties"][0]["BASENAME"] == "District of Columbia" + + +@vcr.use_cassette("tests/fixtures/onelineaddress.yaml") +def test_onelineaddress(cg): + results = cg.onelineaddress("1600 Pennsylvania Avenue NW, Washington, DC, 20500", layers="all") + assert results[0] + + assert results[0]["geographies"]["Counties"][0]["BASENAME"] == "District of Columbia" + assert "Metropolitan Divisions" in results[0]["geographies"].keys() + assert "Alaska Native Village Statistical Areas" in results[0]["geographies"].keys() + + +@vcr.use_cassette("tests/fixtures/address-locations.yaml") +def test_address_return_type(cg): + results = cg.address( + "1600 Pennsylvania Avenue NW", + city="Washington", + state="DC", + zipcode="20500", + returntype="locations", + ) + assert results[0]["matchedAddress"].upper() == "1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502" + assert results[0]["addressComponents"]["streetName"] == "PENNSYLVANIA" + + +@vcr.use_cassette("tests/fixtures/test_benchmark_vintage.yaml") +def test_benchmark_vintage(): + """Tests custom initialization logic independently of the default fixture.""" + bmark, vint = "Public_AR_Census2020", "Census2020_Current" + cg_custom = CensusGeocode(benchmark=bmark, vintage=vint) + result = cg_custom.address( + "1600 Pennsylvania Avenue NW", + city="Washington", + state="DC", + zipcode="20500", + returntype="geographies", + ) + assert result.input["benchmark"]["benchmarkName"] == bmark + assert result.input["vintage"]["vintageName"] == vint + assert result[0]["geographies"]["Census Tracts"][0]["GEOID"] == "11001006202" + + +@vcr.use_cassette("tests/fixtures/address-batch.yaml") +@pytest.mark.parametrize( + "batch_input", + ["tests/fixtures/batch.csv", Path("tests/fixtures/batch.csv")], + ids=["string", "pathlib.Path"], +) +def test_addressbatch(cg, batch_input): + """batch() function works with varied input types.""" + result = cg.addressbatch(batch_input, returntype="locations") + assert isinstance(result, list) + resultdict = {int(r["id"]): r for r in result} + assert resultdict[3]["parsed"] == "3 GRAMERCY PARK W, NEW YORK, NY, 10003" + assert resultdict[2]["match"] is False + + result_geo = cg.addressbatch(batch_input, returntype="geographies") + assert isinstance(result_geo, list) + resultdict_geo = {int(r["id"]): r for r in result_geo} + assert resultdict_geo[3]["tigerlineid"] == "59653655" + assert resultdict_geo[3]["statefp"] == "36" + + +def test_warning10k(cg): + """Sending more than 10,000 records to batch raises a warning.""" + warnings.simplefilter("error") + result = [] + with pytest.raises(UserWarning, match="Sending more than 10,000 records"): + result = cg.addressbatch({} for _ in range(10001)) + assert result == [] From 8817d83a1d11c2ca11ad82e5da7fee253ed5ab27 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 11:09:38 -0400 Subject: [PATCH 04/13] modernize codebase (typing, default arg values, explicit keyword args, improved error formatting, don't close open file) --- src/censusgeocode/__main__.py | 19 ++- src/censusgeocode/censusgeocode.py | 194 ++++++++++++++++++----------- 2 files changed, 136 insertions(+), 77 deletions(-) diff --git a/src/censusgeocode/__main__.py b/src/censusgeocode/__main__.py index 28667d7..8288d46 100644 --- a/src/censusgeocode/__main__.py +++ b/src/censusgeocode/__main__.py @@ -13,12 +13,20 @@ import sys from . import __version__ -from .censusgeocode import DEFAULT_BENCHMARK, DEFAULT_VINTAGE, CensusGeocode, DEFAULT_TIMEOUT +from .censusgeocode import ( + DEFAULT_BENCHMARK, + DEFAULT_VINTAGE, + CensusGeocode, + DEFAULT_TIMEOUT, +) def main() -> None: """Command-line interface for censusgeocode""" - parser = argparse.ArgumentParser("censusgeocode", description="Command-line interface for the Census Geocoding API") + parser = argparse.ArgumentParser( + "censusgeocode", + description="Command-line interface for the Census Geocoding API", + ) parser.add_argument("-v", "--version", action="version", version="%(prog)s v" + __version__) parser.add_argument("address", type=str, nargs="?", default=None) @@ -64,7 +72,12 @@ def main() -> None: search_result = cg.onelineaddress(args.address, returntype=args.rettype, timeout=args.timeout) try: - print("{},{}".format(search_result[0]["coordinates"]["x"], search_result[0]["coordinates"]["y"])) + print( + "{},{}".format( + search_result[0]["coordinates"]["x"], + search_result[0]["coordinates"]["y"], + ) + ) except IndexError: print(f"Address not found: {args.address}", file=sys.stderr) diff --git a/src/censusgeocode/censusgeocode.py b/src/censusgeocode/censusgeocode.py index dc60eb7..4aa05f3 100644 --- a/src/censusgeocode/censusgeocode.py +++ b/src/censusgeocode/censusgeocode.py @@ -14,21 +14,25 @@ import csv import io import warnings +from pathlib import Path +from typing import Dict, List, Literal, Optional, TextIO, Union, Any, Iterable import requests -from requests.exceptions import RequestException from requests_toolbelt.multipart.encoder import MultipartEncoder +SearchType = Literal["onelineaddress", "address", "addressPR", "addressbatch", "coordinates"] +ReturnType = Literal["geographies", "locations"] +ResultType = Dict[str, Union[str, int, float, list]] DEFAULT_BENCHMARK = "Public_AR_Current" DEFAULT_VINTAGE = "Current_Current" +DEFAULT_TIMEOUT = 12 class CensusGeocode: """Fetch results from the Census Geocoder""" _url = "https://geocoding.geo.census.gov/geocoder/{returntype}/{searchtype}" - returntypes = ["geographies", "locations"] batchfields = { "locations": [ @@ -57,7 +61,7 @@ class CensusGeocode: ], } - def __init__(self, benchmark=None, vintage=None): + def __init__(self, benchmark: str = DEFAULT_BENCHMARK, vintage: str = DEFAULT_VINTAGE): """ Arguments: benchmark (str): A name that references the version of the locator to use. @@ -65,17 +69,27 @@ def __init__(self, benchmark=None, vintage=None): vintage (str): The geography part of the desired vintage. See: https://geocoding.geo.census.gov/geocoder/vintages?form - >>> CensusGeocode(benchmark='Public_AR_Current', vintage='Current_Current') + >>> CensusGeocode(benchmark="Public_AR_Current", vintage="Current_Current") """ - self._benchmark = benchmark or DEFAULT_BENCHMARK - self._vintage = vintage or DEFAULT_VINTAGE + self._benchmark = benchmark + self._vintage = vintage - def _geturl(self, searchtype, returntype=None): + def _geturl(self, searchtype: SearchType, returntype: Optional[ReturnType] = "geographies") -> str: """Construct an URL for the geocoder.""" - returntype = returntype or self.returntypes[0] return self._url.format(returntype=returntype, searchtype=searchtype) - def _fetch(self, searchtype, fields, **kwargs): + def _fetch( + self, + searchtype: SearchType, + fields: Dict[ + Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], + Optional[Union[str, float]], + ], + *, + returntype: Optional[ReturnType] = "geographies", + timeout: Optional[int] = DEFAULT_TIMEOUT, + **kwargs, + ) -> Union["AddressResult", "GeographyResult"]: """Fetch a response from the Geocoding API.""" fields["vintage"] = self.vintage fields["benchmark"] = self.benchmark @@ -85,11 +99,10 @@ def _fetch(self, searchtype, fields, **kwargs): if "layers" in kwargs: fields["layers"] = kwargs["layers"] - returntype = kwargs.get("returntype", "geographies") - url = self._geturl(searchtype, returntype) + url = self._geturl(searchtype=searchtype, returntype=returntype) try: - with requests.get(url, params=fields, timeout=kwargs.get("timeout")) as r: + with requests.get(url, params=fields, timeout=timeout) as r: content = r.json() if "addressMatches" in content.get("result", {}): return AddressResult(content) @@ -97,70 +110,89 @@ def _fetch(self, searchtype, fields, **kwargs): if "geographies" in content.get("result", {}): return GeographyResult(content) - raise ValueError() + raise ValueError - except (ValueError, KeyError): - raise ValueError("Unable to parse response from Census") + except (ValueError, KeyError) as e: + err_msg = "Unable to parse response from Census" + raise ValueError(err_msg) from e - except RequestException as err: - raise err - - def coordinates(self, x, y, **kwargs): + def coordinates( + self, x: float, y: float, *, returntype: Optional[ReturnType] = "geographies", **kwargs + ) -> Union["AddressResult", "GeographyResult"]: """Geocode a (lon, lat) coordinate.""" - kwargs["returntype"] = "geographies" - fields = {"x": x, "y": y} - - return self._fetch("coordinates", fields, **kwargs) - - def address(self, street, city=None, state=None, **kwargs): + fields: Dict[ + Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], + Optional[Union[str, float]], + ] = {"x": x, "y": y} + + return self._fetch("coordinates", fields=fields, returntype=returntype, **kwargs) + + def address( + self, + street: str, + city: Optional[str] = None, + state: Optional[str] = None, + *, + zip: Optional[str] = None, + zipcode: Optional[str] = None, + timeout: Optional[int] = DEFAULT_TIMEOUT, + **kwargs, + ) -> Union["AddressResult", "GeographyResult"]: """Geocode an address.""" - fields = { + fields: Dict[ + Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], + Optional[Union[str, float]], + ] = { "street": street, "city": city, "state": state, - "zip": kwargs.get('zip') or kwargs.get('zipcode'), + "zip": zip or zipcode, } - return self._fetch("address", fields, **kwargs) + return self._fetch(searchtype="address", fields=fields, timeout=timeout, **kwargs) - def onelineaddress(self, address, **kwargs): + def onelineaddress(self, address: str, **kwargs) -> Union["AddressResult", "GeographyResult"]: """Geocode an an address passed as one string. e.g. "4600 Silver Hill Rd, Suitland, MD 20746" """ - fields = { + fields: Dict[ + Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], + Optional[Union[str, float]], + ] = { "address": address, } - return self._fetch("onelineaddress", fields, **kwargs) + return self._fetch(searchtype="onelineaddress", fields=fields, **kwargs) - def set_benchmark(self, benchmark): + def set_benchmark(self, benchmark: str) -> None: """Set the Census Geocoding API benchmark the class will use. See: https://geocoding.geo.census.gov/geocoder/vintages?form""" self._benchmark = benchmark @property - def benchmark(self): + def benchmark(self) -> str: """Give the Census Geocoding API benchmark the class is using. See: https://geocoding.geo.census.gov/geocoder/benchmarks""" - return getattr(self, "_benchmark") + return self._benchmark - def set_vintage(self, vintage): + def set_vintage(self, vintage: str) -> None: """Set the Census Geocoding API vintage the class will use. See: https://geocoding.geo.census.gov/geocoder/vintages?form""" self._vintage = vintage @property - def vintage(self): + def vintage(self) -> str: """Give the Census Geocoding API vintage the class is using. See: https://geocoding.geo.census.gov/geocoder/vintages?form""" - return getattr(self, "_vintage") + return self._vintage - def _parse_batch_result(self, data, returntype): + def _parse_batch_result(self, data: str, returntype: ReturnType) -> List[ResultType]: """Parse the batch address results returned from the Census Geocoding API""" try: fieldnames = self.batchfields[returntype] - except KeyError as err: - raise ValueError("unknown returntype: {}".format(returntype)) from err + except KeyError as e: + err_msg = f"unknown returntype: {returntype}" + raise ValueError(err_msg) from e def parse(row): row["lat"], row["lon"] = None, None @@ -168,7 +200,7 @@ def parse(row): if row["coordinate"]: try: row["lon"], row["lat"] = tuple(float(a) for a in row["coordinate"].split(",")) - except: + except ValueError: pass del row["coordinate"] @@ -180,26 +212,37 @@ def parse(row): reader = csv.DictReader(f, fieldnames=fieldnames) return [parse(row) for row in reader] - def _post_batch(self, data=None, f=None, **kwargs): + def _post_batch( + self, + data: Optional[Iterable[Dict[str, Any]]] = None, + f: Optional[Union[io.IOBase, TextIO]] = None, + *, + leave_open: bool = False, + returntype: ReturnType = "geographies", + timeout: Optional[int] = DEFAULT_TIMEOUT, + **kwargs, + ) -> List[ResultType]: """Send batch address file to the Census Geocoding API""" - returntype = kwargs.get("returntype", "geographies") - url = self._geturl("addressbatch", returntype) + url = self._geturl(searchtype="addressbatch", returntype=returntype) + + if data is None and f is None: + err_msg = "Need either data or a file for CensusGeocode.addressbatch" + raise ValueError(err_msg) if data: - # For Python 3, compile data into a StringIO f = io.StringIO() writer = csv.DictWriter(f, fieldnames=["id", "street", "city", "state", "zip"]) for i, row in enumerate(data, 1): row.setdefault("id", i) writer.writerow(row) if i == 10001: - warnings.warn("Sending more than 10,000 records, the upper limit for the Census Geocoder. Request will likely fail") + warnings.warn( + "Sending more than 10,000 records, the upper limit for the Census Geocoder." + "Request will likely fail." + ) f.seek(0) - elif f is None: - raise ValueError("Need either data or a file for CensusGeocode.addressbatch") - try: form = MultipartEncoder( fields={ @@ -210,46 +253,50 @@ def _post_batch(self, data=None, f=None, **kwargs): ) headers = {"Content-Type": form.content_type} - with requests.post(url, data=form, timeout=kwargs.get("timeout"), headers=headers) as r: + with requests.post(url, data=form, timeout=timeout, headers=headers) as r: # return as list of dicts return self._parse_batch_result(r.text, returntype) - except RequestException as err: - raise err - finally: - f.close() + if f and not leave_open: + f.close() - def addressbatch(self, data, **kwargs): + def addressbatch( + self, data: Union[TextIO, str, Path, Iterable[Dict[str, Any]]], *, timeout: Optional[int] = None, **kwargs + ) -> List[ResultType]: """ Send either a CSV file or data to the addressbatch API. According to the Census, "there is currently an upper limit of 10,000 records per batch file." - If a file, can either be a file-like with a `read()` method, or a `str` that's a path to the - file. Either way, it must have no header and have fields id,street,city,state,zip + * If a file, can either be a file-like with a `read()` method, or a `Path` object or + `str` that's a path to the file. Either way, it must have no header and have fields + id, street, city, state, and zip. - If data, should be an iterable of dicts with the above fields (although ID is optional). + * If data, should be an iterable of dicts with the above fields (although ID is optional). """ - # Does data quack like a file handle? - if hasattr(data, "read"): - return self._post_batch(f=data, **kwargs) + if isinstance(data, (io.IOBase, TextIO)): + return self._post_batch(f=data, leave_open=True, timeout=timeout, **kwargs) - # If it is a string, assume it's a filename - if isinstance(data, str): - with open(data, "rb") as f: - return self._post_batch(f=f, **kwargs) + if isinstance(data, (str, Path)): + if isinstance(data, str) and not Path(data).exists(): + raise FileNotFoundError("File not found at path {str}") + f = open(data, "rb") + return self._post_batch(f=f, leave_open=False, timeout=timeout, **kwargs) - # Otherwise, assume an iterable of dicts - return self._post_batch(data=data, **kwargs) + if isinstance(data, Iterable): + return self._post_batch(data=data, leave_open=False, timeout=timeout, **kwargs) + raise TypeError( + f"Expected a file-like object, a path object or string, or a list of dicts; got {type(data).__name__}" + ) -class GeographyResult(dict): +class GeographyResult(Dict): """Wrapper for geography objects returned by the Census Geocoding API""" - def __init__(self, data): - self.input = data["result"].get("input", {}) + def __init__(self, data: Dict[str, Any]) -> None: + self.input: Union[str, int, float, List, Dict] = data["result"].get("input", {}) super().__init__(data["result"]["geographies"]) # create float coordinate tuples @@ -266,10 +313,9 @@ def __init__(self, data): geo["INTPT"] = () -class AddressResult(list): - +class AddressResult(List): """Wrapper for address objects returned by the Census Geocoding API""" - def __init__(self, data): - self.input = data["result"].get("input", {}) + def __init__(self, data: Dict[str, Any]) -> None: + self.input: Union[str, int, float, List, Dict] = data["result"].get("input", {}) super().__init__(data["result"]["addressMatches"]) From 95f246e75ee271a130bec611e50a6922bb4148f3 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 11:17:02 -0400 Subject: [PATCH 05/13] improve file not found error logic + message --- src/censusgeocode/censusgeocode.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/censusgeocode/censusgeocode.py b/src/censusgeocode/censusgeocode.py index 4aa05f3..2e036d6 100644 --- a/src/censusgeocode/censusgeocode.py +++ b/src/censusgeocode/censusgeocode.py @@ -279,10 +279,11 @@ def addressbatch( return self._post_batch(f=data, leave_open=True, timeout=timeout, **kwargs) if isinstance(data, (str, Path)): - if isinstance(data, str) and not Path(data).exists(): - raise FileNotFoundError("File not found at path {str}") - f = open(data, "rb") - return self._post_batch(f=f, leave_open=False, timeout=timeout, **kwargs) + if not Path(data).exists(): + err_msg = f"File not found at path {data}" + raise FileNotFoundError(err_msg) + data_file = open(data, "rb") + return self._post_batch(f=data_file, leave_open=False, timeout=timeout, **kwargs) if isinstance(data, Iterable): return self._post_batch(data=data, leave_open=False, timeout=timeout, **kwargs) From bb62c06bcb9f276308d55d6b4ee3052b16c9b699 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 11:23:22 -0400 Subject: [PATCH 06/13] add pytest requirement --- setup.cfg | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index c97dde7..e74b040 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,8 +36,12 @@ install_requires = python_requires = >=3.8,<4 [options.extras_require] -test = vcrpy>=4.1 -tests = vcrpy>=4.1 +test = + pytest>=6 + vcrpy>=4.1 +tests = + pytest>=6 + vcrpy>=4.1 [options.packages.find] where = src From 840905c29e4c14e2260c73884b24b73b09b87820 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 11:23:48 -0400 Subject: [PATCH 07/13] add test for addressbatch file not found --- tests/test_censusgeocode.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_censusgeocode.py b/tests/test_censusgeocode.py index 247305e..7747310 100644 --- a/tests/test_censusgeocode.py +++ b/tests/test_censusgeocode.py @@ -1,5 +1,5 @@ """Tests for censusgeocode""" - +import re import warnings # This file is part of censusgeocode. @@ -111,6 +111,19 @@ def test_addressbatch(cg, batch_input): assert resultdict_geo[3]["statefp"] == "36" +@pytest.mark.parametrize( + "bad_file", + ["tests/fixtures/nonexistent.csv", Path("tests/fixtures/nonexistent.csv")], + ids=["string", "pathlib.Path"], +) +def test_addressbatch_file_not_found(cg, bad_file): + """batch() function raises error when file not found.""" + with pytest.raises(FileNotFoundError, match=re.escape(f"File not found at path {bad_file}")): + cg.addressbatch(bad_file, returntype="locations") + + with pytest.raises(FileNotFoundError, match=re.escape(f"File not found at path {bad_file}")): + cg.addressbatch(bad_file, returntype="geographies") + def test_warning10k(cg): """Sending more than 10,000 records to batch raises a warning.""" warnings.simplefilter("error") From 1d69399bdf5cd0c64d3f66472b2a3468260c310e Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 11:25:21 -0400 Subject: [PATCH 08/13] call pytest in makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 186ca22..72cd8b0 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ install: ; pip install . -test: ; python -m unittest tests/test_*.py +test: ; python -m pytest tests deploy: build twine upload dist/* From 48058cbd5bc8902343ff475756a8f952274b0e11 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 18:11:26 -0400 Subject: [PATCH 09/13] remove unused fixture --- tests/conftest.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b5ec517..c25196e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,9 +15,3 @@ def cg(): """Provides an initialized CensusGeocode instance.""" return CensusGeocode() - - -@pytest.fixture -def batch_path(request): - """Provides either a string or a Path object for batch testing.""" - return request.param From a6cf7774294969e78c2c63de288b9502b360237f Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 18:19:50 -0400 Subject: [PATCH 10/13] cleanup imports --- setup.py | 1 - src/censusgeocode/__main__.py | 2 +- tests/conftest.py | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2f00cd0..1508682 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # This file is part of censusgeocode. # https://github.com/fitnr/censusgeocode diff --git a/src/censusgeocode/__main__.py b/src/censusgeocode/__main__.py index 8288d46..ef40d93 100644 --- a/src/censusgeocode/__main__.py +++ b/src/censusgeocode/__main__.py @@ -15,9 +15,9 @@ from . import __version__ from .censusgeocode import ( DEFAULT_BENCHMARK, + DEFAULT_TIMEOUT, DEFAULT_VINTAGE, CensusGeocode, - DEFAULT_TIMEOUT, ) diff --git a/tests/conftest.py b/tests/conftest.py index c25196e..cdc0884 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,7 @@ # Copyright (c) 2015-2026, Neil Freeman import pytest + from censusgeocode import CensusGeocode From d29d0a23df8bf104df9245834f84126aad4190db Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 18:20:31 -0400 Subject: [PATCH 11/13] cleanup tests --- tests/test_censusgeocode.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_censusgeocode.py b/tests/test_censusgeocode.py index 7747310..9e18782 100644 --- a/tests/test_censusgeocode.py +++ b/tests/test_censusgeocode.py @@ -56,8 +56,8 @@ def test_onelineaddress(cg): assert results[0] assert results[0]["geographies"]["Counties"][0]["BASENAME"] == "District of Columbia" - assert "Metropolitan Divisions" in results[0]["geographies"].keys() - assert "Alaska Native Village Statistical Areas" in results[0]["geographies"].keys() + assert "Metropolitan Divisions" in results[0]["geographies"] + assert "Alaska Native Village Statistical Areas" in results[0]["geographies"] @vcr.use_cassette("tests/fixtures/address-locations.yaml") @@ -124,6 +124,7 @@ def test_addressbatch_file_not_found(cg, bad_file): with pytest.raises(FileNotFoundError, match=re.escape(f"File not found at path {bad_file}")): cg.addressbatch(bad_file, returntype="geographies") + def test_warning10k(cg): """Sending more than 10,000 records to batch raises a warning.""" warnings.simplefilter("error") From 03bedde87b19bc0f0bc68d4d4f8bb13232e7a402 Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 18:20:50 -0400 Subject: [PATCH 12/13] add set vintage/benchmark method tests --- tests/test_censusgeocode.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tests/test_censusgeocode.py b/tests/test_censusgeocode.py index 9e18782..822e662 100644 --- a/tests/test_censusgeocode.py +++ b/tests/test_censusgeocode.py @@ -1,6 +1,4 @@ """Tests for censusgeocode""" -import re -import warnings # This file is part of censusgeocode. # https://github.com/fitnr/censusgeocode @@ -9,9 +7,15 @@ # http://opensource.org/licenses/LGPL-3.0 # Copyright (c) 2015-2026, Neil Freeman +import random +import re +import string +import warnings +from pathlib import Path + import pytest import vcr -from pathlib import Path + from censusgeocode import CensusGeocode from censusgeocode.censusgeocode import GeographyResult @@ -90,6 +94,20 @@ def test_benchmark_vintage(): assert result[0]["geographies"]["Census Tracts"][0]["GEOID"] == "11001006202" +def test_set_vintage(cg: CensusGeocode): + """Test changing vintage.""" + vint = random.choices(string.ascii_letters, k=8) + cg.set_vintage(vint) + assert cg.vintage == vint + + +def test_set_benchmark(cg: CensusGeocode): + """Test changing vintage.""" + bmark = random.choices(string.ascii_letters, k=8) + cg.set_benchmark(bmark) + assert cg.benchmark == bmark + + @vcr.use_cassette("tests/fixtures/address-batch.yaml") @pytest.mark.parametrize( "batch_input", From 7516a11fc372ee73c525b5ed6907a557b2059b4f Mon Sep 17 00:00:00 2001 From: Ramona T Date: Sat, 14 Mar 2026 18:21:14 -0400 Subject: [PATCH 13/13] update union/optional type syntax via __future__ import annotations --- src/censusgeocode/censusgeocode.py | 133 +++++++++++++++++++++-------- 1 file changed, 96 insertions(+), 37 deletions(-) diff --git a/src/censusgeocode/censusgeocode.py b/src/censusgeocode/censusgeocode.py index 2e036d6..f44e3b9 100644 --- a/src/censusgeocode/censusgeocode.py +++ b/src/censusgeocode/censusgeocode.py @@ -11,11 +11,13 @@ # http://opensource.org/licenses/LGPL-3.0 # Copyright (c) 2015-2026, Neil Freeman +from __future__ import annotations + import csv import io import warnings from pathlib import Path -from typing import Dict, List, Literal, Optional, TextIO, Union, Any, Iterable +from typing import Any, Dict, Iterable, List, Literal, TextIO, Union import requests from requests_toolbelt.multipart.encoder import MultipartEncoder @@ -74,22 +76,34 @@ def __init__(self, benchmark: str = DEFAULT_BENCHMARK, vintage: str = DEFAULT_VI self._benchmark = benchmark self._vintage = vintage - def _geturl(self, searchtype: SearchType, returntype: Optional[ReturnType] = "geographies") -> str: + def _geturl(self, searchtype: SearchType, returntype: ReturnType | None = "geographies") -> str: """Construct an URL for the geocoder.""" return self._url.format(returntype=returntype, searchtype=searchtype) def _fetch( self, searchtype: SearchType, - fields: Dict[ - Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], - Optional[Union[str, float]], + fields: dict[ + Literal[ + "vintage", + "benchmark", + "layers", + "format", + "x", + "y", + "address", + "street", + "city", + "state", + "zip", + ], + str | float | None, ], *, - returntype: Optional[ReturnType] = "geographies", - timeout: Optional[int] = DEFAULT_TIMEOUT, + returntype: ReturnType | None = "geographies", + timeout: int | None = DEFAULT_TIMEOUT, **kwargs, - ) -> Union["AddressResult", "GeographyResult"]: + ) -> AddressResult | GeographyResult: """Fetch a response from the Geocoding API.""" fields["vintage"] = self.vintage fields["benchmark"] = self.benchmark @@ -117,12 +131,29 @@ def _fetch( raise ValueError(err_msg) from e def coordinates( - self, x: float, y: float, *, returntype: Optional[ReturnType] = "geographies", **kwargs - ) -> Union["AddressResult", "GeographyResult"]: + self, + x: float, + y: float, + *, + returntype: ReturnType | None = "geographies", + **kwargs, + ) -> AddressResult | GeographyResult: """Geocode a (lon, lat) coordinate.""" - fields: Dict[ - Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], - Optional[Union[str, float]], + fields: dict[ + Literal[ + "vintage", + "benchmark", + "layers", + "format", + "x", + "y", + "address", + "street", + "city", + "state", + "zip", + ], + str | float | None, ] = {"x": x, "y": y} return self._fetch("coordinates", fields=fields, returntype=returntype, **kwargs) @@ -130,18 +161,30 @@ def coordinates( def address( self, street: str, - city: Optional[str] = None, - state: Optional[str] = None, + city: str | None = None, + state: str | None = None, *, - zip: Optional[str] = None, - zipcode: Optional[str] = None, - timeout: Optional[int] = DEFAULT_TIMEOUT, + zip: str | None = None, + zipcode: str | None = None, + timeout: int | None = DEFAULT_TIMEOUT, **kwargs, - ) -> Union["AddressResult", "GeographyResult"]: + ) -> AddressResult | GeographyResult: """Geocode an address.""" - fields: Dict[ - Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], - Optional[Union[str, float]], + fields: dict[ + Literal[ + "vintage", + "benchmark", + "layers", + "format", + "x", + "y", + "address", + "street", + "city", + "state", + "zip", + ], + str | float | None, ] = { "street": street, "city": city, @@ -151,13 +194,25 @@ def address( return self._fetch(searchtype="address", fields=fields, timeout=timeout, **kwargs) - def onelineaddress(self, address: str, **kwargs) -> Union["AddressResult", "GeographyResult"]: + def onelineaddress(self, address: str, **kwargs) -> AddressResult | GeographyResult: """Geocode an an address passed as one string. e.g. "4600 Silver Hill Rd, Suitland, MD 20746" """ - fields: Dict[ - Literal["vintage", "benchmark", "layers", "format", "x", "y", "address", "street", "city", "state", "zip"], - Optional[Union[str, float]], + fields: dict[ + Literal[ + "vintage", + "benchmark", + "layers", + "format", + "x", + "y", + "address", + "street", + "city", + "state", + "zip", + ], + str | float | None, ] = { "address": address, } @@ -186,7 +241,7 @@ def vintage(self) -> str: See: https://geocoding.geo.census.gov/geocoder/vintages?form""" return self._vintage - def _parse_batch_result(self, data: str, returntype: ReturnType) -> List[ResultType]: + def _parse_batch_result(self, data: str, returntype: ReturnType) -> list[ResultType]: """Parse the batch address results returned from the Census Geocoding API""" try: fieldnames = self.batchfields[returntype] @@ -214,14 +269,14 @@ def parse(row): def _post_batch( self, - data: Optional[Iterable[Dict[str, Any]]] = None, - f: Optional[Union[io.IOBase, TextIO]] = None, + data: Iterable[dict[str, Any]] | None = None, + f: io.IOBase | TextIO | None = None, *, leave_open: bool = False, returntype: ReturnType = "geographies", - timeout: Optional[int] = DEFAULT_TIMEOUT, + timeout: int | None = DEFAULT_TIMEOUT, **kwargs, - ) -> List[ResultType]: + ) -> list[ResultType]: """Send batch address file to the Census Geocoding API""" url = self._geturl(searchtype="addressbatch", returntype=returntype) @@ -262,8 +317,12 @@ def _post_batch( f.close() def addressbatch( - self, data: Union[TextIO, str, Path, Iterable[Dict[str, Any]]], *, timeout: Optional[int] = None, **kwargs - ) -> List[ResultType]: + self, + data: TextIO | str | Path | Iterable[dict[str, Any]], + *, + timeout: int | None = None, + **kwargs, + ) -> list[ResultType]: """ Send either a CSV file or data to the addressbatch API. @@ -296,8 +355,8 @@ def addressbatch( class GeographyResult(Dict): """Wrapper for geography objects returned by the Census Geocoding API""" - def __init__(self, data: Dict[str, Any]) -> None: - self.input: Union[str, int, float, List, Dict] = data["result"].get("input", {}) + def __init__(self, data: dict[str, Any]) -> None: + self.input: str | int | float | list | dict = data["result"].get("input", {}) super().__init__(data["result"]["geographies"]) # create float coordinate tuples @@ -317,6 +376,6 @@ def __init__(self, data: Dict[str, Any]) -> None: class AddressResult(List): """Wrapper for address objects returned by the Census Geocoding API""" - def __init__(self, data: Dict[str, Any]) -> None: - self.input: Union[str, int, float, List, Dict] = data["result"].get("input", {}) + def __init__(self, data: dict[str, Any]) -> None: + self.input: str | int | float | list | dict = data["result"].get("input", {}) super().__init__(data["result"]["addressMatches"])