Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

______________________________________________________________________

<!-- (New changes here in list form) -->
- Add download functions for known data sources

## [0.2.1](https://github.com/RolnickLab/geospatial-tools/tree/0.2.1) (2025-09-17)

Expand Down
2 changes: 1 addition & 1 deletion configs/data_file_links.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ united_states_polygon:
url: "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_nation_20m.zip"

sentinel_2_tiling_grid:
description: ""
description: "Sentinel 2 tiling grid"
url: "https://sentiwiki.copernicus.eu/__attachments/1692737/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.zip"
68 changes: 68 additions & 0 deletions geospatial_tools/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from pathlib import Path

from geospatial_tools import DATA_DIR
from geospatial_tools.utils import download_url, get_yaml_config, unzip_file

# See configs/data_file_links.yaml - these keys need to match
USA_POLYGON = "united_states_polygon"
SENTINEL_2_TILLING_GRID = "sentinel_2_tiling_grid"


def _download_from_link(
target_download: str, output_name: str = None, output_directory: str | Path = DATA_DIR
) -> list[str | Path]:
file_configs = get_yaml_config("data_file_links")
key = target_download
url = file_configs[key]["url"]
if not output_name:
output_name = key

output_path = f"{output_directory}/{output_name}.zip"
downloaded_file = download_url(url=url, filename=output_path)
file_list = unzip_file(downloaded_file, extract_to=output_directory)
downloaded_file.unlink()
return file_list


def download_usa_polygon(output_name: str = USA_POLYGON, output_directory: str | Path = DATA_DIR) -> list[str | Path]:
"""
Download USA polygon file.

Parameters
----------
output_name
What name to give to downloaded file
output_directory
Where to save the downloaded file

Returns
-------
List of output path to downloaded file
"""
file_list = _download_from_link(
target_download=USA_POLYGON, output_name=output_name, output_directory=output_directory
)
return file_list


def download_s2_tiling_grid(
output_name: str = SENTINEL_2_TILLING_GRID, output_directory: str | Path = DATA_DIR
) -> list[str | Path]:
"""
" Download Sentinel 2 tiling grid file.

Parameters
----------
output_name
What name to give to downloaded file
output_directory
Where to save the downloaded file

Returns
-------
List of output path to downloaded file
"""
file_list = _download_from_link(
target_download=SENTINEL_2_TILLING_GRID, output_name=output_name, output_directory=output_directory
)
return file_list
23 changes: 15 additions & 8 deletions geospatial_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import json
import logging
import os
import pathlib
import sys
import zipfile
from pathlib import Path

import requests
import yaml
Expand Down Expand Up @@ -79,7 +79,7 @@ def get_yaml_config(yaml_config_file: str, logger: logging.Logger = LOGGER) -> d
"""

potential_paths = [
pathlib.Path(yaml_config_file),
Path(yaml_config_file),
CONFIGS / yaml_config_file,
CONFIGS / f"{yaml_config_file}.yaml",
CONFIGS / f"{yaml_config_file}.yml",
Expand Down Expand Up @@ -127,7 +127,7 @@ def get_json_config(json_config_file: str, logger=LOGGER) -> dict:
"""

potential_paths = [
pathlib.Path(json_config_file),
Path(json_config_file),
CONFIGS / json_config_file,
CONFIGS / f"{json_config_file}.json",
]
Expand Down Expand Up @@ -185,7 +185,7 @@ def create_crs(dataset_crs: str | int, logger=LOGGER):
return None


def download_url(url: str, filename: str | pathlib.Path, overwrite: bool = False, logger=LOGGER) -> pathlib.Path | None:
def download_url(url: str, filename: str | Path, overwrite: bool = False, logger=LOGGER) -> Path | None:
"""
This function downloads a file from a given URL.

Expand All @@ -204,7 +204,7 @@ def download_url(url: str, filename: str | pathlib.Path, overwrite: bool = False
-------
"""
if isinstance(filename, str):
filename = pathlib.Path(filename)
filename = Path(filename)

if filename.exists() and not overwrite:
logger.info(f"File [{filename}] already exists. Skipping download.")
Expand All @@ -221,7 +221,7 @@ def download_url(url: str, filename: str | pathlib.Path, overwrite: bool = False
return None


def unzip_file(zip_path: str | pathlib.Path, extract_to: str | pathlib.Path, logger: logging.Logger = LOGGER):
def unzip_file(zip_path: str | Path, extract_to: str | Path, logger: logging.Logger = LOGGER) -> list[str | Path]:
"""
This function unzips an archive to a specific directory.

Expand All @@ -233,16 +233,23 @@ def unzip_file(zip_path: str | pathlib.Path, extract_to: str | pathlib.Path, log
Path of directory to extract the zip file
logger
Logger instance

Returns
-------
List of unzipped paths
"""
if isinstance(zip_path, str):
zip_path = pathlib.Path(zip_path)
zip_path = Path(zip_path)
if isinstance(extract_to, str):
extract_to = pathlib.Path(extract_to)
extract_to = Path(extract_to)
extract_to.mkdir(parents=True, exist_ok=True)
extracted_files = []
with zipfile.ZipFile(zip_path, "r") as zip_ref:
for member in zip_ref.infolist():
zip_ref.extract(member, extract_to)
logger.info(f"Extracted: [{member.filename}]")
extracted_files.append(f"{extract_to}/{member.filename}")
return extracted_files


def create_date_range_for_specific_period(
Expand Down
1 change: 1 addition & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def get_paths(session):
main_package,
scripts,
],
"root": [package_path],
}


Expand Down
104 changes: 98 additions & 6 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ nox = "^2024.4.15"
autoflake = "^2.3.1"
autopep8 = "^2.3.2"
ruff = "^0.11.10"
mdformat = "^0.7.22"
mdformat-gfm = "^0.4.1"
mdformat-gfm-alerts = "^2.0.0"

[tool.poetry.group.lab.dependencies]
jupyterlab = "^4.0.10"
Expand Down
19 changes: 19 additions & 0 deletions tests/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest

from geospatial_tools.download import download_s2_tiling_grid, download_usa_polygon


@pytest.mark.skip(reason="Currently a problem with SSL certificate of host census.gov")
def test_download_1usa_polygon(tmp_path):
"""
If this test fails, it is usually because the domain has problems with it's ssl certificate.

The data can usually still be downloaded manually through a browser.
"""
file_list = download_usa_polygon(output_directory=tmp_path)
assert len(file_list) == 7


def test_download_s2_tilling_grid(tmp_path):
file_list = download_s2_tiling_grid(output_directory=tmp_path)
assert len(file_list) == 1