diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 8fd17a0..5724e1c 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -12,7 +12,7 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
-    container: ghcr.io/osgeo/gdal:ubuntu-small-3.10.3
+    container: ghcr.io/osgeo/gdal:ubuntu-small-3.11.4
     strategy:
       fail-fast: false
       matrix:
@@ -22,22 +22,31 @@ jobs:
       - name: Install system
         run: |
           apt-get update -qqy
-          apt-get install -y git python3-pip libpq5 libpq-dev r-base libtirpc-dev
+          apt-get install -y git python3-pip libpq5 libpq-dev r-base libtirpc-dev shellcheck
       - uses: actions/checkout@v4
         with:
           submodules: 'true'
+
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v3
         with:
           python-version: ${{ matrix.python-version }}
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install gdal[numpy]==3.10.3
+          python -m pip install gdal[numpy]==3.11.4
           python -m pip install -r requirements.txt
+
       - name: Lint with pylint
-        run: |
-          python3 -m pylint utils prepare_layers prepare_species threats
+        run: python3 -m pylint utils prepare_layers prepare_species threats
+
+      - name: Type checking with mypy
+        run: python3 -m mypy utils prepare_layers prepare_species threats
+
       - name: Tests
+        run: python3 -m pytest ./tests
+
+      - name: Script checks
         run: |
-          python3 -m pytest ./tests
+          shellcheck ./scripts/run.sh
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 30fd8a6..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "aoh-calculator"]
-	path = aoh-calculator
-	url = git@github.com:quantifyearth/aoh-calculator.git
diff --git a/.mypy.ini b/.mypy.ini
new file mode 100644
index 0000000..d8ac83e
--- /dev/null
+++ b/.mypy.ini
@@ -0,0 +1,4 @@
+[mypy]
+ignore_missing_imports = True
+explicit_package_bases = False
+no_namespace_packages = True
diff --git a/Dockerfile b/Dockerfile
index ade1030..913f289 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,13 +10,14 @@ WORKDIR /go/littlejohn
 RUN go mod tidy
 RUN go build
 
-FROM ghcr.io/osgeo/gdal:ubuntu-small-3.10.0
+FROM ghcr.io/osgeo/gdal:ubuntu-small-3.11.4
 
 RUN apt-get update -qqy && \
 	apt-get install -qy \
 		git \
 		cmake \
 		python3-pip \
+		shellcheck \
 		r-base \
 		libpq-dev \
 		libtirpc-dev \
@@ -27,7 +28,7 @@ COPY --from=reclaimerbuild /go/reclaimer/reclaimer /bin/reclaimer
 COPY --from=littlejohnbuild /go/littlejohn/littlejohn /bin/littlejohn
 
 RUN rm /usr/lib/python3.*/EXTERNALLY-MANAGED
-RUN pip install gdal[numpy]==3.10.0
+RUN pip install gdal[numpy]==3.11.4
 
 COPY requirements.txt /tmp/
 RUN pip install -r /tmp/requirements.txt
@@ -53,3 +54,5 @@ ENV PYTHONPATH=/root/star
 
 RUN python3 -m pytest ./tests
 RUN python3 -m pylint prepare_layers prepare_species utils tests
+RUN python3 -m mypy prepare_layers prepare_species utils tests
+RUN shellcheck ./scripts/run.sh
diff --git a/README.md b/README.md
index 93895b4..bc552b6 100644
--- a/README.md
+++ b/README.md
@@ -4,40 +4,41 @@ An implementation of the threat based [STAR biodiversity metric by Muir et al](h
 
 See [method.md](method.md) for a description of the methodology, or `scripts/run.sh` for how to execute the pipeline.
 
-# Running the pipeline
-
 ## Checking out the code
 
-This repository uses submodules, so once you have cloned it, you need to fetch the submodules:
+The code is available on github, and can be checked out from there:
 
 ```shell
-$ git clone https://github.com/quantifyearth/star.git
-$ cd star
-$ git submodule update --init --recursive
+$ git clone https://github.com/quantifyearth/STAR.git
+...
+$ cd STAR
 ```
 
-## Running the pipeline
+## Additional inputs
+
+There are some additional inputs required to run the pipeline, which should be placed in the directory you use to store the pipeline results.
 
-The easiest way to get started will be to run `scripts/run.sh` under a linux environment.
+* SpeciesList_generalisedRangePolygons.csv - A list of species with generalised ranges on the IUCN Redlist.
+* BL_Species_Elevations_2023.csv (optional) - corrections to the elevation of birdlife species on the IUCN Redlist taken from the BirdLife data.
 
-### Running on Ubuntu
+The script also assumes you have a Postgres database with the IUCN Redlist database in it.
+
+## Running the pipeline
 
-The following extra utilities will need to be installed:
+There are two ways to run the pipeline. The easiest way is to use Docker if you have it available to you, as it will manage all the dependencies for you. But you can check out and run it locally if you want to also, but it requires a little more effort.
 
-* [Reclaimer](https://github.com/quantifyearth/reclaimer/) - a utility for downloading data from various primary sources.
-* [Littlejohn](https://github.com/quantifyearth/littlejohn/) - a utility to run jobs in parallel driven by a CSV file.
+### Running with Docker
 
-### Running in Docker
 
 There is included a docker file, which is based on the GDAL container image, which is set up to install everything ready to use. You can build that using:
 
-```
+```shell
 $ docker buildx build -t star .
 ```
 
 You can then invoke the run script using this. You should map an external folder into the container as a place to store the intermediary data and final results, and you should provide details about the Postgres instance with the IUCN redlist:
 
-```
+```shell
 $ docker run --rm -v /some/local/dir:/data \
 	-e DB_HOST=localhost \
 	-e DB_NAME=iucnredlist \
@@ -46,6 +47,66 @@ $ docker run --rm -v /some/local/dir:/data \
 	star ./scripts/run.sh
 ```
 
+### Running without Docker
+
+If you prefer not to use Docker, you will need:
+
+* Python3 >= 3.10
+* GDAL
+* R (required for validation)
+* [Reclaimer](https://github.com/quantifyearth/reclaimer/) - a Go tool for fetching data from Zenodo
+* [Littlejohn](https://github.com/quantifyearth/littlejohn/) - a Go tool for running scripts in parallel
+
+If you are using macOS please note that the default Python install that Apple ships is now several years out of date (Python 3.9, released Oct 2020) and you'll need to install a more recent version (for example, using [homebrew](https://brew.sh)).
+
+With those you should set up a Python virtual environment to install all the required packages. The one trick to this is you need to match the Python GDAL package to your installed GDAL version. For example, on my machine I did the following:
+
+```shell
+$ python3 -m venv ./venv
+$ . ./venv/bin/activate
+(venv) $ gdalinfo --version
+GDAL 3.11.3 "Eganville", released 2025/07/12
+(venv) $ pip install gdal[numpy]==3.11.3
+...
+(venv) $ pip install -r requirements.txt
+```
+
+You will also need to install the R stats packages required for the validation stage:
+
+```shell
+$ R -e "install.packages(c('lme4', 'lmerTest'), repos='https://cran.rstudio.com/')"
+```
+
+Before running the pipeline you will need to set several environmental variables to tell the script where to store data and where the database with the IUCN Redlist is. You can set these manually, or we recommend using a tool like [direnv](https://direnv.net).
+
+```shell
+export DATADIR=[PATH WHERE YOU WANT THE RESULTS]
+export DB_HOST=localhost
+export DB_NAME=iucnredlist
+export DB_PASSWORD=supersecretpassword
+export DB_USER=postgres
+```
+
+Once you have all that you can then run the pipeline:
+
+```shell
+(venv) $ ./scripts/run.sh
+```
+
 # Credits
 
 The author of this package is greatly indebted to both [Francesca Ridley](https://www.ncl.ac.uk/nes/people/profile/francescaridley.html) from the University of Newcastle and [Simon Tarr](https://www.linkedin.com/in/simon-tarr-22069b209/) of the IUCN for their guidance and review.
+
+## Data Attribution
+
+The crosswalk table `data/crosswalk_bin_T.csv` was created by [Francesca Ridley](https://www.ncl.ac.uk/nes/people/profile/francescaridley.html) and is derived from:
+
+```
+Lumbierres, M., Dahal, P.R., Di Marco, M., Butchart, S.H.M., Donald, P.F.,
+& Rondinini, C. (2022). Translating habitat class to land cover to map area
+of habitat of terrestrial vertebrates. Conservation Biology, 36, e13851.
+https://doi.org/10.1111/cobi.13851
+```
+
+The paper is licensed under CC BY-NC. It is used in this STAR implementation to crosswalk between the IUCN Habitat classes in the Redlist and the land classes in the Copernicus data layers.
+
diff --git a/aoh-calculator b/aoh-calculator
deleted file mode 160000
index c24def9..0000000
--- a/aoh-calculator
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c24def960799f170a9812af31d4c0e2dc5940dbf
diff --git a/data/crosswalk_bin_T.csv b/data/crosswalk_bin_T.csv
new file mode 100644
index 0000000..f068cf8
--- /dev/null
+++ b/data/crosswalk_bin_T.csv
@@ -0,0 +1,18 @@
+CGLS100_name,CGLS100_value,Label,H_1,H_2,H_3,H_4,H_5,H_6,H_7,H_8,H_14.1,H_14.2,H_14.3,H_14.6,H_14.4,H_14.5,H_15
+CLS_20_shrubs,20,shrubs,0,1,1,0,0,0,U,1,0,0,0,0,0,0,0
+CLS_30_Herbaceous_vegetation,30,Herbaceous_vegetation,0,0,0,1,0,0,U,0,0,0,0,0,0,0,0
+CLS_40_CultivatedandManaged_VegetationAgriculture,40,CultivatedandManaged_VegetationAgriculture,0,0,0,1,1,0,U,0,1,1,0,0,0,0,0
+CLS_50_Urban_builtup,50,Urban_builtup,0,0,0,0,0,0,U,0,0,0,0,0,1,1,0
+CLS_60_bare_sparsevegetation,60,bare_sparsevegetation,0,0,1,0,0,1,U,1,0,0,0,0,0,0,0
+CLS_80_permanent_water,80,permanent_water,0,0,0,0,1,0,U,0,0,0,0,0,0,0,0
+CLS_90_Herbaceous_wetland,90,Herbaceous_wetland,0,0,0,0,1,0,U,0,0,0,0,0,0,0,1
+CLS_111_Closedforest_evergreen_needle,111,Closedforest_evergreen_needle,1,0,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_112_Closedforest_evergreen_broad,112,Closedforest_evergreen_broad,1,0,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_114_Closedforest_deciduous_broad,114,Closedforest_deciduous_broad,1,0,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_115_Closedforest_mixed,115,Closedforest_mixed,1,0,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_116_Closedforest_unknown,116,Closedforest_unknown,1,0,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_121_Openforest_evergreen_needle,121,Openforest_evergreen_needle,1,0,0,0,0,1,U,0,0,0,0,0,0,0,0
+CLS_122_Openforest_evergreen_broad,122,Openforest_evergreen_broad,1,0,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_124_Openforest_deciduous_broad,124,Openforest_deciduous_broad,0,1,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_125_Openforest_mixed,125,Openforest_mixed,1,0,0,0,0,0,U,0,0,0,0,0,0,0,0
+CLS_126_Openforest_unknown,126,Openforest_unknown,0,0,0,0,0,0,U,0,0,0,0,0,0,0,0
diff --git a/method.md b/method.md
index f9d3fce..06daf8d 100644
--- a/method.md
+++ b/method.md
@@ -120,8 +120,12 @@ python3 ./prepare_layers/make_masks.py --habitat_layers /data/habitat_layers/cur
 To assist with provenance, we download the data from the Zenodo ID.
 
 ```shark-run:reclaimer
-curl -o FABDEM.zip https://data.bris.ac.uk/datasets/tar/s5hqmjcdj8yo2ibzi9b4ew3sn.zip
-...
+curl -o /data/FABDEM.zip https://data.bris.ac.uk/datasets/tar/s5hqmjcdj8yo2ibzi9b4ew3sn.zip
+```
+
+```shark-run:gdalonly
+python3 tbd.py --input /data/FABDEM.zip \
+    --output /data/elevation.tif
 ```
 
 Similarly to the habitat map we need to resample to 1km, however rather than picking the mean elevation, we select both the min and max elevation for each pixel, and then check whether the species is in that range when we calculate AoH.
@@ -214,4 +218,18 @@ python3 ./aoh-calculator/validation/validate_map_prevelence.py --collated_aoh_da
 
 ```shark-publish
 /data/validation/model_validation.csv
-```
\ No newline at end of file
+```
+
+## Threats
+
+```shark-run:aohbuilder
+python3 ./threats/threat_processing.py --speciesdata /data/species-info/* \
+  --aoh /data/aohs/ \
+  --output /data/threat_rasters
+
+python3 ./threats/threat_summation.py --threat_rasters /data/threat_rasters --output /data/threat_results
+```
+
+```shark-publish
+/data/threat_results
+```
diff --git a/prepare_layers/convert_crosswalk.py b/prepare_layers/convert_crosswalk.py
index 37926ff..4049d6e 100644
--- a/prepare_layers/convert_crosswalk.py
+++ b/prepare_layers/convert_crosswalk.py
@@ -1,4 +1,5 @@
 import argparse
+from pathlib import Path
 
 import pandas as pd
 
@@ -28,8 +29,8 @@
 }
 
 def convert_crosswalk(
-    original_path: str,
-    output_path: str,
+    original_path: Path,
+    output_path: Path,
 ) -> None:
     original = pd.read_csv(original_path)
 
@@ -56,14 +57,14 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Convert IUCN crosswalk to minimal common format.")
     parser.add_argument(
         '--original',
-        type=str,
+        type=Path,
         help="Original format",
         required=True,
         dest="original_path",
     )
     parser.add_argument(
         '--output',
-        type=str,
+        type=Path,
         help='Destination minimal file',
         required=True,
         dest='output_path',
diff --git a/prepare_layers/make_masks.py b/prepare_layers/make_masks.py
index f5d9756..2557280 100644
--- a/prepare_layers/make_masks.py
+++ b/prepare_layers/make_masks.py
@@ -1,62 +1,57 @@
 import argparse
 import os
 import sys
-from glob import glob
+from pathlib import Path
 from typing import Set
 
-import numpy as np
-from yirgacheffe.layers import RasterLayer
+import yirgacheffe as yg
+import yirgacheffe.operators as yo
 
 OPEN_SEA_LCC = "lcc_200.tif"
 NO_DATA_LCC = "lcc_0.tif"
 
 def prepare_mask(
-    layers: Set[str],
-    output_path: str,
+    layers: Set[Path],
+    output_path: Path,
     at_least: bool = True,
 ) -> None:
     assert layers
-    rasters = [RasterLayer.layer_from_file(x) for x in layers]
-
-    intersection = RasterLayer.find_intersection(rasters)
-    for r in rasters:
-        r.set_window_for_intersection(intersection)
+    rasters = [yg.read_raster(x) for x in layers]
 
     calc = rasters[0]
     for r in rasters[1:]:
         calc = calc + r
     if at_least:
-        calc = calc.numpy_apply(lambda a: np.where(a >= 0.5, 1.0, 0.0))
+        calc = yo.where(calc >= 0.5, 1.0, 0.0)
     else:
-        calc = calc.numpy_apply(lambda a: np.where(a > 0.5, 1.0, 0.0))
+        calc = yo.where(calc > 0.5, 1.0, 0.0)
 
-    with RasterLayer.empty_raster_layer_like(rasters[0], filename=output_path) as result:
-        calc.parallel_save(result)
+    calc.to_geotiff(output_path, parallelism=128)
 
 def prepare_masks(
-    habitat_layers_path: str,
-    output_directory_path: str,
+    habitat_layers_path: Path,
+    output_directory_path: Path,
 ) -> None:
     os.makedirs(output_directory_path, exist_ok=True)
 
-    layer_files = set(glob("lcc_*.tif", root_dir=habitat_layers_path))
+    layer_files = set(habitat_layers_path.glob("lcc_*.tif"))
     if not layer_files:
         sys.exit(f"Found no habitat layers in {habitat_layers_path}")
 
-    marine_layers = layer_files & set([OPEN_SEA_LCC])
-    terrerstrial_layers = layer_files - set([OPEN_SEA_LCC, NO_DATA_LCC])
+    marine_layers = {x for x in layer_files if x.name == OPEN_SEA_LCC}
+    terrerstrial_layers = {x for x in layer_files if x.name not in [OPEN_SEA_LCC, NO_DATA_LCC]}
 
     assert len(marine_layers) == 1
-    assert len(terrerstrial_layers) == len(layer_files) - 2
+    assert len(terrerstrial_layers) < len(layer_files)
 
     prepare_mask(
-        {os.path.join(habitat_layers_path, x) for x in marine_layers},
-        os.path.join(output_directory_path, "marine_mask.tif"),
+        marine_layers,
+        output_directory_path / "marine_mask.tif",
     )
 
     prepare_mask(
-        {os.path.join(habitat_layers_path, x) for x in terrerstrial_layers},
-        os.path.join(output_directory_path, "terrestrial_mask.tif"),
+        terrerstrial_layers,
+        output_directory_path / "terrestrial_mask.tif",
         at_least=True,
     )
 
@@ -66,14 +61,14 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Generate terrestrial and marine masks.")
     parser.add_argument(
         '--habitat_layers',
-        type=str,
+        type=Path,
         help="directory with split and scaled habitat layers",
         required=True,
         dest="habitat_layers"
     )
     parser.add_argument(
         '--output_directory',
-        type=str,
+        type=Path,
         help="Folder for output mask layers",
         required=True,
         dest="output_directory"
diff --git a/prepare_species/apply_birdlife_data.py b/prepare_species/apply_birdlife_data.py
new file mode 100644
index 0000000..da65746
--- /dev/null
+++ b/prepare_species/apply_birdlife_data.py
@@ -0,0 +1,78 @@
+import argparse
+import math
+from pathlib import Path
+
+import aoh
+import geopandas as gpd
+import pandas as pd
+
+# Columns from current BirdLife data overrides:
+# SIS ID
+# Assessment ID
+# WBDB ID
+# Sequence
+# Scientific name
+# Common name
+# RL Category
+# PE
+# PEW
+# Min altitude (m)
+# Max altitude (m)
+# Occasional lower elevation
+# Occasional upper elevation
+
+def apply_birdlife_data(
+    geojson_directory_path: Path,
+    overrides_path: Path,
+) -> None:
+    overrides = pd.read_csv(overrides_path, encoding="latin1")
+
+    for _, row in overrides.iterrows():
+        if math.isnan(row["Occasional lower elevation"]) and math.isnan(row["Occasional upper elevation"]):
+            continue
+
+        path = geojson_directory_path / "AVES" / "current" / f'{row["SIS ID"]}.geojson'
+        if not path.exists():
+            continue
+
+        species_info = gpd.read_file(path)
+        data = species_info.loc[0].copy()
+
+        if not math.isnan(row["Occasional lower elevation"]):
+            data.elevation_lower = float(row["Occasional lower elevation"])
+        else:
+            data.elevation_lower = float(data.elevation_lower)
+        if not math.isnan(row["Occasional upper elevation"]):
+            data.elevation_upper = float(row["Occasional upper elevation"])
+        else:
+            data.elevation_upper = float(data.elevation_upper)
+        data = aoh.tidy_data(data)
+
+        res = gpd.GeoDataFrame(data.to_frame().transpose(), crs=species_info.crs, geometry="geometry")
+        res.to_file(path, driver="GeoJSON")
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Process agregate species data to per-species-file.")
+    parser.add_argument(
+        '--geojsons',
+        type=Path,
+        help='Directory where per species Geojson is stored',
+        required=True,
+        dest='geojson_directory_path',
+    )
+    parser.add_argument(
+        '--overrides',
+        type=Path,
+        help="CSV of overrides",
+        required=True,
+        dest="overrides",
+    )
+    args = parser.parse_args()
+
+    apply_birdlife_data(
+        args.geojson_directory_path,
+        args.overrides
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/prepare_species/extract_species_data_psql.py b/prepare_species/extract_species_data_psql.py
index 0ceda78..43b65c7 100644
--- a/prepare_species/extract_species_data_psql.py
+++ b/prepare_species/extract_species_data_psql.py
@@ -1,14 +1,14 @@
 import argparse
-import importlib
 import json
 import logging
 import math
 import os
 from functools import partial
 from multiprocessing import Pool
-from typing import Any, List, Optional, Set, Tuple
+from pathlib import Path
+from typing import Any, Optional
 
-# import pyshark # pylint: disable=W0611
+import aoh
 import geopandas as gpd
 import pandas as pd
 import pyproj
@@ -16,14 +16,13 @@
 import shapely
 from postgis.psycopg import register
 
-aoh_cleaning = importlib.import_module("aoh-calculator.cleaning")
 
 logger = logging.getLogger(__name__)
 logging.basicConfig()
 logger.setLevel(logging.DEBUG)
 
 # To match the FABDEM elevation map we use
-# different range min/max/seperation
+# different range min/max/separation
 ELEVATION_MAX = 8580
 ELEVATION_MIN = -427
 ELEVATION_SPREAD = 12
@@ -31,6 +30,7 @@
 COLUMNS = [
     "id_no",
     "assessment_id",
+    "assessment_year",
     "season",
     "systems",
     "elevation_lower",
@@ -61,6 +61,7 @@
 SELECT
     assessments.sis_taxon_id as id_no,
     assessments.id as assessment_id,
+    DATE_PART('year', assessments.assessment_date) as assessment_year,
     assessments.possibly_extinct,
     assessments.possibly_extinct_in_the_wild,
     (assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower,
@@ -180,36 +181,36 @@ def __getattr__(self, name: str) -> Any:
             return self.info[name]
         return None
 
-    def as_row(self) -> List:
+    def as_row(self) -> list:
         return [self.info[k] for k in self.REPORT_COLUMNS]
 
 def tidy_reproject_save(
     gdf: gpd.GeoDataFrame,
     report: SpeciesReport,
-    output_directory_path: str,
+    output_directory_path: Path,
     target_projection: Optional[str],
 ) -> None:
     src_crs = pyproj.CRS.from_epsg(4326)
     target_crs = pyproj.CRS.from_string(target_projection) if target_projection else src_crs
 
     graw = gdf.loc[0].copy()
-    grow = aoh_cleaning.tidy_data(
+    grow = aoh.tidy_data(
         graw,
         elevation_max=ELEVATION_MAX,
         elevation_min=ELEVATION_MIN,
         elevation_seperation=ELEVATION_SPREAD,
     )
     os.makedirs(output_directory_path, exist_ok=True)
-    output_path = os.path.join(output_directory_path, f"{grow.id_no}.geojson")
+    output_path = output_directory_path / f"{grow.id_no}.geojson"
     res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry")
     res_projected = res.to_crs(target_crs)
     res_projected.to_file(output_path, driver="GeoJSON")
     report.filename = output_path
 
 def process_systems(
-    systems_data: List[Tuple],
+    systems_data: list[tuple],
     report: SpeciesReport,
-) -> None:
+) -> list:
     if len(systems_data) == 0:
         raise ValueError("No systems found")
     if len(systems_data) > 1:
@@ -249,9 +250,9 @@ def process_systems(
 ]
 
 def process_threats(
-    threat_data: List,
+    threat_data: list[tuple[int, str, str]],
     report: SpeciesReport,
-) -> bool:
+) -> list[tuple[int, int]]:
     cleaned_threats = []
     for code, scope, severity in threat_data:
         if scope is None or scope.lower() == "unknown":
@@ -267,9 +268,9 @@ def process_threats(
     return cleaned_threats
 
 def process_habitats(
-    habitats_data: List[List[str]],
+    habitats_data: list[list[str]],
     report: SpeciesReport,
-) -> Set:
+) -> set:
     if len(habitats_data) == 0:
         # Promote to "Unknown"
         habitats_data = [["18"]]
@@ -295,7 +296,7 @@ def process_habitats(
     return habitats
 
 def process_geometries(
-    geometries_data: List[Tuple[int,shapely.Geometry]],
+    geometries_data: list[tuple[int, shapely.Geometry]],
     report: SpeciesReport,
 ) -> shapely.Geometry:
     if len(geometries_data) == 0:
@@ -326,17 +327,17 @@ def process_geometries(
 
 def process_row(
     class_name: str,
-    output_directory_path: str,
+    output_directory_path: Path,
     target_projection: Optional[str],
-    presence: Tuple[int],
-    row: Tuple,
-) -> Tuple:
+    presence: tuple[int, ...],
+    row: tuple,
+) -> SpeciesReport:
 
     connection = psycopg2.connect(DB_CONFIG)
     register(connection)
     cursor = connection.cursor()
 
-    id_no, assessment_id, possibly_extinct, possibly_extinct_in_the_wild, \
+    id_no, assessment_id, assessment_year, possibly_extinct, possibly_extinct_in_the_wild, \
         elevation_lower, elevation_upper, scientific_name, family_name, category = row
 
     report = SpeciesReport(id_no, assessment_id, scientific_name)
@@ -378,6 +379,7 @@ def process_row(
         [[
             id_no,
             assessment_id,
+            int(assessment_year),
             "all",
             systems,
             int(elevation_lower) if elevation_lower is not None else None,
@@ -398,7 +400,7 @@ def process_row(
     return report
 
 def apply_overrides(
-    overrides_path: str,
+    overrides_path: Path,
     results,
 ):
     overrides = pd.read_csv(overrides_path, encoding="latin1")
@@ -425,16 +427,16 @@ def apply_overrides(
 
 def extract_data_per_species(
     class_name: str,
-    overrides_path: Optional[str],
-    excludes_path: Optional[str],
-    output_directory_path: str,
+    overrides_path: Optional[Path],
+    excludes_path: Optional[Path],
+    output_directory_path: Path,
     target_projection: Optional[str],
 ) -> None:
 
     connection = psycopg2.connect(DB_CONFIG)
     cursor = connection.cursor()
 
-    excludes = tuple([])
+    excludes: tuple = tuple([])
     if excludes_path is not None:
         try:
             df = pd.read_csv(excludes_path)
@@ -446,7 +448,7 @@ def extract_data_per_species(
     # For STAR-R we need historic data, but for STAR-T we just need current.
     # for era, presence in [("current", (1, 2)), ("historic", (1, 2, 4, 5))]:
     for era, presence in [("current", (1, 2))]:
-        era_output_directory_path = os.path.join(output_directory_path, era)
+        era_output_directory_path = output_directory_path / era
 
         # You can't do NOT IN on an empty list in SQL
         if excludes:
@@ -471,17 +473,13 @@ def extract_data_per_species(
                 partial(process_row, class_name, era_output_directory_path, target_projection, presence),
                 results
             )
-        # reports = [
-        #     process_row(class_name,  era_output_directory_path, target_projection, presence, x)
-        #     for x in results[:10]
-        # ]
 
         reports_df = pd.DataFrame(
             [x.as_row() for x in reports],
             columns=SpeciesReport.REPORT_COLUMNS
         ).sort_values('id_no')
         os.makedirs(era_output_directory_path, exist_ok=True)
-        reports_df.to_csv(os.path.join(era_output_directory_path, "report.csv"), index=False)
+        reports_df.to_csv(era_output_directory_path / "report.csv", index=False)
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Process agregate species data to per-species-file.")
@@ -494,21 +492,21 @@ def main() -> None:
     )
     parser.add_argument(
         '--overrides',
-        type=str,
+        type=Path,
         help="CSV of overrides",
         required=False,
         dest="overrides",
     )
     parser.add_argument(
         '--excludes',
-        type=str,
+        type=Path,
         help="CSV of taxon IDs to not include",
         required=False,
         dest="excludes"
     )
     parser.add_argument(
         '--output',
-        type=str,
+        type=Path,
         help='Directory where per species GeoJSON is stored',
         required=True,
         dest='output_directory_path',
diff --git a/requirements.txt b/requirements.txt
index 7c9a8ef..720c71a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-alive-progress
 numpy
 geopandas
 postgis
@@ -7,15 +6,12 @@ psutil
 pymer4
 pyproj
 scikit-image
-requests
-zenodo_search
+yirgacheffe>=1.9
+aoh[validation]>=1.0
 
+# GDAL should be installed manually to match the version of the library installed on your machine
 gdal[numpy]
 
-git+https://github.com/quantifyearth/iucn_modlib
-git+https://github.com/quantifyearth/pyshark
-git+https://github.com/quantifyearth/yirgacheffe@4a2cab77f4a64e3f09497ee7098dc9ba499cda90
-
 pylint
 mypy
 pytest
diff --git a/scripts/run.sh b/scripts/run.sh
index 2ce4863..d88e05b 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -7,7 +7,34 @@
 # https://github.com/quantifyearth/reclaimer - used to download inputs from Zenodo directly
 # https://github.com/quantifyearth/littlejohn - used to run batch jobs in parallel
 
+# Set shell script to exit on first error (-e) and to output commands being run to make
+# reviewing logs easier (-x)
 set -e
+set -x
+
+# We know we use two Go tools, so add go/bin to our path as in slurm world they're likely
+# to be installed locally
+export PATH="${PATH}":"${HOME}"/go/bin
+if ! hash littlejohn 2>/dev/null; then
+    echo "Please ensure littlejohn is available"
+    exit 1
+fi
+if ! hash reclaimer 2>/dev/null; then
+    echo "Please ensure reclaimer is available"
+    exit 1
+fi
+
+# Detect if we're running under SLURM
+if [[ -n "${SLURM_JOB_ID}" ]]; then
+    # Slurm users will probably need to customise this
+    # shellcheck disable=SC1091
+    source "${HOME}"/venvs/star/bin/activate
+    cd "${HOME}"/dev/star
+    PROCESS_COUNT="${SLURM_JOB_CPUS_PER_NODE}"
+else
+    PROCESS_COUNT=$(getconf _NPROCESSORS_ONLN)
+fi
+echo "Using ${PROCESS_COUNT} threads."
 
 if [ -z "${DATADIR}" ]; then
     echo "Please specify $DATADIR"
@@ -19,96 +46,100 @@ if [ -z "${VIRTUAL_ENV}" ]; then
     exit 1
 fi
 
-export CPUS=`getconf _NPROCESSORS_ONLN`
-export THREADS=$(($CPUS / 2))
-echo "Using $THREADS threads."
-
 declare -a TAXALIST=("AMPHIBIA" "AVES" "MAMMALIA" "REPTILIA")
 
+if [ ! -d "${DATADIR}" ]; then
+    mkdir "${DATADIR}"
+fi
+
 # Get habitat layer and prepare for use
-if [ ! -d ${DATADIR}/habitat_layers ]; then
-    if [ ! -f ${DATADIR}/habitat/raw.tif ]; then
+if [ ! -d "${DATADIR}"/habitat_layers ]; then
+    if [ ! -f "${DATADIR}"/habitat/raw.tif ]; then
         echo "Fetching habitat map..."
-        reclaimer zenodo --zenodo_id 3939050 --filename PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif --output ${DATADIR}/habitat/raw.tif
+        reclaimer zenodo --zenodo_id 3939050 \
+                         --filename PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif \
+                         --output "${DATADIR}"/habitat/raw.tif
     fi
 
     echo "Processing habitat map..."
-    python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/raw.tif \
-                                                --scale 1000.0 \
-                                                --projection "ESRI:54009" \
-                                                --output ${DATADIR}/tmp_habitat_layers/current
-    mv ${DATADIR}/tmp_habitat_layers ${DATADIR}/habitat_layers
+    aoh-habitat-process --habitat "${DATADIR}"/habitat/raw.tif \
+                        --scale 1000.0 \
+                        --projection "ESRI:54009" \
+                        --output "${DATADIR}"/tmp_habitat_layers/current
+    mv "${DATADIR}"/tmp_habitat_layers "${DATADIR}"/habitat_layers
 fi
 
-if [ ! -d ${DATADIR}/masks ]; then
+if [ ! -d "${DATADIR}"/masks ]; then
     echo "Processing masks..."
-    python3 ./prepare_layers/make_masks.py --habitat_layers ${DATADIR}/habitat_layers/current \
-                                        --output_directory ${DATADIR}/masks
+    python3 ./prepare_layers/make_masks.py --habitat_layers "${DATADIR}"/habitat_layers/current \
+                                        --output_directory "${DATADIR}"/masks
 fi
 
 # Fetch and prepare the elevation layers
-if [[ ! -f ${DATADIR}/elevation/elevation-max-1k.tif || ! -f ${DATADIR}/elevation/elevation-min-1k.tif ]]; then
-    if [ ! -f ${DATADIR}/elevation/elevation.tif ]; then
+if [[ ! -f "${DATADIR}"/elevation/elevation-max-1k.tif || ! -f "${DATADIR}"/elevation/elevation-min-1k.tif ]]; then
+    if [ ! -f "${DATADIR}"/elevation/elevation.tif ]; then
         echo "Fetching elevation map..."
-        reclaimer zenodo --zenodo_id 5719984  --filename dem-100m-esri54017.tif --output ${DATADIR}/elevation/elevation.tif
+        reclaimer zenodo --zenodo_id 5719984  --filename dem-100m-esri54017.tif --output "${DATADIR}"/elevation/elevation.tif
     fi
-    if [ ! -f ${DATADIR}/elevation/elevation-max-1k.tif ]; then
+    if [ ! -f "${DATADIR}"/elevation/elevation-max-1k.tif ]; then
         echo "Generating elevation max layer..."
-        gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-max-1k.tif
+        gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation/elevation.tif "${DATADIR}"/elevation/elevation-max-1k.tif
     fi
-    if [ ! -f ${DATADIR}/elevation/elevation-min-1k.tif ]; then
+    if [ ! -f "${DATADIR}"/elevation/elevation-min-1k.tif ]; then
         echo "Generating elevation min layer..."
-        gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation/elevation.tif ${DATADIR}/elevation/elevation-min-1k.tif
+        gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation/elevation.tif "${DATADIR}"/elevation/elevation-min-1k.tif
     fi
 fi
 
 # Generate the crosswalk table
-if [ ! -f ${DATADIR}/crosswalk.csv ]; then
+if [ ! -f "${DATADIR}"/crosswalk.csv ]; then
     echo "Generating crosswalk table..."
-    python3 ./prepare_layers/convert_crosswalk.py --original ${PWD}/data/crosswalk_bin_T.csv --output ${DATADIR}/crosswalk.csv
+    python3 ./prepare_layers/convert_crosswalk.py --original ./data/crosswalk_bin_T.csv --output "${DATADIR}"/crosswalk.csv
 fi
 
 # Get species data per taxa from IUCN data
 for TAXA in "${TAXALIST[@]}"
 do
-    echo "Extracting species data for ${TAXA}..."
-    python3 ./prepare_species/extract_species_data_psql.py --class ${TAXA} --output ${DATADIR}/species-info/${TAXA}/ --projection "ESRI:54009" --excludes ${DATADIR}/SpeciesList_generalisedRangePolygons.csv
+    if [ ! -d "${DATADIR}"/species-info/"${TAXA}"/ ]; then
+        echo "Extracting species data for ${TAXA}..."
+        python3 ./prepare_species/extract_species_data_psql.py --class "${TAXA}" --output "${DATADIR}"/species-info/"${TAXA}"/ --projection "ESRI:54009" --excludes "${DATADIR}"/SpeciesList_generalisedRangePolygons.csv
+    fi
 done
 
-if [ -f data/BL_Species_Elevations_2023.csv ]; then
+if [ -f "${DATADIR}"/BL_Species_Elevations_2023.csv ]; then
     echo "Applying birdlife data..."
-    python3 ./prepare_species/apply_birdlife_data.py --geojsons ${DATADIR}/species-info/AVES --overrides data/BL_Species_Elevations_2023.csv
+    python3 ./prepare_species/apply_birdlife_data.py --geojsons "${DATADIR}"/species-info/AVES --overrides "${DATADIR}"/BL_Species_Elevations_2023.csv
 fi
 
 echo "Generating AoH task list..."
-python3 ./utils/aoh_generator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/aohbatch.csv
+python3 ./utils/aoh_generator.py --input "${DATADIR}"/species-info --datadir "${DATADIR}" --output "${DATADIR}"/aohbatch.csv
 
 echo "Generating AoHs..."
-littlejohn -j ${THREADS} -o ${DATADIR}/aohbatch.log -c ${DATADIR}/aohbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./aoh-calculator/aohcalc.py
+littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/aohbatch.log -c "${DATADIR}"/aohbatch.csv "${VIRTUAL_ENV}"/bin/aoh-calc
 
 # Calculate predictors from AoHs
 echo "Generating species richness..."
-python3 ./aoh-calculator/summaries/species_richness.py --aohs_folder ${DATADIR}/aohs/current/ \
-                                                       --output ${DATADIR}/summaries/species_richness.tif
+aoh-species-richness --aohs_folder "${DATADIR}"/aohs/current/ \
+                     --output "${DATADIR}"/summaries/species_richness.tif
 echo "Generating endemism..."
-python3 ./aoh-calculator/summaries/endemism.py --aohs_folder ${DATADIR}/aohs/current/ \
-                                               --species_richness ${DATADIR}/summaries/species_richness.tif \
-                                               --output ${DATADIR}/summaries/endemism.tif
+aoh-endemism --aohs_folder "${DATADIR}"/aohs/current/ \
+             --species_richness "${DATADIR}"/summaries/species_richness.tif \
+             --output "${DATADIR}"/summaries/endemism.tif
 
 # Aoh Validation
 echo "Collating validation data..."
-python3 ./aoh-calculator/validation/collate_data.py --aoh_results ${DATADIR}/aohs/current/ \
-                                                    --output ${DATADIR}/validation/aohs.csv
+aoh-collate-data --aoh_results "${DATADIR}"/aohs/current/ \
+                 --output "${DATADIR}"/validation/aohs.csv
 echo "Calculating model validation..."
-python3 ./aoh-calculator/validation/validate_map_prevalence.py --collated_aoh_data ${DATADIR}/validation/aohs.csv \
-                                                               --output ${DATADIR}/validation/model_validation.csv
+aoh-validate-prevalence --collated_aoh_data "${DATADIR}"/validation/aohs.csv \
+                        --output "${DATADIR}"/validation/model_validation.csv
 
 # Threats
 echo "Generating threat task list..."
-python3 ./utils/threats_generator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/threatbatch.csv
+python3 ./utils/threats_generator.py --input "${DATADIR}"/species-info --datadir "${DATADIR}" --output "${DATADIR}"/threatbatch.csv
 
 echo "Generating threat rasters..."
-littlejohn -j ${THREADS} -o ${DATADIR}/threatbatch.log -c ${DATADIR}/threatbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./threats/threat_processing.py
+littlejohn -j "${PROCESS_COUNT}" -o "${DATADIR}"/threatbatch.log -c "${DATADIR}"/threatbatch.csv "${VIRTUAL_ENV}"/bin/python3 -- ./threats/threat_processing.py
 
 echo "Summarising threats..."
-python3 ./threats/threat_summation.py --threat_rasters ${DATADIR}/threat_rasters --output ${DATADIR}/threat_results
+python3 ./threats/threat_summation.py --threat_rasters "${DATADIR}"/threat_rasters --output "${DATADIR}"/threat_results
diff --git a/threats/threat_processing.py b/threats/threat_processing.py
index 2a0adca..0e04c73 100644
--- a/threats/threat_processing.py
+++ b/threats/threat_processing.py
@@ -2,22 +2,23 @@
 import json
 import os
 import sys
+from pathlib import Path
 
 import geopandas as gpd
+import yirgacheffe as yg
 from pyogrio.errors import DataSourceError
-from yirgacheffe.layers import RasterLayer
 
 def threat_processing_per_species(
-    species_data_path: str,
-    aoh_path: str,
-    output_directory_path: str,
+    species_data_path: Path,
+    aoh_path: Path,
+    output_directory_path: Path,
 ) -> None:
     try:
         data = gpd.read_file(species_data_path)
     except DataSourceError:
         sys.exit(f"Failed to read {species_data_path}")
 
-    with RasterLayer.layer_from_file(aoh_path) as aoh:
+    with yg.read_raster(aoh_path) as aoh:
 
         os.makedirs(output_directory_path, exist_ok=True)
 
@@ -26,8 +27,7 @@ def threat_processing_per_species(
         threat_data = json.loads(data.threats[0])
 
         try:
-            aoh_base, _ = os.path.splitext(aoh_path)
-            aoh_data_path = aoh_base + ".json"
+            aoh_data_path = aoh_path.with_suffix(".json")
             with open(aoh_data_path, "r", encoding="UTF-8") as f:
                 aoh_data = json.load(f)
             aoh_total = aoh_data["aoh_total"]
@@ -46,11 +46,10 @@ def threat_processing_per_species(
             per_threat_per_species_score = weighted_species * proportional_threat_weight
             print(per_threat_per_species_score.sum())
 
-            threat_dir_path = os.path.join(output_directory_path, str(threat_id))
+            threat_dir_path = output_directory_path / str(threat_id)
             os.makedirs(threat_dir_path, exist_ok=True)
-            output_path = os.path.join(threat_dir_path, f"{taxon_id}.tif")
-            with RasterLayer.empty_raster_layer_like(aoh, filename=output_path) as result:
-                per_threat_per_species_score.save(result)
+            output_path = threat_dir_path / f"{taxon_id}.tif"
+            per_threat_per_species_score.to_geotiff(output_path)
 
 def main() -> None:
     os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0"
@@ -58,21 +57,21 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Calculate per species threat layers")
     parser.add_argument(
         '--speciesdata',
-        type=str,
+        type=Path,
         help="Single species/seasonality geojson.",
         required=True,
         dest="species_data_path"
     )
     parser.add_argument(
         '--aoh',
-        type=str,
+        type=Path,
         help="AoH raster  of speices.",
         required=True,
         dest="aoh_path"
     )
     parser.add_argument(
         '--output',
-        type=str,
+        type=Path,
         help='Directory where per species/threat layers are stored',
         required=True,
         dest='output_directory_path',
diff --git a/threats/threat_summation.py b/threats/threat_summation.py
index 54bf6a5..5b93ede 100644
--- a/threats/threat_summation.py
+++ b/threats/threat_summation.py
@@ -5,20 +5,17 @@
 import time
 from multiprocessing import Manager, Process, Queue, cpu_count
 from pathlib import Path
-from typing import List
 
-from yirgacheffe.layers import RasterLayer  # type: ignore
+import yirgacheffe as yg
+from yirgacheffe.layers import RasterLayer
 from osgeo import gdal
 
 gdal.SetCacheMax(1024 * 1024 * 32)
 
 def worker(
-    filename: str,
-    result_dir: str,
+    output_tif: Path,
     input_queue: Queue,
 ) -> None:
-    output_tif = os.path.join(result_dir, filename)
-
     merged_result = None
 
     while True:
@@ -26,7 +23,7 @@ def worker(
         if path is None:
             break
 
-        with RasterLayer.layer_from_file(path) as partial_raster:
+        with yg.read_raster(path) as partial_raster:
             if merged_result is None:
                 merged_result = RasterLayer.empty_raster_layer_like(partial_raster)
                 cleaned_raster = partial_raster.nan_to_num()
@@ -38,24 +35,22 @@ def worker(
                 merged_result = temp
 
     if merged_result:
-        final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
-        merged_result.save(final)
+        merged_result.to_geotiff(output_tif)
 
 def raster_sum(
-    images_list: List[Path],
-    output_filename: str,
+    images_list: list[Path],
+    output_filename: Path,
     processes_count: int
 ) -> None:
-    result_dir, filename = os.path.split(output_filename)
-    os.makedirs(result_dir, exist_ok=True)
+    os.makedirs(output_filename.parent, exist_ok=True)
 
-    with tempfile.TemporaryDirectory() as tempdir:
+    with tempfile.TemporaryDirectory() as tempdir_str:
+        tempdir = Path(tempdir_str)
         with Manager() as manager:
             source_queue = manager.Queue()
 
             workers = [Process(target=worker, args=(
-                f"{index}.tif",
-                tempdir,
+                tempdir / f"{index}.tif",
                 source_queue
             )) for index in range(processes_count)]
             for worker_process in workers:
@@ -80,8 +75,7 @@ def raster_sum(
 
             # here we should have now a set of images in tempdir to merge
             single_worker = Process(target=worker, args=(
-                filename,
-                result_dir,
+                output_filename,
                 source_queue
             ))
             single_worker.start()
@@ -103,17 +97,17 @@ def raster_sum(
                 time.sleep(1)
 
 def reduce_to_next_level(
-    rasters_directory: str,
-    output_directory: str,
+    rasters_directory: Path,
+    output_directory: Path,
     processes_count: int,
 ) -> None:
 
-    files = list(Path(rasters_directory).glob("**/*.tif"))
+    files = list(rasters_directory.glob("**/*.tif"))
     print(f"total items: {len(files)}")
     if not files:
         sys.exit(f"No files in {rasters_directory}, aborting")
 
-    buckets = {}
+    buckets: dict[str,list[Path]] = {}
     for filename in files:
         code, _ = os.path.splitext(filename.name)
         next_level_threat_id = ".".join(code.split('.')[:-1])
@@ -126,22 +120,22 @@ def reduce_to_next_level(
 
     print(f"Found {len(buckets)} threats at current level:")
     for code, files in buckets.items():
-        target_output = os.path.join(output_directory, f"{code}.tif")
+        target_output = output_directory / f"{code}.tif"
         print(f"processing {code}: {len(files)} items")
         raster_sum(files, target_output, processes_count)
 
 def reduce_from_species(
-    rasters_directory: str,
-    output_directory: str,
+    rasters_directory: Path,
+    output_directory: Path,
     processes_count: int,
 ) -> None:
 
-    files = list(Path(rasters_directory).glob("**/*.tif"))
+    files = list(rasters_directory.glob("**/*.tif"))
     print(f"total items: {len(files)}")
     if not files:
         sys.exit(f"No files in {rasters_directory}, aborting")
 
-    buckets = {}
+    buckets: dict[str,list[Path]] = {}
     for filename in files:
         threat_code = filename.parts[-2]
         levels = threat_code.split('.')
@@ -159,31 +153,30 @@ def reduce_from_species(
 
     print(f"Found {len(buckets)} threats at current level:")
     for code, files in buckets.items():
-        target_output = os.path.join(output_directory, f"{code}.tif")
+        target_output = output_directory / f"{code}.tif"
         print(f"processing {code}: {len(files)} items")
         raster_sum(files, target_output, processes_count)
 
-
 def threat_summation(
-    rasters_directory: str,
-    output_directory: str,
+    rasters_directory: Path,
+    output_directory: Path,
     processes_count: int,
 ) -> None:
     os.makedirs(output_directory, exist_ok=True)
 
     # All these files are at level3 to start with, so first make level2
     print("processing level 2")
-    level2_target = os.path.join(output_directory, "level2")
+    level2_target = output_directory / "level2"
     reduce_from_species(rasters_directory, level2_target, processes_count)
 
     # Now reduce level2 to level1
     print("processing level 1")
-    level1_target = os.path.join(output_directory, "level1")
+    level1_target = output_directory / "level1"
     reduce_to_next_level(level2_target, level1_target, processes_count)
 
     # Now build a final top level STAR
     print("processing level 0")
-    final_target = os.path.join(output_directory, "level0")
+    final_target = output_directory / "level0"
     reduce_to_next_level(level1_target, final_target, processes_count)
 
 
@@ -191,14 +184,14 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Generates the combined, and level 1 and level 2 threat rasters.")
     parser.add_argument(
         "--threat_rasters",
-        type=str,
+        type=Path,
         required=True,
         dest="rasters_directory",
         help="GeoTIFF file containing level three per species threats"
     )
     parser.add_argument(
         "--output",
-        type=str,
+        type=Path,
         required=True,
         dest="output_directory",
         help="Destination directory file for results."
diff --git a/utils/aoh_generator.py b/utils/aoh_generator.py
index 1392f36..2408cf0 100644
--- a/utils/aoh_generator.py
+++ b/utils/aoh_generator.py
@@ -7,12 +7,11 @@
 import pandas as pd
 
 def aoh_generator(
-    input_dir: str,
-    data_dir: str,
-    output_csv_path: str
+    input_dir: Path,
+    data_dir: Path,
+    output_csv_path: Path,
 ):
-    taxa_dirs = Path(input_dir).glob("[!.]*")
-    data_dir = Path(data_dir)
+    taxa_dirs = input_dir.glob("[!.]*")
 
     res = []
     for taxa_dir_path in taxa_dirs:
@@ -49,21 +48,21 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Species and seasonality generator.")
     parser.add_argument(
         '--input',
-        type=str,
+        type=Path,
         help="directory with taxa folders of species info",
         required=True,
         dest="input_dir"
     )
     parser.add_argument(
         '--datadir',
-        type=str,
+        type=Path,
         help="directory for results",
         required=True,
         dest="data_dir",
     )
     parser.add_argument(
         '--output',
-        type=str,
+        type=Path,
         help="name of output file for csv",
         required=True,
         dest="output"
diff --git a/utils/collect_validation_data.py b/utils/collect_validation_data.py
index 278d71d..49a3563 100644
--- a/utils/collect_validation_data.py
+++ b/utils/collect_validation_data.py
@@ -1,13 +1,14 @@
 import argparse
 import os
 import shutil
+from pathlib import Path
 
 import pandas as pd
 
 def collect_validation_data(
-    model_results_path: str,
-    data_dir: str,
-    output_dir: str,
+    model_results_path: Path,
+    data_dir: Path,
+    output_dir: Path,
 ) -> None:
     model_results = pd.read_csv(model_results_path)
     os.makedirs(output_dir, exist_ok=True)
@@ -29,21 +30,21 @@ def main() -> None:
     parser = argparse.ArgumentParser(description="Collected range/AOH for species that failed validation")
     parser.add_argument(
         '--model_results',
-        type=str,
+        type=Path,
         help="directory with taxa folders of species info",
         required=True,
         dest="model_results_path"
     )
     parser.add_argument(
         '--datadir',
-        type=str,
+        type=Path,
         help="directory for results",
         required=True,
         dest="data_dir",
     )
     parser.add_argument(
         '--output',
-        type=str,
+        type=Path,
         help="name of output directory",
         required=True,
         dest="output"
diff --git a/utils/threats_generator.py b/utils/threats_generator.py
index 7b2a281..6538e8c 100644
--- a/utils/threats_generator.py
+++ b/utils/threats_generator.py
@@ -7,12 +7,11 @@
 import pandas as pd
 
 def threats_generator(
-    input_dir: str,
-    data_dir: str,
-    output_csv_path: str
+    input_dir: Path,
+    data_dir: Path,
+    output_csv_path: Path,
 ):
-    taxa_dirs = Path(input_dir).glob("[!.]*")
-    data_dir = Path(data_dir)
+    taxa_dirs = input_dir.glob("[!.]*")
 
     res = []
     for taxa_dir_path in taxa_dirs:
@@ -40,24 +39,24 @@ def threats_generator(
     df.to_csv(output_csv_path, index=False)
 
 def main() -> None:
-    parser = argparse.ArgumentParser(description="threat tasts generator.")
+    parser = argparse.ArgumentParser(description="threat tasks generator.")
     parser.add_argument(
         '--input',
-        type=str,
+        type=Path,
         help="directory with taxa folders of species info",
         required=True,
         dest="input_dir"
     )
     parser.add_argument(
         '--datadir',
-        type=str,
+        type=Path,
         help="directory for results",
         required=True,
         dest="data_dir",
     )
     parser.add_argument(
         '--output',
-        type=str,
+        type=Path,
         help="name of output file for csv",
         required=True,
         dest="output"