diff --git a/prepare_species/common.py b/prepare_species/common.py index 718c27e..cd98390 100644 --- a/prepare_species/common.py +++ b/prepare_species/common.py @@ -24,7 +24,10 @@ "full_habitat_code", "scientific_name", "family_name", + "order_name", "class_name", + "phylum_name", + "kingdom_name", "threats", "category", "category_weight", @@ -225,7 +228,7 @@ def tidy_reproject_save( elevation_seperation=ELEVATION_SPREAD, ) os.makedirs(output_directory_path, exist_ok=True) - output_path = output_directory_path / f"{grow.id_no}.geojson" + output_path = output_directory_path / f"range_T{grow.id_no}A{grow.assessment_id}_{grow.season}.geojson" res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry") # Ensure proper dtypes for JSON serialization diff --git a/prepare_species/extract_species_data_psql.py b/prepare_species/extract_species_data_psql.py index 3dfa314..ac5694d 100644 --- a/prepare_species/extract_species_data_psql.py +++ b/prepare_species/extract_species_data_psql.py @@ -46,6 +46,9 @@ (assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper, taxons.scientific_name, taxons.family_name, + taxons.order_name, + taxons.phylum_name, + taxons.kingdom_name, red_list_category_lookup.code FROM assessments @@ -133,7 +136,8 @@ def process_row( cursor = connection.cursor() id_no, assessment_id, assessment_year, possibly_extinct, possibly_extinct_in_the_wild, \ - elevation_lower, elevation_upper, scientific_name, family_name, category = row + elevation_lower, elevation_upper, scientific_name, family_name, \ + order_name, phylum_name, kingom_name, category = row report = SpeciesReport(id_no, assessment_id, scientific_name) report.has_api_data = True @@ -212,7 +216,10 @@ def process_row( '|'.join(habitats_list), scientific_name, family_name, + order_name, class_name, + phylum_name, + kingom_name, json_ready_threats, category, category_weight, diff --git a/prepare_species/extract_species_data_redlist.py b/prepare_species/extract_species_data_redlist.py index 8ebdcf0..f38c98d 100644 --- a/prepare_species/extract_species_data_redlist.py +++ b/prepare_species/extract_species_data_redlist.py @@ -182,6 +182,9 @@ def process_species( assessment_year = assessment_dict['assessment_date'].year category = assessment_dict['red_list_category'] family_name = assessment_dict['family_name'] + order_name = assessment_dict['order_name'] + phylum_name = assessment_dict['phylum_name'] + kingdom_name = assessment_dict['kingdom_name'] possibly_extinct = assessment_dict['possibly_extinct'] possibly_extinct_in_the_wild = assessment_dict['possibly_extinct_in_the_wild'] infrarank = assessment_dict['infrarank'] @@ -261,7 +264,10 @@ def process_species( '|'.join(habitats_list), scientific_name, family_name, + order_name, class_name, + phylum_name, + kingdom_name, json_ready_threats, category, CATEGORY_WEIGHTS[category], diff --git a/requirements.txt b/requirements.txt index b96fe18..f25a72b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ scikit-image redlistapi boto3 yirgacheffe>=1.12,<2.0 -aoh[validation]>=2.0.3,<3.0 +aoh[validation]>=2.1.0,<3.0 # Snakemake workflow management snakemake>=8.0 diff --git a/threats/threat_processing.py b/threats/threat_processing.py index f2347c7..36fbc88 100644 --- a/threats/threat_processing.py +++ b/threats/threat_processing.py @@ -25,6 +25,7 @@ def threat_processing_per_species( os.makedirs(output_directory_path, exist_ok=True) taxon_id = data.id_no[0] + assessment_id = data.assessment_id[0] # Due to validation we generate AOHs for many more species than # is needed for STAR, but we need to ensure those don't slip into @@ -54,7 +55,7 @@ def threat_processing_per_species( threat_dir_path = output_directory_path / str(threat_id) os.makedirs(threat_dir_path, exist_ok=True) - output_path = threat_dir_path / f"{taxon_id}.tif" + output_path = threat_dir_path / f"threat_T{taxon_id}A{assessment_id}.tif" per_threat_per_species_score.to_geotiff(output_path) # This script generates a bunch of rasters, but snakemake needs one diff --git a/workflow/rules/aoh.smk b/workflow/rules/aoh.smk index 160a70a..3f6cbe1 100644 --- a/workflow/rules/aoh.smk +++ b/workflow/rules/aoh.smk @@ -53,7 +53,7 @@ def aoh_species_inputs(wildcards): / "species-info" / wildcards.taxa / SCENARIO - / f"{wildcards.species_id}.geojson", + / f"range_{wildcards.species_id}.geojson", # Base layers (precious - won't trigger rebuilds) "habitat_sentinel": ancient( DATADIR / "habitat_layers" / SCENARIO / ".habitat_complete" @@ -88,11 +88,11 @@ rule generate_aoh: unpack(aoh_species_inputs), output: # Only declare JSON as output - TIF is optional (not created for empty AOHs) - metadata=DATADIR / "aohs" / SCENARIO / "{taxa}" / "{species_id}_all.json", + metadata=DATADIR / "aohs" / SCENARIO / "{taxa}" / "aoh_{species_id}.json", params: habitat_dir=DATADIR / "habitat_layers" / SCENARIO, log: - DATADIR / "logs" / "aoh" / "{taxa}" / "{species_id}_all.log", + DATADIR / "logs" / "aoh" / "{taxa}" / "{species_id}.log", resources: # Limit concurrent AOH jobs if needed (e.g., for memory) aoh_slots=1, @@ -126,7 +126,7 @@ def get_species_ids_for_taxa(wildcards): taxa=wildcards.taxa ).output[0] geojson_dir = Path(checkpoint_output).parent - return [p.stem for p in geojson_dir.glob("*.geojson")] + return [p.stem[6:] for p in geojson_dir.glob("range_*.geojson")] def get_all_aoh_metadata_for_taxa(wildcards): @@ -135,7 +135,7 @@ def get_all_aoh_metadata_for_taxa(wildcards): """ species_ids = get_species_ids_for_taxa(wildcards) return [ - DATADIR / "aohs" / SCENARIO / wildcards.taxa / f"{sid}_all.json" + DATADIR / "aohs" / SCENARIO / wildcards.taxa / f"aoh_{sid}.json" for sid in species_ids ] diff --git a/workflow/rules/threats.smk b/workflow/rules/threats.smk index 608934f..bcc7764 100644 --- a/workflow/rules/threats.smk +++ b/workflow/rules/threats.smk @@ -39,9 +39,9 @@ def get_star_species_for_taxa(wildcards): star_species = [] for geojson_path in geojson_dir.glob("*.geojson"): - species_id = geojson_path.stem + species_id = geojson_path.stem[6:] aoh_path = ( - DATADIR / "aohs" / SCENARIO / wildcards.taxa / f"{species_id}_all.tif" + DATADIR / "aohs" / SCENARIO / wildcards.taxa / f"aoh_{species_id}.tif" ) # Check if species should be in STAR and has an AOH @@ -88,8 +88,8 @@ rule generate_threat_rasters: / "species-info" / "{taxa}" / SCENARIO - / "{species_id}.geojson", - aoh=DATADIR / "aohs" / SCENARIO / "{taxa}" / "{species_id}_all.tif", + / "range_{species_id}.geojson", + aoh=DATADIR / "aohs" / SCENARIO / "{taxa}" / "aoh_{species_id}.tif", output: # Sentinel file since actual outputs depend on species' threats sentinel=DATADIR / "threat_rasters" / "{taxa}" / ".{species_id}_complete", diff --git a/workflow/rules/validation.smk b/workflow/rules/validation.smk index b45eed1..932a5fe 100644 --- a/workflow/rules/validation.smk +++ b/workflow/rules/validation.smk @@ -96,7 +96,8 @@ rule validate_gbif_occurrences: gbif_data=lambda wildcards: DATADIR / "validation" / "occurrences" - / wildcards.taxa, + / wildcards.taxa + / "points.csv", species_data=lambda wildcards: DATADIR / "species-info" / wildcards.taxa @@ -106,7 +107,7 @@ rule validate_gbif_occurrences: DATADIR / "logs" / "validate_gbif_{taxa}.log", shell: """ - aoh-validate-occurrences \ + aoh-validate-occurences \ --gbif_data_path {params.gbif_data} \ --species_data {params.species_data} \ --aoh_results {params.aoh_results} \