Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .github/workflows/create_data_packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,5 @@ jobs:
for dir in oeps/data/package_rules/*/; do
name=$(basename "$dir")
echo "Creating data package: $name"
extra=""
if [ "$name" = "DSuite2023" ]; then
extra="--skip-foreign-keys"
fi
flask create-data-package -c "$name" --zip --upload --skip-validation --stable-name $extra --overwrite
flask create-data-package -c "$name" --zip --upload --stable-name --overwrite
done
51 changes: 49 additions & 2 deletions backend/oeps/clients/frictionless.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,54 @@ def create_from_rules(
gs = self.registry.geodata_sources.get(list(gs_lookup.keys())[0])
geodata_sources.append(gs)

df = gs.get_blank_dataframe()

# Tabular geography key table for Frictionless FK checks. Validating FKs against the
# shapefile resource fails (FileResource has no row_stream); see issue #311.
if not skip_foreign_keys:
keys_resource_name = f"{rules_file.stem}-geography-keys"
keys_filename = f"{rules_file.stem}-geography-keys.csv"
keys_path_rel = f"data/{keys_filename}"
geo_key_col = "ZCTA5" if rules_file.stem == "zcta" else "FIPS"
keys_schema_obj = {
"primaryKey": "HEROP_ID",
"fields": [
{
"title": "HEROP_ID",
"name": "HEROP_ID",
"type": "string",
"example": "050US01001",
"description": "A derived unique id corresponding to the relevant geographic unit.",
"metadata": "Geographic_Boundaries",
},
{
"title": "ZCTA5" if geo_key_col == "ZCTA5" else "FIPS",
"name": geo_key_col,
"type": "string",
"example": "22001",
"description": (
"Zip Code for this geographic unit."
if geo_key_col == "ZCTA5"
else "FIPS code for this geographic unit."
),
"metadata": "Geographic_Boundaries",
},
],
}
keys_resource = {
"name": keys_resource_name,
"title": f"Geography keys ({rules_file.stem})",
"format": "csv",
"mediatype": "text/csv",
"path": keys_path_rel,
"schema": f"schemas/{keys_resource_name}.json",
}
keys_outpath = Path(d_path, keys_filename)
df[["HEROP_ID", geo_key_col]].to_csv(keys_outpath, index=False)
write_json(keys_schema_obj, Path(self.path, keys_resource["schema"]))
self.schema["resources"].append(keys_resource)
self.clean_data_resource(keys_resource)

ts_resource = {
"name": rules_file.stem,
"title": rules_file.stem,
Expand All @@ -139,7 +187,7 @@ def create_from_rules(
resource_schema["foreignKeys"] = [{
"fields": "HEROP_ID",
"reference": {
"resource": gs.name,
"resource": f"{rules_file.stem}-geography-keys",
"fields": "HEROP_ID"
}
}]
Expand All @@ -162,7 +210,6 @@ def create_from_rules(
"metadata": "Geographic_Boundaries"
})

df = gs.get_blank_dataframe()
for row in self.rules_rows:
## skip HEROP_ID and FIPS, as they are already in the blank df
if row["name"] in ["HEROP_ID", "FIPS"]:
Expand Down
7 changes: 3 additions & 4 deletions backend/oeps/commands/create_data_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@
"--skip-foreign-keys",
is_flag=True,
default=False,
help="Don't define foreign keys in the output data package. This is needed to avoid validation errors that "
"occur when Shapefiles are used in foreign keys.",
help="Don't define foreign keys in the output data package and omit geography-keys CSV resources. "
"By default, FKs reference a tabular geography-keys file (not the shapefile) so validation can run.",
)
@click.option(
"--skip-validation",
Expand Down Expand Up @@ -106,8 +106,7 @@ def create_data_package(

The resulting package will be validated against the `frictionless` standard using that Python library.

`--skip-foreign-keys` to skip the creation of foreign keys--useful because foreign keys to shapefiles break
validation.
`--skip-foreign-keys` to omit foreign keys and geography-keys tables (packages without relational metadata).

`--skip-validation` to skip the final step of running validation on the output package.
"""
Expand Down