From 4e150d03a56634ded37ff3b34788dbf70acf4198 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sun, 6 Apr 2025 15:06:34 +0000 Subject: [PATCH 1/6] chore(release): 1.0.0 [skip ci] # 1.0.0 (2025-04-06) ### Bug Fixes * **572:** fix webpack warnings + fix linting issues ([#574](https://github.com/adamzev/clean-and-green-philly/issues/574)) ([fbfd459](https://github.com/adamzev/clean-and-green-philly/commit/fbfd459ba147e51a2fd54bee61d5ca63fb74eb98)) * Add focus styling to header links ([a99065c](https://github.com/adamzev/clean-and-green-philly/commit/a99065cba1607b981702e7aa7e654f24c108ad6e)) * Add href to dropdown items to increase link size ([2eb4667](https://github.com/adamzev/clean-and-green-philly/commit/2eb4667e7b68f45489c51ea10aa7d15afd2a3c77)) * closing tooltip popup on map closes property details ([432e8c0](https://github.com/adamzev/clean-and-green-philly/commit/432e8c0a14e1e9cfa199b030019c0730a24741c6)) * correct the scrolling on the map page ([a850237](https://github.com/adamzev/clean-and-green-philly/commit/a8502372e243057e2b06559cb042d94ea894415a)) * Remove nested link and button ([d93e8bd](https://github.com/adamzev/clean-and-green-philly/commit/d93e8bd52d80cd8d5b1a30910aaa4b168a11b57a)) * Update footer markup structure for accessibility ([d5c2a33](https://github.com/adamzev/clean-and-green-philly/commit/d5c2a334f0e5c297f117dca5dc2972b56d7b3542)) ### Features * **247:** make the mapbox legend more legible ([bcf6915](https://github.com/adamzev/clean-and-green-philly/commit/bcf691566fc26e834547a43efd863de58abdf3d4)) * **293:** sort features by priority of high, medium, low ([8cd1058](https://github.com/adamzev/clean-and-green-philly/commit/8cd105872feb0065aa3d207d42dc5df76ef63dbc)) * **305:** expand the header width of single property table ([b25261e](https://github.com/adamzev/clean-and-green-philly/commit/b25261e5d9e26bdbd9694aa03b4964771528dd3b)) * **395:** add white background to backbutton bar ([a65f052](https://github.com/adamzev/clean-and-green-philly/commit/a65f052ca2e5c0eac2737f85b4997af6e993f750)) * **395:** make property selection back button sticky ([6601201](https://github.com/adamzev/clean-and-green-philly/commit/6601201fcf943f8e64a1341327095e9b7edc6b08)) * **457:** add filter count to button ([b35145a](https://github.com/adamzev/clean-and-green-philly/commit/b35145a1df5f2995294f829e0b7cde20ba0e0f29)) * **535:** add close download view button ([#579](https://github.com/adamzev/clean-and-green-philly/issues/579)) ([9bd2c53](https://github.com/adamzev/clean-and-green-philly/commit/9bd2c53aa3b49da8136fa6e7e850c4dc4d5e2d74)) * **544:** adjust remove property section ([#580](https://github.com/adamzev/clean-and-green-philly/issues/580)) ([900af8e](https://github.com/adamzev/clean-and-green-philly/commit/900af8e6fbfc89bd4ed83b17c9eed0a1d5a3de3b)) * **568:** restyle the property table ([#577](https://github.com/adamzev/clean-and-green-philly/issues/577)) ([2c8cc92](https://github.com/adamzev/clean-and-green-philly/commit/2c8cc92b7f28aecdbe65412476691baa50254292)) * Add arrow icon to link ([6bce3fa](https://github.com/adamzev/clean-and-green-philly/commit/6bce3fa84ad9827f2117a2d51ed2f2665e83481b)) * Add content to sidebar ([23d5c52](https://github.com/adamzev/clean-and-green-philly/commit/23d5c52c3fcd1eb1eff902e7d1fd37ab2fe41b6d)) * Add council district ([e576218](https://github.com/adamzev/clean-and-green-philly/commit/e5762185840f0bbc2289fede8481f5b39a16087d)) * Add extra data to single detail page ([48e34fe](https://github.com/adamzev/clean-and-green-philly/commit/48e34fe0a6317fc8fc0f1f5e745dd165000d437f)) * Add font ([c1a8b93](https://github.com/adamzev/clean-and-green-philly/commit/c1a8b93561dfc67c8eed31b3c8befa2b8ae984ec)) * Add font to map ([1491518](https://github.com/adamzev/clean-and-green-philly/commit/14915180857a482a25d96f9f45f7c838082e2d71)) * Add font to search field ([3b08069](https://github.com/adamzev/clean-and-green-philly/commit/3b080690ba92007545467bc2ce4bf3bcad26c899)) * Add info cards ([8170944](https://github.com/adamzev/clean-and-green-philly/commit/8170944fdb8d8be36af062012967323047bda748)) * Add missing data and atlas link; style table ([8c260d4](https://github.com/adamzev/clean-and-green-philly/commit/8c260d48e015ed6c8c29cc10b8936753cc013ff0)) * Add percentage, update column widths ([41caf77](https://github.com/adamzev/clean-and-green-philly/commit/41caf77c55eb0a6336abc80047ed83584d250a64)) * Add priority color square ([ad5a9fa](https://github.com/adamzev/clean-and-green-philly/commit/ad5a9faff3b9596256e680b3d6d95c54caefc7a7)) * add redirect from map to find-properties ([439ed6c](https://github.com/adamzev/clean-and-green-philly/commit/439ed6c7291251aed9751431cd5bdc2fa6883a15)) * Add row scope ([f945979](https://github.com/adamzev/clean-and-green-philly/commit/f945979edaee2ad5a003a5b5560a89cc25717f3b)) * Add spacing below Back button ([7de48c5](https://github.com/adamzev/clean-and-green-philly/commit/7de48c50f177c0275cebac0a4a7d9dfce8d2cd2c)) * Address a few more links ([b3b6733](https://github.com/adamzev/clean-and-green-philly/commit/b3b673375f1472f15d80bffeff378c973d6cb49d)) * Adjust table per requirements ([36e6cfa](https://github.com/adamzev/clean-and-green-philly/commit/36e6cfa5934defadcf1a8b8ad1c09ffe609d0ec5)) * Change Find Properties and Learn More to links ([1de0b47](https://github.com/adamzev/clean-and-green-philly/commit/1de0b474811ab2d4281d94c6a9d5882a8ebb0c13)) * Close sidebar if clicking on empty map ([612117d](https://github.com/adamzev/clean-and-green-philly/commit/612117d6c54bc62baaae4f773013e196232c2dba)) * Navigate to map on click; show popups ([183b6a2](https://github.com/adamzev/clean-and-green-philly/commit/183b6a224d651726a6e465d7c9549ef437626f9e)) * Open sidebar from map ([5474367](https://github.com/adamzev/clean-and-green-philly/commit/54743673e99ffd6b939009ad54e4d0a4252ba044)) * Pass selected property to map view ([663f142](https://github.com/adamzev/clean-and-green-philly/commit/663f14236ccb9246efa8882c0dfa44e8cbcba0f7)) * underline links ([5f23d09](https://github.com/adamzev/clean-and-green-philly/commit/5f23d09da9a7258b284ccfef54b1be0ee2a8e1f8)) * Update aria label and role ([765e5a4](https://github.com/adamzev/clean-and-green-philly/commit/765e5a4fef1941e93815ae4789093e047183101c)) --- CHANGELOG.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccb9f33e..88c33b83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,52 @@ +# 1.0.0 (2025-04-06) + + +### Bug Fixes + +* **572:** fix webpack warnings + fix linting issues ([#574](https://github.com/adamzev/clean-and-green-philly/issues/574)) ([fbfd459](https://github.com/adamzev/clean-and-green-philly/commit/fbfd459ba147e51a2fd54bee61d5ca63fb74eb98)) +* Add focus styling to header links ([a99065c](https://github.com/adamzev/clean-and-green-philly/commit/a99065cba1607b981702e7aa7e654f24c108ad6e)) +* Add href to dropdown items to increase link size ([2eb4667](https://github.com/adamzev/clean-and-green-philly/commit/2eb4667e7b68f45489c51ea10aa7d15afd2a3c77)) +* closing tooltip popup on map closes property details ([432e8c0](https://github.com/adamzev/clean-and-green-philly/commit/432e8c0a14e1e9cfa199b030019c0730a24741c6)) +* correct the scrolling on the map page ([a850237](https://github.com/adamzev/clean-and-green-philly/commit/a8502372e243057e2b06559cb042d94ea894415a)) +* Remove nested link and button ([d93e8bd](https://github.com/adamzev/clean-and-green-philly/commit/d93e8bd52d80cd8d5b1a30910aaa4b168a11b57a)) +* Update footer markup structure for accessibility ([d5c2a33](https://github.com/adamzev/clean-and-green-philly/commit/d5c2a334f0e5c297f117dca5dc2972b56d7b3542)) + + +### Features + +* **247:** make the mapbox legend more legible ([bcf6915](https://github.com/adamzev/clean-and-green-philly/commit/bcf691566fc26e834547a43efd863de58abdf3d4)) +* **293:** sort features by priority of high, medium, low ([8cd1058](https://github.com/adamzev/clean-and-green-philly/commit/8cd105872feb0065aa3d207d42dc5df76ef63dbc)) +* **305:** expand the header width of single property table ([b25261e](https://github.com/adamzev/clean-and-green-philly/commit/b25261e5d9e26bdbd9694aa03b4964771528dd3b)) +* **395:** add white background to backbutton bar ([a65f052](https://github.com/adamzev/clean-and-green-philly/commit/a65f052ca2e5c0eac2737f85b4997af6e993f750)) +* **395:** make property selection back button sticky ([6601201](https://github.com/adamzev/clean-and-green-philly/commit/6601201fcf943f8e64a1341327095e9b7edc6b08)) +* **457:** add filter count to button ([b35145a](https://github.com/adamzev/clean-and-green-philly/commit/b35145a1df5f2995294f829e0b7cde20ba0e0f29)) +* **535:** add close download view button ([#579](https://github.com/adamzev/clean-and-green-philly/issues/579)) ([9bd2c53](https://github.com/adamzev/clean-and-green-philly/commit/9bd2c53aa3b49da8136fa6e7e850c4dc4d5e2d74)) +* **544:** adjust remove property section ([#580](https://github.com/adamzev/clean-and-green-philly/issues/580)) ([900af8e](https://github.com/adamzev/clean-and-green-philly/commit/900af8e6fbfc89bd4ed83b17c9eed0a1d5a3de3b)) +* **568:** restyle the property table ([#577](https://github.com/adamzev/clean-and-green-philly/issues/577)) ([2c8cc92](https://github.com/adamzev/clean-and-green-philly/commit/2c8cc92b7f28aecdbe65412476691baa50254292)) +* Add arrow icon to link ([6bce3fa](https://github.com/adamzev/clean-and-green-philly/commit/6bce3fa84ad9827f2117a2d51ed2f2665e83481b)) +* Add content to sidebar ([23d5c52](https://github.com/adamzev/clean-and-green-philly/commit/23d5c52c3fcd1eb1eff902e7d1fd37ab2fe41b6d)) +* Add council district ([e576218](https://github.com/adamzev/clean-and-green-philly/commit/e5762185840f0bbc2289fede8481f5b39a16087d)) +* Add extra data to single detail page ([48e34fe](https://github.com/adamzev/clean-and-green-philly/commit/48e34fe0a6317fc8fc0f1f5e745dd165000d437f)) +* Add font ([c1a8b93](https://github.com/adamzev/clean-and-green-philly/commit/c1a8b93561dfc67c8eed31b3c8befa2b8ae984ec)) +* Add font to map ([1491518](https://github.com/adamzev/clean-and-green-philly/commit/14915180857a482a25d96f9f45f7c838082e2d71)) +* Add font to search field ([3b08069](https://github.com/adamzev/clean-and-green-philly/commit/3b080690ba92007545467bc2ce4bf3bcad26c899)) +* Add info cards ([8170944](https://github.com/adamzev/clean-and-green-philly/commit/8170944fdb8d8be36af062012967323047bda748)) +* Add missing data and atlas link; style table ([8c260d4](https://github.com/adamzev/clean-and-green-philly/commit/8c260d48e015ed6c8c29cc10b8936753cc013ff0)) +* Add percentage, update column widths ([41caf77](https://github.com/adamzev/clean-and-green-philly/commit/41caf77c55eb0a6336abc80047ed83584d250a64)) +* Add priority color square ([ad5a9fa](https://github.com/adamzev/clean-and-green-philly/commit/ad5a9faff3b9596256e680b3d6d95c54caefc7a7)) +* add redirect from map to find-properties ([439ed6c](https://github.com/adamzev/clean-and-green-philly/commit/439ed6c7291251aed9751431cd5bdc2fa6883a15)) +* Add row scope ([f945979](https://github.com/adamzev/clean-and-green-philly/commit/f945979edaee2ad5a003a5b5560a89cc25717f3b)) +* Add spacing below Back button ([7de48c5](https://github.com/adamzev/clean-and-green-philly/commit/7de48c50f177c0275cebac0a4a7d9dfce8d2cd2c)) +* Address a few more links ([b3b6733](https://github.com/adamzev/clean-and-green-philly/commit/b3b673375f1472f15d80bffeff378c973d6cb49d)) +* Adjust table per requirements ([36e6cfa](https://github.com/adamzev/clean-and-green-philly/commit/36e6cfa5934defadcf1a8b8ad1c09ffe609d0ec5)) +* Change Find Properties and Learn More to links ([1de0b47](https://github.com/adamzev/clean-and-green-philly/commit/1de0b474811ab2d4281d94c6a9d5882a8ebb0c13)) +* Close sidebar if clicking on empty map ([612117d](https://github.com/adamzev/clean-and-green-philly/commit/612117d6c54bc62baaae4f773013e196232c2dba)) +* Navigate to map on click; show popups ([183b6a2](https://github.com/adamzev/clean-and-green-philly/commit/183b6a224d651726a6e465d7c9549ef437626f9e)) +* Open sidebar from map ([5474367](https://github.com/adamzev/clean-and-green-philly/commit/54743673e99ffd6b939009ad54e4d0a4252ba044)) +* Pass selected property to map view ([663f142](https://github.com/adamzev/clean-and-green-philly/commit/663f14236ccb9246efa8882c0dfa44e8cbcba0f7)) +* underline links ([5f23d09](https://github.com/adamzev/clean-and-green-philly/commit/5f23d09da9a7258b284ccfef54b1be0ee2a8e1f8)) +* Update aria label and role ([765e5a4](https://github.com/adamzev/clean-and-green-philly/commit/765e5a4fef1941e93815ae4789093e047183101c)) + # 1.0.0 (2025-03-23) ### Bug Fixes From 3125fae636ef8209ee851a0d35290048be37c50f Mon Sep 17 00:00:00 2001 From: Adam Levin Date: Thu, 27 Mar 2025 12:07:29 -0400 Subject: [PATCH 2/6] feat: allow non-credentialed users and cloud env credentials Allow creating db url by specifing port and password (to be used in Cloud Run to avoid duplicate secret management of POSTGRES_PASSWORD and VACANT_LOTS_DB) Add additional logic to psql to user localhost in Cloud Run environments Add a new bucket manager that should work in Cloud Run, with file credentials and in read-only mode for non-credentialed users Changed slack reporters to not report with a message rather than erroring if no credentials exist --- data/docker-compose.yml | 3 +- data/src/config/psql.py | 34 +++++- data/src/new_etl/classes/bucket_manager.py | 63 ++++++++++ data/src/new_etl/classes/featurelayer.py | 32 ++--- data/src/new_etl/classes/slack_reporters.py | 129 ++++++++++++-------- data/src/streetview.py | 4 + data/src/test/test_slack_error_reporter.py | 18 ++- 7 files changed, 196 insertions(+), 87 deletions(-) create mode 100644 data/src/new_etl/classes/bucket_manager.py diff --git a/data/docker-compose.yml b/data/docker-compose.yml index 5737d39a..ae51e505 100644 --- a/data/docker-compose.yml +++ b/data/docker-compose.yml @@ -6,7 +6,8 @@ services: image: vacant-lots-proj:latest environment: - GOOGLE_APPLICATION_CREDENTIALS=/app/service-account-key.json - - VACANT_LOTS_DB + - POSTGRES_PASSWORD + - POSTGRES_PORT=5434 - CLEAN_GREEN_GOOGLE_KEY - PYTHONUNBUFFERED=1 - GOOGLE_CLOUD_BUCKET_NAME diff --git a/data/src/config/psql.py b/data/src/config/psql.py index f4818363..a7a63955 100644 --- a/data/src/config/psql.py +++ b/data/src/config/psql.py @@ -1,12 +1,34 @@ import os -from config.config import is_docker from sqlalchemy import create_engine -url: str = ( - os.environ["VACANT_LOTS_DB"].replace("localhost", "host.docker.internal") - if is_docker() - else os.environ["VACANT_LOTS_DB"] -) +from config.config import is_docker + +# Detect if running in Cloud Run: +is_cloud_run = "K_SERVICE" in os.environ or "CLOUD_RUN_JOB" in os.environ + +# Use host.docker.internal when running locally in Docker +# except when running in Cloud Run +if is_docker() and not is_cloud_run: + host = "host.docker.internal" +else: + host = "localhost" + + +if os.getenv("VACANT_LOTS_DB"): + # Use the provided database URL + url = os.getenv("VACANT_LOTS_DB") +else: + # Use the specified port, pw, db and user to construct the URL + pw = os.environ["POSTGRES_PASSWORD"] + port = os.getenv("POSTGRES_PORT", "5432") + db = os.getenv("POSTGRES_DB", "vacantlotdb") + user = os.getenv("POSTGRES_USER", "postgres") + url: str = f"postgresql://{user}:{pw}@{host}:{port}/{db}" + print( + f"Connecting to database with URL: postgresql://{user}:****@{host}:{port}/{db}" + ) + + local_engine = create_engine(url) conn = local_engine.connect() diff --git a/data/src/new_etl/classes/bucket_manager.py b/data/src/new_etl/classes/bucket_manager.py new file mode 100644 index 00000000..ee272d7b --- /dev/null +++ b/data/src/new_etl/classes/bucket_manager.py @@ -0,0 +1,63 @@ +import logging as log +import os + +from google.cloud import storage + +from config.config import log_level + +log.basicConfig(level=log_level) + + +class GCSBucketManager: + """ + A manager for interacting with a Google Cloud Storage bucket. + + This class initializes a bucket client using Application Default Credentials, + an optional service account key, or falls back to an anonymous client for read-only access. + """ + + def __init__( + self, bucket_name: str = None, credential_path: str = None, client=None + ): + """ + Initialize the GCSBucketManager. + + Args: + bucket_name (str): Name of the bucket. Defaults to the environment variable + 'GOOGLE_CLOUD_BUCKET_NAME' or "cleanandgreenphl" if not set. + credential_path (str): Optional path to a service account credentials file. + client: Optional storage. Client instance for dependency injection in testing. + """ + self.bucket_name = bucket_name or os.getenv( + "GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl" + ) + + self.read_only = False + + if client is not None: + self._client = client + else: + self._client = self._init_client(credential_path) + + self.bucket = self._client.bucket(self.bucket_name) + + def _init_client(self, credential_path: str = None): + """ + Attempt to initialize the storage client using a credential file, application default + credentials or fall back to anonymous/read-only. + """ + project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") + credentials_path = credential_path or "/app/service-account-key.json" + + if os.path.exists(credentials_path): + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path + + try: + # This will use application default credentials if GOOGLE_APPLICATION_CREDENTIALS is not set + return storage.Client(project=project_name) + + except Exception as e: + log.warning(f"Failed to initialize client with service account key: {e}") + log.warning("Falling back to anonymous client (read-only mode)") + self.read_only = True + return storage.Client.create_anonymous_client() diff --git a/data/src/new_etl/classes/featurelayer.py b/data/src/new_etl/classes/featurelayer.py index 4cc26d28..b64c4201 100644 --- a/data/src/new_etl/classes/featurelayer.py +++ b/data/src/new_etl/classes/featurelayer.py @@ -9,7 +9,6 @@ import requests import sqlalchemy as sa from google.cloud import storage -from google.cloud.storage.bucket import Bucket from shapely import wkb from tqdm import tqdm @@ -21,29 +20,26 @@ write_production_tiles_file, ) from config.psql import conn, local_engine +from new_etl.classes.bucket_manager import GCSBucketManager from new_etl.database import to_postgis_with_schema from new_etl.loaders import load_carto_data, load_esri_data log.basicConfig(level=log_level) -def google_cloud_bucket() -> Bucket: - """Build the google cloud bucket with name configured in your environ or default of cleanandgreenphl - - Returns: - Bucket: the gcp bucket +def google_cloud_bucket(require_write_access: bool = False) -> storage.Bucket | None: """ + Initialize a Google Cloud Storage bucket client using Application Default Credentials. + If a writable bucket is requested and the user does not have write access None is returned. - credentials_path = os.path.expanduser("/app/service-account-key.json") - if not os.path.exists(credentials_path): - raise FileNotFoundError(f"Credentials file not found at {credentials_path}") - - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path - bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl") - project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") + Args: + require_write_access (bool): Whether it is required that the bucket should be writable. Defaults to False. + """ - storage_client = storage.Client(project=project_name) - return storage_client.bucket(bucket_name) + bucket_manager = GCSBucketManager() + if require_write_access and bucket_manager.read_only: + return None + return bucket_manager.bucket class FeatureLayer: @@ -149,6 +145,7 @@ def load_data(self): ] # Save GeoDataFrame to PostgreSQL and configure it as a hypertable + print("Saving GeoDataFrame to PostgreSQL...") to_postgis_with_schema(self.gdf, self.psql_table, conn) except Exception as e: @@ -358,6 +355,11 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None: f"{temp_merged_pmtiles} is {file_size} bytes in size but should be at least {min_tiles_file_size_in_bytes}. Therefore, we are not uploading any files to the GCP bucket. The file may be corrupt or incomplete." ) + bucket = google_cloud_bucket(require_write_access=True) + if bucket is None: + print("Skipping PMTiles upload due to read-only bucket access.") + return + # Upload PMTiles to Google Cloud Storage bucket = google_cloud_bucket() for file in write_files: diff --git a/data/src/new_etl/classes/slack_reporters.py b/data/src/new_etl/classes/slack_reporters.py index 6ae08517..97ba2d3d 100644 --- a/data/src/new_etl/classes/slack_reporters.py +++ b/data/src/new_etl/classes/slack_reporters.py @@ -1,12 +1,15 @@ -from sqlalchemy import text import os -from slack_sdk import WebClient import pandas as pd +from slack_sdk import WebClient +from sqlalchemy import text def send_dataframe_profile_to_slack( - df: pd.DataFrame, df_name: str, channel="clean-and-green-philly-pipeline" + df: pd.DataFrame, + df_name: str, + channel="clean-and-green-philly-pipeline", + slack_token: str | None = None, ): """ Profiles a DataFrame and sends the QC profile summary to a Slack channel. @@ -15,7 +18,13 @@ def send_dataframe_profile_to_slack( df (pd.DataFrame): The DataFrame to profile. df_name (str): The name of the DataFrame being profiled. channel (str): The Slack channel to post the message to. + slack_token (str): The Slack API token. If not provided, it will be read from the environment. """ + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping QC profile report to Slack.") + return # Step 1: Profile the DataFrame profile_summary = {} @@ -58,27 +67,33 @@ def send_dataframe_profile_to_slack( message += f" - `{col}`: {unique_count} unique values\n" # Step 3: Send to Slack - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - try: - client.chat_postMessage( - channel=channel, - text=message, - username="QC Reporter", - ) - print(f"QC profile for `{df_name}` sent to Slack successfully.") - except Exception as e: - print(f"Failed to send QC profile for `{df_name}` to Slack: {e}") - else: - raise ValueError("Slack API token not found in environment variables.") + client = WebClient(token=token) + try: + client.chat_postMessage( + channel=channel, + text=message, + username="QC Reporter", + ) + print(f"QC profile for `{df_name}` sent to Slack successfully.") + except Exception as e: + print(f"Failed to send QC profile for `{df_name}` to Slack: {e}") -def send_pg_stats_to_slack(conn): +def send_pg_stats_to_slack(conn, slack_token: str | None = None): """ Report total sizes for all hypertables using hypertable_detailed_size and send the result to a Slack channel. + + Args: + conn: SQLAlchemy connection to the PostgreSQL database. + slack_token (str): The Slack API token. If not provided, it will be read from the environment. """ + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping PostgreSQL stats report to Slack.") + return + # Step 1: Get all hypertable names hypertable_query = """ SELECT hypertable_name @@ -115,20 +130,19 @@ def send_pg_stats_to_slack(conn): message += f"- {size['hypertable']}: {total_size}\n" # Step 4: Send to Slack - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - client.chat_postMessage( - channel="clean-and-green-philly-pipeline", - text=message, - username="PG Stats Reporter", - ) - else: - raise ValueError("Slack API token not found in environment variables.") + client = WebClient(token=token) + client.chat_postMessage( + channel="clean-and-green-philly-pipeline", + text=message, + username="PG Stats Reporter", + ) def send_diff_report_to_slack( - diff_summary: str, report_url: str, channel="clean-and-green-philly-pipeline" + diff_summary: str, + report_url: str, + channel="clean-and-green-philly-pipeline", + slack_token: str | None = None, ): """ Sends a difference report summary to a Slack channel. @@ -137,41 +151,48 @@ def send_diff_report_to_slack( diff_summary (str): The summary of differences to post. report_url (str): The URL to the detailed difference report. channel (str): The Slack channel to post the message to. + slack_token (str): The Slack API token. If not provided, it will be read from the environment. """ print( f"send_diff_report_to_slack called with:\ndiff_summary:\n{diff_summary}\nreport_url: {report_url}" ) + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping diff report to Slack.") + return # Step 1: Format the message message = f"*Data Difference Report*\n\n{diff_summary}\n\nDetailed report: <{report_url}|View Report>" print(f"Formatted Slack message:\n{message}") # Step 2: Send the message to Slack - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - try: - client.chat_postMessage( - channel=channel, - text=message, - username="Diff Reporter", - ) - print("Diff report sent to Slack successfully.") - except Exception as e: - print(f"Failed to send diff report to Slack: {e}") - else: - raise ValueError("Slack API token not found in environment variables.") + client = WebClient(token=token) + try: + client.chat_postMessage( + channel=channel, + text=message, + username="Diff Reporter", + ) + print("Diff report sent to Slack successfully.") + except Exception as e: + print(f"Failed to send diff report to Slack: {e}") -def send_error_to_slack(error_message: str) -> None: +def send_error_to_slack(error_message: str, slack_token: str | None = None) -> None: """Send error message to Slack.""" - token: str | None = os.getenv("CAGP_SLACK_API_TOKEN") # token can be None - if token: - client = WebClient(token=token) - client.chat_postMessage( - channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID - text=error_message, - username="Backend Error Reporter", - ) - else: - raise ValueError("Slack API token not found in environment variables.") + + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping error report to Slack.") + print("Error message:") + print(error_message) + return + + client = WebClient(token=token) + client.chat_postMessage( + channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID + text=error_message, + username="Backend Error Reporter", + ) diff --git a/data/src/streetview.py b/data/src/streetview.py index cd35763b..384e6e51 100644 --- a/data/src/streetview.py +++ b/data/src/streetview.py @@ -9,6 +9,10 @@ # Configure Google bucket = google_cloud_bucket() +# when switching over to new_etl, this will need to be updated to use the new bucket manager +# bucket = google_cloud_bucket(require_write_access=True) +# if bucket is None: +# do a dry run or exit key = os.environ["CLEAN_GREEN_GOOGLE_KEY"] bucket_name = bucket.name diff --git a/data/src/test/test_slack_error_reporter.py b/data/src/test/test_slack_error_reporter.py index 4a30653b..04d9d938 100644 --- a/data/src/test/test_slack_error_reporter.py +++ b/data/src/test/test_slack_error_reporter.py @@ -5,17 +5,18 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/..") -from classes.slack_error_reporter import ( +from new_etl.classes.slack_reporters import ( send_error_to_slack, ) # Ensure correct file import class TestSlackNotifier(unittest.TestCase): @patch( - "classes.slack_error_reporter.WebClient.chat_postMessage" + "new_etl.classes.slack_reporters.WebClient.chat_postMessage" ) # Correct patching @patch( - "classes.slack_error_reporter.os.getenv", return_value="mock_slack_token" + "new_etl.classes.slack_reporters.os.getenv", + return_value="mock_slack_token", ) # Correct patching def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): """Test that Slack error reporting is triggered correctly.""" @@ -23,7 +24,7 @@ def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): error_message = "Test error message" # Call the Slack notification function - send_error_to_slack(error_message) + send_error_to_slack(error_message, slack_token="test_token") # Verify the Slack API call was made with the correct parameters mock_slack_post.assert_called_once_with( @@ -33,18 +34,13 @@ def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): ) @patch( - "classes.slack_error_reporter.WebClient.chat_postMessage" + "new_etl.classes.slack_reporters.WebClient.chat_postMessage" ) # Correct patching @patch( - "classes.slack_error_reporter.os.getenv", return_value=None + "new_etl.classes.slack_reporters.os.getenv", return_value=None ) # Simulate missing Slack token def test_no_error_no_slack_message(self, _mock_getenv, mock_slack_post): """Test that Slack notification is not triggered if there's no error.""" - - # Call the Slack notification function (with no valid token) - with self.assertRaises(ValueError): - send_error_to_slack("Test error message") - # Ensure Slack's chat_postMessage was not called due to missing token mock_slack_post.assert_not_called() From cf08d088bd90e5f603b88dbf2a789df6c523e0d8 Mon Sep 17 00:00:00 2001 From: Adam Levin Date: Thu, 27 Mar 2025 21:11:22 -0400 Subject: [PATCH 3/6] chore: add VACANT_LOT_DB back to docker-compose env --- data/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/data/docker-compose.yml b/data/docker-compose.yml index ae51e505..d662d718 100644 --- a/data/docker-compose.yml +++ b/data/docker-compose.yml @@ -6,6 +6,7 @@ services: image: vacant-lots-proj:latest environment: - GOOGLE_APPLICATION_CREDENTIALS=/app/service-account-key.json + - VACANT_LOTS_DB - POSTGRES_PASSWORD - POSTGRES_PORT=5434 - CLEAN_GREEN_GOOGLE_KEY From 7167ebc70db99d567359f68d5ecc5499e09883b8 Mon Sep 17 00:00:00 2001 From: Adam Levin Date: Mon, 31 Mar 2025 09:55:22 -0400 Subject: [PATCH 4/6] chore: modify data_diff's send_to_slack method Print rather than error is Slack credentials are missing --- data/src/new_etl/classes/bucket_manager.py | 7 +++- data/src/new_etl/classes/data_diff.py | 39 ++++++++++++---------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/data/src/new_etl/classes/bucket_manager.py b/data/src/new_etl/classes/bucket_manager.py index ee272d7b..045571aa 100644 --- a/data/src/new_etl/classes/bucket_manager.py +++ b/data/src/new_etl/classes/bucket_manager.py @@ -48,12 +48,17 @@ def _init_client(self, credential_path: str = None): """ project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") credentials_path = credential_path or "/app/service-account-key.json" + is_credentials_file = os.path.exists(credentials_path) - if os.path.exists(credentials_path): + if is_credentials_file: os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path try: # This will use application default credentials if GOOGLE_APPLICATION_CREDENTIALS is not set + if is_credentials_file: + print(f"Using service account key at {credentials_path}") + else: + print("Using application default credentials") return storage.Client(project=project_name) except Exception as e: diff --git a/data/src/new_etl/classes/data_diff.py b/data/src/new_etl/classes/data_diff.py index 3dc40bfb..59ab9b90 100644 --- a/data/src/new_etl/classes/data_diff.py +++ b/data/src/new_etl/classes/data_diff.py @@ -1,7 +1,8 @@ -from slack_sdk import WebClient +import os + import pandas as pd +from slack_sdk import WebClient from sqlalchemy import text -import os from config.psql import conn @@ -115,27 +116,31 @@ def generate_diff(self): self.summary_text = "\n".join(summary_lines) - def send_to_slack(self, channel="clean-and-green-philly-pipeline"): + def send_to_slack( + self, channel="clean-and-green-philly-pipeline", slack_token=None + ): """ Sends the diff summary to a Slack channel. Args: channel (str): The Slack channel to post the message to. """ - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - try: - client.chat_postMessage( - channel=channel, - text=f"*Data Difference Report*\n\n{self.summary_text}", - username="Diff Reporter", - ) - print("Diff report sent to Slack successfully.") - except Exception as e: - print(f"Failed to send diff report to Slack: {e}") - else: - raise ValueError("Slack API token not found in environment variables.") + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping sending diff report to Slack.") + return + + client = WebClient(token=token) + try: + client.chat_postMessage( + channel=channel, + text=f"*Data Difference Report*\n\n{self.summary_text}", + username="Diff Reporter", + ) + print("Diff report sent to Slack successfully.") + except Exception as e: + print(f"Failed to send diff report to Slack: {e}") def run(self, send_to_slack=True, slack_channel="clean-and-green-philly-pipeline"): """ From e23ceb740e358b80e44396bf98602c554cbc9a0c Mon Sep 17 00:00:00 2001 From: Adam Levin Date: Sat, 26 Apr 2025 17:22:37 -0400 Subject: [PATCH 5/6] Update CHANGELOG.md --- CHANGELOG.md | 49 ------------------------------------------------- 1 file changed, 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88c33b83..ccb9f33e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,52 +1,3 @@ -# 1.0.0 (2025-04-06) - - -### Bug Fixes - -* **572:** fix webpack warnings + fix linting issues ([#574](https://github.com/adamzev/clean-and-green-philly/issues/574)) ([fbfd459](https://github.com/adamzev/clean-and-green-philly/commit/fbfd459ba147e51a2fd54bee61d5ca63fb74eb98)) -* Add focus styling to header links ([a99065c](https://github.com/adamzev/clean-and-green-philly/commit/a99065cba1607b981702e7aa7e654f24c108ad6e)) -* Add href to dropdown items to increase link size ([2eb4667](https://github.com/adamzev/clean-and-green-philly/commit/2eb4667e7b68f45489c51ea10aa7d15afd2a3c77)) -* closing tooltip popup on map closes property details ([432e8c0](https://github.com/adamzev/clean-and-green-philly/commit/432e8c0a14e1e9cfa199b030019c0730a24741c6)) -* correct the scrolling on the map page ([a850237](https://github.com/adamzev/clean-and-green-philly/commit/a8502372e243057e2b06559cb042d94ea894415a)) -* Remove nested link and button ([d93e8bd](https://github.com/adamzev/clean-and-green-philly/commit/d93e8bd52d80cd8d5b1a30910aaa4b168a11b57a)) -* Update footer markup structure for accessibility ([d5c2a33](https://github.com/adamzev/clean-and-green-philly/commit/d5c2a334f0e5c297f117dca5dc2972b56d7b3542)) - - -### Features - -* **247:** make the mapbox legend more legible ([bcf6915](https://github.com/adamzev/clean-and-green-philly/commit/bcf691566fc26e834547a43efd863de58abdf3d4)) -* **293:** sort features by priority of high, medium, low ([8cd1058](https://github.com/adamzev/clean-and-green-philly/commit/8cd105872feb0065aa3d207d42dc5df76ef63dbc)) -* **305:** expand the header width of single property table ([b25261e](https://github.com/adamzev/clean-and-green-philly/commit/b25261e5d9e26bdbd9694aa03b4964771528dd3b)) -* **395:** add white background to backbutton bar ([a65f052](https://github.com/adamzev/clean-and-green-philly/commit/a65f052ca2e5c0eac2737f85b4997af6e993f750)) -* **395:** make property selection back button sticky ([6601201](https://github.com/adamzev/clean-and-green-philly/commit/6601201fcf943f8e64a1341327095e9b7edc6b08)) -* **457:** add filter count to button ([b35145a](https://github.com/adamzev/clean-and-green-philly/commit/b35145a1df5f2995294f829e0b7cde20ba0e0f29)) -* **535:** add close download view button ([#579](https://github.com/adamzev/clean-and-green-philly/issues/579)) ([9bd2c53](https://github.com/adamzev/clean-and-green-philly/commit/9bd2c53aa3b49da8136fa6e7e850c4dc4d5e2d74)) -* **544:** adjust remove property section ([#580](https://github.com/adamzev/clean-and-green-philly/issues/580)) ([900af8e](https://github.com/adamzev/clean-and-green-philly/commit/900af8e6fbfc89bd4ed83b17c9eed0a1d5a3de3b)) -* **568:** restyle the property table ([#577](https://github.com/adamzev/clean-and-green-philly/issues/577)) ([2c8cc92](https://github.com/adamzev/clean-and-green-philly/commit/2c8cc92b7f28aecdbe65412476691baa50254292)) -* Add arrow icon to link ([6bce3fa](https://github.com/adamzev/clean-and-green-philly/commit/6bce3fa84ad9827f2117a2d51ed2f2665e83481b)) -* Add content to sidebar ([23d5c52](https://github.com/adamzev/clean-and-green-philly/commit/23d5c52c3fcd1eb1eff902e7d1fd37ab2fe41b6d)) -* Add council district ([e576218](https://github.com/adamzev/clean-and-green-philly/commit/e5762185840f0bbc2289fede8481f5b39a16087d)) -* Add extra data to single detail page ([48e34fe](https://github.com/adamzev/clean-and-green-philly/commit/48e34fe0a6317fc8fc0f1f5e745dd165000d437f)) -* Add font ([c1a8b93](https://github.com/adamzev/clean-and-green-philly/commit/c1a8b93561dfc67c8eed31b3c8befa2b8ae984ec)) -* Add font to map ([1491518](https://github.com/adamzev/clean-and-green-philly/commit/14915180857a482a25d96f9f45f7c838082e2d71)) -* Add font to search field ([3b08069](https://github.com/adamzev/clean-and-green-philly/commit/3b080690ba92007545467bc2ce4bf3bcad26c899)) -* Add info cards ([8170944](https://github.com/adamzev/clean-and-green-philly/commit/8170944fdb8d8be36af062012967323047bda748)) -* Add missing data and atlas link; style table ([8c260d4](https://github.com/adamzev/clean-and-green-philly/commit/8c260d48e015ed6c8c29cc10b8936753cc013ff0)) -* Add percentage, update column widths ([41caf77](https://github.com/adamzev/clean-and-green-philly/commit/41caf77c55eb0a6336abc80047ed83584d250a64)) -* Add priority color square ([ad5a9fa](https://github.com/adamzev/clean-and-green-philly/commit/ad5a9faff3b9596256e680b3d6d95c54caefc7a7)) -* add redirect from map to find-properties ([439ed6c](https://github.com/adamzev/clean-and-green-philly/commit/439ed6c7291251aed9751431cd5bdc2fa6883a15)) -* Add row scope ([f945979](https://github.com/adamzev/clean-and-green-philly/commit/f945979edaee2ad5a003a5b5560a89cc25717f3b)) -* Add spacing below Back button ([7de48c5](https://github.com/adamzev/clean-and-green-philly/commit/7de48c50f177c0275cebac0a4a7d9dfce8d2cd2c)) -* Address a few more links ([b3b6733](https://github.com/adamzev/clean-and-green-philly/commit/b3b673375f1472f15d80bffeff378c973d6cb49d)) -* Adjust table per requirements ([36e6cfa](https://github.com/adamzev/clean-and-green-philly/commit/36e6cfa5934defadcf1a8b8ad1c09ffe609d0ec5)) -* Change Find Properties and Learn More to links ([1de0b47](https://github.com/adamzev/clean-and-green-philly/commit/1de0b474811ab2d4281d94c6a9d5882a8ebb0c13)) -* Close sidebar if clicking on empty map ([612117d](https://github.com/adamzev/clean-and-green-philly/commit/612117d6c54bc62baaae4f773013e196232c2dba)) -* Navigate to map on click; show popups ([183b6a2](https://github.com/adamzev/clean-and-green-philly/commit/183b6a224d651726a6e465d7c9549ef437626f9e)) -* Open sidebar from map ([5474367](https://github.com/adamzev/clean-and-green-philly/commit/54743673e99ffd6b939009ad54e4d0a4252ba044)) -* Pass selected property to map view ([663f142](https://github.com/adamzev/clean-and-green-philly/commit/663f14236ccb9246efa8882c0dfa44e8cbcba0f7)) -* underline links ([5f23d09](https://github.com/adamzev/clean-and-green-philly/commit/5f23d09da9a7258b284ccfef54b1be0ee2a8e1f8)) -* Update aria label and role ([765e5a4](https://github.com/adamzev/clean-and-green-philly/commit/765e5a4fef1941e93815ae4789093e047183101c)) - # 1.0.0 (2025-03-23) ### Bug Fixes From d802e51a5a5b296d3b219666c636d0a4e65cf2d4 Mon Sep 17 00:00:00 2001 From: Adam Levin Date: Sun, 27 Apr 2025 15:47:59 -0400 Subject: [PATCH 6/6] chore: made credentials optional for current/old pipeline The pipeline can be run locally but will not write changes to the bucket and will not report errors to slack without credentials. Previously, the code would error when credentials were missing. Added BACKUP_SCHEMA=False and made FORCE_RELOAD=True as the default. These are temporary fixes and will not be relevant once postgres is removed. --- data/src/classes/backup_archive_database.py | 12 ++-- data/src/classes/bucket_manager.py | 68 +++++++++++++++++++++ data/src/classes/diff_report.py | 33 ++++++---- data/src/classes/featurelayer.py | 46 +++++++------- data/src/classes/slack_error_reporter.py | 25 ++++---- data/src/config/config.py | 5 +- data/src/config/psql.py | 48 ++++++++------- data/src/script.py | 12 ++-- data/src/test/test_slack_error_reporter.py | 14 ++--- 9 files changed, 176 insertions(+), 87 deletions(-) create mode 100644 data/src/classes/bucket_manager.py diff --git a/data/src/classes/backup_archive_database.py b/data/src/classes/backup_archive_database.py index 94302971..0d0c51f4 100644 --- a/data/src/classes/backup_archive_database.py +++ b/data/src/classes/backup_archive_database.py @@ -4,16 +4,17 @@ from datetime import datetime, timedelta import sqlalchemy as sa +from sqlalchemy import inspect + +from classes.featurelayer import google_cloud_bucket from config.config import ( log_level, max_backup_schema_days, - tiles_file_id_prefix, tile_file_backup_directory, + tiles_file_id_prefix, ) from config.psql import conn, local_engine, url from data_utils.utils import mask_password -from sqlalchemy import inspect -from classes.featurelayer import google_cloud_bucket log.basicConfig(level=log_level) @@ -125,7 +126,10 @@ def is_backup_schema_exists(self) -> bool: def backup_tiles_file(self): """backup the main tiles file to a timestamped copy in the backup/ folder in GCP""" - bucket = google_cloud_bucket() + bucket = google_cloud_bucket(require_write_access=True) + if not bucket: + log.warning("No GCP bucket found. Skipping tiles file backup.") + return count: int = 0 for blob in bucket.list_blobs(prefix=tiles_file_id_prefix): suffix: str = "_" + self.timestamp_string diff --git a/data/src/classes/bucket_manager.py b/data/src/classes/bucket_manager.py new file mode 100644 index 00000000..045571aa --- /dev/null +++ b/data/src/classes/bucket_manager.py @@ -0,0 +1,68 @@ +import logging as log +import os + +from google.cloud import storage + +from config.config import log_level + +log.basicConfig(level=log_level) + + +class GCSBucketManager: + """ + A manager for interacting with a Google Cloud Storage bucket. + + This class initializes a bucket client using Application Default Credentials, + an optional service account key, or falls back to an anonymous client for read-only access. + """ + + def __init__( + self, bucket_name: str = None, credential_path: str = None, client=None + ): + """ + Initialize the GCSBucketManager. + + Args: + bucket_name (str): Name of the bucket. Defaults to the environment variable + 'GOOGLE_CLOUD_BUCKET_NAME' or "cleanandgreenphl" if not set. + credential_path (str): Optional path to a service account credentials file. + client: Optional storage. Client instance for dependency injection in testing. + """ + self.bucket_name = bucket_name or os.getenv( + "GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl" + ) + + self.read_only = False + + if client is not None: + self._client = client + else: + self._client = self._init_client(credential_path) + + self.bucket = self._client.bucket(self.bucket_name) + + def _init_client(self, credential_path: str = None): + """ + Attempt to initialize the storage client using a credential file, application default + credentials or fall back to anonymous/read-only. + """ + project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") + credentials_path = credential_path or "/app/service-account-key.json" + is_credentials_file = os.path.exists(credentials_path) + + if is_credentials_file: + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path + + try: + # This will use application default credentials if GOOGLE_APPLICATION_CREDENTIALS is not set + if is_credentials_file: + print(f"Using service account key at {credentials_path}") + else: + print("Using application default credentials") + return storage.Client(project=project_name) + + except Exception as e: + log.warning(f"Failed to initialize client with service account key: {e}") + log.warning("Falling back to anonymous client (read-only mode)") + self.read_only = True + return storage.Client.create_anonymous_client() diff --git a/data/src/classes/diff_report.py b/data/src/classes/diff_report.py index 102d0352..def32159 100644 --- a/data/src/classes/diff_report.py +++ b/data/src/classes/diff_report.py @@ -5,6 +5,8 @@ import subprocess from email.mime.text import MIMEText +from slack_sdk import WebClient + from classes.backup_archive_database import backup_schema_name from classes.featurelayer import google_cloud_bucket from config.config import ( @@ -16,7 +18,6 @@ ) from config.psql import conn, url from data_utils.utils import mask_password -from slack_sdk import WebClient log.basicConfig(level=log_level) @@ -213,20 +214,30 @@ def compare_table(self, diff_table: DiffTable) -> str: output = complete_process.stdout.decode() return re.sub(r"\nExtra-Info:.*", "", output, flags=re.DOTALL) - def send_report_to_slack(self): + def send_report_to_slack(self, slack_token=None): """ post the summary report to the slack channel if configured. """ - if report_to_slack_channel: - token = os.environ["CAGP_SLACK_API_TOKEN"] - client = WebClient(token=token) - - # Send a message - client.chat_postMessage( - channel=report_to_slack_channel, - text=self.report, - username="CAGP Diff Bot", + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not report_to_slack_channel: + log.warning( + "Skipping Slack reporting. Configure report_to_slack_channel in config.py to enable." ) + return + if not token: + log.warning( + "Skipping Slack reporting. Configure CAGP_SLACK_API_TOKEN in environment to enable." + ) + return + + client = WebClient(token=token) + + # Send a message + client.chat_postMessage( + channel=report_to_slack_channel, + text=self.report, + username="CAGP Diff Bot", + ) def email_report(self): """ diff --git a/data/src/classes/featurelayer.py b/data/src/classes/featurelayer.py index 4eb30e3b..487846fb 100644 --- a/data/src/classes/featurelayer.py +++ b/data/src/classes/featurelayer.py @@ -7,6 +7,11 @@ import pandas as pd import requests import sqlalchemy as sa +from esridump.dumper import EsriDumper +from google.cloud import storage +from shapely import Point, wkb + +from classes.bucket_manager import GCSBucketManager from config.config import ( FORCE_RELOAD, USE_CRS, @@ -15,31 +20,22 @@ write_production_tiles_file, ) from config.psql import conn, local_engine -from esridump.dumper import EsriDumper -from google.cloud import storage -from google.cloud.storage.bucket import Bucket -from shapely import Point, wkb log.basicConfig(level=log_level) -def google_cloud_bucket() -> Bucket: - """Build the google cloud bucket with name configured in your environ or default of cleanandgreenphl - - Returns: - Bucket: the gcp bucket +def google_cloud_bucket(require_write_access: bool = False) -> storage.Bucket | None: + """ + Initialize a Google Cloud Storage bucket client using Application Default Credentials. + If a writable bucket is requested and the user does not have write access None is returned. + Args: + require_write_access (bool): Whether it is required that the bucket should be writable. Defaults to False. """ - credentials_path = os.path.expanduser("/app/service-account-key.json") - - if not os.path.exists(credentials_path): - raise FileNotFoundError(f"Credentials file not found at {credentials_path}") - - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path - bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl") - project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") - storage_client = storage.Client(project=project_name) - return storage_client.bucket(bucket_name) + bucket_manager = GCSBucketManager() + if require_write_access and bucket_manager.read_only: + return None + return bucket_manager.bucket class FeatureLayer: @@ -58,7 +54,6 @@ def __init__( from_xy=False, use_wkb_geom_field=None, cols: list[str] = None, - bucket: Bucket = None, ): self.name = name self.esri_rest_urls = ( @@ -75,7 +70,6 @@ def __init__( self.psql_table = name.lower().replace(" ", "_") self.input_crs = "EPSG:4326" if not from_xy else USE_CRS self.use_wkb_geom_field = use_wkb_geom_field - self.bucket = bucket or google_cloud_bucket() inputs = [self.esri_rest_urls, self.carto_sql_queries, self.gdf] non_none_inputs = [i for i in inputs if i is not None] @@ -330,7 +324,13 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None: df_no_geom.to_parquet(temp_parquet) # Upload Parquet to Google Cloud Storage - blob_parquet = self.bucket.blob(f"{tiles_file_id_prefix}.parquet") + bucket = google_cloud_bucket(require_write_access=True) + if bucket is None: + print( + "Skipping Parquest and PMTiles upload due to read-only bucket access." + ) + return + blob_parquet = bucket.blob(f"{tiles_file_id_prefix}.parquet") try: blob_parquet.upload_from_filename(temp_parquet) parquet_size = os.stat(temp_parquet).st_size @@ -399,7 +399,7 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None: # Upload PMTiles to Google Cloud Storage for file in write_files: - blob = self.bucket.blob(file) + blob = bucket.blob(file) try: blob.upload_from_filename(temp_merged_pmtiles) print(f"PMTiles upload successful for {file}!") diff --git a/data/src/classes/slack_error_reporter.py b/data/src/classes/slack_error_reporter.py index 1c443d4b..e0d02bd4 100644 --- a/data/src/classes/slack_error_reporter.py +++ b/data/src/classes/slack_error_reporter.py @@ -1,16 +1,19 @@ import os + from slack_sdk import WebClient -def send_error_to_slack(error_message: str) -> None: +def send_error_to_slack(error_message: str, slack_token: str | None = None) -> None: """Send error message to Slack.""" - token: str | None = os.getenv("CAGP_SLACK_API_TOKEN") # token can be None - if token: - client = WebClient(token=token) - client.chat_postMessage( - channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID - text=error_message, - username="Backend Error Reporter", - ) - else: - raise ValueError("Slack API token not found in environment variables.") + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping QC profile report to Slack.") + return + + client = WebClient(token=token) + client.chat_postMessage( + channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID + text=error_message, + username="Backend Error Reporter", + ) diff --git a/data/src/config/config.py b/data/src/config/config.py index 4b240cf2..a9d2fe56 100644 --- a/data/src/config/config.py +++ b/data/src/config/config.py @@ -1,9 +1,12 @@ import logging from pathlib import Path -FORCE_RELOAD = False +FORCE_RELOAD = True """ During the data load, whether to query the various GIS API services for the data to load into the postgres tables. If True, will query the API services, backup the database, reload the database and report on data differences. If false will read the data from postgres.""" +BACKUP_SCHEMA = False +""" Whether to backup the database schema before loading the data in script.py. """ + USE_CRS = "EPSG:2272" """ the standard geospatial code for Pennsylvania South (ftUS) """ diff --git a/data/src/config/psql.py b/data/src/config/psql.py index a7a63955..2f21924a 100644 --- a/data/src/config/psql.py +++ b/data/src/config/psql.py @@ -4,31 +4,33 @@ from config.config import is_docker -# Detect if running in Cloud Run: -is_cloud_run = "K_SERVICE" in os.environ or "CLOUD_RUN_JOB" in os.environ - -# Use host.docker.internal when running locally in Docker -# except when running in Cloud Run -if is_docker() and not is_cloud_run: - host = "host.docker.internal" -else: - host = "localhost" - -if os.getenv("VACANT_LOTS_DB"): - # Use the provided database URL - url = os.getenv("VACANT_LOTS_DB") -else: - # Use the specified port, pw, db and user to construct the URL - pw = os.environ["POSTGRES_PASSWORD"] - port = os.getenv("POSTGRES_PORT", "5432") - db = os.getenv("POSTGRES_DB", "vacantlotdb") - user = os.getenv("POSTGRES_USER", "postgres") - url: str = f"postgresql://{user}:{pw}@{host}:{port}/{db}" - print( - f"Connecting to database with URL: postgresql://{user}:****@{host}:{port}/{db}" - ) +def get_db_url(): + # Detect if running in Cloud Run: + is_cloud_run = "K_SERVICE" in os.environ or "CLOUD_RUN_JOB" in os.environ + # Use host.docker.internal when running locally in Docker + # except when running in Cloud Run + host = "localhost" + if is_docker() and not is_cloud_run: + host = "host.docker.internal" + + if os.getenv("VACANT_LOTS_DB"): + # Use the provided database URL + url = os.getenv("VACANT_LOTS_DB") + url = url.replace("localhost", host) + else: + # Use the specified port, pw, db and user to construct the URL + pw = os.environ["POSTGRES_PASSWORD"] + port = os.getenv("POSTGRES_PORT", "5432") + db = os.getenv("POSTGRES_DB", "vacantlotdb") + user = os.getenv("POSTGRES_USER", "postgres") + url: str = f"postgresql://{user}:{pw}@{host}:{port}/{db}" + print(f"Set database url to: postgresql://{user}:****@{host}:{port}/{db}") + return url + + +url = get_db_url() local_engine = create_engine(url) conn = local_engine.connect() diff --git a/data/src/script.py b/data/src/script.py index cf42ace6..e6adb155 100644 --- a/data/src/script.py +++ b/data/src/script.py @@ -1,9 +1,11 @@ import sys import time +import traceback from classes.backup_archive_database import BackupArchiveDatabase from classes.diff_report import DiffReport -from config.config import FORCE_RELOAD, tiles_file_id_prefix +from classes.slack_error_reporter import send_error_to_slack +from config.config import BACKUP_SCHEMA, FORCE_RELOAD, tiles_file_id_prefix from config.psql import conn from data_utils.access_process import access_process from data_utils.city_owned_properties import city_owned_properties @@ -16,10 +18,10 @@ from data_utils.gun_crimes import gun_crimes from data_utils.imm_dang_buildings import imm_dang_buildings from data_utils.l_and_i import l_and_i -from data_utils.owner_type import owner_type from data_utils.nbhoods import nbhoods from data_utils.negligent_devs import negligent_devs from data_utils.opa_properties import opa_properties +from data_utils.owner_type import owner_type from data_utils.park_priority import park_priority from data_utils.phs_properties import phs_properties from data_utils.ppr_properties import ppr_properties @@ -30,10 +32,6 @@ from data_utils.unsafe_buildings import unsafe_buildings from data_utils.vacant_properties import vacant_properties -import traceback - -from classes.slack_error_reporter import send_error_to_slack - # Ensure the directory containing awkde is in the Python path awkde_path = "/usr/src/app" if awkde_path not in sys.path: @@ -66,7 +64,7 @@ # backup sql schema if we are reloading data backup: BackupArchiveDatabase = None - if FORCE_RELOAD: + if FORCE_RELOAD and BACKUP_SCHEMA: # first archive any remaining backup that may exist from a previous run that errored backup = BackupArchiveDatabase() if backup.is_backup_schema_exists(): diff --git a/data/src/test/test_slack_error_reporter.py b/data/src/test/test_slack_error_reporter.py index 04d9d938..9b79a323 100644 --- a/data/src/test/test_slack_error_reporter.py +++ b/data/src/test/test_slack_error_reporter.py @@ -5,17 +5,17 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/..") -from new_etl.classes.slack_reporters import ( - send_error_to_slack, -) # Ensure correct file import +from classes.slack_error_reporter import ( + send_error_to_slack, # Ensure correct file import +) class TestSlackNotifier(unittest.TestCase): @patch( - "new_etl.classes.slack_reporters.WebClient.chat_postMessage" + "classes.slack_error_reporter.WebClient.chat_postMessage" ) # Correct patching @patch( - "new_etl.classes.slack_reporters.os.getenv", + "classes.slack_error_reporter.os.getenv", return_value="mock_slack_token", ) # Correct patching def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): @@ -34,10 +34,10 @@ def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): ) @patch( - "new_etl.classes.slack_reporters.WebClient.chat_postMessage" + "classes.slack_error_reporter.WebClient.chat_postMessage" ) # Correct patching @patch( - "new_etl.classes.slack_reporters.os.getenv", return_value=None + "classes.slack_error_reporter.os.getenv", return_value=None ) # Simulate missing Slack token def test_no_error_no_slack_message(self, _mock_getenv, mock_slack_post): """Test that Slack notification is not triggered if there's no error."""