diff --git a/.gcp/terraforms/bigquery.tf b/.gcp/terraforms/bigquery.tf
new file mode 100644
index 0000000..d713bba
--- /dev/null
+++ b/.gcp/terraforms/bigquery.tf
@@ -0,0 +1,90 @@
+# ------------------------------------------------------------
+# OPS EXTERNALIZED TABLES (For metadata caching)
+# ------------------------------------------------------------
+
+resource "google_bigquery_connection" "biglake_connection" {
+  connection_id = "ops_biglake_connection"
+  location      = var.region
+  friendly_name = "BigLake Connection for GCS Parquet Scanning"
+  cloud_resource {}
+}
+
+# Enable connection service to access pipeline bucket
+resource "google_storage_bucket_iam_member" "biglake_storage_viewer" {
+  bucket = google_storage_bucket.ops_pipeline_bucket.name
+  role   = "roles/storage.objectViewer"
+  member = "serviceAccount:${google_bigquery_connection.biglake_connection.cloud_resource[0].service_account_id}"
+}
+
+resource "google_bigquery_dataset" "silver_dataset" {
+  dataset_id = var.bq_dataset_id
+  location   = var.region
+
+  delete_contents_on_destroy = false
+}
+
+locals {
+  external_tables = [
+    "df_orders",
+    "df_customers",
+    "df_order_items",
+    "df_products",
+    "df_payments"
+  ]
+}
+
+resource "google_bigquery_table" "external_tables" {
+  for_each   = toset(local.external_tables)
+  dataset_id = google_bigquery_dataset.silver_dataset.dataset_id
+  table_id   = each.key
+
+  # Might throw error if contracted/ is empty
+  external_data_configuration {
+    autodetect    = true
+    source_format = "PARQUET"
+    connection_id = google_bigquery_connection.biglake_connection.name
+    source_uris   = ["gs://${google_storage_bucket.ops_pipeline_bucket.name}/contracted/${each.key}_*.parquet"]
+
+    # Triggered manually by pipeline
+    metadata_cache_mode = "MANUAL"
+  }
+  lifecycle {
+    prevent_destroy = true
+  }
+}
+
+
+# ------------------------------------------------------------
+# BIGQUERY SEMANTTIC DATASETS (For table versionining)
+# ------------------------------------------------------------
+
+locals {
+  # Expiration for versioned tables
+  one_month_ms = 2678400000
+
+  semantic_datasets = [
+    "seller_semantic",
+    "customer_semantic",
+    "product_semantic"
+  ]
+}
+
+resource "google_bigquery_dataset" "semantic_datasets" {
+  for_each   = toset(local.semantic_datasets)
+  dataset_id = each.key
+  location   = var.region
+
+  delete_contents_on_destroy  = false
+  default_table_expiration_ms = local.one_month_ms
+
+  description = "Semantic layer for ${each.key}. Tables expire after 1 month."
+
+  labels = {
+    env   = var.environment
+    layer = "semantic"
+  }
+
+  lifecycle {
+    prevent_destroy = true
+  }
+}
diff --git a/.gcp/terraforms/iam_bindings.tf b/.gcp/terraforms/iam_bindings.tf
index 31c87f2..1e56bec 100644
--- a/.gcp/terraforms/iam_bindings.tf
+++ b/.gcp/terraforms/iam_bindings.tf
@@ -24,7 +24,9 @@ locals {
     "roles/monitoring.admin",                # Manage Monitoring in monitoring.tf
     "roles/logging.configWriter",            # Required for log-based alert policies
     "roles/iam.serviceAccountAdmin",         # Manage Alert policies in monitoring.tf
-    "roles/iam.admin"                        # Manage Iam roles
+    "roles/iam.admin",                       # Manage Iam roles
+    "roles/bigquery.admin",                  # Manage BigQuery datasets and views
+    "roles/serviceusage.serviceUsageAdmin",  # Manage APIs
   ]
 }
 
@@ -74,6 +76,21 @@ resource "google_storage_bucket_iam_member" "pipeline_runner_pipeline_access" {
   member = "serviceAccount:${google_service_account.platform_accounts["ops-pipeline-sa"].email}"
 }
 
+# Pipeline Runner BigQuery Access
+locals {
+  pipeline_bq_roles = [
+    "roles/bigquery.dataEditor",
+    "roles/bigquery.jobUser"
+  ]
+}
+
+resource "google_project_iam_member" "pipeline_runner_bq_access" {
+  for_each = toset(local.pipeline_bq_roles)
+  project  = var.project_id
+  role     = each.key
+  member   = "serviceAccount:${google_service_account.platform_accounts["ops-pipeline-sa"].email}"
+}
+
 
 # ------------------------------------------------------------
 # GOOGLE SERVICE AGENTS (Pub/Sub)
diff --git a/.gcp/terraforms/jobs.tf b/.gcp/terraforms/jobs.tf
index dbe25fa..e0f2d00 100644
--- a/.gcp/terraforms/jobs.tf
+++ b/.gcp/terraforms/jobs.tf
@@ -16,22 +16,49 @@ resource "google_cloud_run_v2_job" "pipeline" {
 
         resources {
           limits = {
-            cpu    = "2"
+            cpu    = "4"
             memory = "8Gi"
           }
         }
         env {
           name  = "POLARS_MAX_THREADS"
-          value = "2"
+          value = "4"
+        }
+        env {
+          name  = "GCP_REGION"
+          value = var.region
+        }
+        env {
+          name  = "BQ_DATASET_ID"
+          value = var.bq_dataset_id
+        }
+        env {
+          name  = "GCP_PROJECT"
+          value = var.project_id
+        }
+
+        volume_mounts {
+          name       = "ephemeral-disk-1"
+          mount_path = "/tmp"
+        }
+      }
+
+      volumes {
+        name = "ephemeral-disk-1"
+        empty_dir {
+          size_limit = "10Gi"
         }
       }
     }
   }
   lifecycle {
     ignore_changes = [
+      # Github ci-infra updates image every update
       template[0].template[0].containers[0].image,
       client,
-      client_version
+      client_version,
+      # Block terraform from defaulting medium to MEMORY, DISK isn't supported by provider yet
+      template[0].template[0].volumes[0].empty_dir[0].medium
     ]
   }
 }
diff --git a/.gcp/terraforms/main.tf b/.gcp/terraforms/main.tf
index 7fd8ccc..7dc97e5 100644
--- a/.gcp/terraforms/main.tf
+++ b/.gcp/terraforms/main.tf
@@ -31,6 +31,8 @@ locals {
     "cloudscheduler.googleapis.com",
     "iamcredentials.googleapis.com",
     "drive.googleapis.com",
+    "bigquery.googleapis.com",
+    "bigqueryconnection.googleapis.com",
   ]
 }
 
diff --git a/.gcp/terraforms/storage.tf b/.gcp/terraforms/storage.tf
index 999f2d1..5130e70 100644
--- a/.gcp/terraforms/storage.tf
+++ b/.gcp/terraforms/storage.tf
@@ -46,3 +46,4 @@ resource "google_storage_bucket" "ops_pipeline_bucket" {
     }
   }
 }
+
diff --git a/.gcp/terraforms/variables.tf b/.gcp/terraforms/variables.tf
index 71f0294..5bac088 100644
--- a/.gcp/terraforms/variables.tf
+++ b/.gcp/terraforms/variables.tf
@@ -4,7 +4,7 @@ variable "project_id" {
 }
 
 variable "region" {
-  description = "The Default GCP region"
+  description = "The Project GCP region"
   type        = string
   default     = "us-east1"
 }
@@ -24,3 +24,8 @@ variable "alert_email_map" {
   description = "List of emails to receive pipeline alerts"
   sensitive   = true
 }
+
+variable "bq_dataset_id" {
+  description = "BigQuery dataset containing externalized GCS tables"
+  type        = string
+}
diff --git a/.github/workflows/ci-infra.yml b/.github/workflows/ci-infra.yml
index 48b4db6..21639cb 100644
--- a/.github/workflows/ci-infra.yml
+++ b/.github/workflows/ci-infra.yml
@@ -55,4 +55,5 @@ jobs:
           TF_VAR_region: ${{ env.REGION }}
           TF_VAR_github_repo: ${{ env.GITHUB_REPO }}
           TF_VAR_alert_email_map: ${{ secrets.ALERT_EMAIL_MAP }}
+          TF_VAR_bq_dataset_id: ${{secrets.BQ_DATASET_ID}}
         run: terraform apply -auto-approve
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index f6246d9..a4c1ffa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,9 +7,11 @@ __pycache__/
 runtime/
 /data/raw
 data/published/
+data/id_mapping/
 data/run_artifact
 data/contracted/
 assets/benchmarks/benchmark.py
+docker-compose.benchmark.yml
 
 # local editor configs
 pyrightconfig.json
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 3acec9e..54275d1 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -5,5 +5,6 @@
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
     "python-envs.defaultEnvManager": "ms-python.python:conda",
-    "python-envs.defaultPackageManager": "ms-python.python:conda"
+    "python-envs.defaultPackageManager": "ms-python.python:conda",
+    "python.terminal.activateEnvironment": false
 }
\ No newline at end of file
diff --git a/README.md b/README.md
index f536a0c..542e504 100644
--- a/README.md
+++ b/README.md
@@ -16,9 +16,11 @@ This project solves that challenge by delivering a highly resilient, event-drive
 
 To eliminate the risk of cross-run data contamination and memory bloat, the pipeline employs a defensive state-management strategy where local compute environments are strictly temporary:
 * **Stateless Orchestration:** Every execution operates within an isolated, deterministic `run_id` workspace that is aggressively purged post-run.
+* **Primitive Integer Pipeline:** Optimizes high-volume joins by mapping 36-byte UUID strings to 4-byte UInt32 surrogates, reducing join-key memory overhead by ~16x and protecting the serverless memory ceiling.
 * **Cloud Sync & Purge:** After processing data into the Silver layer, the system syncs the output to Cloud Storage, purging the local environment.
-* **Historical Context Pull:** It then safely re-downloads the complete historical state for Gold layer aggregation, ensuring every run builds analytical models in a clean, untainted environment.
+* **Historical Context Pull:** It then safely streams the complete historical state for Gold layer aggregation, ensuring every run builds analytical models in a clean, untainted environment.
 * **Linear Gating:** Stages are strictly gated; failure at any tier (Ingestion, Contract, or Assembly) prevents downstream processing and ensures partial data is never promoted.
+* **BigQuery Atomic Swap:** Final semantic models are delivered via Authorized Views that atomically swap pointers to new data versions, providing zero-downtime connectivity for BI consumers.
 * **Resource-Optimized Compute:** Leverages a highly efficient lazy-evaluation engine to process large-scale datasets seamlessly within the strict memory constraints of serverless environments.
 
 ### Event-Driven Cloud Infrastructure
@@ -42,23 +44,35 @@ The pipeline does not just move data; it actively defends the analytical layer f
 
 **Silver (The Contract Layer)**
 * **Philosophy (Subtractive-Only Logic):** The pipeline never guesses, imputes, or "repairs" bad data. If a record violates the contract, it is explicitly dropped, and the loss is logged in the telemetry report.
+* **Primitive Integer Pipeline:** Optimizes downstream high-volume joins by mapping 36-byte UUID strings to 4-byte UInt32 surrogates, reducing join-key memory overhead by ~16x and ensuring the pipeline stays within serverless memory constraints.
 * **Role-Based Rules:** Tables are classified by role (`event_fact`, `transaction_detail`, `entity_reference`) and subjected to specific registry rules (e.g., deduplication, non-null assertions).
 * **Referential Integrity (Cascade Cleanup):** The pipeline tracks invalidated parent IDs (e.g., malformed `order_id`s) and propagates them downstream. If an order is dropped, all associated child records (like line items) are cascade-dropped to prevent orphan data from polluting joins.
 * **Schema Freeze:** Output files are strictly cast to predefined data types and projected to contain only approved columns before being written to Cloud Storage.
 
 **Gold (The Semantic Layer)**
-* **Purpose:** Business-ready Fact and Dimension tables modeled for entity-centric and cohort analysis (Customers, Sellers, Products).
-* **Strict Grain Enforcement:**
-    * **Temporal:** All fact tables are deterministically aligned to an ISO-Week grain (`W-MON`).
-    * **Entity:** The engine validates that Dimension tables contain exactly one row per `Entity_ID`, and Fact tables contain exactly one row per `(Entity_ID, order_year_week)`.
-* **Lineage Integrity:** The Semantic builder aggressively checks that the assembled data belongs to a single `run_id`. Cross-run data contamination triggers a terminal failure, preventing poisoned data from ever reaching production.
+* **Purpose:** High-fidelity analytical modeling through advanced integration and entity-centric aggregation. The Gold layer is partitioned into two distinct stages to maintain a strict separation between integration logic and business metrics.
+* **Stage I: Assembly (The Analytical Backbone)**
+    * **Role:** Integrates normalized relational tables (`orders`, `items`, `payments`) into a unified, analytical "Event" dataset.
+    * **Invariants:** Guaranteed 1:1 grain per `order_id_int`. It performs analytical flattening and calculates fulfillment lead times while enforcing referential integrity (e.g., purging orders without items).
+    * **Dimension Extraction:** Generates strictly deduplicated reference tables for Customers and Products, ensuring a single source of truth for entity attributes.
+* **Stage II: Semantic (The Business Logic Engine)**
+    * **Role:** Transforms unified order-grain events into specialized Fact and Dimension modules tailored for cohort and entity-centric analysis (Sellers, Customers, Products).
+    * **Strict Grain Enforcement:**
+        * **Temporal:** All fact tables are deterministically aligned to an ISO-Week grain (`W-MON`).
+        * **Entity-Fact:** Strictly 1 row per `(Entity_ID, order_year_week)`.
+        * **Entity-Dim:** Strictly 1 row per `Entity_ID`.
+* **Technical Invariants:**
+    * **Integer Key Optimization:** Both stages leverage the Primitive Integer Pipeline for grouping and joins, maintaining a constant memory profile by avoiding string-based hash tables.
+    * **Schema Freeze:** Both stages output files are strictly cast to predefined data types and projected to contain only approved columns
 
 ### Validation Gates & Deployment Integrity
 
 * **Dual-Pass Validation Strategy:**
     * **Initial Validation (Raw Gate):** The orchestrator evaluates raw snapshots. At this stage, `warnings` (like duplicate IDs or nulls) are tolerated and passed down to the Contract Stage for subtractive cleanup. Only fatal structural errors abort the run.
     * **Post-Contract Revalidation (Silver Gate):** After contract rules are applied, the system re-runs validation. In this phase, `warnings` are escalated to fatal. Because the contract stage guarantees a clean schema, any remaining warnings trigger a terminal `RuntimeError`, halting the pipeline immediately to prevent downstream corruption.
-* **Atomic Publishing Lifecycle:** The pipeline protects the Gold layer by writing intermediate analytical models to isolated temporary directories during computation. Only when *all* semantic modules successfully finish processing does the system execute an atomic publish via `latest_version.json` pointer updates, guaranteeing that partial or incomplete data is never served to dashboards.
+* **Atomic Publishing Lifecycle:** 
+    * **Staged Execution (Isolated Buffer):** The pipeline protects the Gold layer by writing intermediate analytical models to isolated temporary directories during computation. Only when all semantic modules successfully finish processing does the system execute a multi-system atomic publish.
+    * **Atomic Deployment (BigQuery View Swap):** This multi-system swap redirects BigQuery Authorized Views to fresh External Tables and updates the latest_version.json manifest, ensuring BI tools like Power BI always query complete, validated datasets without downtime.
 * **Comprehensive Telemetry:**
     * **End-to-End Traceability:** A single `run_id` is propagated through all raw snapshots, metadata logs, and published artifacts to provide absolute lineage tracking.
     * **Resilient Logging:** Even in the event of a fatal crash, the orchestrator's `finally` block guarantees that partial logs and stage reports are synced back to cloud storage before the local workspace is purged, ensuring debuggability.
@@ -69,58 +83,58 @@ The pipeline is explicitly engineered to process massive datasets within the rig
 
 ### GCP Stress-Test Metrics (Scaling Efficiency)
 
-| 18M Snapshot (8GiB / 2 vCPU) | 36M Snapshot (16GiB / 4 vCPU) |
-| :---: | :---: |
-| ![engine-performance-8gb](/assets/screenshots/engine-performance-8gb-2cpu.png) | ![engine-performance-16gb](/assets/screenshots/engine-performance-16gb-4cpu.png) |
+| 40M Snapshot (8GB / 4 vCPU) with mounted temporary disk|
+| :---: |
+| ![engine-performance-8gb](/assets/screenshots/engine-performance-8gb-4cpu.png) |
 
-> Benchmark data: [`18m_stats_log.csv`](/assets/benchmarks/polars/18mrows_dataset_stats_log.csv) and [`36m_stats_log.csv`](/assets/benchmarks/polars/36mrows_dataset_stats_log.csv)
+> Benchmark data: [`40m_stats_log.csv`](/assets/benchmarks/polars/40mrows_dataset_stats_log.csv)
+> Dataset : [`Dataset Information`](/data/README.md)
 
-| Metric | 18M Rows (8GB / 2 vCPU) | 36M Rows (16GB / 4 vCPU) |
-| :--- | :--- | :--- |
-| **Throughput (Processing)** | ~116,000 Rows / Second | ~220,000 Rows / Second |
-| **Total Runtime (Wall-Clock)** | 02m 34s | 02m 43s |
-| **Memory Tax (Fixed)** | ~1.5 GiB | ~1.5 GiB |
-| **Effective Data Headroom** | ~6.5 GiB | ~14.5 GiB |
+| Metric | Data | 
+|:---|:---|
+| Dataset |~40 Million Rows / ~5.3 GB Parquet|
+| Provision Spec | 8 GB RAM / 4 vCPU |
+| Efficiency (Processing) | ~307k Rows / Second |
+| Total Runtime (Wall-Clock) | 130 Seconds |
 
-*   **Near-Linear Performance Scaling:** Doubling the compute and dataset size results in only a 9-second increase in wall-clock time, effectively doubling the throughput as the Polars engine saturates the additional vCPUs.
-*   **Predictable Capacity:** Identifying the "Memory Tax" (OS/IO overhead) allows for precise resource governance, ensuring jobs never fail due to unpredictable Signal 9 (OOM) events.
+*   **Maximized Memory Density:** Enabled by the **Primitive Integer Pipeline**, mapping 36-byte UUID strings to 4-byte UInt32 keys shrunk join-key memory overhead by ~16x. This allowed a ~5.34GB analytical model (40M rows) to easily process entirely within the 8GB RAM limit
+*   **Near-Linear Performance Scaling:** The Polars engine saturates the available vCPUs, yielding ultra-high throughput (307k rows/s) during streaming execution.
 *   **Zero-Idle Economics:** 100% serverless execution ensures zero billable time during idle periods, significantly reducing the Total Cost of Ownership (TCO) compared to dedicated cluster solutions.
 
 ### Cost Efficiency & Free-Tier
 
-The pipeline's processing speed allows for a full analytical rebuild of 36M rows while remaining comfortably within the **GCP Cloud Run Free Tier** (180k vCPU-sec, 360k GiB-sec). This means a small-to-mid-sized organization can run this production-grade pipeline multiple times a day with **zero compute costs.**
+The pipeline's processing speed allows for a full analytical rebuild of 40M rows while remaining comfortably within the **GCP Cloud Run Free Tier** (180k vCPU-sec, 360k GB-sec). This means a small-to-mid-sized organization can run this production-grade pipeline multiple times a day with **zero compute costs.**
 
-| Compute Provision | Dataset | vCPU-Seconds / Run | GiB-Seconds / Run | Monthly Free-Tier Runs |
+| Compute Provision | Dataset | vCPU-Seconds / Run | GB-Seconds / Run | Monthly Free-Tier Runs |
 | :--- | :--- | :--- | :--- | :--- |
-| **8 GiB / 2 vCPU** | ~18m rows | 308 | 1,232 | **~292 Runs / Month** |
-| **16 GiB / 4 vCPU** | ~36m rows | 652 | 2,608 | **~138 Runs / Month** |
-| **32 GiB / 8 vCPU** | ~72m rows | 1,304 | 5,216 | **~69 Runs / Month** |
+| **8 GB / 4 vCPU** | ~40M rows | 520 | 1,040 | **~346 Runs / Month** |
+| **16 GB / 6 vCPU** | ~80M rows | 1040 | 2,773 | **~129 Runs / Month** |
+| **32 GB / 8 vCPU** | ~160M rows | 2,080 | 8,320 | **~43 Runs / Month** |
 
-> *Calculations based on verified benchmarks. Even at the highest 32GiB tier, the pipeline can execute a full state rebuild twice daily for $0*
+> *Calculations based on verified benchmarks. Even at the highest 32GB tier, the pipeline can execute a full state rebuild over 43 times per month for $0 within the GCP free tier.*
 
 ### Measurement Methodology
 *   **Performance Profiling:** Captured from production telemetry via the pipeline's native `run_duration` metadata, calculating the precise delta between `started_at` and `completed_at` timestamps.
-*   **Memory Utilization:** Monitored via an integrated [`psutil.virtual_memory().used`](/assets/benchmarks/polars/README.md) profiling implementation to verify the actual resource footprint and confirm the physical ceiling for 8GiB/16GiB provision.
-*   **Throughput Efficiency:** Leverages Polars streaming evaluation to maintain high throughput and minimize CPU idle time during GCS I/O, providing a significant performance advantage over traditional eager-loading engines.
+*   **Memory Utilization:** Monitored via an integrated [`psutil.virtual_memory().used`](/assets/benchmarks/polars/README.md) profiling implementation to verify the actual resource footprint and confirm the physical ceiling for 8GB provision.
 
 ### **Scaling Roadmap: From Serverless to Enterprise Lakehouse**
 
 To ensure the architecture survives the transition from millions to billions of rows, the pipeline is designed to evolve across three validated scaling paths. This roadmap prioritizes cost-efficiency at low volumes while providing a clear architectural pivot for enterprise-scale workloads.
 
-#### **Stage 1: Temporal Sharding (Vertical Efficiency)**
-*   **Strategy:** Refactor the `Assemble` stage to iterate through **yearly batch partitions** while `Semantic` stage to **streams output directly** to a GCS staging location.
-*   **Publish Evolution:** Moves to a **Partitioned Atomic Swap**. Yearly shards are streamed directly to a staged GCS version prefix. The `Integrity Gate` validates cloud-side completeness before the `latest_version.json` pointer is updated.
-*   **Trade-off:** **Latency vs. Memory.** Significantly increases total wall-clock time due to repeated I/O cycles, but allows 32GiB instances to process 100M+ rows by isolating join-intensity to specific temporal shards.
+#### **Stage 1: Incremental Delta Propagation (Efficiency Pivot)**
+*   **Strategy:** Transition from a "Full Rebuild" batch model to a **Stateless Delta Propagation** model using Polars' streaming engine to process only newly arrived `.parquet` deltas.
+*   **Optimization:** Leverages the existing BigQuery View infrastructure to perform "Last-Mile" merging of incremental updates with the historical state, eliminating the need for redundant full-table re-reads.
+*   **Trade-off:** **Operational Complexity vs. Compute Cost.** Reduces GCS I/O and CPU time by 80-90% for daily runs, but requires more sophisticated state-tracking in the metadata layer.
 
-#### **Stage 2: Incremental Delta Architecture (Event-Driven)**
-*   **Strategy:** Transition from a "Full Rebuild" batch model to a **Stateless Delta Propagation** model, processing only active deltas.
-*   **Publish Evolution:** Moves to a **Checkpoint-based Commit**. Folder-based versioning is replaced by an atomic merge into the Gold layer. The "Pointer" evolves into a metadata watermark signifying data freshness to downstream consumers.
-*   **Trade-off:** **Simplicity vs. Scale.** Eliminates memory constraints and reduces runtime costs, but sacrifices easy "point-in-time" folder recovery. Requires "Last-Mile" deduplication logic (e.g., SQL Views) for downstream consumers.
+#### **Stage 2: Event-Driven Real-Time Streaming (Latency Pivot)**
+*   **Strategy:** Integrate GCS Pub/Sub notifications with **Cloud Run streaming sinks** to trigger sub-minute validation and assembly.
+*   **Architecture:** Moves from a daily batch schedule to a continuous ingestion loop where each file upload triggers a micro-run. The BigQuery Atomic View Swap acts as the transactional boundary, ensuring dashboards always see the latest validated data without waiting for the daily window.
+*   **Trade-off:** **Responsiveness vs. Throughput.** Provides near real-time insights but increases the frequency of small I/O operations.
 
 #### **Stage 3: BigQuery "Engine-as-a-Service" (The Enterprise Pivot)**
-*   **Strategy:** Offload the `Assemble` and `Semantic` compute layers entirely to **BigQuery (ELT Pattern)**.
-*   **Publish Evolution:** Moves to a **Atomic View Redirection**. The Python "Gatekeeper" builds semantics in a staging dataset and runs SQL-driven integrity checks. Publication is achieved by an atomic swap of a BigQuery Authorized View, replacing the file-based pointer system.
-*   **Trade-off:** **Cost vs. Capability.** Provides an infinite scaling ceiling and removes all local infrastructure bounds, but introduces higher cost-per-query overhead and requires transitioning from local Parquet files to managed cloud storage.
+*   **Strategy:** Offload the high-volume `Assemble` and `Semantic` compute layers entirely to **BigQuery (ELT Pattern)** using SQL-driven logic.
+*   **Scalability:** Provides an infinite scaling ceiling (Petabyte-scale) and removes all local infrastructure bounds, while the Python pipeline acts as an "Air-Traffic Controller" managing integrity gates and view swaps.
+*   **Trade-off:** **Scalability vs. Vendor Lock-in.** Simplifies the compute environment but moves the primary cost from serverless RAM to BigQuery slot usage.
 
 
 ## Observability & Alerting
@@ -135,7 +149,7 @@ The custom Cloud Monitoring dashboard tracks granular operational metrics to pro
 **Pipeline Job Metrics:**
 1. **Workflow Execution Traffic:** Measures the volume of finished pipeline runs.
 2. **Execution Status Ratio:** Tracks the count of `SUCCESS` vs. `FAILED` runs to monitor overall reliability.
-3. **Memory Allocation Bottlenecks:** Plots the actual Cloud Run memory usage against a hardcoded 4GB horizontal threshold to visualize proximity to OOM exhaustion.
+3. **Memory Allocation Bottlenecks:** Plots the actual Cloud Run memory usage against a hardcoded 8GB horizontal threshold to visualize proximity to OOM exhaustion.
 
 **Extractor Job Metrics:**
 1. **Drive Extractor Latency:** Tracks the billable instance time of the extractor job (the most accurate proxy for API usage cost, as the extractor utilizes the Drive API continuously during runtime).
diff --git a/assets/benchmarks/polars/18mrows_dataset_stats_log.csv b/assets/benchmarks/polars/18mrows_dataset_stats_log.csv
deleted file mode 100644
index 1dcd5dd..0000000
--- a/assets/benchmarks/polars/18mrows_dataset_stats_log.csv
+++ /dev/null
@@ -1,156 +0,0 @@
-view,timestamp,logger,memory,unit
-DEFAULT,2026-04-05T15:17:39.094288Z,METRIC_MEM:,838.05,MB
-DEFAULT,2026-04-05T15:17:40.094015Z,METRIC_MEM:,885.5,MB
-DEFAULT,2026-04-05T15:17:41.094492Z,METRIC_MEM:,902.29,MB
-DEFAULT,2026-04-05T15:17:42.095390Z,METRIC_MEM:,1021.48,MB
-DEFAULT,2026-04-05T15:17:43.096006Z,METRIC_MEM:,1090.54,MB
-DEFAULT,2026-04-05T15:17:44.096644Z,METRIC_MEM:,1175.03,MB
-DEFAULT,2026-04-05T15:17:45.096868Z,METRIC_MEM:,1239.12,MB
-DEFAULT,2026-04-05T15:17:46.098148Z,METRIC_MEM:,1305.58,MB
-DEFAULT,2026-04-05T15:17:47.098659Z,METRIC_MEM:,1366.32,MB
-DEFAULT,2026-04-05T15:17:48.099119Z,METRIC_MEM:,1434.21,MB
-DEFAULT,2026-04-05T15:17:49.099548Z,METRIC_MEM:,1494.41,MB
-DEFAULT,2026-04-05T15:17:50.099927Z,METRIC_MEM:,1558.38,MB
-DEFAULT,2026-04-05T15:17:51.100508Z,METRIC_MEM:,1623.3,MB
-DEFAULT,2026-04-05T15:17:52.100953Z,METRIC_MEM:,1686.8,MB
-DEFAULT,2026-04-05T15:17:53.101235Z,METRIC_MEM:,1749.76,MB
-DEFAULT,2026-04-05T15:17:54.101841Z,METRIC_MEM:,1813.02,MB
-DEFAULT,2026-04-05T15:17:55.102002Z,METRIC_MEM:,1876.75,MB
-DEFAULT,2026-04-05T15:17:56.101989Z,METRIC_MEM:,1939.24,MB
-DEFAULT,2026-04-05T15:17:57.102025Z,METRIC_MEM:,2002.38,MB
-DEFAULT,2026-04-05T15:17:58.102325Z,METRIC_MEM:,2057.79,MB
-DEFAULT,2026-04-05T15:17:59.102856Z,METRIC_MEM:,2121.08,MB
-DEFAULT,2026-04-05T15:18:00.111702Z,METRIC_MEM:,2183.94,MB
-DEFAULT,2026-04-05T15:18:01.112002Z,METRIC_MEM:,2246.98,MB
-DEFAULT,2026-04-05T15:18:02.112465Z,METRIC_MEM:,2309.6,MB
-DEFAULT,2026-04-05T15:18:03.112922Z,METRIC_MEM:,2381.02,MB
-DEFAULT,2026-04-05T15:18:04.113281Z,METRIC_MEM:,2443.89,MB
-DEFAULT,2026-04-05T15:18:05.113580Z,METRIC_MEM:,2503.56,MB
-DEFAULT,2026-04-05T15:18:06.114007Z,METRIC_MEM:,2565.33,MB
-DEFAULT,2026-04-05T15:18:07.114544Z,METRIC_MEM:,2636.52,MB
-DEFAULT,2026-04-05T15:18:08.115021Z,METRIC_MEM:,2688.11,MB
-DEFAULT,2026-04-05T15:18:09.115407Z,METRIC_MEM:,2751.13,MB
-DEFAULT,2026-04-05T15:18:10.116230Z,METRIC_MEM:,2814.24,MB
-DEFAULT,2026-04-05T15:18:11.116344Z,METRIC_MEM:,2873.65,MB
-DEFAULT,2026-04-05T15:18:12.116918Z,METRIC_MEM:,3059.93,MB
-DEFAULT,2026-04-05T15:18:13.117293Z,METRIC_MEM:,3313.13,MB
-DEFAULT,2026-04-05T15:18:14.117668Z,METRIC_MEM:,3622.96,MB
-DEFAULT,2026-04-05T15:18:15.117599Z,METRIC_MEM:,3910.37,MB
-DEFAULT,2026-04-05T15:18:16.117708Z,METRIC_MEM:,4121.29,MB
-DEFAULT,2026-04-05T15:18:17.117574Z,METRIC_MEM:,4370.69,MB
-DEFAULT,2026-04-05T15:18:18.117917Z,METRIC_MEM:,4486.84,MB
-DEFAULT,2026-04-05T15:18:19.118298Z,METRIC_MEM:,4700.68,MB
-DEFAULT,2026-04-05T15:18:20.118662Z,METRIC_MEM:,4763.57,MB
-DEFAULT,2026-04-05T15:18:21.119092Z,METRIC_MEM:,4878.82,MB
-DEFAULT,2026-04-05T15:18:22.119512Z,METRIC_MEM:,4885.33,MB
-DEFAULT,2026-04-05T15:18:23.119906Z,METRIC_MEM:,5020.86,MB
-DEFAULT,2026-04-05T15:18:24.120397Z,METRIC_MEM:,4984.23,MB
-DEFAULT,2026-04-05T15:18:25.120751Z,METRIC_MEM:,4914.77,MB
-DEFAULT,2026-04-05T15:18:26.121326Z,METRIC_MEM:,4899.29,MB
-DEFAULT,2026-04-05T15:18:27.121629Z,METRIC_MEM:,5030.85,MB
-DEFAULT,2026-04-05T15:18:28.122106Z,METRIC_MEM:,5132.49,MB
-DEFAULT,2026-04-05T15:18:29.122520Z,METRIC_MEM:,5390.58,MB
-DEFAULT,2026-04-05T15:18:30.123148Z,METRIC_MEM:,4796.01,MB
-DEFAULT,2026-04-05T15:18:31.123418Z,METRIC_MEM:,4350.85,MB
-DEFAULT,2026-04-05T15:18:32.123873Z,METRIC_MEM:,4539.49,MB
-DEFAULT,2026-04-05T15:18:33.124349Z,METRIC_MEM:,4668.3,MB
-DEFAULT,2026-04-05T15:18:34.124620Z,METRIC_MEM:,4796.89,MB
-DEFAULT,2026-04-05T15:18:35.124609Z,METRIC_MEM:,4944.93,MB
-DEFAULT,2026-04-05T15:18:36.124705Z,METRIC_MEM:,5001.48,MB
-DEFAULT,2026-04-05T15:18:37.124716Z,METRIC_MEM:,5149.01,MB
-DEFAULT,2026-04-05T15:18:38.125250Z,METRIC_MEM:,5257.33,MB
-DEFAULT,2026-04-05T15:18:39.128174Z,METRIC_MEM:,5386.73,MB
-DEFAULT,2026-04-05T15:18:40.127702Z,METRIC_MEM:,5283.05,MB
-DEFAULT,2026-04-05T15:18:41.128200Z,METRIC_MEM:,5429.89,MB
-DEFAULT,2026-04-05T15:18:42.128853Z,METRIC_MEM:,5615.36,MB
-DEFAULT,2026-04-05T15:18:43.129231Z,METRIC_MEM:,5757.95,MB
-DEFAULT,2026-04-05T15:18:44.129594Z,METRIC_MEM:,5779.03,MB
-DEFAULT,2026-04-05T15:18:45.130128Z,METRIC_MEM:,5901.58,MB
-DEFAULT,2026-04-05T15:18:46.130602Z,METRIC_MEM:,5883.96,MB
-DEFAULT,2026-04-05T15:18:47.131164Z,METRIC_MEM:,5858.47,MB
-DEFAULT,2026-04-05T15:18:48.131824Z,METRIC_MEM:,5792.43,MB
-DEFAULT,2026-04-05T15:18:49.132486Z,METRIC_MEM:,5744.39,MB
-DEFAULT,2026-04-05T15:18:50.133137Z,METRIC_MEM:,5701.54,MB
-DEFAULT,2026-04-05T15:18:51.133544Z,METRIC_MEM:,5626.5,MB
-DEFAULT,2026-04-05T15:18:52.134033Z,METRIC_MEM:,5644.95,MB
-DEFAULT,2026-04-05T15:18:53.134312Z,METRIC_MEM:,5595.89,MB
-DEFAULT,2026-04-05T15:18:54.134773Z,METRIC_MEM:,5604.79,MB
-DEFAULT,2026-04-05T15:18:55.134682Z,METRIC_MEM:,5545.34,MB
-DEFAULT,2026-04-05T15:18:56.134782Z,METRIC_MEM:,5478.24,MB
-DEFAULT,2026-04-05T15:18:57.134596Z,METRIC_MEM:,5473.36,MB
-DEFAULT,2026-04-05T15:18:58.134867Z,METRIC_MEM:,5630.43,MB
-DEFAULT,2026-04-05T15:18:59.135234Z,METRIC_MEM:,5692.07,MB
-DEFAULT,2026-04-05T15:19:00.135694Z,METRIC_MEM:,5561.45,MB
-DEFAULT,2026-04-05T15:19:01.136174Z,METRIC_MEM:,5544.65,MB
-DEFAULT,2026-04-05T15:19:02.136578Z,METRIC_MEM:,5575.79,MB
-DEFAULT,2026-04-05T15:19:03.136949Z,METRIC_MEM:,5544.28,MB
-DEFAULT,2026-04-05T15:19:04.137511Z,METRIC_MEM:,5540,MB
-DEFAULT,2026-04-05T15:19:05.137967Z,METRIC_MEM:,5541.6,MB
-DEFAULT,2026-04-05T15:19:06.138332Z,METRIC_MEM:,5549.2,MB
-DEFAULT,2026-04-05T15:19:07.138981Z,METRIC_MEM:,5489.71,MB
-DEFAULT,2026-04-05T15:19:08.139470Z,METRIC_MEM:,5471.11,MB
-DEFAULT,2026-04-05T15:19:09.139825Z,METRIC_MEM:,5431.8,MB
-DEFAULT,2026-04-05T15:19:10.140261Z,METRIC_MEM:,5320.22,MB
-DEFAULT,2026-04-05T15:19:11.140891Z,METRIC_MEM:,4346.55,MB
-DEFAULT,2026-04-05T15:19:12.141460Z,METRIC_MEM:,2485.19,MB
-DEFAULT,2026-04-05T15:19:13.141774Z,METRIC_MEM:,2588.53,MB
-DEFAULT,2026-04-05T15:19:14.142218Z,METRIC_MEM:,2806.17,MB
-DEFAULT,2026-04-05T15:19:15.142033Z,METRIC_MEM:,2963.23,MB
-DEFAULT,2026-04-05T15:19:16.142183Z,METRIC_MEM:,3216.75,MB
-DEFAULT,2026-04-05T15:19:17.142126Z,METRIC_MEM:,3407.09,MB
-DEFAULT,2026-04-05T15:19:18.142371Z,METRIC_MEM:,3624.29,MB
-DEFAULT,2026-04-05T15:19:19.142658Z,METRIC_MEM:,3956.66,MB
-DEFAULT,2026-04-05T15:19:20.143131Z,METRIC_MEM:,4189.17,MB
-DEFAULT,2026-04-05T15:19:21.143696Z,METRIC_MEM:,4211.23,MB
-DEFAULT,2026-04-05T15:19:22.143941Z,METRIC_MEM:,4211.42,MB
-DEFAULT,2026-04-05T15:19:23.144386Z,METRIC_MEM:,3863.35,MB
-DEFAULT,2026-04-05T15:19:24.144796Z,METRIC_MEM:,2536.34,MB
-DEFAULT,2026-04-05T15:19:25.145214Z,METRIC_MEM:,2660.13,MB
-DEFAULT,2026-04-05T15:19:26.145782Z,METRIC_MEM:,2836.82,MB
-DEFAULT,2026-04-05T15:19:27.146128Z,METRIC_MEM:,2584.15,MB
-DEFAULT,2026-04-05T15:19:28.146666Z,METRIC_MEM:,2558.5,MB
-DEFAULT,2026-04-05T15:19:29.147126Z,METRIC_MEM:,2608.82,MB
-DEFAULT,2026-04-05T15:19:30.147580Z,METRIC_MEM:,2667.48,MB
-DEFAULT,2026-04-05T15:19:31.148120Z,METRIC_MEM:,2916.17,MB
-DEFAULT,2026-04-05T15:19:32.148692Z,METRIC_MEM:,2942.69,MB
-DEFAULT,2026-04-05T15:19:33.148913Z,METRIC_MEM:,3120.53,MB
-DEFAULT,2026-04-05T15:19:34.149279Z,METRIC_MEM:,3296.05,MB
-DEFAULT,2026-04-05T15:19:35.149299Z,METRIC_MEM:,3372.83,MB
-DEFAULT,2026-04-05T15:19:36.149242Z,METRIC_MEM:,3573.39,MB
-DEFAULT,2026-04-05T15:19:37.149163Z,METRIC_MEM:,3607.4,MB
-DEFAULT,2026-04-05T15:19:38.149425Z,METRIC_MEM:,3747.48,MB
-DEFAULT,2026-04-05T15:19:39.149925Z,METRIC_MEM:,4006.93,MB
-DEFAULT,2026-04-05T15:19:40.150442Z,METRIC_MEM:,4343.77,MB
-DEFAULT,2026-04-05T15:19:41.150760Z,METRIC_MEM:,4695.14,MB
-DEFAULT,2026-04-05T15:19:42.151181Z,METRIC_MEM:,5070.16,MB
-DEFAULT,2026-04-05T15:19:43.151938Z,METRIC_MEM:,6073.37,MB
-DEFAULT,2026-04-05T15:19:44.153628Z,METRIC_MEM:,6354,MB
-DEFAULT,2026-04-05T15:19:45.153962Z,METRIC_MEM:,5669.24,MB
-DEFAULT,2026-04-05T15:19:46.155840Z,METRIC_MEM:,4217.47,MB
-DEFAULT,2026-04-05T15:19:47.156260Z,METRIC_MEM:,4248.13,MB
-DEFAULT,2026-04-05T15:19:48.156769Z,METRIC_MEM:,4326.8,MB
-DEFAULT,2026-04-05T15:19:49.157253Z,METRIC_MEM:,4379.6,MB
-DEFAULT,2026-04-05T15:19:50.157524Z,METRIC_MEM:,4434.73,MB
-DEFAULT,2026-04-05T15:19:51.158025Z,METRIC_MEM:,4496.63,MB
-DEFAULT,2026-04-05T15:19:52.158544Z,METRIC_MEM:,4474.88,MB
-DEFAULT,2026-04-05T15:19:53.159087Z,METRIC_MEM:,2964.78,MB
-DEFAULT,2026-04-05T15:19:54.159493Z,METRIC_MEM:,3233.5,MB
-DEFAULT,2026-04-05T15:19:55.159420Z,METRIC_MEM:,3510.62,MB
-DEFAULT,2026-04-05T15:19:56.159305Z,METRIC_MEM:,3812.53,MB
-DEFAULT,2026-04-05T15:19:57.159372Z,METRIC_MEM:,4118.51,MB
-DEFAULT,2026-04-05T15:19:58.159660Z,METRIC_MEM:,4441.35,MB
-DEFAULT,2026-04-05T15:19:59.160048Z,METRIC_MEM:,4737.43,MB
-DEFAULT,2026-04-05T15:20:00.160439Z,METRIC_MEM:,5069.43,MB
-DEFAULT,2026-04-05T15:20:01.160982Z,METRIC_MEM:,5400,MB
-DEFAULT,2026-04-05T15:20:02.161400Z,METRIC_MEM:,5691.34,MB
-DEFAULT,2026-04-05T15:20:03.161772Z,METRIC_MEM:,5747.48,MB
-DEFAULT,2026-04-05T15:20:04.162242Z,METRIC_MEM:,5785.51,MB
-DEFAULT,2026-04-05T15:20:05.162640Z,METRIC_MEM:,5753.39,MB
-DEFAULT,2026-04-05T15:20:06.163097Z,METRIC_MEM:,3100.74,MB
-DEFAULT,2026-04-05T15:20:07.163918Z,METRIC_MEM:,1582.75,MB
-DEFAULT,2026-04-05T15:20:08.163899Z,METRIC_MEM:,1665,MB
-DEFAULT,2026-04-05T15:20:09.164133Z,METRIC_MEM:,1763.7,MB
-DEFAULT,2026-04-05T15:20:10.164601Z,METRIC_MEM:,1760.18,MB
-DEFAULT,2026-04-05T15:20:11.164990Z,METRIC_MEM:,1762.74,MB
-DEFAULT,2026-04-05T15:20:12.165458Z,METRIC_MEM:,1739.09,MB
-DEFAULT,2026-04-05T15:20:13.165808Z,METRIC_MEM:,1625.09,MB
\ No newline at end of file
diff --git a/assets/benchmarks/polars/36mrows_dataset_stats_log.csv b/assets/benchmarks/polars/36mrows_dataset_stats_log.csv
deleted file mode 100644
index 8f0913d..0000000
--- a/assets/benchmarks/polars/36mrows_dataset_stats_log.csv
+++ /dev/null
@@ -1,165 +0,0 @@
-view,timestamp,logger,memory,unit
-DEFAULT,2026-04-09T21:43:40.178678Z,METRIC_MEM:,2812.83,MB
-DEFAULT,2026-04-09T21:43:41.178984Z,METRIC_MEM:,2868.2,MB
-DEFAULT,2026-04-09T21:43:42.179341Z,METRIC_MEM:,2931.05,MB
-DEFAULT,2026-04-09T21:43:43.179687Z,METRIC_MEM:,2994.48,MB
-DEFAULT,2026-04-09T21:43:44.179962Z,METRIC_MEM:,3049.48,MB
-DEFAULT,2026-04-09T21:43:45.180339Z,METRIC_MEM:,3111.88,MB
-DEFAULT,2026-04-09T21:43:46.180721Z,METRIC_MEM:,3175.25,MB
-DEFAULT,2026-04-09T21:43:47.181107Z,METRIC_MEM:,3246.77,MB
-DEFAULT,2026-04-09T21:43:48.181558Z,METRIC_MEM:,3309.72,MB
-DEFAULT,2026-04-09T21:43:49.181470Z,METRIC_MEM:,3364.92,MB
-DEFAULT,2026-04-09T21:43:50.181366Z,METRIC_MEM:,3427.7,MB
-DEFAULT,2026-04-09T21:43:51.181351Z,METRIC_MEM:,3483.11,MB
-DEFAULT,2026-04-09T21:43:52.181694Z,METRIC_MEM:,3554.01,MB
-DEFAULT,2026-04-09T21:43:53.182108Z,METRIC_MEM:,3609.08,MB
-DEFAULT,2026-04-09T21:43:54.182588Z,METRIC_MEM:,3672.5,MB
-DEFAULT,2026-04-09T21:43:55.182979Z,METRIC_MEM:,3726.8,MB
-DEFAULT,2026-04-09T21:43:56.183486Z,METRIC_MEM:,3790.45,MB
-DEFAULT,2026-04-09T21:43:57.183846Z,METRIC_MEM:,3854.01,MB
-DEFAULT,2026-04-09T21:43:58.184338Z,METRIC_MEM:,3916.96,MB
-DEFAULT,2026-04-09T21:43:59.184674Z,METRIC_MEM:,3980.06,MB
-DEFAULT,2026-04-09T21:44:00.185069Z,METRIC_MEM:,4043.16,MB
-DEFAULT,2026-04-09T21:44:01.185349Z,METRIC_MEM:,4098.34,MB
-DEFAULT,2026-04-09T21:44:02.185794Z,METRIC_MEM:,4161.24,MB
-DEFAULT,2026-04-09T21:44:03.190091Z,METRIC_MEM:,4224.66,MB
-DEFAULT,2026-04-09T21:44:04.186657Z,METRIC_MEM:,4287.29,MB
-DEFAULT,2026-04-09T21:44:05.187081Z,METRIC_MEM:,4342.34,MB
-DEFAULT,2026-04-09T21:44:06.187557Z,METRIC_MEM:,4405.66,MB
-DEFAULT,2026-04-09T21:44:07.187831Z,METRIC_MEM:,4467.94,MB
-DEFAULT,2026-04-09T21:44:08.188119Z,METRIC_MEM:,4523.98,MB
-DEFAULT,2026-04-09T21:44:09.188007Z,METRIC_MEM:,4586.83,MB
-DEFAULT,2026-04-09T21:44:10.187799Z,METRIC_MEM:,4649.84,MB
-DEFAULT,2026-04-09T21:44:11.187840Z,METRIC_MEM:,4705.04,MB
-DEFAULT,2026-04-09T21:44:12.187947Z,METRIC_MEM:,4768.01,MB
-DEFAULT,2026-04-09T21:44:13.188380Z,METRIC_MEM:,4831.27,MB
-DEFAULT,2026-04-09T21:44:14.189078Z,METRIC_MEM:,4894.65,MB
-DEFAULT,2026-04-09T21:44:15.189546Z,METRIC_MEM:,4958.21,MB
-DEFAULT,2026-04-09T21:44:16.189898Z,METRIC_MEM:,5020.09,MB
-DEFAULT,2026-04-09T21:44:17.190829Z,METRIC_MEM:,5075.54,MB
-DEFAULT,2026-04-09T21:44:18.191150Z,METRIC_MEM:,5138.73,MB
-DEFAULT,2026-04-09T21:44:19.191518Z,METRIC_MEM:,5182.56,MB
-DEFAULT,2026-04-09T21:44:20.197169Z,METRIC_MEM:,5623.91,MB
-DEFAULT,2026-04-09T21:44:21.197587Z,METRIC_MEM:,6033.96,MB
-DEFAULT,2026-04-09T21:44:22.201299Z,METRIC_MEM:,6488.75,MB
-DEFAULT,2026-04-09T21:44:23.198486Z,METRIC_MEM:,6785.55,MB
-DEFAULT,2026-04-09T21:44:24.198818Z,METRIC_MEM:,6895.43,MB
-DEFAULT,2026-04-09T21:44:25.199286Z,METRIC_MEM:,7071.41,MB
-DEFAULT,2026-04-09T21:44:26.199620Z,METRIC_MEM:,7220.95,MB
-DEFAULT,2026-04-09T21:44:27.200130Z,METRIC_MEM:,7458.04,MB
-DEFAULT,2026-04-09T21:44:28.200477Z,METRIC_MEM:,7530.45,MB
-DEFAULT,2026-04-09T21:44:29.200378Z,METRIC_MEM:,7652.43,MB
-DEFAULT,2026-04-09T21:44:30.200421Z,METRIC_MEM:,7838.43,MB
-DEFAULT,2026-04-09T21:44:31.200275Z,METRIC_MEM:,8050.17,MB
-DEFAULT,2026-04-09T21:44:32.200504Z,METRIC_MEM:,8270.6,MB
-DEFAULT,2026-04-09T21:44:33.200943Z,METRIC_MEM:,8439.16,MB
-DEFAULT,2026-04-09T21:44:34.201362Z,METRIC_MEM:,8777.46,MB
-DEFAULT,2026-04-09T21:44:35.201918Z,METRIC_MEM:,8934.88,MB
-DEFAULT,2026-04-09T21:44:36.202212Z,METRIC_MEM:,8910.29,MB
-DEFAULT,2026-04-09T21:44:37.202609Z,METRIC_MEM:,7358.61,MB
-DEFAULT,2026-04-09T21:44:38.204970Z,METRIC_MEM:,7661.91,MB
-DEFAULT,2026-04-09T21:44:39.209060Z,METRIC_MEM:,7835.91,MB
-DEFAULT,2026-04-09T21:44:40.209130Z,METRIC_MEM:,7907.15,MB
-DEFAULT,2026-04-09T21:44:41.204203Z,METRIC_MEM:,8154.41,MB
-DEFAULT,2026-04-09T21:44:42.209313Z,METRIC_MEM:,8405.19,MB
-DEFAULT,2026-04-09T21:44:43.209838Z,METRIC_MEM:,7903.66,MB
-DEFAULT,2026-04-09T21:44:44.210360Z,METRIC_MEM:,8222.37,MB
-DEFAULT,2026-04-09T21:44:45.210692Z,METRIC_MEM:,8588.28,MB
-DEFAULT,2026-04-09T21:44:46.213686Z,METRIC_MEM:,9032.81,MB
-DEFAULT,2026-04-09T21:44:47.211559Z,METRIC_MEM:,9398.2,MB
-DEFAULT,2026-04-09T21:44:48.211871Z,METRIC_MEM:,9777.2,MB
-DEFAULT,2026-04-09T21:44:49.211810Z,METRIC_MEM:,10187.28,MB
-DEFAULT,2026-04-09T21:44:50.211790Z,METRIC_MEM:,10544.79,MB
-DEFAULT,2026-04-09T21:44:51.211702Z,METRIC_MEM:,10918.25,MB
-DEFAULT,2026-04-09T21:44:52.226469Z,METRIC_MEM:,11645.3,MB
-DEFAULT,2026-04-09T21:44:53.226946Z,METRIC_MEM:,12692.11,MB
-DEFAULT,2026-04-09T21:44:54.227541Z,METRIC_MEM:,13956.78,MB
-DEFAULT,2026-04-09T21:44:55.228051Z,METRIC_MEM:,13874.34,MB
-DEFAULT,2026-04-09T21:44:56.228511Z,METRIC_MEM:,12523.92,MB
-DEFAULT,2026-04-09T21:44:57.228953Z,METRIC_MEM:,12711.82,MB
-DEFAULT,2026-04-09T21:44:58.229409Z,METRIC_MEM:,12737.88,MB
-DEFAULT,2026-04-09T21:44:59.229860Z,METRIC_MEM:,12783.1,MB
-DEFAULT,2026-04-09T21:45:00.230271Z,METRIC_MEM:,12872.66,MB
-DEFAULT,2026-04-09T21:45:01.230679Z,METRIC_MEM:,12877.08,MB
-DEFAULT,2026-04-09T21:45:02.231040Z,METRIC_MEM:,12811.36,MB
-DEFAULT,2026-04-09T21:45:03.231455Z,METRIC_MEM:,12722.5,MB
-DEFAULT,2026-04-09T21:45:04.237488Z,METRIC_MEM:,12728.71,MB
-DEFAULT,2026-04-09T21:45:05.232293Z,METRIC_MEM:,12771.59,MB
-DEFAULT,2026-04-09T21:45:06.232823Z,METRIC_MEM:,12869.63,MB
-DEFAULT,2026-04-09T21:45:07.237757Z,METRIC_MEM:,12988.25,MB
-DEFAULT,2026-04-09T21:45:08.233418Z,METRIC_MEM:,13095.33,MB
-DEFAULT,2026-04-09T21:45:09.233396Z,METRIC_MEM:,13201.29,MB
-DEFAULT,2026-04-09T21:45:10.236817Z,METRIC_MEM:,13288.67,MB
-DEFAULT,2026-04-09T21:45:11.237004Z,METRIC_MEM:,13399.59,MB
-DEFAULT,2026-04-09T21:45:12.237337Z,METRIC_MEM:,13502.8,MB
-DEFAULT,2026-04-09T21:45:13.237766Z,METRIC_MEM:,13616.84,MB
-DEFAULT,2026-04-09T21:45:14.240824Z,METRIC_MEM:,13711.62,MB
-DEFAULT,2026-04-09T21:45:15.241266Z,METRIC_MEM:,13820.56,MB
-DEFAULT,2026-04-09T21:45:16.241645Z,METRIC_MEM:,13937.32,MB
-DEFAULT,2026-04-09T21:45:17.242003Z,METRIC_MEM:,14028.76,MB
-DEFAULT,2026-04-09T21:45:18.242372Z,METRIC_MEM:,14126.38,MB
-DEFAULT,2026-04-09T21:45:19.245015Z,METRIC_MEM:,14220.46,MB
-DEFAULT,2026-04-09T21:45:20.243002Z,METRIC_MEM:,14323.86,MB
-DEFAULT,2026-04-09T21:45:21.245997Z,METRIC_MEM:,14424.54,MB
-DEFAULT,2026-04-09T21:45:22.247559Z,METRIC_MEM:,7915.41,MB
-DEFAULT,2026-04-09T21:45:23.246817Z,METRIC_MEM:,8489.96,MB
-DEFAULT,2026-04-09T21:45:24.247168Z,METRIC_MEM:,8911.7,MB
-DEFAULT,2026-04-09T21:45:25.249494Z,METRIC_MEM:,9354.19,MB
-DEFAULT,2026-04-09T21:45:26.249975Z,METRIC_MEM:,9780.51,MB
-DEFAULT,2026-04-09T21:45:27.250390Z,METRIC_MEM:,10217.01,MB
-DEFAULT,2026-04-09T21:45:28.250624Z,METRIC_MEM:,10676.55,MB
-DEFAULT,2026-04-09T21:45:29.250558Z,METRIC_MEM:,11116.09,MB
-DEFAULT,2026-04-09T21:45:30.250662Z,METRIC_MEM:,11445.64,MB
-DEFAULT,2026-04-09T21:45:31.250691Z,METRIC_MEM:,11473.34,MB
-DEFAULT,2026-04-09T21:45:32.256485Z,METRIC_MEM:,10635.77,MB
-DEFAULT,2026-04-09T21:45:33.256709Z,METRIC_MEM:,9477.71,MB
-DEFAULT,2026-04-09T21:45:34.256519Z,METRIC_MEM:,8338.32,MB
-DEFAULT,2026-04-09T21:45:35.260632Z,METRIC_MEM:,8521.45,MB
-DEFAULT,2026-04-09T21:45:36.260800Z,METRIC_MEM:,8241.01,MB
-DEFAULT,2026-04-09T21:45:37.260872Z,METRIC_MEM:,8706.99,MB
-DEFAULT,2026-04-09T21:45:38.258259Z,METRIC_MEM:,8889.8,MB
-DEFAULT,2026-04-09T21:45:39.258646Z,METRIC_MEM:,9144.98,MB
-DEFAULT,2026-04-09T21:45:40.261191Z,METRIC_MEM:,9366.04,MB
-DEFAULT,2026-04-09T21:45:41.259351Z,METRIC_MEM:,9586.33,MB
-DEFAULT,2026-04-09T21:45:42.259706Z,METRIC_MEM:,9807.19,MB
-DEFAULT,2026-04-09T21:45:43.260068Z,METRIC_MEM:,10050.3,MB
-DEFAULT,2026-04-09T21:45:44.260485Z,METRIC_MEM:,10235.79,MB
-DEFAULT,2026-04-09T21:45:45.260779Z,METRIC_MEM:,10445.2,MB
-DEFAULT,2026-04-09T21:45:46.261136Z,METRIC_MEM:,10634.31,MB
-DEFAULT,2026-04-09T21:45:47.261490Z,METRIC_MEM:,10886.51,MB
-DEFAULT,2026-04-09T21:45:48.265978Z,METRIC_MEM:,11324.49,MB
-DEFAULT,2026-04-09T21:45:49.266059Z,METRIC_MEM:,12017.47,MB
-DEFAULT,2026-04-09T21:45:50.266025Z,METRIC_MEM:,11914.96,MB
-DEFAULT,2026-04-09T21:45:51.266082Z,METRIC_MEM:,11647.12,MB
-DEFAULT,2026-04-09T21:45:52.272392Z,METRIC_MEM:,11648.13,MB
-DEFAULT,2026-04-09T21:45:53.269008Z,METRIC_MEM:,9406.49,MB
-DEFAULT,2026-04-09T21:45:54.269512Z,METRIC_MEM:,9458.97,MB
-DEFAULT,2026-04-09T21:45:55.269918Z,METRIC_MEM:,9560.43,MB
-DEFAULT,2026-04-09T21:45:56.270231Z,METRIC_MEM:,9660.94,MB
-DEFAULT,2026-04-09T21:45:57.270653Z,METRIC_MEM:,9766.71,MB
-DEFAULT,2026-04-09T21:45:58.271050Z,METRIC_MEM:,9849.17,MB
-DEFAULT,2026-04-09T21:45:59.303917Z,METRIC_MEM:,9507.84,MB
-DEFAULT,2026-04-09T21:46:00.313037Z,METRIC_MEM:,9122.2,MB
-DEFAULT,2026-04-09T21:46:01.309538Z,METRIC_MEM:,9528.65,MB
-DEFAULT,2026-04-09T21:46:02.310012Z,METRIC_MEM:,9809.38,MB
-DEFAULT,2026-04-09T21:46:03.310421Z,METRIC_MEM:,10074.81,MB
-DEFAULT,2026-04-09T21:46:04.310900Z,METRIC_MEM:,10459.12,MB
-DEFAULT,2026-04-09T21:46:05.311218Z,METRIC_MEM:,10760.92,MB
-DEFAULT,2026-04-09T21:46:06.311655Z,METRIC_MEM:,11069.43,MB
-DEFAULT,2026-04-09T21:46:07.312043Z,METRIC_MEM:,11441.79,MB
-DEFAULT,2026-04-09T21:46:08.312403Z,METRIC_MEM:,11540.78,MB
-DEFAULT,2026-04-09T21:46:09.312344Z,METRIC_MEM:,11546.04,MB
-DEFAULT,2026-04-09T21:46:10.312541Z,METRIC_MEM:,10711.32,MB
-DEFAULT,2026-04-09T21:46:11.312428Z,METRIC_MEM:,9515.68,MB
-DEFAULT,2026-04-09T21:46:12.312786Z,METRIC_MEM:,8679.4,MB
-DEFAULT,2026-04-09T21:46:13.313152Z,METRIC_MEM:,8724.61,MB
-DEFAULT,2026-04-09T21:46:14.313501Z,METRIC_MEM:,8833.48,MB
-DEFAULT,2026-04-09T21:46:15.313877Z,METRIC_MEM:,8843.11,MB
-DEFAULT,2026-04-09T21:46:16.314200Z,METRIC_MEM:,8844.81,MB
-DEFAULT,2026-04-09T21:46:17.315061Z,METRIC_MEM:,8844.75,MB
-DEFAULT,2026-04-09T21:46:18.315452Z,METRIC_MEM:,8845.94,MB
-DEFAULT,2026-04-09T21:46:19.315830Z,METRIC_MEM:,8850.8,MB
-DEFAULT,2026-04-09T21:46:20.316259Z,METRIC_MEM:,8860.11,MB
-DEFAULT,2026-04-09T21:46:21.316618Z,METRIC_MEM:,8866.31,MB
-DEFAULT,2026-04-09T21:46:22.316957Z,METRIC_MEM:,8858.84,MB
-DEFAULT,2026-04-09T21:46:23.317325Z,METRIC_MEM:,8746.22,MB
\ No newline at end of file
diff --git a/assets/benchmarks/polars/40mrows_dataset_stats_log.csv b/assets/benchmarks/polars/40mrows_dataset_stats_log.csv
new file mode 100644
index 0000000..c4c326d
--- /dev/null
+++ b/assets/benchmarks/polars/40mrows_dataset_stats_log.csv
@@ -0,0 +1,131 @@
+view,timestamp,logger,memory,unit
+DEFAULT,2026-04-24T06:06:58.207798Z,METRIC_MEM:,434.28,MB
+DEFAULT,2026-04-24T06:06:59.207850Z,METRIC_MEM:,509.68,MB
+DEFAULT,2026-04-24T06:07:00.208299Z,METRIC_MEM:,831.41,MB
+DEFAULT,2026-04-24T06:07:01.208631Z,METRIC_MEM:,961.37,MB
+DEFAULT,2026-04-24T06:07:02.209024Z,METRIC_MEM:,1103.34,MB
+DEFAULT,2026-04-24T06:07:03.209345Z,METRIC_MEM:,1188.03,MB
+DEFAULT,2026-04-24T06:07:04.209737Z,METRIC_MEM:,1449.39,MB
+DEFAULT,2026-04-24T06:07:05.210167Z,METRIC_MEM:,1590.39,MB
+DEFAULT,2026-04-24T06:07:06.210532Z,METRIC_MEM:,1771.91,MB
+DEFAULT,2026-04-24T06:07:07.210839Z,METRIC_MEM:,1914.38,MB
+DEFAULT,2026-04-24T06:07:08.211239Z,METRIC_MEM:,2056.89,MB
+DEFAULT,2026-04-24T06:07:09.211568Z,METRIC_MEM:,2175.38,MB
+DEFAULT,2026-04-24T06:07:10.212214Z,METRIC_MEM:,2356.97,MB
+DEFAULT,2026-04-24T06:07:11.212420Z,METRIC_MEM:,2531.37,MB
+DEFAULT,2026-04-24T06:07:12.212775Z,METRIC_MEM:,2673.6,MB
+DEFAULT,2026-04-24T06:07:13.223192Z,METRIC_MEM:,3255.59,MB
+DEFAULT,2026-04-24T06:07:14.223502Z,METRIC_MEM:,4054.9,MB
+DEFAULT,2026-04-24T06:07:15.223724Z,METRIC_MEM:,4033.88,MB
+DEFAULT,2026-04-24T06:07:16.224079Z,METRIC_MEM:,4035.61,MB
+DEFAULT,2026-04-24T06:07:17.223893Z,METRIC_MEM:,4035.61,MB
+DEFAULT,2026-04-24T06:07:18.223768Z,METRIC_MEM:,4035.61,MB
+DEFAULT,2026-04-24T06:07:19.223720Z,METRIC_MEM:,4037.49,MB
+DEFAULT,2026-04-24T06:07:20.223688Z,METRIC_MEM:,4037.49,MB
+DEFAULT,2026-04-24T06:07:21.224059Z,METRIC_MEM:,4037.97,MB
+DEFAULT,2026-04-24T06:07:22.224378Z,METRIC_MEM:,4037.97,MB
+DEFAULT,2026-04-24T06:07:23.224735Z,METRIC_MEM:,4037.97,MB
+DEFAULT,2026-04-24T06:07:24.225030Z,METRIC_MEM:,4038.95,MB
+DEFAULT,2026-04-24T06:07:25.226138Z,METRIC_MEM:,4038.95,MB
+DEFAULT,2026-04-24T06:07:26.226603Z,METRIC_MEM:,4038.89,MB
+DEFAULT,2026-04-24T06:07:27.226915Z,METRIC_MEM:,4038.1,MB
+DEFAULT,2026-04-24T06:07:28.227237Z,METRIC_MEM:,4037.22,MB
+DEFAULT,2026-04-24T06:07:29.227583Z,METRIC_MEM:,4038.28,MB
+DEFAULT,2026-04-24T06:07:30.227920Z,METRIC_MEM:,4037.22,MB
+DEFAULT,2026-04-24T06:07:31.228230Z,METRIC_MEM:,4037.38,MB
+DEFAULT,2026-04-24T06:07:32.228553Z,METRIC_MEM:,4041.57,MB
+DEFAULT,2026-04-24T06:07:33.228834Z,METRIC_MEM:,4041.36,MB
+DEFAULT,2026-04-24T06:07:34.229175Z,METRIC_MEM:,4041.36,MB
+DEFAULT,2026-04-24T06:07:35.229526Z,METRIC_MEM:,4042.84,MB
+DEFAULT,2026-04-24T06:07:36.229733Z,METRIC_MEM:,4042.84,MB
+DEFAULT,2026-04-24T06:07:37.229503Z,METRIC_MEM:,4046.88,MB
+DEFAULT,2026-04-24T06:07:38.229947Z,METRIC_MEM:,4100.14,MB
+DEFAULT,2026-04-24T06:07:39.229831Z,METRIC_MEM:,4143.25,MB
+DEFAULT,2026-04-24T06:07:40.230001Z,METRIC_MEM:,4165.57,MB
+DEFAULT,2026-04-24T06:07:41.230428Z,METRIC_MEM:,4168.71,MB
+DEFAULT,2026-04-24T06:07:42.250201Z,METRIC_MEM:,4193.07,MB
+DEFAULT,2026-04-24T06:07:43.253966Z,METRIC_MEM:,4207.2,MB
+DEFAULT,2026-04-24T06:07:44.250842Z,METRIC_MEM:,4206.1,MB
+DEFAULT,2026-04-24T06:07:45.251184Z,METRIC_MEM:,4256.93,MB
+DEFAULT,2026-04-24T06:07:46.251637Z,METRIC_MEM:,4257.46,MB
+DEFAULT,2026-04-24T06:07:47.252061Z,METRIC_MEM:,4273.06,MB
+DEFAULT,2026-04-24T06:07:48.252264Z,METRIC_MEM:,4277.84,MB
+DEFAULT,2026-04-24T06:07:49.252628Z,METRIC_MEM:,4287.83,MB
+DEFAULT,2026-04-24T06:07:50.252933Z,METRIC_MEM:,4299.75,MB
+DEFAULT,2026-04-24T06:07:51.253352Z,METRIC_MEM:,4305.93,MB
+DEFAULT,2026-04-24T06:07:52.253787Z,METRIC_MEM:,4339.72,MB
+DEFAULT,2026-04-24T06:07:53.262896Z,METRIC_MEM:,4354.3,MB
+DEFAULT,2026-04-24T06:07:54.259356Z,METRIC_MEM:,4362.78,MB
+DEFAULT,2026-04-24T06:07:55.259685Z,METRIC_MEM:,4385,MB
+DEFAULT,2026-04-24T06:07:56.259922Z,METRIC_MEM:,4385.66,MB
+DEFAULT,2026-04-24T06:07:57.259775Z,METRIC_MEM:,4409.23,MB
+DEFAULT,2026-04-24T06:07:58.259651Z,METRIC_MEM:,4411.26,MB
+DEFAULT,2026-04-24T06:07:59.259557Z,METRIC_MEM:,4434.75,MB
+DEFAULT,2026-04-24T06:08:00.259799Z,METRIC_MEM:,4459.57,MB
+DEFAULT,2026-04-24T06:08:01.262089Z,METRIC_MEM:,4492.48,MB
+DEFAULT,2026-04-24T06:08:02.262503Z,METRIC_MEM:,4493.22,MB
+DEFAULT,2026-04-24T06:08:03.262963Z,METRIC_MEM:,4506.8,MB
+DEFAULT,2026-04-24T06:08:04.263388Z,METRIC_MEM:,4540.05,MB
+DEFAULT,2026-04-24T06:08:05.263680Z,METRIC_MEM:,4623.99,MB
+DEFAULT,2026-04-24T06:08:06.264028Z,METRIC_MEM:,4698.93,MB
+DEFAULT,2026-04-24T06:08:07.264313Z,METRIC_MEM:,4770.75,MB
+DEFAULT,2026-04-24T06:08:08.264810Z,METRIC_MEM:,4872.98,MB
+DEFAULT,2026-04-24T06:08:09.265137Z,METRIC_MEM:,5062.45,MB
+DEFAULT,2026-04-24T06:08:10.265502Z,METRIC_MEM:,5209.61,MB
+DEFAULT,2026-04-24T06:08:11.265831Z,METRIC_MEM:,5337.89,MB
+DEFAULT,2026-04-24T06:08:12.270969Z,METRIC_MEM:,5616.41,MB
+DEFAULT,2026-04-24T06:08:13.271402Z,METRIC_MEM:,6073.16,MB
+DEFAULT,2026-04-24T06:08:14.271755Z,METRIC_MEM:,6514.41,MB
+DEFAULT,2026-04-24T06:08:15.272127Z,METRIC_MEM:,6931.82,MB
+DEFAULT,2026-04-24T06:08:16.273910Z,METRIC_MEM:,7545.53,MB
+DEFAULT,2026-04-24T06:08:17.273802Z,METRIC_MEM:,7579.41,MB
+DEFAULT,2026-04-24T06:08:18.273648Z,METRIC_MEM:,7575.39,MB
+DEFAULT,2026-04-24T06:08:19.273546Z,METRIC_MEM:,7581.73,MB
+DEFAULT,2026-04-24T06:08:20.273690Z,METRIC_MEM:,7569.69,MB
+DEFAULT,2026-04-24T06:08:21.274034Z,METRIC_MEM:,7586.38,MB
+DEFAULT,2026-04-24T06:08:22.274427Z,METRIC_MEM:,7584.85,MB
+DEFAULT,2026-04-24T06:08:23.274720Z,METRIC_MEM:,7565.12,MB
+DEFAULT,2026-04-24T06:08:24.275157Z,METRIC_MEM:,7512.23,MB
+DEFAULT,2026-04-24T06:08:25.275555Z,METRIC_MEM:,7295.88,MB
+DEFAULT,2026-04-24T06:08:26.276061Z,METRIC_MEM:,7148.72,MB
+DEFAULT,2026-04-24T06:08:27.276404Z,METRIC_MEM:,6975.2,MB
+DEFAULT,2026-04-24T06:08:28.277060Z,METRIC_MEM:,6634.11,MB
+DEFAULT,2026-04-24T06:08:29.277427Z,METRIC_MEM:,6463.36,MB
+DEFAULT,2026-04-24T06:08:30.277957Z,METRIC_MEM:,6270.87,MB
+DEFAULT,2026-04-24T06:08:31.278482Z,METRIC_MEM:,6107.61,MB
+DEFAULT,2026-04-24T06:08:32.279156Z,METRIC_MEM:,5950.92,MB
+DEFAULT,2026-04-24T06:08:33.279385Z,METRIC_MEM:,5947.23,MB
+DEFAULT,2026-04-24T06:08:34.279685Z,METRIC_MEM:,5924.39,MB
+DEFAULT,2026-04-24T06:08:35.286375Z,METRIC_MEM:,5910.64,MB
+DEFAULT,2026-04-24T06:08:36.283635Z,METRIC_MEM:,6774.96,MB
+DEFAULT,2026-04-24T06:08:37.283545Z,METRIC_MEM:,6806.27,MB
+DEFAULT,2026-04-24T06:08:38.283430Z,METRIC_MEM:,6821.47,MB
+DEFAULT,2026-04-24T06:08:39.283389Z,METRIC_MEM:,6821.45,MB
+DEFAULT,2026-04-24T06:08:40.283475Z,METRIC_MEM:,6897.86,MB
+DEFAULT,2026-04-24T06:08:41.284863Z,METRIC_MEM:,6937.82,MB
+DEFAULT,2026-04-24T06:08:42.284236Z,METRIC_MEM:,6847.93,MB
+DEFAULT,2026-04-24T06:08:43.284623Z,METRIC_MEM:,6862.59,MB
+DEFAULT,2026-04-24T06:08:44.285121Z,METRIC_MEM:,6866.46,MB
+DEFAULT,2026-04-24T06:08:45.285655Z,METRIC_MEM:,6864.15,MB
+DEFAULT,2026-04-24T06:08:46.286183Z,METRIC_MEM:,6852.66,MB
+DEFAULT,2026-04-24T06:08:47.286737Z,METRIC_MEM:,6906.88,MB
+DEFAULT,2026-04-24T06:08:48.287239Z,METRIC_MEM:,6609.81,MB
+DEFAULT,2026-04-24T06:08:49.294612Z,METRIC_MEM:,6459.63,MB
+DEFAULT,2026-04-24T06:08:50.291134Z,METRIC_MEM:,6522.66,MB
+DEFAULT,2026-04-24T06:08:51.291500Z,METRIC_MEM:,6507.25,MB
+DEFAULT,2026-04-24T06:08:52.291873Z,METRIC_MEM:,6512.25,MB
+DEFAULT,2026-04-24T06:08:53.292217Z,METRIC_MEM:,6523.08,MB
+DEFAULT,2026-04-24T06:08:54.292612Z,METRIC_MEM:,6523.29,MB
+DEFAULT,2026-04-24T06:08:55.292877Z,METRIC_MEM:,6431,MB
+DEFAULT,2026-04-24T06:08:56.293090Z,METRIC_MEM:,6451.77,MB
+DEFAULT,2026-04-24T06:08:57.293038Z,METRIC_MEM:,6456.9,MB
+DEFAULT,2026-04-24T06:08:58.292924Z,METRIC_MEM:,6453.93,MB
+DEFAULT,2026-04-24T06:08:59.292781Z,METRIC_MEM:,6450.25,MB
+DEFAULT,2026-04-24T06:09:00.292920Z,METRIC_MEM:,6452.43,MB
+DEFAULT,2026-04-24T06:09:01.293289Z,METRIC_MEM:,6449.31,MB
+DEFAULT,2026-04-24T06:09:02.293620Z,METRIC_MEM:,6292.76,MB
+DEFAULT,2026-04-24T06:09:03.293983Z,METRIC_MEM:,6290.23,MB
+DEFAULT,2026-04-24T06:09:04.294267Z,METRIC_MEM:,6285.38,MB
+DEFAULT,2026-04-24T06:09:05.294589Z,METRIC_MEM:,6281.16,MB
+DEFAULT,2026-04-24T06:09:06.294899Z,METRIC_MEM:,6274.43,MB
+DEFAULT,2026-04-24T06:09:07.295234Z,METRIC_MEM:,6274.39,MB
\ No newline at end of file
diff --git a/assets/benchmarks/polars/README.md b/assets/benchmarks/polars/README.md
index 2de9c2f..d79caab 100644
--- a/assets/benchmarks/polars/README.md
+++ b/assets/benchmarks/polars/README.md
@@ -2,7 +2,7 @@
 
 This section details the methodology used to capture the memory metrics in the [`GCP Stress-Test Metrics (Scaling Efficiency)`](/README.md#gcp-stress-test-metrics-scaling-efficiency)
 
-The telemetry logger below was added **temporarily** to the orchestrator for a specific benchmarking run. This code was pushed directly to the Cloud Artifact Registry as an experimental image tag (`mem-record`) and is not part of the permanent git repository history.
+The telemetry logger below was added to the orchestrator for a specific benchmarking run. 
 
 ```python
 import psutil
@@ -28,12 +28,16 @@ finally:
     stop_event.set()
     logger_thread.join()
 ```
-Since `psutil` requires C-extensions to compile, the **Dockerfile** was modified to include the necessary build tools and the package itself. This allowed for benchmarking without altering the project's permanent `requirements.txt`.
+Since `psutil` requires C-extensions to compile, the **Dockerfile** was modified to include the necessary build tools and the package itself. This allowed for benchmarking without altering the project's permanent [`requirements.txt`](/data_pipeline/requirements.txt).
 
 ```docker
 FROM python:3.11-slim
 ENV # Environments...
 
+WORKDIR /app
+
+COPY data_pipeline/requirements.txt .
+
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     gcc \
@@ -42,7 +46,8 @@ RUN apt-get update && \
     apt-get purge -y --auto-remove gcc python3-dev && \
     rm -rf /var/lib/apt/lists/*
 
-WORKDIR /app
+COPY data_pipeline/ ./data_pipeline/
+ENV PYTHONPATH=/app
 
 # the rest of docker code...
 
diff --git a/assets/diagrams/01-pipeline-orchestration-diagram.png b/assets/diagrams/01-pipeline-orchestration-diagram.png
index 26f3237..0b98f9b 100644
Binary files a/assets/diagrams/01-pipeline-orchestration-diagram.png and b/assets/diagrams/01-pipeline-orchestration-diagram.png differ
diff --git a/assets/diagrams/03-contract-stage-diagram.png b/assets/diagrams/03-contract-stage-diagram.png
index 64a2b90..51d4236 100644
Binary files a/assets/diagrams/03-contract-stage-diagram.png and b/assets/diagrams/03-contract-stage-diagram.png differ
diff --git a/assets/screenshots/engine-performance-16gb-4cpu.png b/assets/screenshots/engine-performance-16gb-4cpu.png
deleted file mode 100644
index dbf638b..0000000
Binary files a/assets/screenshots/engine-performance-16gb-4cpu.png and /dev/null differ
diff --git a/assets/screenshots/engine-performance-8gb-2cpu.png b/assets/screenshots/engine-performance-8gb-2cpu.png
deleted file mode 100644
index 8129e52..0000000
Binary files a/assets/screenshots/engine-performance-8gb-2cpu.png and /dev/null differ
diff --git a/assets/screenshots/engine-performance-8gb-4cpu.png b/assets/screenshots/engine-performance-8gb-4cpu.png
new file mode 100644
index 0000000..c7b5952
Binary files /dev/null and b/assets/screenshots/engine-performance-8gb-4cpu.png differ
diff --git a/data/README.md b/data/README.md
index 7ad4e8c..85adc48 100644
--- a/data/README.md
+++ b/data/README.md
@@ -1,21 +1,32 @@
 # Data & Synthetic Benchmarks
 
-This directory serves as the local state provider for the pipeline when executing in a non-cloud environment. It mimics the structure of the Google Cloud Storage (GCS) buckets, allowing for high-fidelity local simulation and performance benchmarking.
+This directory serves as the local state provider for the pipeline when executing in a non-cloud environment. It mimics the structure of the Google Cloud Storage (GCS) buckets.
 
 ## Synthetic Dataset
-To replicate the high-volume environment described in the [GCP Stress-Test Metrics (Scaling Efficiency)](/README.md#gcp-stress-test-metrics-scaling-efficiency) section, you can download the 36M-row synthetic dataset here: [**Kaggle Dataset Link**](https://www.kaggle.com/datasets/melvidabryan/e-commerce-synthetic-dataset)
+To replicate the high-volume environment described in the [GCP Stress-Test Metrics (Scaling Efficiency)](/README.md#gcp-stress-test-metrics-scaling-efficiency) section, you can download the 40M-row synthetic dataset here: [**Kaggle Dataset Link**](https://www.kaggle.com/datasets/melvidabryan/e-commerce-synthetic-dataset)
 
->*Note: This upload contains the **Contracted Version** of the dataset. The original "Raw" state—totaling approximately 24GB of unrefined CSVs was omitted to prioritize transfer efficiency.*
+> *Note: This upload contains the **Contracted Version** of the dataset. The original "Raw" state, totaling approximately ~26GB of unrefined CSVs was omitted to prioritize transfer efficiency.*
 
-### File Structure & Purpose
-The dataset is divided into two primary directories to facilitate different stages of pipeline testing:
+## File Structure & Purpose
+The dataset is divided into three primary directories to facilitate different stages of pipeline testing:
 
 | Directory | Files | Description |
 | :--- | :--- | :--- |
-| `contracted/` | 110 files | **Production-Scale Test:** The full 36M row dataset (~4.04 GB) formatted to strict enterprise schema requirements. |
-| `raw/` | 5 files | **Delta Sample (Validation):** Small-scale samples (~10k rows each) representing **daily incoming deltas**. These files are intentionally "noisy" to exhibit the full range of injected data quality errors. |
+| `contracted/` | 125 files | **Production-Scale Test:** The full 36M row dataset (~5.34 GB) formatted to strict schema requirements. |
+| `id_mapping/customer_id/` | 1 file | **Metadata Registry:** Central lookup mapping Customer UUIDs to Uint32 surrogate keys.  |
+| `id_mapping/order_id/` | 40 files | **Metadata Registry (Sharded):** Fragmented lookup (40M+ keys) to test high-cardinality ID resolution. |
+| `id_mapping/product_id/` | 1 file | **Metadata Registry:**  Central lookup mapping Product UUIDs to Uint32 surrogate keys. |
+| `id_mapping/seller_id/` | 1 file |  **Metadata Registry:** Central lookup mapping Seller UUIDs to Uint32 surrogate keys. |
+| `raw/` | 5 files | **Delta Sample (Validation):** Small-scale samples (~20k rows each) representing **daily incoming deltas**. These files are intentionally "noisy" to exhibit the full range of injected data quality errors. |
 
-### Included Tables
+---
+
+### ID Mapping & Surrogate Key Simulation
+The id_mapping/ directory acts as a simulated metadata registrar for surrogate key generation. The pipeline utilizes these registries to resolve raw source UUIDs into memory-efficient Uint32 identifiers while enforcing global deduplication and referential integrity.
+
+To benchmark ***[`mapping`](/data_pipeline/contract/id_registrar.py) throughput and memory footprint***, the order_id registry is partitioned into 40 sharded files (1M rows each). This fragmentation simulates the ingestion pressure of high-cardinality transactional data (40M+ unique keys) on serverless compute. Dimension-level registries (Customer, Product, Seller) remain unfragmented, as their lower cardinality is insufficient to trigger the resource-exhaustion thresholds required for these performance benchmarks.
+
+## Included Tables
 
 The dataset provides a complete relational snapshot of an e-commerce ecosystem:
 
@@ -28,7 +39,8 @@ The dataset provides a complete relational snapshot of an e-commerce ecosystem:
 ## Local Execution Setup
 1.  Extract the downloaded dataset archive.
 2.  Copy the `raw/` and `contracted/` directories into this `data/` folder.
-3.  The `RunContext` manager is configured to strictly recognize `.parquet` and `.csv` extensions; all other file types are ignored to prevent ingestion noise.
+3.  Use the commented out local path in [`RunContext.create()`](../data_pipeline/shared/run_context.py#L62).
+4.  The `RunContext` manager is configured to strictly recognize `.parquet` and `.csv` extensions; all other file types are ignored to prevent ingestion noise.
 
 **Execute the local pipeline:**
 ```
diff --git a/data_extract/shared/utils.py b/data_extract/shared/utils.py
index 8d606ef..d5a1f00 100644
--- a/data_extract/shared/utils.py
+++ b/data_extract/shared/utils.py
@@ -14,7 +14,6 @@
 from datetime import datetime as dt
 from zoneinfo import ZoneInfo
 
-
 GoogleDriveService: TypeAlias = Any
 
 # ------------------------------------------------------------
diff --git a/data_pipeline/Dockerfile b/data_pipeline/Dockerfile
index 78c327f..6fdcc9f 100644
--- a/data_pipeline/Dockerfile
+++ b/data_pipeline/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.11-slim
+FROM python:3.12-slim
 
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
@@ -7,8 +7,18 @@ WORKDIR /app
 
 COPY data_pipeline/requirements.txt .
 
-RUN pip install --no-cache-dir -r requirements.txt
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    gcc \
+    python3-dev && \
+    pip install --no-cache-dir -r requirements.txt && \
+    apt-get purge -y --auto-remove gcc python3-dev && \
+    rm -rf /var/lib/apt/lists/*
+
 
 COPY data_pipeline/ ./data_pipeline/
 
+
+ENV PYTHONPATH=/app
+
 CMD ["python", "-m", "data_pipeline.run_pipeline"]
\ No newline at end of file
diff --git a/data_pipeline/assembly/assembly_executor.py b/data_pipeline/assembly/assembly_executor.py
index e088b60..e754945 100644
--- a/data_pipeline/assembly/assembly_executor.py
+++ b/data_pipeline/assembly/assembly_executor.py
@@ -3,11 +3,15 @@
 # =============================================================================
 
 import gc
+from typing import Dict
 import ctypes
 import platform
-from typing import Dict
 from data_pipeline.shared.run_context import RunContext
-from data_pipeline.shared.loader_exporter import load_historical_table, export_file
+from data_pipeline.shared.loader_exporter import (
+    load_historical_data,
+    scan_gcs_uris_from_bigquery,
+    export_file,
+)
 from data_pipeline.shared.modeling_configs import DIMENSION_REFERENCES
 from data_pipeline.assembly.assembly_logic import (
     init_report,
@@ -141,8 +145,14 @@ def orchestrate_event_assembly(run_context: RunContext, report: Dict) -> bool:
 
     except Exception as e:
         log_error(f"Unexpected error processing event assembly: {e}", report)
+        report["status"] = "failed"
+        return False
 
     finally:
+        if "lf_derived" in locals():
+            del lf_derived  # type: ignore
+        if "lf_freezed" in locals():
+            del lf_freezed
         force_gc()
 
     return True
@@ -178,21 +188,26 @@ def orchestrate_dimension_refs(run_context: RunContext, report: Dict) -> bool:
         report[table] = {"dim_reference": False, "export": False}
         tracker = report[table]
 
-        lf_raw = None
-        df_dim = None
-
         try:
-            lf_raw = load_historical_table(
-                run_context.contracted_path,
-                table,
-                log_info=lambda msg: loaded_data(msg, report),
-            )
+            # Switch between local and gcp IO
+            if run_context.bq_project_id == "PROJECT_ID_NOT_DETECTED":
+                lf_raw = load_historical_data(
+                    base_path=run_context.storage_contracted_path, table_name=table
+                )
+            else:
+                lf_raw = scan_gcs_uris_from_bigquery(
+                    project_id=run_context.bq_project_id,
+                    dataset_id=run_context.bq_dataset_id,
+                    table_id=table,
+                    log_info=lambda msg: loaded_data(msg, report),
+                )
 
             if lf_raw is None:
                 return False
 
             primary_key = config.get("primary_key", [])
             require_col = config.get("required_column", [])
+            dtypes = config.get("dtypes", {})
 
             ok, df_dim = task_wrapper(
                 report=report,
@@ -202,6 +217,7 @@ def orchestrate_dimension_refs(run_context: RunContext, report: Dict) -> bool:
                 lf=lf_raw,
                 primary_key=primary_key,
                 req_column=require_col,
+                dtypes=dtypes,
             )
 
             if not ok:
@@ -222,21 +238,21 @@ def orchestrate_dimension_refs(run_context: RunContext, report: Dict) -> bool:
             log_info(f"Export dimension reference:{table} successfully", report)
 
         except FileNotFoundError as e:
-            log_error(f"File not found for dimension table {table}: {str(e)}", report)
+            log_error(f"File not found for dimension table {table}: {e}", report)
 
             return False
 
         except Exception as e:
             log_error(
-                f"Unexpected error processing dimension table {table}: {str(e)}", report
+                f"Unexpected error processing dimension table {table}: {e}", report
             )
             return False
 
         finally:
-            if lf_raw is not None:
-                del lf_raw
-            if df_dim is not None:
-                del df_dim
+            if "lf_raw" in locals():
+                del lf_raw  # type: ignore
+            if "df_dim" in locals():
+                del df_dim  # type: ignore
             gc.collect()
 
     return True
diff --git a/data_pipeline/assembly/assembly_logic.py b/data_pipeline/assembly/assembly_logic.py
index 08303e2..86d2325 100644
--- a/data_pipeline/assembly/assembly_logic.py
+++ b/data_pipeline/assembly/assembly_logic.py
@@ -6,7 +6,10 @@
 from pathlib import Path
 from typing import Dict, Callable, Any, List
 from data_pipeline.shared.run_context import RunContext
-from data_pipeline.shared.loader_exporter import load_historical_table
+from data_pipeline.shared.loader_exporter import (
+    load_historical_data,
+    scan_gcs_uris_from_bigquery,
+)
 from data_pipeline.shared.modeling_configs import ASSEMBLE_SCHEMA, ASSEMBLE_DTYPES
 
 EVENT_TABLES = ["df_orders", "df_order_items", "df_payments"]
@@ -47,20 +50,18 @@ def loaded_data(message: str, report: Dict[str, list[str]]) -> None:
 
 def merge_data(tables: Dict) -> pl.LazyFrame:
     """
-    Core event assembly join and grain enforcement using Hash-Join optimization.
+    Core event assembly and grain enforcement using the Primitive Integer Pipeline.
 
     Contract:
-    - Inner joins 'df_orders' with 'df_order_items' to ensure analytical relevance.
-    - Left joins 'df_payments' to capture financial metadata.
-    - Subtractive Filtering: Discards orders lacking corresponding item records.
+    - Integer-Join: Leverages pre-mapped UInt32/UInt64 IDs (order_id_int) to execute memory-efficient joins.
+    - Grain Enforcement: Ensures a strict 1:1 analytical grain through pre-aggregation of children.
 
     Optimization Logic:
-    - Hash-Join: Maps high-cardinality UUIDs to UInt64 hashes to reduce Join Hash Table memory.
-    - Pre-aggregation: Sums payments and deduplicates items BEFORE joining to guarantee a strict 1:1 grain and prevent Cartesian row explosions.
+    - Primitive Integer Pipeline: Eliminates 36-byte UUID string overhead in Hash Tables, reducing memory footprint by >60%.
     - Early Projection: Selects required columns at the source to minimize join width.
 
     Invariants:
-    - Dataset Grain: Strictly one row per 'order_id'.
+    - Dataset Grain: Strictly one row per 'order_id_int'.
 
     Outputs:
     - Merged LazyFrame containing joined order, item, and payment data.
@@ -72,8 +73,8 @@ def merge_data(tables: Dict) -> pl.LazyFrame:
     pl.enable_string_cache()
 
     col_orders = [
-        "order_id",
-        "customer_id",
+        "order_id_int",
+        "customer_id_int",
         "order_status",
         "order_purchase_timestamp",
         "order_approved_at",
@@ -84,22 +85,17 @@ def merge_data(tables: Dict) -> pl.LazyFrame:
     # Pre-aggregate Tables
     lf_payments_agg = (
         tables["df_payments"]
-        .with_columns(join_key=pl.col("order_id").hash())
-        .group_by("join_key")
+        .group_by("order_id_int")
         .agg(order_revenue=pl.col("payment_value").sum())
     )
 
     lf_items_agg = (
         tables["df_order_items"]
-        .with_columns(
-            join_key=pl.col("order_id").hash(),
-            product_id=pl.col("product_id").cast(pl.Categorical),
-            seller_id=pl.col("seller_id").cast(pl.Categorical),
-        )
-        .group_by("join_key")
+        .select(["order_id_int", "product_id_int", "seller_id_int"])
+        .group_by("order_id_int")
         .agg(
-            product_id=pl.col("product_id").first(),
-            seller_id=pl.col("seller_id").first(),
+            product_id_int=pl.col("product_id_int").first(),
+            seller_id_int=pl.col("seller_id_int").first(),
         )
     )
 
@@ -107,15 +103,12 @@ def merge_data(tables: Dict) -> pl.LazyFrame:
         tables["df_orders"]
         .select(col_orders)
         .with_columns(
-            join_key=pl.col("order_id").hash(),
             order_status=pl.col("order_status").cast(pl.Categorical),
         )
     )
 
-    df_merged = (
-        lf_orders.join(lf_items_agg, on="join_key", how="inner")
-        .join(lf_payments_agg, on="join_key", how="left")
-        .drop("join_key")
+    df_merged = lf_orders.join(lf_items_agg, on="order_id_int", how="inner").join(
+        lf_payments_agg, on="order_id_int", how="left"
     )
 
     return df_merged
@@ -161,7 +154,7 @@ def derive_fields(lf: pl.LazyFrame) -> pl.LazyFrame:
         )
         .dt.total_days()
         .cast(pl.Int16),
-        order_date=pl.col("order_purchase_timestamp").dt.date(),
+        order_date=pl.col("order_purchase_timestamp").dt.date().cast(pl.Datetime("us")),
         order_year_week=pl.col("order_purchase_timestamp")
         .dt.strftime("%G-W%V")
         .cast(pl.Categorical),
@@ -190,15 +183,24 @@ def freeze_schema(lf: pl.LazyFrame) -> pl.LazyFrame:
     Failures:
     - [Structural] Raises RuntimeError if input frame lacks columns required by 'ASSEMBLE_SCHEMA'.
     """
+
     current_columns = lf.collect_schema().names()
 
     missing_cols = set(ASSEMBLE_SCHEMA) - set(current_columns)
     if missing_cols:
         raise RuntimeError(f"missing required columns: {sorted(missing_cols)}")
 
-    lf_contract = lf.select(ASSEMBLE_SCHEMA).cast(pl.Schema(ASSEMBLE_DTYPES))
+    lf_contract = lf.select(ASSEMBLE_SCHEMA)
+
+    datetime_cols = [
+        col for col, dtype in ASSEMBLE_DTYPES.items() if isinstance(dtype, pl.Datetime)
+    ]
+
+    lf_contract = lf_contract.with_columns(
+        [pl.col(col).dt.cast_time_unit("us") for col in datetime_cols]
+    )
 
-    return lf_contract
+    return lf_contract.cast(pl.Schema(ASSEMBLE_DTYPES))
 
 
 # ------------------------------------------------------------
@@ -210,12 +212,14 @@ def dimension_references(
     lf: pl.LazyFrame,
     primary_key: list[str],
     req_column: list[str],
+    dtypes: dict,
 ) -> pl.LazyFrame:
     """
     Extracts a unique reference dataset from a historical source.
 
     Contract:
     - Subtractive Filtering: Selects specified 'req_column' set and enforces uniqueness.
+    - Type Enforcement: Casts columns to the formats defined in the provided 'dtypes' schema.
 
     Invariants:
     - Dataset Grain: Strictly one row per 'primary_key'.
@@ -227,7 +231,7 @@ def dimension_references(
     - [Structural] Crashes if input LazyFrame lacks 'primary_key' or 'req_column'.
     """
 
-    lf_dim = lf.select(req_column).unique(subset=primary_key)
+    lf_dim = lf.select(req_column).unique(subset=primary_key).cast(pl.Schema(dtypes))
 
     return lf_dim
 
@@ -280,7 +284,7 @@ def task_wrapper(
         return True, result
 
     except Exception as e:
-        log_error(f"Step {step_name} failed: {(e)}", report)
+        log_error(f"Step {step_name} failed: {e}", report)
         status_tracker[step_name] = False
         return False, None
 
@@ -292,10 +296,10 @@ def task_wrapper(
 
 def load_event_table(run_context: RunContext, report: Dict) -> Any:
     """
-    Batch-loads core event tables required for assembly.
+    Batch-loads core event tables required for assembly from BigQuery.
 
     Contract:
-    - Hydrate: Iterates through EVENT_TABLES and loads Parquet files from 'contracted_path'.
+    - Hydrate: Iterates through EVENT_TABLES and streams data via scan_gcs_uris_from_bigquery.
 
     Outputs:
     - Dict keyed by table name containing loaded LazyFrames.
@@ -304,22 +308,31 @@ def load_event_table(run_context: RunContext, report: Dict) -> Any:
     - [Operational] Returns None if any required table is missing or fails to load.
     """
 
-    contracted_path = run_context.contracted_path
     tables = {}
 
     for table_name in EVENT_TABLES:
         try:
-            df = load_historical_table(
-                contracted_path,
-                table_name,
-                log_info=lambda msg: loaded_data(msg, report),
-            )
+            # Switch between local and gcp IO
+            if run_context.bq_project_id == "PROJECT_ID_NOT_DETECTED":
+                df = load_historical_data(
+                    base_path=run_context.storage_contracted_path,
+                    table_name=table_name,
+                    log_info=lambda msg: loaded_data(msg, report),
+                )
+
+            else:
+                df = scan_gcs_uris_from_bigquery(
+                    project_id=run_context.bq_project_id,
+                    dataset_id=run_context.bq_dataset_id,
+                    table_id=table_name,
+                    log_info=lambda msg: loaded_data(msg, report),
+                )
 
             if df is not None:
                 tables[table_name] = df
 
         except Exception as e:
-            log_error(f"Required table {table_name} not found: {e}", report)
+            log_error(f"Required table {table_name} not found : {e}", report)
             return None
 
     if len(tables) < len(EVENT_TABLES):
diff --git a/data_pipeline/contract/contract_executor.py b/data_pipeline/contract/contract_executor.py
index beeecbd..c0a77b8 100644
--- a/data_pipeline/contract/contract_executor.py
+++ b/data_pipeline/contract/contract_executor.py
@@ -2,42 +2,45 @@
 # Contract Stage Executor
 # =============================================================================
 
+import polars as pl
 from data_pipeline.shared.run_context import RunContext
 from data_pipeline.shared.loader_exporter import load_single_delta, export_file
+from data_pipeline.assembly.assembly_executor import force_gc
 from data_pipeline.shared.table_configs import TABLE_CONFIG
 from data_pipeline.contract.registry import ROLE_STEPS
+from data_pipeline.contract.id_registrar import ID_ENTITY_MAP
 
 
 def apply_contract(
     run_context: RunContext,
     table_name: str,
+    master_mappings: dict[str, pl.LazyFrame],
     invalid_order_ids: set | None = None,
     valid_order_ids: set | None = None,
 ) -> tuple[dict, set, set]:
     """
-    Main entry point for the Raw-to-Contracted Stage.
+    Orchestrates the Raw-to-Contracted transformation for a specific logical table.
 
     Workflow:
     1. Resolve: Identifies table metadata (role, schema, keys) from the central registry.
     2. Hydrate: Fetches the raw snapshot from the lake's snapshot zone.
     3. Delegate: Iteratively applies atomic logic rules (Deduplication, Chronology, Null-checks).
     4. Validate: Executes 'enforce_schema' as the terminal structural gate.
-    5. Promote: Persists the contract-compliant dataset to the Silver (contracted) zone.
+    5. Map: Joins against pre-calculated Discovery mappings to enrich UUIDs with UInt32 integer IDs.
+    6. Promote: Persists the contract-compliant dataset to the Silver (contracted) zone.
 
     Operational Guarantees:
     - Subtractive Only: Exclusively filters rows or casts types; never mutates business values.
     - Referential Safety: Propagates invalidated keys across table boundaries to ensure consistent pruning.
-    - Structural Finality: Guarantees output parity with the ASSEMBLE_SCHEMA specification.
+    - Structural Finality: Guarantees output parity with the Silver layer specification, including required integer IDs.
 
     Side Effects:
     - Persists a Parquet artifact to the contracted directory.
-    - Updates newly invalidated 'order_id' sets for downstream cross-table pruning.
+    - Updates invalidated 'order_id' sets for downstream cross-table pruning.
 
     Failure Behavior:
     - Traps logic-step exceptions; logs errors to the report and halts the current table's processing.
-
-    Returns:
-        tuple: (Stage Report Dict, Newly Invalidated IDs Set, Validated Order IDs Set)
+    - Crashes if ID mapping joins fail to prevent downstream schema corruption.
     """
 
     report = {
@@ -67,7 +70,6 @@ def apply_contract(
     if table_name not in TABLE_CONFIG:
         report["status"] = "failed"
         report["errors"].append(f"Unknown table: {table_name}")
-
         return report, invalid_ids, valid_ids
 
     base_path = run_context.raw_snapshot_path
@@ -77,7 +79,6 @@ def apply_contract(
     dtypes = config.get("dtypes", {})
 
     df, filename = load_single_delta(base_path=base_path, table_name=table_name)
-
     if df is None:
         report["status"] = "failed"
         report["errors"].append("Failed to load logical table")
@@ -88,32 +89,26 @@ def apply_contract(
     role = config["role"]
 
     for step in ROLE_STEPS[role]:
-
         contract = step["contract"]
-        args = []
 
-        if "non_nullable" in step["args"]:
-            args.append(non_nullable)
+        args = [
+            non_nullable if "non_nullable" in step["args"] else None,
+            invalid_order_ids if "invalid_order_ids" in step["args"] else None,
+            valid_order_ids if "valid_order_ids" in step["args"] else None,
+        ]
 
-        if "invalid_order_ids" in step["args"]:
-            args.append(invalid_order_ids)
-
-        if "valid_order_ids" in step["args"]:
-            args.append(valid_order_ids)
+        # Remove args not needed for the current registry loop
+        args = [arg for arg in args if arg is not None]
 
         if "required_column" in step["args"]:
-            args.append(required_column)
-            args.append(dtypes)
+            args.extend([required_column, dtypes])
 
         try:
-
             if step["return_invalid_ids"]:
                 df, removed, new_invalid = contract(df, *args)
                 invalid_ids |= new_invalid
-
             else:
                 df, removed = contract(df, *args)
-
             report[step["metric"]] += removed
 
         except Exception as e:
@@ -126,14 +121,33 @@ def apply_contract(
     report["final_rows"] = len(df)
 
     if table_name == "df_orders":
-        valid_ids = set(df["order_id"])
+        valid_ids = set(df.get_column("order_id"))
 
-    output_path = run_context.contracted_path / f"{filename}.parquet"
+    df_lf = df.lazy()
+
+    try:
+        # Attach mapped integer in dataframe
+        for entity_col, tables in ID_ENTITY_MAP.items():
+            if table_name in tables and entity_col in master_mappings:
+                df_lf = df_lf.join(
+                    master_mappings[entity_col], on=entity_col, how="left"
+                )
 
-    if not export_file(df, output_path):
+    # Force to fail before corrupting downstream
+    except Exception as e:
+        raise RuntimeError(f"Mapping Uint32 to UUIDs Failed: {e}") from e
+
+    output_path = run_context.contracted_path / f"{filename}.parquet"
+    if not export_file(df_lf.collect(), output_path):
         report["status"] = "failed"
         report["errors"].append("Export failed")
+        return report, invalid_ids, valid_ids
 
-    report["status"] = "success"
+    if "df" in locals():
+        del df
+    if "df_lf" in locals():
+        del df_lf
+    force_gc()
 
+    report["status"] = "success"
     return report, invalid_ids, valid_ids
diff --git a/data_pipeline/contract/contract_logic.py b/data_pipeline/contract/contract_logic.py
index 6a826f8..0a6ecc0 100644
--- a/data_pipeline/contract/contract_logic.py
+++ b/data_pipeline/contract/contract_logic.py
@@ -2,13 +2,16 @@
 # Contract Stage logic
 # =============================================================================
 
-
-import pandas as pd
+import polars as pl
 from typing import List
 from data_pipeline.shared.table_configs import REQUIRED_TIMESTAMPS, TIMESTAMP_FORMATS
 
+# ------------------------------------------------------------
+# CONTRACT LOGICS
+# ------------------------------------------------------------
+
 
-def deduplicate_exact_events(df: pd.DataFrame) -> tuple[pd.DataFrame, int]:
+def deduplicate_exact_events(df: pl.DataFrame) -> tuple[pl.DataFrame, int]:
     """
     Enforces record-level uniqueness across the entire row schema.
 
@@ -23,34 +26,32 @@ def deduplicate_exact_events(df: pd.DataFrame) -> tuple[pd.DataFrame, int]:
     - Tuple: (Filtered DataFrame, Integer count of dropped rows).
 
     Failures:
-    - [Structural] Crashes if input is not a pandas DataFrame.
+    - [Structural] Crashes if input is not a polars DataFrame.
     """
 
-    initial_count = len(df)
-    duplicated_mask = df.duplicated()
+    initial_count = df.height
+    duplicated_mask = df.is_duplicated()
+    removed_count = 0
 
     if duplicated_mask.any():
 
-        df = df.drop_duplicates()
-        removed_count = initial_count - len(df)
-
-    else:
-        removed_count = 0
+        df = df.unique()
+        removed_count = initial_count - df.height
 
     return df, removed_count
 
 
-def remove_unparsable_timestamps(df: pd.DataFrame) -> tuple[pd.DataFrame, int, set]:
+def remove_unparsable_timestamps(df: pl.DataFrame) -> tuple[pl.DataFrame, int, set]:
     """
-    Enforces parseability for system-critical temporal fields.
+    Enforces temporal completeness for system-critical fields.
 
     Contract:
-    - Evaluates all columns defined in REQUIRED_TIMESTAMPS.
-    - Subtractive Filtering: Drops any row containing at least one NaT/unparsable value in target columns.
+    - Data Presence: Evaluates all columns defined in REQUIRED_TIMESTAMPS for Null/NaT values.
+    - Subtractive Filtering: Drops any row containing unmapped temporal data.
 
     Invariants:
-    - Type Safety: Does not cast types permanently; performs internal validation only.
-    - Lineage: Emits 'order_id' of failing rows to enable cascade pruning downstream.
+    - Post-Normalization: Operates on the guarantee that the I/O layer has already standardized resolution to microseconds.
+    - Referential Integrity: Emits 'order_id' of failing rows to enable cascade pruning downstream.
 
     Outputs:
     - Tuple: (Filtered DataFrame, Count of dropped rows, Set of invalid order_ids).
@@ -59,36 +60,36 @@ def remove_unparsable_timestamps(df: pd.DataFrame) -> tuple[pd.DataFrame, int, s
     - [Structural] Crashes if REQUIRED_TIMESTAMPS columns are missing from the DataFrame.
     """
 
-    initial_count = len(df)
-    unparsable_mask = pd.Series(False, index=df.index)
+    initial_count = df.height
+    invalid_order_ids = set()
+    remove_count = 0
 
+    exprs = []
     for col in REQUIRED_TIMESTAMPS:
-        ts = pd.to_datetime(
-            df[col],
-            format=TIMESTAMP_FORMATS[col],
-            errors="coerce",
-        )
-
-        # accumulate True for every NaT
-        unparsable_mask |= ts.isna()
+        if col in df.columns:
 
-    invalid_order_ids = set()
-    if unparsable_mask.any():
+            if df.schema[col] == pl.String:
+                fmt = TIMESTAMP_FORMATS.get(col)
+                exprs.append(
+                    pl.col(col).str.to_datetime(format=fmt, strict=False).is_null()
+                )
+            else:
+                exprs.append(pl.col(col).is_null())
 
-        invalid_order_ids = set(df.loc[unparsable_mask, "order_id"])
+    unparsable_mask = df.select(pl.any_horizontal(exprs)).to_series()
 
-        df = df[~unparsable_mask]
-        remove_count = initial_count - len(df)
+    if unparsable_mask.any():
 
-    else:
-        remove_count = 0
+        invalid_order_ids = set(df.filter(unparsable_mask).get_column("order_id"))
+        df = df.filter(~unparsable_mask)
+        remove_count = initial_count - df.height
 
     return df, remove_count, invalid_order_ids
 
 
-def remove_impossible_timestamps(df: pd.DataFrame) -> tuple[pd.DataFrame, int, set]:
+def remove_impossible_timestamps(df: pl.DataFrame) -> tuple[pl.DataFrame, int, set]:
     """
-    Enforces logical chronology for the order lifecycle.
+    Enforces logical chronology for the order lifecycle using Polars expressions.
 
     Contract:
     - Chronological Gate: Order Approval Date >= Order Purchase Date AND Order Delivery Date >= Order Purchase Date.
@@ -96,6 +97,7 @@ def remove_impossible_timestamps(df: pd.DataFrame) -> tuple[pd.DataFrame, int, s
 
     Invariants:
     - Temporal Alignment: Ensures all orders have a positive or zero lead time.
+    - Clean Code: Leverages direct Polars comparison logic without manual type-checking overhead.
 
     Outputs:
     - Tuple: (Filtered DataFrame, Count of dropped rows, Set of invalid order_ids).
@@ -104,30 +106,29 @@ def remove_impossible_timestamps(df: pd.DataFrame) -> tuple[pd.DataFrame, int, s
     - [Structural] Crashes if lifecycle timestamp columns are missing.
     """
 
-    purchase_ts = pd.to_datetime(df["order_purchase_timestamp"])
-    approved_ts = pd.to_datetime(df["order_approved_at"])
-    delivered_ts = pd.to_datetime(df["order_delivered_timestamp"])
-
-    invalid_mask = (approved_ts < purchase_ts) | (delivered_ts < purchase_ts)
-    initial_count = len(df)
-
+    initial_count = df.height
     invalid_order_ids = set()
-    if invalid_mask.any():
+    remove_count = 0
 
-        invalid_order_ids = set(df.loc[invalid_mask, "order_id"])
+    invalid_mask = df.select(
+        violation=(
+            (pl.col("order_approved_at") < pl.col("order_purchase_timestamp"))
+            | (pl.col("order_delivered_timestamp") < pl.col("order_purchase_timestamp"))
+        ).fill_null(False)
+    ).get_column("violation")
 
-        df = df[~invalid_mask]
-        remove_count = initial_count - len(df)
+    if invalid_mask.any():
+        invalid_order_ids = set(df.filter(invalid_mask).get_column("order_id"))
 
-    else:
-        remove_count = 0
+        df = df.filter(~invalid_mask)
+        remove_count = initial_count - df.height
 
     return df, remove_count, invalid_order_ids
 
 
 def remove_rows_with_null_constraint(
-    df: pd.DataFrame, non_nullable_column: List[str]
-) -> tuple[pd.DataFrame, int, set]:
+    df: pl.DataFrame, non_nullable_column: List[str]
+) -> tuple[pl.DataFrame, int, set]:
     """
     Enforces mandatory data presence (NOT NULL) for a dynamic column list.
 
@@ -145,27 +146,27 @@ def remove_rows_with_null_constraint(
     - [Structural] Crashes if 'non_nullable_column' names are not in the DataFrame.
     """
 
-    initial_count = len(df)
+    initial_count = df.height
     invalid_ids = set()
+    removed_count = 0
 
-    column_nulls = df[non_nullable_column].isna().any(axis=1)
+    column_nulls = df.select(
+        pl.any_horizontal([pl.col(col).is_null() for col in non_nullable_column])
+    ).to_series()
 
     if column_nulls.any():
         if "order_id" in df.columns:
-            invalid_ids = set(df.loc[column_nulls, "order_id"])
+            invalid_ids = set(df.filter(column_nulls).get_column("order_id"))
 
-        df = df[~column_nulls]
-        removed_count = initial_count - len(df)
-
-    else:
-        removed_count = 0
+        df = df.filter(~column_nulls)
+        removed_count = initial_count - df.height
 
     return df, removed_count, invalid_ids
 
 
 def cascade_drop_by_order_id(
-    df: pd.DataFrame, invalid_order_ids: set
-) -> tuple[pd.DataFrame, int]:
+    df: pl.DataFrame, invalid_order_ids: set
+) -> tuple[pl.DataFrame, int]:
     """
     Enforces referential cleanup based on a blacklist of compromised keys.
 
@@ -183,17 +184,18 @@ def cascade_drop_by_order_id(
     - [Structural] Crashes if 'order_id' column is missing.
     """
 
-    initial_count = len(df)
+    initial_count = df.height
+    removed_count = 0
 
-    df = df[~df["order_id"].isin(invalid_order_ids)]
-    removed = initial_count - len(df)
+    df = df.filter(~pl.col("order_id").is_in(invalid_order_ids))
+    removed_count = initial_count - df.height
 
-    return df, removed
+    return df, removed_count
 
 
 def enforce_parent_reference(
-    df: pd.DataFrame, valid_order_ids: set
-) -> tuple[pd.DataFrame, int]:
+    df: pl.DataFrame, valid_order_ids: set
+) -> tuple[pl.DataFrame, int]:
     """
     Enforces referential integrity based on a whitelist of validated keys.
 
@@ -205,25 +207,27 @@ def enforce_parent_reference(
     - Data Reliability: Guarantees that every child record has a corresponding valid parent.
 
     Outputs:
-    - Tuple: (Filtered DataFrame, Integer count of dropped rows).
+    - Tuple: (Filtered DataFrame, Count of dropped rows, Set of invalid order_ids).
 
     Failures:
     - [Structural] Crashes if 'order_id' column is missing.
     """
-    initial_count = len(df)
+
+    initial_count = df.height
+    removed_count = 0
 
     if not valid_order_ids:
-        return df, 0
+        return df, removed_count
 
-    df = df[df["order_id"].isin(valid_order_ids)]
-    removed = initial_count - len(df)
+    df = df.filter(pl.col("order_id").is_in(valid_order_ids))
+    removed_count = initial_count - df.height
 
-    return df, removed
+    return df, removed_count
 
 
 def enforce_schema(
-    df: pd.DataFrame, required_column: List[str], dtypes: dict
-) -> tuple[pd.DataFrame, int]:
+    df: pl.DataFrame, required_column: List[str], dtypes: dict
+) -> tuple[pl.DataFrame, int]:
     """
     Finalizes the structural contract via schema projection and type casting.
 
@@ -242,11 +246,28 @@ def enforce_schema(
     - [Structural] Crashes if required columns are missing or if dtypes are incompatible.
     """
 
-    initial_col_count = len(df.columns)
+    initial_col_count = df.width
 
-    df = df[required_column]
-    df = df.astype(dtypes)
+    valid_cols = [col for col in required_column if col in df.columns]
 
-    removed = initial_col_count - len(df.columns)
+    exprs = []
+    for col in valid_cols:
+        target_dtype = dtypes.get(col)
 
-    return df, removed
+        if target_dtype == pl.Datetime:
+
+            if df.schema[col] == pl.String:
+                fmt = TIMESTAMP_FORMATS.get(col)
+                exprs.append(pl.col(col).str.to_datetime(format=fmt, strict=False))
+            else:
+                exprs.append(pl.col(col))
+
+        elif target_dtype:
+            exprs.append(pl.col(col).cast(target_dtype))
+        else:
+            exprs.append(pl.col(col))
+
+    df = df.select(exprs)
+    removed_count = initial_col_count - df.width
+
+    return df, removed_count
diff --git a/data_pipeline/contract/id_registrar.py b/data_pipeline/contract/id_registrar.py
new file mode 100644
index 0000000..c863171
--- /dev/null
+++ b/data_pipeline/contract/id_registrar.py
@@ -0,0 +1,242 @@
+# =============================================================================
+# UUIDs to Integers Mappings - Discovery First Architecture
+# =============================================================================
+
+import polars as pl
+from pathlib import Path
+from data_pipeline.shared.run_context import RunContext
+from data_pipeline.shared.storage_adapter import (
+    promote_new_mapping_files,
+    check_gcs_path_exists,
+)
+
+# Maps which tables an ID appears
+ID_ENTITY_MAP = {
+    "order_id": ["df_orders", "df_order_items", "df_payments"],
+    "customer_id": ["df_orders", "df_customers"],
+    "product_id": ["df_order_items", "df_products"],
+    "seller_id": ["df_order_items"],
+}
+
+# -----------------------------------------------------------------------------
+# DISCOVERY HELPERS
+# -----------------------------------------------------------------------------
+
+
+def discover_uuids(raw_path: Path, tables: list[str], col: str) -> pl.Series:
+    """
+    Scans raw snapshot files to identify the unique set of UUIDs present in the current run.
+
+    Contract:
+    - Multi-Format Support: Detects and scans both .parquet and .csv extensions.
+    - Defensive Schema: Uses 'infer_schema_length=0' for CSVs to ensure ID columns are always treated as strings.
+    - Subtractive Consolidation: Aggregates IDs from all relevant tables and enforces global uniqueness.
+
+    Invariants:
+    - Type Safety: Always returns a pl.Series of dtype pl.String.
+    - Empty Handling: Returns an empty Series with correct column name/type if no files are found.
+
+    Outputs:
+    - Unique pl.Series of string UUIDs.
+    """
+
+    all_uuids = []
+
+    for table in tables:
+        for ext in ["parquet", "csv"]:
+            files = list(raw_path.glob(f"{table}_*.{ext}"))
+            if not files:
+                continue
+
+            table_glob = str(raw_path / f"{table}_*.{ext}").replace("\\", "/")
+            if ext == "parquet":
+                lf = pl.scan_parquet(table_glob)
+            else:
+                lf = pl.scan_csv(table_glob, infer_schema_length=0)
+
+            all_uuids.append(lf.select(col))
+
+    if not all_uuids:
+        return pl.Series(col, [], dtype=pl.String)
+
+    return pl.concat(all_uuids).unique().collect().get_column(col)
+
+
+def lookup_mapping_storage(
+    storage_glob: str, col: str, batch_uuids: pl.Series
+) -> tuple[pl.DataFrame, int]:
+    """
+    Surgically retrieves known mappings and the current global sequence head from central storage.
+
+    Contract:
+    - Sequence Continuity: Resolves the maximum existing integer ID to ensure gapless sequence generation for new IDs.
+
+    Optimization Logic:
+    - Semi-Join Hydration: Filters the historical registry against the current batch via semi-join to minimize memory footprint.
+    - Parallel Execution: Utilizes pl.collect_all to resolve both mappings and the sequence head in a single IO pass.
+    - Early Projection: Restricts the scan to only the required UUID and Integer ID columns.
+
+    Invariants:
+    - Integrity: Enforces uniqueness on historical loads to prevent row duplication from overlapping delta files.
+    - Grain: Returns a 1-to-1 mapping DataFrame (UUID to UInt32).
+
+    Outputs:
+    - tuple: (known_mappings_df, current_max_id_int)
+
+    Failures:
+    - System Error: Crashes if storage is unreachable or if the mapping schema is corrupted.
+    """
+    int_col = f"{col}_int"
+
+    history_lf = pl.scan_parquet(storage_glob, use_statistics=True).select(
+        [col, int_col]
+    )
+
+    # Find existing UUIDs in this batch
+    batch_lf = pl.DataFrame({col: batch_uuids}).lazy()
+    known_mappings_plan = history_lf.join(
+        batch_lf,
+        on=col,
+        how="semi",
+    ).unique(subset=[col])
+
+    # Extract max mapped IDs
+    max_id_plan = history_lf.select(pl.col(int_col).max())
+
+    known_mappings, max_df = pl.collect_all([known_mappings_plan, max_id_plan])
+
+    max_val = max_df.item()
+    current_max = max_val if max_val is not None else 0
+
+    return known_mappings, current_max
+
+
+def generate_and_persist_delta(
+    missing_uuids: pl.Series,
+    current_max: int,
+    col: str,
+    runtime_dir: Path,
+    run_id: str,
+) -> pl.DataFrame:
+    """
+    Generates deterministic integer mappings for new UUIDs and persists a run-specific delta artifact.
+
+    Contract:
+    - Sequence Generation: Assigns UInt32 IDs starting from current_max + 1.
+    - Local Persistence: Writes a Parquet delta file to a run-specific directory before promotion.
+
+    Invariants:
+    - Determinism: Sequential IDs are stable within a single run's context.
+    - Lineage: Delta filename includes the run_id for traceability.
+
+    Outputs:
+    - pl.DataFrame of new mappings.
+
+    Failures:
+    - Operational: Fails if local disk is unwritable.
+    """
+
+    int_col = f"{col}_int"
+    start_val = current_max + 1
+
+    new_mappings = pl.DataFrame({col: missing_uuids}).with_columns(
+        pl.int_range(start_val, start_val + missing_uuids.len(), dtype=pl.UInt32).alias(
+            int_col
+        )
+    )
+
+    delta_path = runtime_dir / col / f"map_{run_id}.parquet"
+    delta_path.parent.mkdir(parents=True, exist_ok=True)
+    new_mappings.write_parquet(delta_path)
+
+    return new_mappings
+
+
+# -----------------------------------------------------------------------------
+# MAIN ORCHESTRATOR (ENTRY POINT)
+# -----------------------------------------------------------------------------
+
+
+def extract_entity_mappings(run_context: RunContext) -> dict[str, pl.LazyFrame]:
+    """
+    Orchestrates the global ID discovery and mapping resolution for the entire pipeline run.
+
+    Workflow:
+    1. Discover: Scans raw sources to identify all entity UUIDs (Orders, Customers, etc.) requiring mapping.
+    2. Hydrate: Loads historical mappings from central storage for only the discovered UUIDs.
+    3. Resolve: Determines which UUIDs are "New" and dispatches them for ID generation.
+    4. Promote: Synchronizes all locally generated mapping deltas back to central cloud storage.
+
+    Operational Guarantees:
+    - Atomicity: Mappings are resolved once per run to prevent join collisions in the Contract stage.
+    - Write Safety: Uses the storage_adapter to promote deltas, ensuring historical files are never overwritten.
+
+    Side Effects:
+    - Creates local Parquet deltas in the contracted/id_mapping/ directory.
+    - Promotes new mapping files to the central storage bucket.
+
+    Failure Behavior:
+    - Fail-Fast: Any error in mapping resolution triggers a RuntimeError to prevent data corruption downstream.
+    """
+    master_mappings = {}
+
+    raw_path = run_context.raw_snapshot_path
+    mapping_dest = run_context.storage_mapping_path
+    runtime_dir = run_context.contracted_path / "id_mapping"
+
+    dest_str = str(mapping_dest).replace("\\", "/")
+    is_gcs = dest_str.startswith("gs://")
+
+    for entity_col, tables in ID_ENTITY_MAP.items():
+
+        # Extract UUIDs from raw data
+        batch_uuids = discover_uuids(raw_path, tables, entity_col)
+        if batch_uuids.len() == 0:
+            continue
+
+        col_storage_dir = f"{dest_str}/{entity_col}"
+        storage_glob = f"{col_storage_dir}/*.parquet"
+
+        # Check mapping in storage
+        mapping_exists = (
+            check_gcs_path_exists(col_storage_dir)
+            if is_gcs
+            else Path(col_storage_dir).exists()
+        )
+
+        try:
+            if mapping_exists:
+                known_df, current_max = lookup_mapping_storage(
+                    storage_glob, entity_col, batch_uuids
+                )
+                # Filter new UUIDs from raw data
+                missing_uuids = batch_uuids.filter(
+                    ~batch_uuids.is_in(known_df.get_column(entity_col))
+                )
+            else:
+                known_df = pl.DataFrame(
+                    {entity_col: [], f"{entity_col}_int": []},
+                    schema={entity_col: pl.String, f"{entity_col}_int": pl.UInt32},
+                )
+                missing_uuids = batch_uuids
+                current_max = 0
+
+            # Map new UUIDs if found
+            if missing_uuids.len() > 0:
+                new_df = generate_and_persist_delta(
+                    missing_uuids,
+                    current_max,
+                    entity_col,
+                    runtime_dir,
+                    run_context.run_id,
+                )
+                master_mappings[entity_col] = pl.concat([known_df, new_df]).lazy()
+            else:
+                master_mappings[entity_col] = known_df.lazy()
+
+        except Exception as e:
+            raise RuntimeError(f"Master Mapping Failure: {e}") from e
+
+    promote_new_mapping_files(runtime_dir, mapping_dest)
+
+    return master_mappings
diff --git a/data_pipeline/publish/publish_executor.py b/data_pipeline/publish/publish_executor.py
index 4fffc23..0c1573d 100644
--- a/data_pipeline/publish/publish_executor.py
+++ b/data_pipeline/publish/publish_executor.py
@@ -8,6 +8,7 @@
     run_integrity_gate,
     promote_semantic_version,
     activate_published_version,
+    swap_bigquery_view,
     log_info,
 )
 
@@ -18,16 +19,18 @@ def execute_publish_lifecycle(run_context: RunContext) -> Dict:
 
     Workflow:
     1. Validate: Executes the 'Integrity Gate' to ensure all semantic artifacts exist and are schema-compliant.
-    2. Promote: Transfers validated artifacts to the permanent versioned publication zone.
-    3. Delegate: Triggers the atomic pointer swap to activate the new version for BI consumers.
+    2. Promote: Transfers validated artifacts to the permanent versioned publication zone (GCS).
+    3. Synchronizes BigQuery External Tables and Views to point to the newly promoted version.
+    4. Activate: Triggers the atomic pointer swap (_latest.json) to update the version pointer for file-system consumers.
 
     Operational Guarantees:
-    - Atomicity: The 'latest' version pointer is updated ONLY after successful promotion of all artifacts.
+    - Multi-System Atomicity: The BI views and file-system pointers are updated ONLY after successful promotion of all artifacts.
     - Immutability: Once published, a versioned directory is treated as a static, read-only snapshot.
-    - Fail-Fast: Any failure in validation or promotion immediately halts the lifecycle.
+    - Fail-Fast: Any failure in validation, promotion, or SQL sync immediately halts the lifecycle.
 
     Side Effects:
     - Persists a new versioned directory (v{run_id}) in the publication zone.
+    - Mutates BigQuery External Tables and Views to update the stable BI layer.
     - Mutates the 'latest_version.json' manifest to update the global version pointer.
 
     Failure Behavior:
@@ -64,6 +67,19 @@ def fail_step(step_name):
 
     log_info("Semantic artifacts promoted successfully", promote_semantic)
 
+    update_sql_view = swap_bigquery_view(run_context)
+    report["steps"]["sql_view"] = update_sql_view
+
+    if update_sql_view["status"] == "failed":
+        return fail_step("sql_view")
+
+    # Skip logging view updated
+    if any("Skipping" in info for info in update_sql_view["info"]):
+        pass
+
+    else:
+        log_info("BigQuery views updated successfully", update_sql_view)
+
     published_activation = activate_published_version(run_context)
     report["steps"]["activation"] = published_activation
 
diff --git a/data_pipeline/publish/publish_logic.py b/data_pipeline/publish/publish_logic.py
index 91dad08..a49812d 100644
--- a/data_pipeline/publish/publish_logic.py
+++ b/data_pipeline/publish/publish_logic.py
@@ -3,7 +3,8 @@
 # =============================================================================
 
 import polars as pl
-from datetime import datetime as dt
+from datetime import datetime as dt, timezone
+from google.cloud import bigquery
 from contextlib import suppress
 from pathlib import Path
 import json
@@ -168,6 +169,75 @@ def promote_semantic_version(run_context: RunContext) -> Dict:
     return report
 
 
+# ------------------------------------------------------------
+# PUBLISHED SQL VIEW
+# ------------------------------------------------------------
+
+
+def swap_bigquery_view(run_context: RunContext, location: str | None = None) -> Dict:
+    """
+    Atomically updates BigQuery External Tables and Views to point to the new version.
+
+    Contract:
+    - Versioned Tables: Creates unique external tables for each semantic table in the current run.
+    - Stable Views: Replaces existing 'published_' views to point to the new versioned tables.
+
+    Invariants:
+    - Multi-System Sync: BI tools connected to views see the new data immediately after DDL success.
+    - Cloud Only: Skips SQL updates if the pipeline is running in a local-only environment.
+
+    Outputs:
+    - Dict: Report logging the SQL activation status for each module.
+    """
+
+    report = init_report()
+    latest_path = run_context.latest_pointer_path
+    published_uri = run_context.storage_published_path
+
+    if not str(latest_path).startswith("gs://"):
+        log_info("Skipping BigQuery swap (Local Storage detected)", report)
+
+        return report
+
+    # Use provided location or fallback to environment variable (set by Terraform)
+    effective_location = location or os.getenv("GCP_REGION", "us-east1")
+
+    try:
+
+        client = bigquery.Client(location=effective_location)
+        run_id = run_context.run_id
+        project = client.project
+
+        for module_name, module_config in SEMANTIC_MODULES.items():
+            for table_name in module_config["tables"]:
+
+                # Create Versioned External Table
+                table_ddl = f"""
+                CREATE OR REPLACE EXTERNAL TABLE `{project}.{module_name}.{table_name}_v{run_id}`
+                OPTIONS (
+                    format = 'PARQUET',
+                    uris = ['{published_uri}/v{run_id}/{module_name}/{table_name}_*.parquet']
+                )
+                """
+
+                # Atomic Pointer Swap (View)
+                view_ddl = f"""
+                CREATE OR REPLACE VIEW `{project}.{module_name}.published_{table_name}` AS
+                SELECT * FROM `{project}.{module_name}.{table_name}_v{run_id}`
+                """
+
+                client.query(table_ddl, location=effective_location).result()
+                client.query(view_ddl, location=effective_location).result()
+
+            log_info(f"BigQuery swap successful for module: {module_name}", report)
+
+    except Exception as e:
+        report["status"] = "failed"
+        log_error(f"BigQuery Swap Failed: {e}", report)
+
+    return report
+
+
 # ------------------------------------------------------------
 # PUBLISHED ATOMIC POINTER
 # ------------------------------------------------------------
@@ -203,7 +273,7 @@ def activate_published_version(run_context: RunContext) -> Dict:
         "run_year": run_dt.year,
         "run_month": run_dt.month,
         "run_week_of_month": (run_dt.day - 1) // 7 + 1,
-        "published_at": dt.utcnow().isoformat(),
+        "published_at": dt.now(timezone.utc).isoformat(),
     }
 
     # LOCAL storage
diff --git a/data_pipeline/requirements.txt b/data_pipeline/requirements.txt
index a642083..0c1f211 100644
--- a/data_pipeline/requirements.txt
+++ b/data_pipeline/requirements.txt
@@ -1,4 +1,6 @@
-pandas==2.1.4
 polars==1.39.0
 pyarrow==19.0.0
-google-cloud-storage
\ No newline at end of file
+google-cloud-storage
+google-cloud-bigquery>=3.0.0
+google-cloud-bigquery-storage>=2.36.0
+psutil==5.9.8
\ No newline at end of file
diff --git a/data_pipeline/run_pipeline.py b/data_pipeline/run_pipeline.py
index 639da90..3f00ed6 100644
--- a/data_pipeline/run_pipeline.py
+++ b/data_pipeline/run_pipeline.py
@@ -4,17 +4,19 @@
 
 
 from pathlib import Path
-from datetime import datetime as dt
+from datetime import datetime as dt, timezone
 import json
 import os
 import shutil
-import gc
 
+from concurrent.futures import ThreadPoolExecutor
+from google.cloud import bigquery
 from data_pipeline.shared.table_configs import TABLE_CONFIG
 from data_pipeline.shared.run_context import RunContext
 from data_pipeline.validation.validation_executor import apply_validation
 from data_pipeline.contract.contract_executor import apply_contract
-from data_pipeline.assembly.assembly_executor import assemble_events
+from data_pipeline.contract.id_registrar import extract_entity_mappings
+from data_pipeline.assembly.assembly_executor import assemble_events, force_gc
 from data_pipeline.semantic.semantic_executor import build_semantic_layer
 from data_pipeline.publish.publish_executor import execute_publish_lifecycle
 
@@ -22,9 +24,20 @@
     download_raw_snapshot,
     upload_run_artifacts,
     upload_contracted_directory,
-    download_contracted_datasets,
 )
 
+import psutil
+import threading
+import time
+
+
+def memory_logger(stop_event: threading.Event):
+    """Temporary: Logs RAM usage to stdout every 1s for benchmarking."""
+    while not stop_event.is_set():
+        mem_mb = psutil.virtual_memory().used / (1024 * 1024)
+        print(f"METRIC_MEM: {mem_mb:.2f} MB")
+        time.sleep(1)
+
 
 # ------------------------------------------------------------
 # SUPPORTING UTILITIES
@@ -72,9 +85,9 @@ def initialize_metadata(run_context: RunContext) -> None:
 
     payload = {
         "run_id": run_context.run_id,
-        "pipeline_version": "v5",
+        "pipeline_version": "v5.1",
         "status": "RUNNING",
-        "started_at": dt.utcnow().isoformat(),
+        "started_at": dt.now(timezone.utc).isoformat(),
         "run_year": run_dt.year,
         "run_month": run_dt.month,
         "run_day": run_dt.day,
@@ -102,7 +115,7 @@ def finalize_metadata(run_context: RunContext, status: str) -> None:
         payload = json.load(file)
 
     start_time = dt.fromisoformat(payload["started_at"])
-    completion_time = dt.utcnow()
+    completion_time = dt.now(timezone.utc)
 
     payload["status"] = status
     payload["completed_at"] = completion_time.isoformat()
@@ -116,6 +129,46 @@ def finalize_metadata(run_context: RunContext, status: str) -> None:
     persist_json(run_context.metadata_path, payload)
 
 
+def refresh_bq_external_cache(run_context: RunContext) -> None:
+    """
+    Forces BigQuery to refresh the metadata cache for a BigLake/External table.
+
+    Contract:
+    - Connectivity: Initializes a BigQuery client using Application Default Credentials.
+    - Execution: Invokes the BQ.REFRESH_EXTERNAL_METADATA_CACHE system procedure.
+    - Idempotency: Can be safely called multiple times without data mutation.
+
+    Invariants:
+    - State Sync: Ensures downstream stages (like Assembly via Storage Read API) see the
+      newly contracted Parquet files immediately, bypassing BigQuery's default metadata caching delay.
+    """
+    project_id = run_context.bq_project_id
+    dataset_id = run_context.bq_dataset_id
+    location = os.getenv("GCP_REGION", "MISSING_REGION")
+
+    if location == "MISSING_REGION":
+        print("[INFO] Skipping BigQuery cache refresh (Local Storage detected)")
+
+        return
+
+    client = bigquery.Client(project=project_id, location=location)
+
+    def refresh_table(table_name):
+        table_path = f"{project_id}.{dataset_id}.{table_name}"
+        query = f"CALL BQ.REFRESH_EXTERNAL_METADATA_CACHE('{table_path}')"
+        client.query(query).result()
+
+    try:
+        # Parallel execute cache refresh
+        with ThreadPoolExecutor(max_workers=len(TABLE_CONFIG)) as executor:
+            executor.map(refresh_table, TABLE_CONFIG)
+
+    except Exception as e:
+        print(f"Failed to refresh BigQuery cache for {dataset_id}: {e}")
+
+    print(f"Successfully refreshed BigQuery cache for {dataset_id}")
+
+
 # ------------------------------------------------------------
 # STAGE WRAPPERS
 # ------------------------------------------------------------
@@ -133,6 +186,9 @@ def run_initial_validation_stage(run_context) -> None:
 def run_contract_application_stage(run_context) -> tuple[set, set]:
     report = []
 
+    # Extract all UUIDs from (order_id, product_id, etc.) on raw_snapshot directory
+    master_mappings = extract_entity_mappings(run_context)
+
     # Accumulates set of invalid order_ids and valid order_ids, and apply to child tables.
     invalid_ids = set()
     valid_ids = set()
@@ -140,15 +196,15 @@ def run_contract_application_stage(run_context) -> tuple[set, set]:
     # NOTE: TABLE_CONFIG order must list parent first before its children.
     for table_name in TABLE_CONFIG:
 
-        contract, new_inv, new_val = apply_contract(
-            run_context, table_name, invalid_ids, valid_ids
+        contract_rep, new_inv, new_val = apply_contract(
+            run_context, table_name, master_mappings, invalid_ids, valid_ids
         )
 
         invalid_ids |= new_inv
         if new_val:
             valid_ids = new_val
 
-        report.append(contract)
+        report.append(contract_rep)
 
     stage_logger(run_context, stage="contract_application", report=report)
     return invalid_ids, valid_ids
@@ -198,33 +254,46 @@ def run_prepublishing_validation_stage(run_context) -> None:
 
 def main() -> None:
     """
-    Ultimate authority for the end-to-end data pipeline lifecycle.
+    Ultimate authority for the end-to-end data pipeline lifecycle coordination.
 
     Workflow:
-        1.  Initialization: Resolve RunContext and instantiate global run metadata.
-        2.  Ingestion: Synchronize the raw data snapshot from Cloud Storage to local workspace.
-        3.  Gate I (Validation): Assert raw data sanity; fail-fast on fatal structural errors.
-        4.  Processing (Contract): Execute subtractive filtering and Silver-layer schema freezing.
-        5.  Gate II (Revalidation): Defensive check to ensure 'contracted' data is valid.
-        6.  Persistence (Sync Upload): Promote local contracted artifacts to the Cloud Silver Storage.
-        7.  Resource Reclamation: Purge transient directories (raw/contracted) to optimize memory.
-        8.  Hydration (Sync Download): Restore local environment with the full accumulated Silver state.
-        9.  Integration (Assembly): Flatten relational data into a unified Gold event layer.
-        10. Modeling (Semantic): Build entity-centric analytical modules (Fact/Dim).
-        11. Gate III (Pre-Publish): Final verification of semantic artifact completeness.
-        12. Activation (Publish): Atomic swap of the production 'latest' version pointer.
-        13. Finalization: Persist all telemetry/logs to Cloud and purge the local workspace.
+    1. Resolve: Instantiate RunContext and initialize background memory telemetry.
+    2. Hydrate (Raw): Synchronize the raw data snapshot from Cloud Storage to local workspace.
+    3. Initialize: Register the run commencement and capture initial metadata.
+    4. Validate (Raw): Assert raw data sanity and fail-fast on structural errors.
+    5. Contract: Execute subtractive filtering and schema freezing for Silver-layer datasets.
+    6. Revalidate: Defensive check to ensure contracted artifacts meet downstream requirements.
+    7. Promote (Silver): Persist delta contracted datasets to Cloud Silver Storage.
+    8. Synchronize (BQ): Force refresh of BigQuery external metadata cache for immediate visibility.
+    9. Purge (Local): Reclaim local disk and RAM by evicting raw/contracted sources before Assembly.
+    10. Assemble: Flatten relational data into a unified Gold event layer using Storage Read API.
+    11. Model (Semantic): Build entity-centric analytical modules (Fact/Dim).
+    12. Publish: Execute final lifecycle validation, BigQuery view swap, and atomic pointer swap for the 'latest' version.
+    13. Finalize: Update terminal metadata, upload all stage reports/telemetry, and purge workspace.
 
     Operational Guarantees:
-    - Defensive Integrity: No data moves to 'Assembly' without passing 'Revalidation'.
-    - Silver Continuity: Uses a Cloud-Sync loop to ensure Assembly operates on the full delta state.
-    - Resource Stewardship: Mandatory local cleanup via global 'finally' block to prevent disk leaks.
-    - Traceability: Enforces atomic 'run_id' consistency across all 13 lifecycle steps.
-    - Visibility: Guarantees cloud-upload of stage reports even in partial failure scenarios.
+    - Defensive Integrity: Prevents promotion of invalid data to Silver/Gold layers via multi-gate validation.
+    - Memory Efficiency: Enforces deterministic 'Purge' and 'GC' cycles between heavy processing stages.
+    - Traceability: Maintains strict run_id consistency across local and Cloud artifact lineages.
+    - Resilience: Guarantees telemetry upload (logs/metadata) even during catastrophic stage failures.
+
+    Side Effects:
+    - Writes local stage reports and metadata to the run-specific workspace.
+    - Mutates Cloud Storage (Silver layer artifacts, Run Artifacts).
+    - Refreshes BigQuery system procedures (Metadata Cache).
+    - Swaps production environment pointers during the Publish stage.
+
+    Failure Behavior:
+    - Crash: Any stage RuntimeError triggers a 'FAILED' status update and immediate local cleanup.
+    - Recovery: Logs are persisted to Cloud before exit to enable post-mortem analysis.
     """
 
     run_context = RunContext.create()
 
+    stop_event = threading.Event()
+    logger_thread = threading.Thread(target=memory_logger, args=(stop_event,))
+    logger_thread.start()
+
     # Pre-start cleaning
     if os.path.exists(run_context.workspace_root):
         shutil.rmtree(run_context.workspace_root, ignore_errors=True)
@@ -241,21 +310,20 @@ def main() -> None:
         # Persist delta contracted datasets to silver storage
         upload_contracted_directory(run_context)
 
+        # Refresh BQ Metadata Cache after uploading is complete
+        refresh_bq_external_cache(run_context)
+
         # Clear RAM memory from previous stages
         if os.path.exists(run_context.raw_snapshot_path):
             shutil.rmtree(run_context.raw_snapshot_path)
             shutil.rmtree(run_context.contracted_path)
-        gc.collect()
-
-        # Recreate path and download contract data from silver storage
-        run_context.contracted_path.mkdir(parents=True, exist_ok=True)
-        download_contracted_datasets(run_context)
+        force_gc()
 
         run_assemble_events_stage(run_context)
-        gc.collect()
+        force_gc()
 
         run_semantic_modeling_stage(run_context)
-        gc.collect()
+        force_gc()
 
         run_prepublishing_validation_stage(run_context)
 
@@ -267,13 +335,16 @@ def main() -> None:
         raise
 
     finally:
+        stop_event.set()
+        logger_thread.join()
+
         # Persist run artifacts (logs/metadata) Pass or Fail
         upload_run_artifacts(run_context)
 
         # Clean RAM memory for next run
         if os.path.exists(run_context.workspace_root):
             shutil.rmtree(run_context.workspace_root)
-        gc.collect()
+        force_gc()
 
 
 if __name__ == "__main__":
diff --git a/data_pipeline/semantic/registry.py b/data_pipeline/semantic/registry.py
index 5de0ada..9d07aec 100644
--- a/data_pipeline/semantic/registry.py
+++ b/data_pipeline/semantic/registry.py
@@ -22,7 +22,6 @@
     PRODUCT_DIM_DTYPES,
 )
 
-
 SEMANTIC_MODULES = {
     "seller_semantic": {
         "builder": build_seller_semantic,
diff --git a/data_pipeline/semantic/semantic_executor.py b/data_pipeline/semantic/semantic_executor.py
index ee2b631..dd9da1f 100644
--- a/data_pipeline/semantic/semantic_executor.py
+++ b/data_pipeline/semantic/semantic_executor.py
@@ -2,11 +2,10 @@
 # Semantic Modeling Stage Executor
 # =============================================================================
 
-import gc
 import polars as pl
 from typing import Dict
 from data_pipeline.shared.run_context import RunContext
-from data_pipeline.shared.loader_exporter import load_historical_table, export_file
+from data_pipeline.shared.loader_exporter import load_assembled_data, export_file
 from data_pipeline.semantic.registry import SEMANTIC_MODULES
 from data_pipeline.assembly.assembly_logic import (
     init_report,
@@ -15,6 +14,7 @@
     loaded_data,
     task_wrapper,
 )
+from data_pipeline.assembly.assembly_executor import force_gc
 
 
 def validate_and_freeze_table(lf: pl.LazyFrame, table: dict) -> pl.LazyFrame:
@@ -118,7 +118,7 @@ def orchestrate_module(
         print(f"[INFO] Module {module_name}: build_stage completed successfully.")
 
     except Exception as e:
-        log_error(f"Step build_stage failed: {str(e)}", report)
+        log_error(f"Step build_stage failed: {e}", report)
         report["status"] = "failed"
 
         return False
@@ -168,18 +168,24 @@ def orchestrate_module(
 
         except FileExistsError as e:
             log_error(f"Unexpected table returned {table_name}: {e}", report)
+            report["status"] = "failed"
+            return False
 
         except Exception as e:
             log_error(f"Unexpected error processing {table_name}: {e}", report)
+            report["status"] = "failed"
+            return False
 
         finally:
             if "lf_frozen" in locals():
                 del lf_frozen
-            del df_table
-            gc.collect()
+            if "df_table" in locals():
+                del df_table
+            force_gc()
 
-    del builder_output
-    gc.collect()
+    if "builder_output" in locals():
+        del builder_output
+    force_gc()
 
     log_info(f"Export Module: {module_name} Successfully", report)
     module_report[module_name]["export"] = True
@@ -214,7 +220,7 @@ def build_semantic_layer(run_context: RunContext) -> Dict:
     report = init_report()
     report["modules"] = {}
 
-    df_assembled = load_historical_table(
+    df_assembled = load_assembled_data(
         base_path=run_context.assembled_path,
         table_name="assembled_events",
         log_info=lambda msg: loaded_data(msg, report),
@@ -238,7 +244,8 @@ def build_semantic_layer(run_context: RunContext) -> Dict:
             report["status"] = "failed"
             return report
 
-    del df_assembled
-    gc.collect()
+    if "df_assembled" in locals():
+        del df_assembled
+    force_gc()
 
     return report
diff --git a/data_pipeline/semantic/semantic_logic.py b/data_pipeline/semantic/semantic_logic.py
index 5f743d1..a6527df 100644
--- a/data_pipeline/semantic/semantic_logic.py
+++ b/data_pipeline/semantic/semantic_logic.py
@@ -5,7 +5,7 @@
 import polars as pl
 from typing import Dict
 from data_pipeline.shared.run_context import RunContext
-from data_pipeline.shared.loader_exporter import load_historical_table
+from data_pipeline.shared.loader_exporter import load_assembled_data
 
 # ------------------------------------------------------------
 # SELLER SEMANTIC BUILDER
@@ -14,21 +14,20 @@
 
 def build_seller_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
     """
-    Constructs the Seller-centric analytical layer from assembled events.
+    Constructs the Seller-centric analytical layer from assembled integer-mapped events.
 
     Contract:
     - Subtractive Filtering: Selects strictly required columns for performance.
     - Transformation: Derives week_start_date and boolean status flags.
-    - Aggregation: Computes weekly performance metrics (revenue, lead times, delays) per seller.
+    - Aggregation: Computes weekly performance metrics (revenue, lead times, delays) per seller using optimized Integer keys.
 
     Optimization Logic:
-    - Streaming Projection: Selects required columns for aggregation, allowing the streaming engine to push projection through the plan.
-    - Non-Blocking Aggregation: Executes aggregations in a streaming fashion, maintaining a constant memory profile.
-    - Categorical Handling: Utilizes categorical grouping keys to maintain optimized performance during non-blocking aggregation.
+    - Integer Key Optimization: Utilizes UInt32/UInt64 grouping keys (seller_id_int) to maintain a constant memory profile during non-blocking aggregation.
+    - Metric Downcasting: Enforces Int16 for counts/days and Float32 for revenue to minimize row width during streaming.
 
     Invariants:
-    - Fact Grain: Strictly 1 row per ('seller_id', 'order_year_week').
-    - Dimension Grain: Strictly 1 row per 'seller_id'.
+    - Fact Grain: Strictly 1 row per ('seller_id_int', 'order_year_week').
+    - Dimension Grain: Strictly 1 row per 'seller_id_int'.
     - Temporal: Aligns all metrics to ISO-week start dates (Monday).
 
     Outputs:
@@ -39,21 +38,18 @@ def build_seller_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
     """
 
     needed_cols = [
-        "seller_id",
+        "seller_id_int",
         "order_year_week",
         "order_date",
         "order_status",
-        "order_id",
+        "order_id_int",
         "order_revenue",
         "lead_time_days",
         "delivery_delay_days",
         "approval_lag_days",
     ]
 
-    lf_filtered = lf.select(needed_cols).with_columns(
-        seller_id=pl.col("seller_id").cast(pl.Categorical),
-        order_year_week=pl.col("order_year_week").cast(pl.Categorical),
-    )
+    lf_filtered = lf.select(needed_cols)
 
     seller_weekly_fact = (
         lf_filtered.with_columns(
@@ -61,10 +57,10 @@ def build_seller_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
             is_delivered=pl.col("order_status").eq("delivered"),
             is_cancelled=pl.col("order_status").eq("cancelled"),
         )
-        .group_by(["seller_id", "order_year_week"])
+        .group_by(["seller_id_int", "order_year_week"])
         .agg(
             week_start_date=pl.col("week_start_date").min(),
-            weekly_order_count=pl.col("order_id").count().cast(pl.Int16),
+            weekly_order_count=pl.col("order_id_int").count().cast(pl.Int16),
             weekly_delivered_orders=pl.col("is_delivered").sum().cast(pl.Int16),
             weekly_cancelled_orders=pl.col("is_cancelled").sum().cast(pl.Int16),
             weekly_revenue=pl.col("order_revenue").sum().cast(pl.Float32),
@@ -80,7 +76,7 @@ def build_seller_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
         )
     )
 
-    seller_dim = lf_filtered.group_by("seller_id").agg(
+    seller_dim = lf_filtered.group_by("seller_id_int").agg(
         first_order_date=pl.col("order_date").min(),
         first_order_year_week=pl.col("order_year_week").min(),
     )
@@ -100,22 +96,21 @@ def build_seller_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
 
 def build_customer_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
     """
-    Constructs the Customer-centric analytical layer from assembled events.
+    Constructs the Customer-centric analytical layer from assembled integer-mapped events.
 
     Contract:
     - Subtractive Filtering: Selects strictly required columns for performance.
     - Transformation: Derives week_start_date and boolean status flags.
-    - Aggregation: Computes weekly performance metrics (revenue, lead times, delays) per customer.
+    - Aggregation: Computes weekly performance metrics (revenue, lead times, delays) per customer using optimized Integer keys.
     - Hydration: Loads historical customer dimension table from the assembly zone.
 
     Optimization Logic:
-    - Streaming Projection: Selects required columns for aggregation, allowing the streaming engine to push projection through the plan.
-    - Non-Blocking Aggregation: Executes aggregations in a streaming fashion, maintaining a constant memory profile.
-    - Categorical Handling: Utilizes categorical grouping keys to maintain optimized performance during non-blocking aggregation.
+    - Integer Key Optimization: Utilizes UInt32/UInt64 grouping keys (customer_id_int) to maintain a constant memory profile during non-blocking aggregation.
+    - Metric Downcasting: Enforces Int16 for counts/days and Float32 for revenue to minimize row width.
 
     Invariants:
-    - Fact Grain: Strictly 1 row per ('customer_id', 'order_year_week').
-    - Dimension Grain: Strictly 1 row per 'customer_id'.
+    - Fact Grain: Strictly 1 row per ('customer_id_int', 'order_year_week').
+    - Dimension Grain: Strictly 1 row per 'customer_id_int'.
 
     Outputs:
     - Dict containing 'customer_weekly_fact' (LazyFrame) and 'customer_dim' (LazyFrame).
@@ -126,22 +121,18 @@ def build_customer_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
     """
 
     needed_cols = [
-        "customer_id",
+        "customer_id_int",
         "order_year_week",
         "order_date",
         "order_status",
-        "order_id",
+        "order_id_int",
         "order_revenue",
         "lead_time_days",
         "delivery_delay_days",
         "approval_lag_days",
     ]
 
-    # Cast grouping keys to Categorical to reduce hash table memory pressure
-    lf_filtered = lf.select(needed_cols).with_columns(
-        customer_id=pl.col("customer_id").cast(pl.Categorical),
-        order_year_week=pl.col("order_year_week").cast(pl.Categorical),
-    )
+    lf_filtered = lf.select(needed_cols)
 
     customer_weekly_fact = (
         lf_filtered.with_columns(
@@ -149,10 +140,10 @@ def build_customer_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
             is_delivered=pl.col("order_status").eq("delivered"),
             is_cancelled=pl.col("order_status").eq("cancelled"),
         )
-        .group_by(["customer_id", "order_year_week"])
+        .group_by(["customer_id_int", "order_year_week"])
         .agg(
             week_start_date=pl.col("week_start_date").min(),
-            weekly_order_count=pl.col("order_id").count().cast(pl.Int16),
+            weekly_order_count=pl.col("order_id_int").count().cast(pl.Int16),
             weekly_delivered_orders=pl.col("is_delivered").sum().cast(pl.Int16),
             weekly_cancelled_orders=pl.col("is_cancelled").sum().cast(pl.Int16),
             weekly_revenue=pl.col("order_revenue").sum().cast(pl.Float32),
@@ -168,7 +159,7 @@ def build_customer_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
         )
     )
 
-    customer_dim = load_historical_table(
+    customer_dim = load_assembled_data(
         base_path=run_context.assembled_path, table_name="df_customers"
     )
 
@@ -187,22 +178,21 @@ def build_customer_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
 
 def build_product_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
     """
-    Constructs the Product-centric analytical layer from assembled events.
+    Constructs the Product-centric analytical layer from assembled integer-mapped events.
 
     Contract:
     - Subtractive Filtering: Selects strictly required columns for performance.
     - Transformation: Derives week_start_date and boolean status flags.
-    - Aggregation: Computes weekly performance metrics (revenue, lead times, delays) per product.
+    - Aggregation: Computes weekly performance metrics (revenue, lead times, delays) per product using optimized Integer keys.
     - Hydration: Loads historical product dimension table from the assembly zone.
 
     Optimization Logic:
-    - Streaming Projection: Selects required columns for aggregation, allowing the streaming engine to push projection through the plan.
-    - Non-Blocking Aggregation: Executes aggregations in a streaming fashion, maintaining a constant memory profile.
-    - Categorical Handling: Utilizes categorical grouping keys to maintain optimized performance during non-blocking aggregation.
+    - Integer Key Optimization: Utilizes UInt32/UInt64 grouping keys (product_id_int) to maintain a constant memory profile during non-blocking aggregation.
+    - Metric Downcasting: Enforces Int16 for counts/days and Float32 for revenue to minimize row width.
 
     Invariants:
-    - Fact Grain: Strictly 1 row per ('product_id', 'order_year_week').
-    - Dimension Grain: Strictly 1 row per 'product_id'.
+    - Fact Grain: Strictly 1 row per ('product_id_int', 'order_year_week').
+    - Dimension Grain: Strictly 1 row per 'product_id_int'.
 
     Outputs:
     - Dict containing 'product_weekly_fact' (LazyFrame) and 'product_dim' (LazyFrame).
@@ -213,21 +203,18 @@ def build_product_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
     """
 
     needed_cols = [
-        "product_id",
+        "product_id_int",
         "order_year_week",
         "order_date",
         "order_status",
-        "order_id",
+        "order_id_int",
         "order_revenue",
         "lead_time_days",
         "delivery_delay_days",
         "approval_lag_days",
     ]
 
-    lf_filtered = lf.select(needed_cols).with_columns(
-        product_id=pl.col("product_id").cast(pl.Categorical),
-        order_year_week=pl.col("order_year_week").cast(pl.Categorical),
-    )
+    lf_filtered = lf.select(needed_cols)
 
     product_weekly_fact = (
         lf_filtered.with_columns(
@@ -235,10 +222,10 @@ def build_product_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
             is_delivered=pl.col("order_status").eq("delivered"),
             is_cancelled=pl.col("order_status").eq("cancelled"),
         )
-        .group_by(["product_id", "order_year_week"])
+        .group_by(["product_id_int", "order_year_week"])
         .agg(
             week_start_date=pl.col("week_start_date").min(),
-            weekly_order_count=pl.col("order_id").count().cast(pl.Int16),
+            weekly_order_count=pl.col("order_id_int").count().cast(pl.Int16),
             weekly_delivered_orders=pl.col("is_delivered").sum().cast(pl.Int16),
             weekly_cancelled_orders=pl.col("is_cancelled").sum().cast(pl.Int16),
             weekly_revenue=pl.col("order_revenue").sum().cast(pl.Float32),
@@ -254,7 +241,7 @@ def build_product_semantic(lf: pl.LazyFrame, run_context: RunContext) -> Dict:
         )
     )
 
-    product_dim = load_historical_table(
+    product_dim = load_assembled_data(
         base_path=run_context.assembled_path, table_name="df_products"
     )
 
diff --git a/data_pipeline/shared/loader_exporter.py b/data_pipeline/shared/loader_exporter.py
index 49cd2f9..0cb8972 100644
--- a/data_pipeline/shared/loader_exporter.py
+++ b/data_pipeline/shared/loader_exporter.py
@@ -4,13 +4,98 @@
 
 from pathlib import Path
 import polars as pl
-import pandas as pd
 from typing import Optional, Callable, Tuple, Any
+from google.cloud import bigquery
+
+
+def normalize_datetimes(lf: pl.LazyFrame | pl.DataFrame) -> Any:
+    """
+    Standardizes all Datetime columns to a unified resolution (microseconds).
+
+    Contract:
+    - Discovery: Scans the schema for all pl.Datetime fields (accepts both LazyFrame and DataFrame).
+    - Transformation: Forcefully casts identified columns to 'us' (microseconds) resolution.
+
+    Invariants:
+    - Zero-Failure: Returns the input 'lf' unchanged if no Datetime columns are found.
+    - Environment Neutrality: Prevents 'Datetime(ns) != Datetime(us)' resolution mismatches
+      between local development and cloud production environments.
+
+    Outputs:
+    - LazyFrame or DataFrame (matching input type) with resolution-standardized temporal fields.
+    """
+
+    schema = lf.collect_schema() if isinstance(lf, pl.LazyFrame) else lf.schema
+
+    datetime_cols = [
+        col for col, dtype in schema.items() if isinstance(dtype, pl.Datetime)
+    ]
+    if not datetime_cols:
+        return lf
+
+    return lf.with_columns(
+        [pl.col(col).dt.cast_time_unit("us") for col in datetime_cols]
+    )
+
+
+def scan_gcs_uris_from_bigquery(
+    project_id: str,
+    dataset_id: str,
+    table_id: str,
+    log_info: Optional[Callable[[str], None]] = None,
+) -> pl.LazyFrame:
+    """
+    Streams data natively into Polars using BigQuery External Table metadata as a read bridge.
+
+    Contract:
+    - Discovery: Uses the BigQuery API to fetch the authoritative 'source_uris' for the External Table.
+    - Optimization: Bypasses BigQuery compute and memory-bound Arrow downloads entirely.
+    - Zero-Disk Native Streaming: Passes the extracted GCS URIs directly to Polars' Rust-based
+      object-store engine for high-performance, concurrent, lazy evaluation from Cloud Storage.
+
+    Invariants:
+    - Lazy Evaluation: Returns a pure pl.LazyFrame without executing any I/O blocking reads.
+    - Source Consistency: Relies on BigQuery as the source-of-truth for file locations.
+
+    Outputs:
+    - A pl.LazyFrame ready for downstream streaming processing.
+    """
+
+    if project_id == "PROJECT_ID_NOT_DETECTED":
+        raise ValueError(
+            "Project ID is set to 'PROJECT_ID_NOT_DETECTED'. Pipeline environment variables are likely missing."
+        )
+
+    try:
+        client = bigquery.Client(project=project_id)
+        table_ref = f"{project_id}.{dataset_id}.{table_id}"
+
+        file_query = f"SELECT DISTINCT _FILE_NAME FROM `{table_ref}`"
+        query_result = client.query(file_query).result()
+        uris = [row[0] for row in query_result]
+
+        if not uris:
+            raise ValueError(f"No source URIs found in external table {table_ref}.")
+
+        lfs = [normalize_datetimes(pl.scan_parquet(uri)) for uri in uris]
+        lf = pl.concat(lfs, how="vertical_relaxed")
+
+    except Exception as e:
+        if log_info:
+            log_info(f"Failed to initialize stream for {dataset_id}.{table_id}: {e}")
+        raise
+
+    if log_info:
+        log_info(
+            f"Connected to GCS Stream via BigQuery: {dataset_id}.{table_id} ({len(uris)} URIs)"
+        )
+
+    return lf
 
 
 FILE_LOADERS = {
-    ".csv": lambda path: pd.read_csv(path),
-    ".parquet": lambda path: pd.read_parquet(path),
+    ".csv": lambda path: pl.read_csv(path),
+    ".parquet": lambda path: pl.read_parquet(path),
 }
 
 
@@ -23,15 +108,20 @@ def load_single_delta(
     Loads the chronologically most recent delta for a logical table.
 
     Contract:
-    - Scans 'base_path' for files matching the 'table_name' prefix.
-    - Identifies the target file via alphanumeric sorting of the date suffix (YYYY_MM_DD).
+    - Discovery: Scans 'base_path' for files matching the 'table_name' prefix.
+    - Selection: Identifies the target file via alphanumeric sorting of the date suffix (YYYY_MM_DD).
+    - Normalization: Automatically applies 'normalize_datetimes' to enforce microsecond resolution.
 
     Invariants:
     - Recency: Only the latest snapshot is returned; historical deltas are ignored.
     - Format Support: Handles .csv and .parquet (prioritizing Parquet).
+    - Source Integrity: Operates on a lazy scan to minimize memory footprint during initial load.
+
+    Outputs:
+    - Tuple containing (pl.DataFrame, str: file_name).
 
     Failures:
-    - Raises FileNotFoundError if no matching artifacts are found.
+    - [Operational] Raises FileNotFoundError if no matching artifacts are found.
     """
 
     base_path = Path(base_path)
@@ -56,6 +146,7 @@ def load_single_delta(
     loader = FILE_LOADERS[target_file.suffix.lower()]
 
     df = loader(target_file)
+    df = normalize_datetimes(df.lazy()).collect()
 
     if log_info:
         log_info(f"Loaded: {target_file.name} ({len(df)} rows)")
@@ -63,38 +154,89 @@ def load_single_delta(
     return df, file_name
 
 
-def load_historical_table(
+def load_historical_data(
     base_path: Path | str,
     table_name: str,
     log_info: Optional[Callable[[str], None]] = None,
 ) -> pl.LazyFrame:
     """
-    Aggregates matching artifacts into a single cumulative LazyFrame.
+    Aggregates matching historical artifacts into a single cumulative LazyFrame for the Assembly stage.
 
     Contract:
-    - Performs a multi-file scan of all Parquet artifacts matching 'table_name'.
-    - Queues files for lazy evaluation rather than loading them into memory.
+    - Discovery: Performs a multi-file glob of all Parquet artifacts matching 'table_name'.
+    - Normalize-at-Source: Scans and normalizes resolution (Datetime[us]) for every file individually before concatenation.
+    - Safety: Prevents 'Datetime(ns) != Datetime(us)' resolution mismatches that occur when mixing local and cloud Parquet files.
+
+    Invariants:
+    - Zero-Loss: Concatenates all identified files into a single unified stream.
+    - Lazy Execution: Returns a planned LazyFrame without triggering disk I/O.
 
     Outputs:
-    - Returns a pl.LazyFrame ready for downstream transformations.
+    - Returns a pl.LazyFrame ready for downstream joins and aggregations.
+
+    Failures:
+    - [Operational] Raises FileNotFoundError if no Parquet files match the table name in base_path.
     """
     base_path = Path(base_path)
 
-    files = [str(f) for f in base_path.glob(f"{table_name}*.parquet")]
+    all_files = list(base_path.glob(f"{table_name}*.parquet"))
 
-    if not files:
+    if not all_files:
         raise FileNotFoundError(f"No Parquet files found for {table_name}")
 
-    lf_unified = pl.scan_parquet(files)
+    lfs = [normalize_datetimes(pl.scan_parquet(file)) for file in all_files]
+    lf_unified = pl.concat(lfs, how="vertical_relaxed")
 
     if log_info:
         log_info(
-            f"Scanned: {table_name} ({len(files)} files queued for lazy evaluation)"
+            f"Hybrid Scan: {table_name} ({len(all_files)} total files queued for lazy evaluation)"
         )
 
     return lf_unified
 
 
+def load_assembled_data(
+    base_path: Path,
+    table_name: str,
+    log_info: Optional[Callable[[str], None]] = None,
+) -> pl.LazyFrame:
+    """
+    Optimized loader for high-volume assembled datasets targeting the Semantic stage.
+
+    Contract:
+    - Discovery (Rust): Passes the glob pattern directly to Polars for high-performance file discovery in Rust.
+    - Efficiency: Minimizes Python-side overhead by avoiding explicit file listing.
+    - Normalization: Applies resolution standardization to the unified scan result.
+
+    Invariants:
+    - Consistency: Assumes uniform resolution across assembled files (standardized by the Assembly stage).
+    - Validation: Performs a quick existence check before initiating the lazy scan.
+
+    Outputs:
+    - A pl.LazyFrame optimized for streaming through semantic model construction.
+
+    Failures:
+    - [Operational] Raises FileNotFoundError if no Parquet files matching the pattern are found.
+    """
+
+    pattern = str(base_path / f"{table_name}*.parquet")
+
+    if not any(base_path.glob(f"{table_name}*.parquet")):
+        raise FileNotFoundError(f"No Parquet files found for {table_name}")
+
+    lf = normalize_datetimes(
+        pl.scan_parquet(
+            pattern,
+            cast_options=pl.ScanCastOptions(datetime_cast="nanosecond-downcast"),
+        )
+    )
+
+    if log_info:
+        log_info(f"Scanned: {table_name} for lazy evaluation")
+
+    return lf
+
+
 def export_file(
     df: Any,
     output_path: Path,
@@ -106,18 +248,21 @@ def export_file(
     Persists DataFrames or LazyFrames to disk using standardized formats.
 
     Contract:
-    - Automates directory creation for the target 'output_path'.
-    - Enforces Parquet with Brotli compression as the internal standard.
+    - Hydrate: Automatically ensures parent directories for 'output_path' exist.
+    - Persist: Enforces Parquet with compression as the internal standard.
 
     Optimization Logic:
-    - Streaming Sink: When provided with a pl.LazyFrame, uses sink_parquet() to
-      stream data in chunks, bypassing full in-memory materialization.
+    - Streaming Sink: If 'df' is a LazyFrame, uses 'sink_parquet()' to execute
+      non-blocking writes directly from the query plan to disk.
 
     Invariants:
-    - Compression: Parquet exports always utilize 'brotli' to optimize storage.
+    - Compression: Utilizes 'brotli' for DataFrames and 'snappy' for LazyFrame streaming sinks.
+
+    Outputs:
+    - Boolean: True if write succeeded, False on I/O exception.
 
-    Returns:
-        bool: True if write succeeded, False on I/O exception.
+    Failures:
+    - [Operational] Returns False and logs to 'log_error' if disk I/O fails or permissions are denied.
     """
 
     output_path = Path(output_path)
@@ -126,18 +271,27 @@ def export_file(
         output_path.parent.mkdir(parents=True, exist_ok=True)
         row_count = 0
 
-        if isinstance(df, pd.DataFrame):
-            df.to_parquet(
-                output_path, index=index, engine="pyarrow", compression="brotli"
-            )
-            row_count = len(df)
-
-        elif isinstance(df, pl.DataFrame):
+        if isinstance(df, pl.DataFrame):
+            df = normalize_datetimes(df)
             df.write_parquet(output_path, compression="brotli")
             row_count = len(df)
 
         elif isinstance(df, pl.LazyFrame):
-            df.sink_parquet(output_path, compression="snappy")
+            df = normalize_datetimes(df)
+
+            try:
+                pa_schema = df.limit(0).collect().to_arrow().schema
+                df.sink_parquet(
+                    output_path, compression="snappy", arrow_schema=pa_schema
+                )
+
+            except Exception as e:
+                print(
+                    f"[WARNING] Arrow schema override failed, falling back to native sink:{e}"
+                )
+
+                df.sink_parquet(output_path, compression="snappy")
+
             row_count = "streaming"
 
         else:
diff --git a/data_pipeline/shared/modeling_configs.py b/data_pipeline/shared/modeling_configs.py
index 22da458..2f2870d 100644
--- a/data_pipeline/shared/modeling_configs.py
+++ b/data_pipeline/shared/modeling_configs.py
@@ -12,11 +12,11 @@
 
 # Assemble events enforced schema and dtypes
 ASSEMBLE_SCHEMA = [
-    "order_id",
+    "order_id_int",
     "order_revenue",
-    "seller_id",
-    "customer_id",
-    "product_id",
+    "seller_id_int",
+    "customer_id_int",
+    "product_id_int",
     "order_status",
     "order_purchase_timestamp",
     "order_approved_at",
@@ -29,20 +29,20 @@
 ]
 
 ASSEMBLE_DTYPES: Mapping[str, pl.DataType] = {
-    "order_id": pl.String(),
+    "order_id_int": pl.UInt32(),
     "order_revenue": pl.Float32(),
-    "seller_id": pl.String(),
-    "customer_id": pl.String(),
-    "product_id": pl.String(),
+    "seller_id_int": pl.UInt32(),
+    "customer_id_int": pl.UInt32(),
+    "product_id_int": pl.UInt32(),
     "order_status": pl.Categorical(),
-    "order_purchase_timestamp": pl.Datetime(),
-    "order_approved_at": pl.Datetime(),
-    "order_delivered_timestamp": pl.Datetime(),
+    "order_purchase_timestamp": pl.Datetime(time_unit="us"),
+    "order_approved_at": pl.Datetime(time_unit="us"),
+    "order_delivered_timestamp": pl.Datetime(time_unit="us"),
     "lead_time_days": pl.Int16(),
     "approval_lag_days": pl.Int16(),
     "delivery_delay_days": pl.Int16(),
-    "order_date": pl.Datetime(),
-    "order_year_week": pl.String(),
+    "order_date": pl.Datetime(time_unit="us"),
+    "order_year_week": pl.Categorical(),
 }
 
 # ------------------------------------------------------------
@@ -52,8 +52,11 @@
 dimension_table = ["df_customers", "df_products"]
 DIMENSION_REFERENCES = {
     table: {
-        "primary_key": TABLE_CONFIG[table]["primary_key"],
-        "required_column": TABLE_CONFIG[table]["required_column"],
+        "primary_key": [key + "_int" for key in TABLE_CONFIG[table]["primary_key"]],
+        "required_column": [
+            key + "_int" if "_id" in key else key
+            for key in TABLE_CONFIG[table]["required_column"]
+        ],
     }
     for table in dimension_table
 }
@@ -65,21 +68,21 @@
 
 # Seller dimension enforced schema and dtypes
 SELLER_DIM_SCHEMA = [
-    "seller_id",
+    "seller_id_int",
     "first_order_date",
     "first_order_year_week",
 ]
 
 SELLER_DIM_DTYPES: Mapping[str, pl.DataType] = {
-    "seller_id": pl.String(),
-    "first_order_date": pl.Datetime(),
-    "first_order_year_week": pl.String(),
+    "seller_id_int": pl.UInt32(),
+    "first_order_date": pl.Datetime(time_unit="us"),
+    "first_order_year_week": pl.Categorical(),
 }
 
 
 # Seller Facts enforced schema and dtypes
 SELLER_FACT_SCHEMA = [
-    "seller_id",
+    "seller_id_int",
     "order_year_week",
     "week_start_date",
     "weekly_order_count",
@@ -94,9 +97,9 @@
 ]
 
 SELLER_FACT_DTYPES: Mapping[str, pl.DataType] = {
-    "seller_id": pl.String(),
-    "order_year_week": pl.String(),
-    "week_start_date": pl.Datetime(),
+    "seller_id_int": pl.UInt32(),
+    "order_year_week": pl.Categorical(),
+    "week_start_date": pl.Datetime(time_unit="us"),
     "weekly_order_count": pl.Int16(),
     "weekly_delivered_orders": pl.Int16(),
     "weekly_cancelled_orders": pl.Int16(),
@@ -115,7 +118,7 @@
 
 # Customer Dimension and dtypes
 CUSTOMER_DIM_SCHEMA = [
-    "customer_id",
+    "customer_id_int",
     "customer_state",
     "customer_city",
     "customer_segment",
@@ -123,16 +126,16 @@
 ]
 
 CUSTOMER_DIM_DTYPES: Mapping[str, pl.DataType] = {
-    "customer_id": pl.String(),
+    "customer_id_int": pl.UInt32(),
     "customer_state": pl.Categorical(),
     "customer_city": pl.Categorical(),
     "customer_segment": pl.Categorical(),
-    "account_creation_date": pl.Datetime(),
+    "account_creation_date": pl.Datetime(time_unit="us"),
 }
 
 # Customer Fact and dtypes
 CUSTOMER_FACT_SCHEMA = [
-    "customer_id",
+    "customer_id_int",
     "order_year_week",
     "week_start_date",
     "weekly_order_count",
@@ -147,9 +150,9 @@
 ]
 
 CUSTOMER_FACT_DTYPES: Mapping[str, pl.DataType] = {
-    "customer_id": pl.String(),
-    "order_year_week": pl.String(),
-    "week_start_date": pl.Datetime(),
+    "customer_id_int": pl.UInt32(),
+    "order_year_week": pl.Categorical(),
+    "week_start_date": pl.Datetime(time_unit="us"),
     "weekly_order_count": pl.Int16(),
     "weekly_delivered_orders": pl.Int16(),
     "weekly_cancelled_orders": pl.Int16(),
@@ -168,7 +171,7 @@
 
 # Product Dim and dtypes
 PRODUCT_DIM_SCHEMA = [
-    "product_id",
+    "product_id_int",
     "product_category_name",
     "product_length_cm",
     "product_height_cm",
@@ -179,7 +182,7 @@
 ]
 
 PRODUCT_DIM_DTYPES: Mapping[str, pl.DataType] = {
-    "product_id": pl.String(),
+    "product_id_int": pl.UInt32(),
     "product_category_name": pl.Categorical(),
     "product_length_cm": pl.Float32(),
     "product_height_cm": pl.Float32(),
@@ -192,7 +195,7 @@
 
 # Product Fact and dtypes
 PRODUCT_FACT_SCHEMA = [
-    "product_id",
+    "product_id_int",
     "order_year_week",
     "week_start_date",
     "weekly_order_count",
@@ -208,9 +211,9 @@
 
 
 PRODUCT_FACT_DTYPES: Mapping[str, pl.DataType] = {
-    "product_id": pl.String(),
-    "order_year_week": pl.String(),
-    "week_start_date": pl.Datetime(),
+    "product_id_int": pl.UInt32(),
+    "order_year_week": pl.Categorical(),
+    "week_start_date": pl.Datetime(time_unit="us"),
     "weekly_order_count": pl.Int16(),
     "weekly_delivered_orders": pl.Int16(),
     "weekly_cancelled_orders": pl.Int16(),
diff --git a/data_pipeline/shared/run_context.py b/data_pipeline/shared/run_context.py
index d027ea6..08a17bc 100644
--- a/data_pipeline/shared/run_context.py
+++ b/data_pipeline/shared/run_context.py
@@ -5,12 +5,12 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Callable
-from datetime import datetime
+from datetime import datetime as dt, timezone
 import uuid
 
 
 def _generate_run_id() -> str:
-    timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%S")
+    timestamp = dt.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
     random_suffix = uuid.uuid4().hex[:6]
     return f"{timestamp}_{random_suffix}"
 
@@ -49,11 +49,16 @@ class RunContext:
     # Storage paths
     storage_raw_path: str
     storage_contracted_path: str
+    storage_mapping_path: str
     storage_published_path: str
     version_path: str
     latest_pointer_path: str
     storage_runs_path: str
 
+    # BigQuery coordinates
+    bq_project_id: str
+    bq_dataset_id: str
+
     # NOTE: base =./runtime and storage= ./data were local test paths
     @classmethod
     def create(
@@ -62,6 +67,8 @@ def create(
         storage: str | Path = "gs://ops-pipeline-storage-dev",  # "./data",
         run_id: str | None = None,
         run_id_factory: Callable[[], str] | None = None,
+        bq_project_id: str | None = None,
+        bq_dataset_id: str | None = None,
     ) -> "RunContext":
         """
         Factory method for instantiating a fresh execution context.
@@ -74,6 +81,8 @@ def create(
             RunContext: An initialized context with all path mappings resolved.
         """
 
+        import os
+
         base_path = Path(base)
 
         if run_id is None:
@@ -95,11 +104,19 @@ def create(
         storage_root = str(storage)
         storage_raw_path = f"{storage_root}/raw"
         storage_contracted_path = f"{storage_root}/contracted"
+        storage_mapping_path = f"{storage_root}/id_mapping"
         storage_published_path = f"{storage_root}/published"
         version_path = f"{storage_published_path}/v{run_id}"
         latest_pointer_path = f"{storage_published_path}/_latest.json"
         storage_runs_path = f"{storage_root}/run_artifact/{run_id}"
 
+        bq_project_id = os.getenv("GCP_PROJECT", "PROJECT_ID_NOT_DETECTED")
+
+        if not bq_project_id:
+            bq_project_id = "PROJECT_ID_NOT_DETECTED"
+
+        bq_dataset_id = os.getenv("BQ_DATASET_ID", "BQ_DATASET_ID_NOT_DETECTED")
+
         return cls(
             run_id=run_id,
             # Workspace paths
@@ -115,10 +132,14 @@ def create(
             # Storage paths
             storage_raw_path=storage_raw_path,
             storage_contracted_path=storage_contracted_path,
+            storage_mapping_path=storage_mapping_path,
             storage_published_path=storage_published_path,
             version_path=version_path,
             latest_pointer_path=latest_pointer_path,
             storage_runs_path=storage_runs_path,
+            # BigQuery
+            bq_project_id=bq_project_id,
+            bq_dataset_id=bq_dataset_id,
         )
 
     def initialize_directories(self) -> None:
diff --git a/data_pipeline/shared/storage_adapter.py b/data_pipeline/shared/storage_adapter.py
index bfdc522..7c1e367 100644
--- a/data_pipeline/shared/storage_adapter.py
+++ b/data_pipeline/shared/storage_adapter.py
@@ -23,6 +23,18 @@ def _split_gcs_path(path: str):
     return bucket, prefix
 
 
+def check_gcs_path_exists(gcs_uri: str) -> bool:
+    """
+    Helper to check if a GCS prefix has any blobs (effectively checking if 'directory' exists).
+    """
+    client = storage.Client()
+    bucket_name, prefix = _split_gcs_path(gcs_uri)
+
+    bucket = client.bucket(bucket_name)
+    blobs = list(bucket.list_blobs(prefix=prefix, max_results=1))
+    return len(blobs) > 0
+
+
 def download_raw_snapshot(run_context: RunContext) -> None:
     """
     Synchronizes the raw data snapshot from Cloud Storage to the local workspace.
@@ -141,6 +153,7 @@ def upload_contracted_directory(run_context: RunContext) -> None:
 
     Contract:
     - Synchronizes the local 'contracted/' directory to 'storage_contracted_path'.
+    - Excludes the 'id_mapping' directory to prevent cross-contamination.
     - Purpose: Archives newly cleaned data for delta accumulation and historical lineage.
     """
 
@@ -167,40 +180,81 @@ def upload_contracted_directory(run_context: RunContext) -> None:
     bucket = client.bucket(bucket_name)
 
     for file in source.rglob("*"):
-        if file.is_file():
+        if file.is_file() and "id_mapping" not in file.parts:
 
             blob = bucket.blob(f"{prefix}/{file.relative_to(source)}")
             blob.upload_from_filename(file)
 
 
-def download_contracted_datasets(run_context: RunContext) -> None:
+# NOTE: Legacy architecture helper, retain for fallback.
+# def download_contracted_datasets(run_context: RunContext) -> None:
+#     """
+#     Populate the reconstructed local contracted/ with full historical delta set from Silver Cloud storage.
+
+#     Contract:
+#     - Downloads the full accumulated Silver state from 'storage_contracted_path'.
+#     """
+
+#     source = run_context.storage_contracted_path
+#     destination = run_context.contracted_path
+
+#     # Local filesystem case
+#     if not str(source).startswith("gs://"):
+#         shutil.copytree(source, destination, dirs_exist_ok=True)
+#         return
+
+#     # GCS case
+#     client = storage.Client()
+
+#     bucket_name, prefix = _split_gcs_path(source)
+
+#     bucket = client.bucket(bucket_name)
+
+#     for blob in bucket.list_blobs(prefix=prefix):
+#         if blob.name.endswith("/"):
+#             continue
+
+#         target = destination / Path(blob.name).name
+#         target.parent.mkdir(parents=True, exist_ok=True)
+
+#         blob.download_to_filename(target)
+
+
+def promote_new_mapping_files(runtime_dir: Path, destination: Path | str) -> None:
     """
-    Populate the reconstructed local contracted/ with full historical delta set from Silver Cloud storage.
+    Synchronizes new UUID mapping files from the local temporary directory to central storage.
 
     Contract:
-    - Downloads the full accumulated Silver state from 'storage_contracted_path'.
+    - Recursively identifies all '*.parquet' files in the local 'runtime_dir' subdirectories.
+    - Promotes them to the persistent 'destination' under matching subdirectories.
     """
 
-    source = run_context.storage_contracted_path
-    destination = run_context.contracted_path
+    if not runtime_dir.exists():
+        return
+
+    destination_str = str(destination).replace("\\", "/")
 
     # Local filesystem case
-    if not str(source).startswith("gs://"):
-        shutil.copytree(source, destination, dirs_exist_ok=True)
+    if not destination_str.startswith("gs://"):
+        dest_base = Path(destination)
+
+        for file in runtime_dir.rglob("*.parquet"):
+            if file.is_file():
+                # Reconstruct relative path in destination
+                # (e.g., destination/order_id/run_id.parquet)
+                relative_path = file.relative_to(runtime_dir)
+                target_path = dest_base / relative_path
+                target_path.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(file, target_path)
         return
 
     # GCS case
     client = storage.Client()
-
-    bucket_name, prefix = _split_gcs_path(source)
-
+    bucket_name, prefix = _split_gcs_path(destination_str)
     bucket = client.bucket(bucket_name)
 
-    for blob in bucket.list_blobs(prefix=prefix):
-        if blob.name.endswith("/"):
-            continue
-
-        target = destination / Path(blob.name).name
-        target.parent.mkdir(parents=True, exist_ok=True)
-
-        blob.download_to_filename(target)
+    for file in runtime_dir.rglob("*.parquet"):
+        if file.is_file():
+            relative_path = file.relative_to(runtime_dir).as_posix()
+            blob = bucket.blob(f"{prefix}/{relative_path}")
+            blob.upload_from_filename(str(file))
diff --git a/data_pipeline/shared/table_configs.py b/data_pipeline/shared/table_configs.py
index e68c7c4..0f939b9 100644
--- a/data_pipeline/shared/table_configs.py
+++ b/data_pipeline/shared/table_configs.py
@@ -2,6 +2,8 @@
 # Table configuration for Validation and Contract stage
 # =============================================================================
 
+import polars as pl
+
 TABLE_CONFIG = {
     "df_orders": {
         "role": "event_fact",
@@ -22,13 +24,13 @@
             "order_purchase_timestamp",
         ],
         "dtypes": {
-            "order_id": "string",
-            "customer_id": "string",
-            "order_status": "category",
-            "order_purchase_timestamp": "datetime64[ns]",
-            "order_approved_at": "datetime64[ns]",
-            "order_delivered_timestamp": "datetime64[ns]",
-            "order_estimated_delivery_date": "datetime64[ns]",
+            "order_id": pl.String,
+            "customer_id": pl.String,
+            "order_status": pl.Categorical,
+            "order_purchase_timestamp": pl.Datetime(time_unit="us"),
+            "order_approved_at": pl.Datetime(time_unit="us"),
+            "order_delivered_timestamp": pl.Datetime(time_unit="us"),
+            "order_estimated_delivery_date": pl.Datetime(time_unit="us"),
         },
     },
     "df_order_items": {
@@ -47,10 +49,10 @@
             "price",
         ],
         "dtypes": {
-            "order_id": "string",
-            "product_id": "string",
-            "seller_id": "string",
-            "price": "float32",
+            "order_id": pl.String,
+            "product_id": pl.String,
+            "seller_id": pl.String,
+            "price": pl.Float32,
         },
     },
     "df_customers": {
@@ -71,11 +73,11 @@
             "account_creation_date",
         ],
         "dtypes": {
-            "customer_id": "string",
-            "customer_state": "category",
-            "customer_city": "category",
-            "customer_segment": "category",
-            "account_creation_date": "datetime64[ns]",
+            "customer_id": pl.String,
+            "customer_state": pl.Categorical,
+            "customer_city": pl.Categorical,
+            "customer_segment": pl.Categorical,
+            "account_creation_date": pl.Datetime(time_unit="us"),
         },
     },
     "df_payments": {
@@ -90,8 +92,8 @@
             "payment_value",
         ],
         "dtypes": {
-            "order_id": "string",
-            "payment_value": "float32",
+            "order_id": pl.String,
+            "payment_value": pl.Float32,
         },
     },
     "df_products": {
@@ -118,14 +120,14 @@
             "supplier_tier",
         ],
         "dtypes": {
-            "product_id": "string",
-            "product_category_name": "category",
-            "product_length_cm": "float32",
-            "product_height_cm": "float32",
-            "product_width_cm": "float32",
-            "product_fragility_index": "category",
-            "product_weight_g": "float32",
-            "supplier_tier": "category",
+            "product_id": pl.String,
+            "product_category_name": pl.Categorical,
+            "product_length_cm": pl.Float32,
+            "product_height_cm": pl.Float32,
+            "product_width_cm": pl.Float32,
+            "product_fragility_index": pl.Categorical,
+            "product_weight_g": pl.Float32,
+            "supplier_tier": pl.Categorical,
         },
     },
 }
diff --git a/data_pipeline/validation/validation_executor.py b/data_pipeline/validation/validation_executor.py
index 55fc2f6..d3e3729 100644
--- a/data_pipeline/validation/validation_executor.py
+++ b/data_pipeline/validation/validation_executor.py
@@ -3,7 +3,7 @@
 # =============================================================================
 
 from typing import Dict
-import pandas as pd
+import polars as pl
 from pathlib import Path
 from data_pipeline.shared.loader_exporter import load_single_delta
 from data_pipeline.shared.table_configs import TABLE_CONFIG
@@ -46,7 +46,7 @@ def apply_validation(run_context: RunContext, base_path: Path | None = None) ->
 
     report = init_report()
 
-    tables: Dict[str, pd.DataFrame] = {}
+    tables: Dict[str, pl.DataFrame] = {}
     loaded_table_names = set()
 
     # Get assigned table configs
@@ -66,12 +66,12 @@ def apply_validation(run_context: RunContext, base_path: Path | None = None) ->
         tables[table_name] = df
 
         if not run_base_validations(
-            df,
-            table_name,
-            config["primary_key"],
-            config["required_column"],
-            config["non_nullable_column"],
-            report,
+            df=df,
+            table_name=table_name,
+            primary_key=config["primary_key"],
+            required_column=config["required_column"],
+            non_nullable_column=config["non_nullable_column"],
+            report=report,
         ):
             continue
 
diff --git a/data_pipeline/validation/validation_logic.py b/data_pipeline/validation/validation_logic.py
index 6122fd3..c55e61f 100644
--- a/data_pipeline/validation/validation_logic.py
+++ b/data_pipeline/validation/validation_logic.py
@@ -3,7 +3,8 @@
 # =============================================================================
 
 from typing import Dict, List
-import pandas as pd
+import polars as pl
+import polars.selectors as cs
 from data_pipeline.shared.table_configs import (
     REQUIRED_TIMESTAMPS,
     TIMESTAMP_FORMATS,
@@ -44,7 +45,7 @@ def log_error(message: str, report: Dict[str, List[str]]) -> None:
 
 
 def run_base_validations(
-    df: pd.DataFrame,
+    df: pl.DataFrame,
     table_name: str,
     primary_key: List[str],
     required_column: List[str],
@@ -52,15 +53,16 @@ def run_base_validations(
     report: Dict[str, List[str]],
 ) -> bool:
     """
-    Enforces foundational structural integrity for a logical table.
+    Enforces foundational structural integrity using Polars-native expressions.
 
     Contract:
     - Mandatory Schema: All 'required_column' names must exist in the DataFrame.
     - Uniqueness: Enforces primary key uniqueness and detects conflicting duplicates.
-    - Non-Nullability: Columns in 'non_nullable_column' must not contain NaN values.
+    - Non-Nullability: Columns in 'non_nullable_column' must not contain Null values.
 
     Invariants:
     - Diagnostic Safety: Read-only; does not mutate the input DataFrame.
+    - Performance: Leverages Polars lazy-style evaluations for memory efficiency.
 
     Outputs:
     - Boolean: True if all mandatory structural checks pass.
@@ -69,7 +71,7 @@ def run_base_validations(
     - [Structural] Logs findings to 'report["errors"]' and returns False for missing columns, empty datasets, or PK conflicts.
     """
 
-    if df.empty:
+    if df.is_empty():
         log_error(f"{table_name}: dataset is empty", report)
 
         return False
@@ -92,20 +94,15 @@ def run_base_validations(
 
         return False
 
-    duplicate_mask = df.duplicated(subset=primary_key, keep=False)
-    if duplicate_mask.any():
+    duplicate_mask = df.select(pl.col(primary_key).is_duplicated()).to_series()
 
-        duplicate_rows = df[duplicate_mask]
+    if duplicate_mask.any():
 
-        # Count of rows per PK
-        pk_group_size = duplicate_rows.groupby(primary_key, dropna=False).size()
+        duplicate_rows = df.filter(duplicate_mask)
 
         # number of unique rows per PK (full row comparison)
-        pk_unique_rows = (
-            duplicate_rows.drop_duplicates().groupby(primary_key, dropna=False).size()
-        )
-
-        conflicting = (pk_unique_rows > 1).any()
+        pk_unique_rows = duplicate_rows.unique().group_by(primary_key).len()
+        conflicting = (pk_unique_rows.get_column("len") > 1).any()
 
         if conflicting:
             log_error(
@@ -114,7 +111,11 @@ def run_base_validations(
             )
             return False
 
-        repairable_count = int((pk_group_size - 1).sum())  # Exclude 1st PK occurrence
+        # Count of rows per PK
+        pk_group_size = duplicate_rows.group_by(primary_key).len()
+        repairable_count = int(
+            (pk_group_size.get_column("len") - 1).sum()
+        )  # Exclude 1st PK occurrence
 
         if repairable_count > 0:
             log_warning(
@@ -122,28 +123,35 @@ def run_base_validations(
                 report,
             )
 
-    duplicate_columns = df.columns[df.columns.duplicated()].tolist()
+    columns = df.columns
+    duplicate_columns = [col for idx, col in enumerate(columns) if col in columns[:idx]]
     if duplicate_columns:
         log_warning(
             f"{table_name}: duplicate column names detected: {duplicate_columns}",
             report,
         )
 
-    pk_null_count = df[primary_key].isnull().any(axis=1).sum()
+    pk_null_count = (
+        df.select(pl.any_horizontal(pl.col(primary_key).is_null())).to_series().sum()
+    )
+
     if pk_null_count > 0:
         log_warning(
             f"{table_name}: {pk_null_count} rows with null primary key values", report
         )
 
     # Null rows in non nullable columns
-    column_nulls = df[non_nullable_column].isna().sum()
+    if non_nullable_column:
+        column_nulls = df.select(pl.col(non_nullable_column).null_count()).row(
+            0, named=True
+        )
 
-    for col, count in column_nulls.items():
-        if count > 0:
-            log_warning(
-                f"{table_name}: {count} null values in non-nullable column {col}",
-                report,
-            )
+        for col, count in column_nulls.items():
+            if count > 0:
+                log_warning(
+                    f"{table_name}: {count} null values in non-nullable column {col}",
+                    report,
+                )
 
     return True
 
@@ -154,17 +162,18 @@ def run_base_validations(
 
 
 def run_event_fact_validations(
-    df: pd.DataFrame, table_name: str, report: Dict[str, List[str]]
+    df: pl.DataFrame, table_name: str, report: Dict[str, List[str]]
 ) -> bool:
     """
-    Enforces business-logic chronology for Event-Role tables.
+    Enforces business-logic chronology and resolution standards for Event-Role tables.
 
     Contract:
-    - Chronological Check: Evaluates temporal sequence (Purchase <= Approval <= Delivery).
-    - Parseability: Validates timestamp string compatibility with system formats.
+    - Resolution Verification: Asserts that all timestamps are pre-normalized to microseconds (us) by the I/O layer.
+    - Chronological Check: Evaluates temporal sequence (Purchase <= Approval <= Delivery) using clean Polars syntax.
 
     Invariants:
     - Temporal Consistency: Flags records where delivery precedes purchase as Warnings.
+    - Zero-Tolerance Resolution: Assumes compliance with the 'Normalize-at-Source' I/O strategy.
 
     Outputs:
     - Boolean: True if all temporal checks are executed.
@@ -182,39 +191,54 @@ def run_event_fact_validations(
 
         return False
 
-    parsed = {}
+    safe_parse_expr = []
 
     for col in REQUIRED_TIMESTAMPS:
-        ts = pd.to_datetime(
-            df[col],
-            format=TIMESTAMP_FORMATS[col],
-            errors="coerce",
-        )
-        parsed[col] = ts
 
-        invalid_count = ts.isna().sum()
+        # Parse only string columns
+        if col in df.columns and df.schema[col] == pl.String:
+            safe_parse_expr.append(
+                pl.col(col)
+                .str.to_datetime(format=TIMESTAMP_FORMATS[col], strict=False)
+                .alias(col)
+            )
+
+    parsed_df = df.with_columns(safe_parse_expr) if safe_parse_expr else df
+
+    unparsable_counts = parsed_df.select(
+        [
+            pl.col(col).is_null().sum().alias(col)
+            for col in REQUIRED_TIMESTAMPS
+            if col in df.columns
+        ]
+    ).row(0, named=True)
+
+    for col, invalid_count in unparsable_counts.items():
         if invalid_count > 0:
             log_warning(
                 f"{table_name}: {invalid_count} unparsable timestamp values in {col}",
                 report,
             )
 
-    purchase_ts = parsed["order_purchase_timestamp"]
-    approved_ts = parsed["order_approved_at"]
-    delivered_ts = parsed["order_delivered_timestamp"]
+    invalid_temporal_counts = parsed_df.select(
+        invalid_approval=(
+            pl.col("order_approved_at") < pl.col("order_purchase_timestamp")
+        ).sum(),
+        invalid_delivery=(
+            pl.col("order_delivered_timestamp") < pl.col("order_purchase_timestamp")
+        ).sum(),
+    ).row(0, named=True)
 
     # Check for invalid temporal ordering
-    invalid_approval = (approved_ts < purchase_ts).sum()
-    if invalid_approval > 0:
+    if invalid_temporal_counts["invalid_approval"] > 0:
         log_warning(
-            f"{table_name}: {invalid_approval} records where approval precedes purchase",
+            f"{table_name}: {invalid_temporal_counts['invalid_approval']} records where approval precedes purchase",
             report,
         )
 
-    invalid_delivery = (delivered_ts < purchase_ts).sum()
-    if invalid_delivery > 0:
+    if invalid_temporal_counts["invalid_delivery"] > 0:
         log_warning(
-            f"{table_name}: {invalid_delivery} records where delivery precedes purchase",
+            f"{table_name}: {invalid_temporal_counts['invalid_delivery'] } records where delivery precedes purchase",
             report,
         )
 
@@ -222,7 +246,7 @@ def run_event_fact_validations(
 
 
 def run_transaction_detail_validations(
-    df: pd.DataFrame, table_name: str, report: Dict[str, List[str]]
+    df: pl.DataFrame, table_name: str, report: Dict[str, List[str]]
 ) -> bool:
     """
     Enforces domain and range constraints for Transaction-Role tables.
@@ -237,13 +261,13 @@ def run_transaction_detail_validations(
     - [Operational] Logs out-of-range values to 'report["errors"]'.
     """
 
-    numeric_columns = df.select_dtypes(include=["number"]).columns.tolist()
+    negative_counts = df.select((cs.numeric() < 0).sum()).row(0, named=True)
 
-    for col in numeric_columns:
-        negative_count = (df[col] < 0).sum()
-        if negative_count > 0:
+    # 2. Iterate through the resulting dictionary
+    for col, count in negative_counts.items():
+        if count > 0:
             log_error(
-                f"{table_name}: {negative_count} negative values in numeric column `{col}`",
+                f"{table_name}: {count} negative values in numeric column `{col}`",
                 report,
             )
 
@@ -251,7 +275,7 @@ def run_transaction_detail_validations(
 
 
 def run_cross_table_validations(
-    tables: Dict[str, pd.DataFrame], report: Dict[str, List[str]]
+    tables: Dict[str, pl.DataFrame], report: Dict[str, List[str]]
 ) -> bool:
     """
     Enforces referential integrity (Foreign Key) across the dataset.
@@ -284,19 +308,16 @@ def run_cross_table_validations(
     order_items_df = tables["df_order_items"]
     payments_df = tables["df_payments"]
 
-    # Orders PK reference
-    order_id_set = set(orders_df["order_id"].dropna().unique())
+    order_id_set = set(orders_df.get_column("order_id").drop_nulls().unique())
 
-    # OrderItems to Orders integrity
-    orphan_items = ~order_items_df["order_id"].isin(order_id_set)
+    orphan_items = ~order_items_df.get_column("order_id").is_in(order_id_set)
     if orphan_items.any():
         log_warning(
             f"df_order_items: {orphan_items.sum()} orphan records referencing non-existent order_id",
             report,
         )
 
-    # Payments to Orders integrity
-    orphan_payments = ~payments_df["order_id"].isin(order_id_set)
+    orphan_payments = ~payments_df.get_column("order_id").is_in(order_id_set)
     if orphan_payments.any():
         log_warning(
             f"df_payments: {orphan_payments.sum()} orphan records referencing non-existent order_id",
diff --git a/dev-cloud-test.ps1 b/dev-cloud-test.ps1
new file mode 100644
index 0000000..8875b4a
--- /dev/null
+++ b/dev-cloud-test.ps1
@@ -0,0 +1,42 @@
+param(
+  [string]$ProjectId = "",
+  [string]$ArtifactReg = "",
+  [string]$GcpDocker = "",
+  [string]$ImageTag = "",
+  [string]$Region = "",
+  [string]$BqDatasetId = "",
+  [string]$Memory = "",
+  [string]$Cpu = "",
+  [string]$Threads = ""
+)
+
+$ErrorActionPreference = 'Stop'
+
+$IMAGE_PATH = "$GcpDocker/$ProjectId/$ArtifactReg/$ImageTag"
+
+Write-Host ""
+Write-Host "BUILDING IMAGE LOCALLY" -ForegroundColor Blue
+
+docker build --no-cache -t $IMAGE_PATH -f data_pipeline/Dockerfile .
+
+Write-Host ""
+Write-Host "PUSHING IMAGE TO CLOUD REPO" -ForegroundColor Blue
+
+docker push $IMAGE_PATH
+
+Write-Host ""
+Write-Host "UPDATING JOB" -ForegroundColor Blue
+
+gcloud run jobs update operations-pipeline-dev `
+  --image $IMAGE_PATH `
+  --update-env-vars GCP_PROJECT=$ProjectId `
+  --update-env-vars BQ_DATASET_ID=$BqDatasetId `
+  --update-env-vars POLARS_MAX_THREADS=$Threads `
+  --region $Region `
+  --memory $Memory `
+  --cpu $Cpu
+
+Write-Host ""
+Write-Host "EXECUTING CLOUD JOB" -ForegroundColor Blue
+
+gcloud run jobs execute operations-pipeline-dev --region $Region
\ No newline at end of file
diff --git a/dev-local-test.ps1 b/dev-local-test.ps1
new file mode 100644
index 0000000..adf3395
--- /dev/null
+++ b/dev-local-test.ps1
@@ -0,0 +1,28 @@
+param(
+    [string]$Buildtag = "",
+    [string]$Testname = "",
+    [string]$Memory = "",
+    [string]$Memswap = "",
+    [string]$Cpu = "",
+    [string]$Threads = "",
+    [string]$Data = ""
+)
+
+$ErrorActionPreference = 'Stop'
+
+Write-Host ""
+Write-Host "BUILDING IMAGE LOCALLY" -ForegroundColor Blue
+
+docker build --no-cache -t $Buildtag -f data_pipeline/Dockerfile .
+
+Write-Host ""
+Write-Host "MOUNTING LOCAL DATA DIRECTORY AND RUN TEST" -ForegroundColor Blue
+
+docker run --rm `
+  --name $Testname `
+  -v "${Data}:/app/data" `
+  --memory=$Memory `
+  --memory-swap=$Memswap `
+  --cpus=$Cpu `
+  -e POLARS_MAX_THREADS=$Threads `
+  $Buildtag
\ No newline at end of file
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1362d4f..9421ec7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -8,11 +8,16 @@ pyparsing<3.0.0
 tzdata
 
 # pipeline
-pandas==2.1.4
 polars==1.39.0
-pytest==9.0.2
 pyarrow==19.0.0
-black==24.3.0
-ruff==0.0.264
+# duckdb==1.5.2
 google-cloud-storage
-pytest-cov
\ No newline at end of file
+google-cloud-bigquery>=3.0.0
+google-cloud-bigquery-storage>=2.36.0
+
+# dev & testing
+psutil==7.2.2
+pytest==9.0.2
+pytest-cov
+black==24.3.0
+ruff==0.0.264
\ No newline at end of file
diff --git a/docker-compose.benchmark.yml b/docker-compose.benchmark.yml
deleted file mode 100644
index fb55da0..0000000
--- a/docker-compose.benchmark.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-services:
-  stress-test:
-    container_name: pipeline-memory-stress-test
-    build:
-      context: .
-      dockerfile: data_pipeline/Dockerfile
-
-    # Mount the local data folder
-    volumes:
-      - ./data:/app/data
-
-    # Provision Specs: Memory 8 Gb/ 2cpu/2 threads
-    # Memory tax = Sandbox Tax +  Import Tax + I/O Buffer Tax
-    # Max Data Size = (Total RAM- 8GB) - (Memory Tax - 1.5GB) = 6.5GB
-    mem_limit: 6.5G
-    memswap_limit: 6.5G
-    cpus: '2.0'
-    environment:
-      - POLARS_MAX_THREADS=2
diff --git a/docs/data_pipeline/assembly_stage.md b/docs/data_pipeline/assembly_stage.md
index bf42e9e..957aaae 100644
--- a/docs/data_pipeline/assembly_stage.md
+++ b/docs/data_pipeline/assembly_stage.md
@@ -12,12 +12,12 @@
 
 **Purpose**
 
-Integrates multiple normalized relational tables into a unified, analytical "Event" dataset and extracts high-fidelity "Dimension" references. It transforms raw business facts into a ready-to-model state by enforcing cardinality rules and calculating temporal performance metrics.
+Integrates multiple normalized relational tables into a unified, analytical "Event" dataset and extracts high-fidelity "Dimension" references. It transforms raw business facts into a ready-to-model state by enforcing cardinality rules, leveraging the Primitive Integer Pipeline for memory efficiency, and calculating temporal performance metrics.
 
 **Invariants**
-* **Strict Order-ID Grain:** The primary event output is guaranteed to be exactly 1 row per `order_id`. Any operation causing cardinality explosion triggers a terminal failure.
+* **Strict Order-ID Grain:** The primary event output is guaranteed to be exactly 1 row per `order_id_int`. Any operation causing cardinality explosion triggers a terminal failure.
 * **Inner-Join Priority:** To maintain analytical integrity, orders without corresponding items are purged.
-* **Temporal Determinism:** All lead times, lags, and delays are calculated as integer-day durations based on validated UTC timestamps.
+* **Temporal Determinism:** All lead times, lags, and delays are calculated as integer-day durations based on validated UTC timestamps pre-normalized to microsecond resolution.
 * **Reference Uniqueness:** Dimension reference tables (Customers, Products) are strictly deduplicated by their primary keys.
 
 **Inputs**
@@ -36,7 +36,7 @@ The **Executor** coordinates two distinct sub-orchestrations:
 ### **Workflow I: Event Assembly**
 1.  **Batch Load:** Fetches the required triplet (`orders`, `items`, `payments`) from the Silver zone.
 2.  **Merge:** Joins datasets using `merge_data`. It performs an inner join on items and a left join on payments to preserve financial data without losing order context.
-    *   **Optimization:** Employs Hash-Joins on `UInt64` keys derived from `order_id` to drastically reduce memory overhead for high-cardinality UUIDs. Utilizes pre-aggregation on payments and items to ensure a strict 1:1 grain, preventing row explosions.
+    *   **Optimization:** Employs **Integer-Joins** on pre-mapped `UInt32/UInt64` IDs (e.g., `order_id_int`) provided by the Contract Registrar to drastically reduce memory overhead. Utilizes pre-aggregation on payments and items to ensure a strict 1:1 grain, preventing row explosions.
 3.  **Derivation:** Executes `derive_fields` to calculate fulfillment lead times and extract ISO-calendar attributes.
     *   **Optimization:** Applies memory-efficient casting (e.g., `Int16` for durations, `Categorical` for repetitive strings) and drops intermediate columns early to minimize row width.
 4.  **Schema Freeze:** Projects the final `ASSEMBLE_SCHEMA` and casts all columns to `ASSEMBLE_DTYPES`.
@@ -51,7 +51,7 @@ The **Executor** coordinates two distinct sub-orchestrations:
 ## **Optimization & Memory Invariants**
 
 * **Primitive Integer Pipeline:** To operate within 4GB RAM, the pipeline converts 36-byte UUID strings into 8-byte `UInt64` hashes for joins, and 4-byte `UInt32` categoricals for payloads. This is the primary driver of memory efficiency for 36M+ row datasets.
-* **Streaming-First Join:** By deferring aggregations until after raw joins on `order_id`, we leverage Polars' streaming engine to avoid massive, materialized hash tables.
+* **Streaming-First Join:** By deferring aggregations until after raw joins on `order_id`, leveraging Polars' streaming engine to avoid massive, materialized hash tables.
 * **Low-Level Memory Reclamation:** The executor utilizes `ctypes.CDLL('libc.so.6').malloc_trim(0)` at high-water mark transitions. This forces the Linux allocator to release free memory back to the OS, preventing Cloud Run from terminating the process due to bloated (but unused) heap memory.
 * **Zero-Copy Streaming:** `sink_parquet()` is used to prevent the pipeline from fully materializing the assembly result set in memory.
 
diff --git a/docs/data_pipeline/contract_stage.md b/docs/data_pipeline/contract_stage.md
index 7dd13bd..ab7218c 100644
--- a/docs/data_pipeline/contract_stage.md
+++ b/docs/data_pipeline/contract_stage.md
@@ -3,27 +3,29 @@
 **Files:**
 * **Executor:** [`contract_executor.py`](../../data_pipeline/contract/contract_executor.py)
 * **Logic:** [`contract_logic.py`](../../data_pipeline/contract/contract_logic.py)
+* **Registrar:** [`id_registrar.py`](../../data_pipeline/contract/id_registrar.py)
 * **Registry:** [`registry.py`](../../data_pipeline/contract/registry.py)
 
-**Role:** Structural Enforcement and Subtractive Filtering.
+**Role:** Structural Enforcement, Subtractive Filtering, and Discovery-First ID Mapping.
 
 ![contract-stage-diagram](/assets/diagrams/03-contract-stage-diagram.png)
 
 ## **System Contract**
 
 **Purpose**
-
-Enforces role-based structural rules on logical tables to ensure that only "contract-compliant" records reach the Silver layer. It acts as a gate that prunes malformed data, enforces referential integrity via ID propagation, and freezes the technical schema.
+Enforces role-based structural rules and referential integrity on raw snapshots to ensure that only "contract-compliant" records reach the Silver layer. It acts as a gate that prunes malformed data, enforces referential integrity via ID propagation, and freezes the technical schema using a discovery-first integer mapping approach.
 
 **Invariants**
-* **Subtractive-Only Row Logic:** With the exception of type casting, this stage never modifies data values or "repairs" them. If a row is non-compliant, it is dropped.
-* **Grain Enforcement:** Guarantees the removal of duplicates and the enforcement of the primary key grain defined in the registry.
+* **Subtractive-Only Row Logic:** With the exception of type casting, this stage never modifies business values or "repairs" data. If a row is non-compliant, it is dropped.
+* **Structural Parity:** Every file within a logical table's contracted zone MUST share an identical schema width and data types to support high-speed vertical concatenation in the Assembly stage.
 * **ID Propagation:** If an `order_id` is invalidated (e.g., due to nulls or unparsable dates), that ID is propagated to child tables to ensure a clean cascade drop.
-* **Final Schema Freeze:** The terminal step always ensures the output contains only approved columns with strictly defined data types.
+* **Discovery-First Mapping:** Guarantees that all UUIDs are resolved and mapped to deterministic `UInt32` integers BEFORE table enforcement begins, preventing join collisions and schema drift.
+* **Final Schema Freeze:** The terminal step for every role always executes `enforce_schema` to project only required columns and cast to strictly defined types.
 
 **Inputs**
 * `run_context`: `RunContext` (Path resolution for source raw snapshots and destination contracted zone).
 * `table_name`: `str` (Logical identifier used to look up role-based rules).
+* `master_mappings`: `dict[str, pl.LazyFrame]` (The pre-resolved dictionary of UUID-to-Integer mappings).
 * `invalid_order_ids`: `set` (Blacklist of IDs from preceding tables to be dropped).
 * `valid_order_ids`: `set` (Whitelist of IDs used to ensure child-parent referential integrity).
 
@@ -31,38 +33,45 @@ Enforces role-based structural rules on logical tables to ensure that only "cont
 * **Contract Report:** `dict` (Telemetry including `initial_rows`, `final_rows`, and counts for each rule applied).
 * **Invalidated IDs:** `set` (New IDs discovered to be non-compliant during this run).
 * **Valid IDs:** `set` (Emitted specifically by the `orders` table to act as a parent whitelist).
-* **Side Effect:** Writes a schema-enforced Parquet file to the `contracted/` directory.
+* **Side Effect:** Writes a schema-enforced and integer-mapped Parquet file to the `contracted/` directory.
 
 ## **Execution Workflow**
 
-The **Executor** applies the contract through a registry-driven sequence:
+The Contract stage is split into a global Discovery phase and a table-specific Enforcement phase:
+
+### **Phase A: Global Discovery**
+1. **Discover:** Scans all raw sources (CSV/Parquet) for the unique set of UUIDs in the current run.
+2. **Lookup:** Surgically retrieves existing mappings from Cloud Storage.
+3. **Generate:** Maps truly new UUIDs to a continuous integer sequence.
+4. **Promote:** Persists new mapping deltas to local disk and synchronizes them to central storage.
 
-1.  **Role Resolution:** Identifies if the table is an `event_fact`, `transaction_detail`, or `entity_reference`.
-2.  **Logic Sequencing:** Fetches the specific list of rules (e.g., `deduplicate`, `remove_nulls`) from the `ROLE_STEPS` registry.
-3.  **Atomic Filtering:** Iteratively applies each logic function. For `event_fact` roles, it captures any `order_id` that triggers a violation.
-4.  **Cascade Cleanup:** If `invalid_order_ids` are provided, it drops child records whose parents were previously invalidated.
-5.  **Referential Gate:** If `valid_order_ids` are provided (post-orders processing), it prunes orphan records.
-6.  **Schema Freeze:** As the final operation, it executes `enforce_schema` to project only required columns and cast types.
-7.  **Persistence:** Saves the resulting compliant DataFrame to the Silver layer.
+### **Phase B: Table Enforcement**
+1. **Hydrate:** Fetches the raw snapshot from the lake's snapshot zone.
+2. **Logic Sequencing:** Fetches rules (dedupe, null-checks, cascade drops) from `ROLE_STEPS`.
+3. **Atomic Filtering:** Iteratively applies rules. For `event_fact` roles, it captures IDs triggering violations.
+4. **Structural Freeze:** Executes `enforce_schema` as the final step in the registry sequence to project the required columns.
+5. **ID Mapping:** Joins the filtered and projected DataFrame against the `master_mappings` to attach integer IDs.
+6. **Persistence:** Saves the resulting compliant and integer-mapped dataset to the Silver layer.
 
 ## **Boundaries**
 
 | This component **DOES** | This component **DOES NOT** |
 | :--- | :--- |
-| Remove rows violating structural rules (Nulls, Duplicates). | Calculate business metrics or durations. |
-| Drop child records based on parent invalidation (Cascade). | Impute missing values or "fix" bad data. |
-| Enforce chronological logic (Purchase < Delivery). | Join multiple tables (delegated to Assembly stage). |
-| Project a final schema and enforce strictly defined types. | Rename columns or change business definitions. |
-| Track exactly how many rows were lost at each rule. | Handle global orchestration of all tables. |
+| Discover UUIDs across all raw sources (CSV/Parquet) before processing. | Calculate business metrics, KPIs, or aggregates. |
+| Subtractively filter rows violating structural or temporal rules. | Impute missing values or repair malformed records. |
+| Propagate `order_id` invalidations to child tables (Cascade Drop). | Perform cross-table business joins (delegated to Assembly). |
+| Guarantee fixed-width schemas via terminal `enforce_schema`. | Alter business definitions or rename columns. |
+| Map UUIDs to UInt32 primitives for optimized joins. | Handle cross-run global state (delegated to Storage Adapter). |
 
 ## **Failure & Severity Model**
 
-### **Operational Failures (System Level)**
-* **Configuration Mismatch:** If a `table_name` is not in `TABLE_CONFIG` or `ROLE_STEPS`, the executor returns a `failed` status immediately.
-* **Schema Breach:** If `enforce_schema` is called but a required column is missing from the data, it raises a `KeyError` and halts the export.
+### **Operational Failures (Fatal)**
+* **Discovery Failure:** If mappings cannot be resolved, the pipeline halts to prevent schema corruption.
+* **Schema Breach:** If `enforce_schema` is called but a required column is missing from the source data.
+* **Persistence Failure:** If disk I/O or GCS promotion fails during the write phase.
 
-### **Functional Findings (Data Level)**
-* **Contract Violations:** Data issues (duplicates, nulls) are not treated as "pipeline crashes." They are treated as expected noise; the rows are removed, the count is logged, and the pipeline continues with the remaining "clean" data.
+### **Functional Findings (Warnings)**
+* **Contract Violations:** Data issues (duplicates, nulls) result in row removal and are logged in the telemetry report.
 * **Referential Cleanup:** 
-    * **Cascade:** Compromised IDs from parents (e.g., orders) trigger removal of children (e.g., items) logged under `removed_cascade_rows`.
-    * **Orphans:** Ghost records without any parent reference are logged under `removed_ghost_orphan_rows`, ensuring downstream joins in the Assembly stage are 100% clean.
\ No newline at end of file
+    * **Cascade:** Dropped child records are logged under `removed_cascade_rows`.
+    * **Orphans:** Records without parent references are dropped and logged under `removed_ghost_orphan_rows`.
diff --git a/docs/data_pipeline/pipeline_orchestrator.md b/docs/data_pipeline/pipeline_orchestrator.md
index 18806b0..eb91d28 100644
--- a/docs/data_pipeline/pipeline_orchestrator.md
+++ b/docs/data_pipeline/pipeline_orchestrator.md
@@ -31,38 +31,37 @@ Serves as the central nervous system of the pipeline. It synchronizes data betwe
 
 ## **Execution Workflow**
 
-The orchestrator manages the lifecycle in three high-level phases, featuring a defensive synchronization loop:
+The orchestrator manages the lifecycle through a strictly gated 13-step sequence, emphasizing memory efficiency and cloud-local synchronization:
 
 ### **Phase I: Environment Initialization**
-1.  **Context Resolution**: Instantiates the `RunContext`.
-2.  **Metadata Start**: Persists the initial "RUNNING" state to `run_metadata.json`.
-3.  **Ingestion**: Downloads the required raw data snapshot from the cloud to the local workspace.
-
-### **Phase II: The Defensive Cloud-Sync Loop**
-1.  **Gate I (Raw Validation)**: Asserts the health of the downloaded raw data.
-2.  **Contract Processing**: Filters rows and freezes schemas into the local `contracted/` path.
-3.  **Gate II (Revalidation)**: Defensive check to ensure the local Silver data is structurally sound.
-4.  **Silver Synchronization (Upload)**: Promotes the newly contracted data to the **Cloud Silver Storage** to ensure delta accumulation and persistence.
-5.  **Environment Purge**: Deletes the local `raw_snapshot/` and `contracted/` directories and triggers `gc.collect()` to free system memory.
-6.  **Silver Restoration (Download)**: Recreates the local `contracted/` directory and downloads the **accumulated Silver deltas** from the Cloud storage.
-7.  **Integration (Assembly)**: Merges the restored data into the Gold-layer event grain.
-8.  **Modeling (Semantic)**: Builds the final analytical modules.
-9.  **Gate III (Pre-Publish)**: Verifies the completeness of semantic artifacts.
-
-### **Phase III: Finalization & Cleanup**
-1.  **Promotion**: Atomically updates the production pointer (`latest_version.json`).
-2.  **Persistence**: Uploads all logs and metadata back to cloud storage.
-3.  **Final Purge**: Deletes the entire local `workspace_root`.
+1.  **Resolve**: Instantiates the `RunContext` and initializes background memory telemetry for real-time benchmarking.
+2.  **Hydrate (Raw)**: Synchronizes the required raw data snapshot from Cloud Storage to the local workspace.
+3.  **Initialize**: Registers the run commencement by generating `run_metadata.json` with initial "RUNNING" status.
+
+### **Phase II: Processing & Memory Reclamation**
+4.  **Validate (Raw)**: Asserts the health of the raw data snapshot; fail-fast on structural errors.
+5.  **Contract Processing**: Executes subtractive filtering and freezes schemas into the local `contracted/` path (Silver layer).
+6.  **Gate II (Revalidation)**: Defensive check to ensure contracted data meets downstream semantic requirements.
+7.  **Promote (Silver)**: Persists the newly contracted datasets to **Cloud Silver Storage**.
+8.  **Synchronize (BQ)**: Forces a metadata cache refresh for BigQuery External Tables via system procedures (`BQ.REFRESH_EXTERNAL_METADATA_CACHE`) for immediate visibility.
+9.  **Purge (Local)**: Deterministically deletes local `raw/` and `contracted/` directories and invokes `force_gc()` to reclaim RAM before the high-compute Assembly stage.
+10. **Assemble**: Flattens relational data into a unified Gold-layer event grain using the **BigQuery Storage Read API** (bypassing the need for local Silver restoration).
+11. **Modeling (Semantic)**: Builds entity-centric analytical modules (Fact/Dim tables).
+
+### **Phase III: Activation & Finalization**
+12. **Publish**: Executes final integrity gates, performs the **BigQuery View Swap** for the BI layer, and triggers the atomic pointer swap (`_latest.json`) to activate the new version.
+13. **Finalize**: Updates terminal metadata (status, duration), uploads all telemetry/stage reports to Cloud Storage, and purges the entire local workspace.
 
 ## **Boundaries**
 
 | This component **DOES** | This component **DOES NOT** |
 | :--- | :--- |
 | Coordinate the sequence of high-level executors. | Modify rows, columns, or data values. |
-| Manage local/cloud data synchronization. | Implement business logic or aggregation rules. |
-| Manage the Silver "Upload-Purge-Download" cycle. | Define the technical schema for Fact/Dim tables. |
+| Manage local/cloud data synchronization and BQ caching. | Implement business logic or aggregation rules. |
+| Enforce the "Purge-before-Assembly" memory optimization. | Define the technical schema for Fact/Dim tables. |
 | Manage the `finally` block for resource safety. | Direct file-level I/O within a stage (Delegated). |
 | Aggregate stage-level reports into a run summary. | Perform granular row-level validation. |
+| Monitor and log real-time memory telemetry. | Execute SQL transformations directly (Delegated). |
 
 ## **Failure & Severity Model**
 
diff --git a/docs/data_pipeline/publishing_stage.md b/docs/data_pipeline/publishing_stage.md
index 65148f2..57a6380 100644
--- a/docs/data_pipeline/publishing_stage.md
+++ b/docs/data_pipeline/publishing_stage.md
@@ -10,31 +10,32 @@
 
 **Purpose**
 
-Serves as the final gate and deployment mechanism for the pipeline. It transitions validated semantic artifacts into a permanent, versioned storage layer and updates the system's "latest" pointer to ensure Business Intelligence (BI) tools consume the most recent high-quality data.
+Serves as the final gate and deployment mechanism for the pipeline. It transitions validated semantic artifacts into a permanent, versioned storage layer and updates a dual-pointer system: a `latest_version.json` manifest for automated systems and BigQuery Authorized Views for Power BI/Business Intelligence tools.
 
 **Invariants**
 * **Integrity-Gated Promotion:** Promotion to the production zone is strictly prohibited if any table defined in the `SEMANTIC_MODULES` registry is missing or inaccessible.
-* **Atomic Activation:** The update to the `latest_version.json` pointer must be atomic (e.g., using `os.replace` locally) to prevent downstream tools from reading a partially written or corrupted manifest.
+* **Atomic Multi-System Swap:** The "switch" to the new version must happen across both GCS and BigQuery. The BigQuery View swap ensures Power BI never experiences "partial data" reads during the file promotion phase.
 * **Version Immutability:** Once a run is archived in a `v{run_id}` directory, the files are treated as read-only snapshots; they are never updated or overwritten by subsequent runs.
-* **Decoupled Storage:** Supports transparent publishing across both Local Filesystems and Google Cloud Storage (GCS) via a storage adapter.
+* **SQL Decoupling:** Dashboards connect to "Stable" Views (e.g., `published_seller_weekly_fact`) which are dynamically redirected to version-specific External Tables (e.g., `seller_weekly_fact_v20260413`).
 
 **Inputs**
 * `run_context`: `RunContext` (Contains the unique `run_id` and the semantic/published path configurations).
 * `SEMANTIC_MODULES`: `Registry` (The source of truth for which artifacts must exist to pass the integrity gate).
 
 **Outputs**
-* **Publish Report:** `dict` (Telemetry for the integrity check, file promotion status, and pointer update).
+* **Publish Report:** `dict` (Telemetry for the integrity check, file promotion status, and SQL/JSON pointer updates).
 * **Versioned Artifacts:** A new directory `/published/v{run_id}/` containing the full suite of semantic Fact and Dimension tables.
+* **BigQuery Pointers:** Updated External Tables and Authorized Views reflecting the new version.
 * **Latest Pointer:** An updated `latest_version.json` file in the root of the published zone.
 
 ## **Execution Workflow**
 
-The **Executor** ensures the production release follows a fail-fast, three-phase sequence:
+The **Executor** ensures the production release follows a fail-fast, four-phase sequence:
 
 1.  **Integrity Gate:** `run_integrity_gate` scans the semantic zone to verify that 100% of the expected tables (defined in the registry) were successfully produced.
 2.  **Promotion:** `promote_semantic_version` transfers all verified artifacts from the transient run-scoped directory to a permanent versioned path (`/published/v{run_id}`).
-3.  **Metadata Generation:** Constructs a publication manifest containing the `run_id`, a timestamped `published_at` field, and temporal metadata (Year/Month/Week).
-4.  **Activation:** `activate_published_version` performs the terminal swap of the `latest_version.json` file, effectively "going live" for downstream consumers.
+3.  **SQL Sync:** `swap_bigquery_view` executes DDL commands to create versioned External Tables and atomically redirect the "Published" Views used by dashboards.
+4.  **Activation:** `activate_published_version` performs the terminal swap of the `latest_version.json` file, effectively "going live" for downstream file-system consumers.
 
 ## **Boundaries**
 
@@ -42,16 +43,13 @@ The **Executor** ensures the production release follows a fail-fast, three-phase
 | :--- | :--- |
 | Verify the physical existence of semantic artifacts. | Re-validate data quality (handled in Validation/Contract). |
 | Copy or upload files to a versioned production path. | Perform any data transformation or aggregation. |
-| Update the atomic production pointer (`latest`). | Manage historical version cleanup (Garbage collection). |
-| Provide abstraction for Local vs. Cloud storage. | Handle automated rollbacks (pointer must be reverted manually). |
+| Manage BigQuery DDL for External Tables and Views. | Manage historical version cleanup (Garbage collection). |
+| Update the atomic production pointers (SQL and JSON). | Handle automated rollbacks (pointers must be reverted manually). |
 | Capture lifecycle metadata (Publication timestamps). | Modify the contents of the `.parquet` files. |
 
 ## **Failure & Severity Model**
 
 ### **Operational Failures (System Level)**
 * **Storage Access Denied:** If the service account lacks write permissions to the published zone (Local or GCS), the lifecycle halts before activation.
-* **Network/IO Exception:** Interrupted file transfers during the promotion phase result in an immediate `failed` status, ensuring the `latest` pointer remains on the previous stable version.
-
-### **Functional Findings (Data Level)**
-* **Integrity Breach:** If a builder in the Semantic stage failed to produce even one required table, the `run_integrity_gate` will fail. This prevents "partial data" from being promoted to production.
-* **Activation Collision:** If the `latest_version.json` cannot be replaced atomically, the executor traps the error and logs a fatal failure, preserving the existing production state.
\ No newline at end of file
+* **BigQuery DDL Error:** If the SQL swap fails (e.g., dataset permissions or syntax), the `latest_version.json` is never updated, ensuring systems stay in sync.
+* **Network/IO Exception:** Interrupted file transfers during the promotion phase result in an immediate `failed` status, ensuring the pointers remain on the previous stable version.
\ No newline at end of file
diff --git a/docs/data_pipeline/semantic_stage.md b/docs/data_pipeline/semantic_stage.md
index 92ce4a5..718b9bf 100644
--- a/docs/data_pipeline/semantic_stage.md
+++ b/docs/data_pipeline/semantic_stage.md
@@ -13,7 +13,7 @@
 
 **Purpose**
 
-Transforms the unified Gold-layer "Order-Grain" event table into entity-centric Fact and Dimension modules. It performs temporal aggregations, calculates long-term performance metrics, and organizes data into a schema optimized for time-series and cohort analysis.
+Transforms the unified Gold-layer "Order-Grain" event table into entity-centric Fact and Dimension modules. It performs temporal aggregations, calculates long-term performance metrics, and leverages the Primitive Integer Pipeline for efficient, high-fidelity analytical modeling.
 
 **Invariants**
 
@@ -49,9 +49,9 @@ The **Executor** coordinates the semantic build through a modular, registry-driv
 
 ## **Optimization & Memory Invariants**
 
-* **Local Categorical Aggregation:** To optimize memory during grouping operations, builders cast high-cardinality grouping keys (e.g., `seller_id`) to `pl.Categorical` locally. This creates a temporary, localized dictionary optimized specifically for that module's aggregation plan, bypassing the need for a persistent global string cache.
-* **Narrow Aggregation Payloads:** All aggregation results (counts, sums) are immediately cast to `Int16` or `Float32` within the `agg()` block. This prevents the materialized result set from expanding in memory.
-* **Schema Hand-off:** While the building process uses `Categorical` for performance, the final output is cast back to `pl.String()` via the registry/freezing process. This ensures downstream compatibility with BI tools and prevents "dictionary leakage" between pipeline runs.
+* **Integer Key Optimization:** To optimize memory during grouping operations, builders leverage pre-mapped `UInt32/UInt64` keys (e.g., `seller_id_int`). This maintains a constant memory profile during non-blocking aggregation and eliminates the overhead of string-based hash tables.
+* **Narrow Aggregation Payloads:** All aggregation results (counts, sums) are immediately downcast to `Int16` or `Float32` within the `agg()` block. This prevents the materialized result set from expanding in memory.
+* **Metric Downcasting:** Durations, counts, and years are forced to `Int16` (2 bytes) to minimize row width during streaming.
 * **Streaming Export:** `sink_parquet()` is utilized for all fact and dimension table exports, enabling zero-copy streaming of results directly from the query plan to storage.
 
 ## **Boundaries**
@@ -61,7 +61,7 @@ The **Executor** coordinates the semantic build through a modular, registry-driv
 | Perform multi-level aggregations (Sum, Mean, Count). | Filter "bad" data (handled in Validation/Contract stages). |
 | Derive entity-level attributes (e.g., `first_order_date`). | Resolve order-item join cardinality. |
 | Align all temporal metrics to the ISO Week grain. | Mutate the "Assembled Events" source. |
-| Enforce technical schemas and data types lazily. | Manage the physical publish/pointer logic. |
+| Utilize Integer-Key grouping for constant memory. | Manage the physical publish/pointer logic. |
 | Organize data into Fact/Dimension modules via streaming. | Perform cross-module joins. |
 
 ## **Failure & Severity Model**
diff --git a/docs/data_pipeline/validation_stage.md b/docs/data_pipeline/validation_stage.md
index b98463d..b6d5a87 100644
--- a/docs/data_pipeline/validation_stage.md
+++ b/docs/data_pipeline/validation_stage.md
@@ -12,10 +12,11 @@
 
 **Purpose** 
 
-Evaluates raw datasets against declared structural contracts before any mutation or transformation occurs. It prevents "garbage-in" scenarios by detecting schema violations, structural inconsistencies, and referential integrity issues that would compromise downstream aggregation.
+Evaluates raw datasets against declared structural contracts before any mutation or transformation occurs. It prevents "garbage-in" scenarios by detecting schema violations, structural inconsistencies, and referential integrity issues. In the modern Polars-native architecture, it also serves as a verification gate for the 'Normalize-at-Source' I/O strategy.
 
 **Invariants** 
 * **Non-Mutation Guarantee:** This stage is strictly read-only. It never modifies values, removes rows, or casts types in the source data.
+* **Resolution Verification:** Asserts that all timestamps are pre-normalized to microsecond (us) resolution by the I/O layer.
 * **Severity Hierarchy:** 
     * `errors`: Fatal structural violations (e.g., missing columns, duplicate PKs).
     * `warnings`: Admissible integrity issues (e.g., orphan records, chronological anomalies).
@@ -37,10 +38,10 @@ The **Executor** coordinates the validation lifecycle through the following dete
 2.  **Data Loading:** Attempts to load each table as a DataFrame. If a table is missing, an `error` is logged to the report.
 3.  **Base Validation:** Dispatches the DataFrame to `run_base_validations` to check for:
     * Presence of required columns.
-    * Uniqueness of Primary Keys and column names.
+    * Uniqueness of Primary Keys and column names using Polars-native expressions.
     * Compliance with non-nullable constraints.
 4.  **Role-Specific Dispatch:** If base validations pass, the executor applies specialized rules:
-    * `event_fact`: Triggers `run_event_fact_validations` (temporal chronology).
+    * `event_fact`: Triggers `run_event_fact_validations` (temporal chronology and microsecond resolution verification).
     * `transaction_detail`: Triggers `run_transaction_detail_validations` (numeric range checks).
 5.  **Cross-Table Integrity:** Once all tables are processed individually, `run_cross_table_validations` evaluates Foreign Key relationships (e.g., ensuring all Items belong to an existing Order).
 
@@ -50,10 +51,10 @@ The **Executor** coordinates the validation lifecycle through the following dete
 | :--- | :--- |
 | Load logical tables from the snapshot zone. | Remove rows or filter data. |
 | Detect schema and primary key violations. | Correct or impute missing values. |
-| Evaluate timestamp validity and chronological ordering. | Deduplicate records (delegated to Contract stage). |
-| Detect numeric anomalies (negative prices/lags). | Perform data type casting. |
-| Evaluate cross-table referential integrity (orphans). | Halt the pipeline (Decision owned by global orchestrator). |
-| Produce structured, machine-readable reports. | Modify the physical state of the data lake. |
+| Verify microsecond (us) timestamp resolution. | Deduplicate records (delegated to Contract stage). |
+| Evaluate temporal chronology using clean Polars syntax. | Perform data type casting. |
+| Detect numeric anomalies (negative prices/lags). | Mutate the physical state of the data lake. |
+| Produce structured, machine-readable reports. | Halt the pipeline (Decision owned by global orchestrator). |
 
 ## **Failure & Severity Model**
 
diff --git a/docs/terraform/gcp-iac.md b/docs/terraform/gcp-iac.md
index 3523dd2..5e62211 100644
--- a/docs/terraform/gcp-iac.md
+++ b/docs/terraform/gcp-iac.md
@@ -9,6 +9,7 @@ The pipeline follows a **Trigger-Action-Archive** flow:
 3.  **Dispatch:** An Eventarc trigger detects the new file and invokes a Google Workflow (`pipeline-dispatcher`).
 4.  **Processing:** The Workflow triggers the main `operations-pipeline` Cloud Run job (2 vCPU, 8Gi RAM) for heavy-duty data processing.
 5.  **Transient Storage:** Intermediate files are stored in the **Pipeline Bucket** with a 7-day TTL on raw data to minimize costs and exposure.
+6.  **Serving Layer:** The final semantic models are published as **BigQuery External Tables** and presented via stable **Authorized Views** for Power BI and dashboard consumers.
 
 ## Prerequisites
 *   **Terraform:** Version `~> 1.5.0`
@@ -18,32 +19,47 @@ The pipeline follows a **Trigger-Action-Archive** flow:
 ## Post-Provisioning (CI/CD Handshake)
 The integration between GCP and GitHub Actions requires a one-time "Bootstrap" extraction to populate Repository Secrets. This process completes the cryptographic trust relationship established by Workload Identity Federation (WIF).
 
-### 1. Secret Injection Matrix
+### Secret Injection Matrix
 | GitHub Secret | Source / Origin | Purpose |
 | :--- | :--- | :--- |
 | `WIF_PROVIDER` | `terraform output -raw GITHUB_WIF_PROVIDER_NAME` | Logical path for the WIF identity provider handshake. |
 | `DEPLOYER_SA_EMAIL` | `github-actions-deployer@...` | Target identity for GitHub OIDC impersonation. |
 | `GCP_PROJECT_ID` | `var.project_id` | Project scoping for GCP API and resource discovery. |
 
-### 2. Bootstrapping Constraint
+### Bootstrapping Constraint
 The initial infrastructure provisioning must be executed by a maintainer with `Project IAM Admin` or `Owner` privileges. This "privileged apply" is required to establish the WIF provider and assign the administrative roles to the `github-actions-deployer` service account. Subsequent updates are autonomously managed by the CI/CD identity.
 
 ## Infrastructure Components
 
-### 1. Compute & Jobs (`jobs.tf`)
+### Compute & Jobs (`jobs.tf`)
 | Resource Name | Type | Memory | Timeout | Purpose |
 | :--- | :--- | :--- | :--- | :--- |
-| `operations-pipeline` | Cloud Run Job | 8Gi | 30m | Main Polars-based processing engine. |
+| `operations-pipeline` | Cloud Run Job | 8Gi | 30m | Main Polars-based processing engine. Includes 10Gi Local SSD mount at `/tmp`. |
 | `drive-extractor` | Cloud Run Job | 1Gi | 15m | Pulls source data from external APIs. |
 | `ops-repo` | Artifact Registry | n/a | n/a | Docker repository for pipeline images. |
 
-### 2. Storage & Lifecycle (`storage.tf`)
-| Bucket Name | Storage Class | Lifecycle Policy |
+### Storage & Lifecycle (`storage.tf`)
+| Resource Name | Type | Policy / Details |
 | :--- | :--- | :--- |
-| `ops-archival-storage` | Standard -> Coldline | Move to Coldline after 400 days; Delete after 3 years. |
-| `ops-pipeline-storage` | Standard | Delete files with prefix `raw/` after 7 days. |
+| `ops-archival-storage` | GCS Bucket | Move to Coldline after 400 days; Delete after 3 years. |
+| `ops-pipeline-storage` | GCS Bucket | Delete files with prefix `raw/` after 7 days. |
+| `seller_semantic` | BQ Dataset | **Protected:** `prevent_destroy = true`; Logical container for Seller fact/dim views. |
+| `customer_semantic` | BQ Dataset | **Protected:** `prevent_destroy = true`; Logical container for Customer fact/dim views. |
+| `product_semantic` | BQ Dataset | **Protected:** `prevent_destroy = true`; Logical container for Product fact/dim views. |
 
-### 3. Orchestration (`orchestration.tf`)
+## Infrastructure-as-Code Workarounds
+
+### Cloud Run Local SSD Strategy (Preview)
+The `operations-pipeline` utilizes a **Local SSD** mount at `/tmp` (10Gi) **by provisioning manually** to offload memory pressure from Polars streaming joins. 
+*   **The Problem:** As of April 2026, the Google Terraform provider does not natively support the `DISK` medium for `empty_dir` volumes (it defaults to `MEMORY`). 
+*   **The Resolution:** Provision manually and utilize lifecycle `ignore_changes` on the `medium` attribute. This allows the job to be created with the SSD partition enabled via the CLI or UI, while preventing Terraform from "correcting" it back to RAM-based storage during subsequent runs.
+
+### BigQuery Accidental Deletion Protection
+To safeguard analytical history, all semantic datasets are configured with:
+*   `delete_contents_on_destroy = false`: Ensures data/views remain even if the resource is deleted.
+*   `prevent_destroy = true`: Forces a manual override to destroy the dataset, protecting it from `terraform destroy` or accidental refactoring.
+
+### Orchestration (`orchestration.tf`)
 *   **Cloud Scheduler:** `0 0 * * *` (Daily 12AM PHT) triggers the Extractor.
 *   **Eventarc:** Monitors `object.v1.finalized` on the Archival bucket.
 *   **Workflows:** `pipeline-dispatcher` evaluates logic to trigger the main pipeline.
@@ -52,7 +68,7 @@ The initial infrastructure provisioning must be executed by a maintainer with `P
 
 This project implements **Zero Trust** via Workload Identity Federation and granular Service Account (SA) permissions.
 
-### 1. Identity Registry
+### Identity Registry
 | Identity Name | Role/Purpose |
 | :--- | :--- |
 | `github-actions-deployer` | CI/CD automation for infra and code updates. |
@@ -61,16 +77,17 @@ This project implements **Zero Trust** via Workload Identity Federation and gran
 | `eventarc-invoker-sa` | Orchestration identity to receive events and trigger workflows. |
 | `job-invoker-sa` | Scheduler identity to trigger Cloud Run jobs. |
 
-### 2. Permission Bindings
+### Permission Bindings
 | Identity | Target | Roles | Rationale |
 | :--- | :--- | :--- | :--- |
-| **Github Deployer** | Project | `run.developer`, `workflows.editor`, `cloudscheduler.admin`, `artifactregistry.admin`, `eventarc.admin`, `storage.admin`, `resourcemanager.projectIamAdmin`, `iam.workloadIdentityPoolAdmin`, `monitoring.admin`, `iam.serviceAccountAdmin`, `iam.serviceAccountUser`, `iam.admin` | **Least Privilege:** Granular roles for managing the entire pipeline lifecycle, IAM bindings, and state management. |
+| **Github Deployer** | Project | `run.developer`, `workflows.editor`, `cloudscheduler.admin`, `artifactregistry.admin`, `eventarc.admin`, `storage.admin`, `resourcemanager.projectIamAdmin`, `iam.workloadIdentityPoolAdmin`, `monitoring.admin`, `iam.serviceAccountAdmin`, `iam.serviceAccountUser`, `iam.admin`, `logging.configWriter`, `bigquery.admin`| **Least Privilege:** Granular roles for managing the entire pipeline lifecycle, IAM bindings, state management, and BigQuery schemas. |
 | **Drive Extractor** | Archival/Pipeline Buckets | `roles/storage.objectAdmin` | Full CRUD for data landing and archival. |
 | **Ops Pipeline** | Pipeline Bucket | `roles/storage.objectAdmin` | Read raw data and write processed artifacts. |
+| | Project | `roles/bigquery.dataEditor`, `roles/bigquery.jobUser` | Permission to create External Tables, swap Authorized Views, and execute queries. |
 | **Event Invoker** | Project | `roles/eventarc.eventReceiver` | Receive GCS notifications. |
 | | Project | `roles/workflows.invoker` | Permission to start workflow execution. |
 
-### 3. Workload Identity Federation
+### Workload Identity Federation
 *   **Pool:** `github-pool`
 *   **Trust Policy:** Restricted to `${var.github_repo}` to prevent unauthorized repository access.
 
@@ -78,10 +95,13 @@ This project implements **Zero Trust** via Workload Identity Federation and gran
 | Name | Type | Sensitive | Description |
 | :--- | :--- | :--- | :--- |
 | `project_id` | `string` | No | Target Google Cloud Project ID. |
+| `region` | `string` | No | The Project GCP region. |
 | `environment` | `string` | No | Deployment environment (dev, prod). |
 | `github_repo` | `string` | No | Format: `owner/repository`. |
+| `bq_dataset_id` | `string` | No | BigQuery dataset containing externalized GCS tables. |
 | `alert_email_map` | `map` | **Yes** | Monitoring notification recipients. |
 
+
 ## State Management
 State is managed remotely in GCS to ensure consistency and locking.
 ```hcl
diff --git a/tests/shared/test_loader_exporter.py b/tests/shared/test_loader_exporter.py
index c46af77..b20d6d0 100644
--- a/tests/shared/test_loader_exporter.py
+++ b/tests/shared/test_loader_exporter.py
@@ -2,43 +2,124 @@
 # UNIT TESTS FOR loader_exporter.py
 # =============================================================================
 
-import pandas as pd
 import polars as pl
 import pytest
+from unittest.mock import MagicMock, patch
+from datetime import datetime
 from data_pipeline.shared.loader_exporter import (
+    normalize_datetimes,
+    scan_gcs_uris_from_bigquery,
     load_single_delta,
-    load_historical_table,
+    load_historical_data,
+    load_assembled_data,
     export_file,
 )
 
-
 # ------------------------------------------------------------
 # FIXTURES (SHARED TEST DATA)
 # ------------------------------------------------------------
 
 
-@pytest.fixture
-def sample_pd_df():
-    return pd.DataFrame({"a": [1, 2], "b": [3, 4]})
-
-
 @pytest.fixture
 def sample_pl_df():
     return pl.DataFrame({"a": [1, 2], "b": [3, 4]})
 
 
+# ------------------------------------------------------------
+# NORMALIZE DATETIMES
+# ------------------------------------------------------------
+
+
+def test_normalize_datetimes():
+    # With nanosecond
+    df = pl.DataFrame({"ts": [datetime(2023, 1, 1)], "val": [1]}).with_columns(
+        pl.col("ts").dt.cast_time_unit("ns")
+    )
+
+    lf = df.lazy()
+    assert lf.collect_schema()["ts"].time_unit == "ns"  # type: ignore
+
+    normalized_lf = normalize_datetimes(lf)
+    assert normalized_lf.collect_schema()["ts"].time_unit == "us"  # type: ignore
+
+
+def test_normalize_datetimes_no_temporal_cols():
+    df = pl.DataFrame({"a": [1, 2], "b": ["x", "y"]})
+    lf = df.lazy()
+    normalized_lf = normalize_datetimes(lf)
+    assert normalized_lf is lf
+
+
+# ------------------------------------------------------------
+# BIGQUERY EXTERNAL TABLE SCAN (Mocked IO)
+# ------------------------------------------------------------
+
+
+def test_scan_gcs_uris_from_bigquery_success():
+    project_id = "test-project"
+    dataset_id = "test_dataset"
+    table_id = "test_table"
+
+    # Mock BigQuery Client and Result
+    mock_client = MagicMock()
+    mock_query_job = MagicMock()
+    mock_query_job.result.return_value = [
+        ["gs://bucket/file1.parquet"],
+        ["gs://bucket/file2.parquet"],
+    ]
+    mock_client.query.return_value = mock_query_job
+
+    with patch("google.cloud.bigquery.Client", return_value=mock_client), patch(
+        "polars.scan_parquet"
+    ) as mock_scan_parquet:
+
+        # Mock the Polars LazyFrame returned by scan_parquet
+        mock_lf = pl.LazyFrame({"a": [1]})
+        mock_scan_parquet.return_value = mock_lf
+
+        lf = scan_gcs_uris_from_bigquery(project_id, dataset_id, table_id)
+
+        # Check BigQuery interactions
+        mock_client.query.assert_called_once()
+        query_call = mock_client.query.call_args[0][0]
+        assert "SELECT DISTINCT _FILE_NAME" in query_call
+        assert f"`{project_id}.{dataset_id}.{table_id}`" in query_call
+
+        # Check Polars interactions
+        assert mock_scan_parquet.call_count == 2
+        mock_scan_parquet.assert_any_call("gs://bucket/file1.parquet")
+        mock_scan_parquet.assert_any_call("gs://bucket/file2.parquet")
+        assert lf is not None
+
+
+def test_scan_gcs_uris_from_bigquery_empty_results():
+    mock_client = MagicMock()
+    mock_query_job = MagicMock()
+    mock_query_job.result.return_value = []
+    mock_client.query.return_value = mock_query_job
+
+    with (
+        patch("google.cloud.bigquery.Client", return_value=mock_client),
+        pytest.raises(ValueError, match="No source URIs found"),
+    ):
+        scan_gcs_uris_from_bigquery("proj", "ds", "tbl")
+
+
+def test_scan_gcs_uris_from_bigquery_invalid_env():
+    with pytest.raises(ValueError, match="Project ID is set to"):
+        scan_gcs_uris_from_bigquery("PROJECT_ID_NOT_DETECTED", "ds", "tbl")
+
+
 # ------------------------------------------------------------
 # LOAD SINGLE DELTA
 # ------------------------------------------------------------
 
 
-def test_load_single_delta_success(tmp_path, sample_pd_df):
-    # Setup: Create multiple files with different dates
-    # load_single_delta currently uses pandas for loading
-    sample_pd_df.to_csv(tmp_path / "df_test_2023_01_01.csv", index=False)
+def test_load_single_delta_success(tmp_path, sample_pl_df):
+    sample_pl_df.write_csv(tmp_path / "df_test_2023_01_01.csv")
 
-    newer_df = pd.DataFrame({"a": [10], "b": [20]})
-    newer_df.to_parquet(tmp_path / "df_test_2023_01_02.parquet", index=False)
+    newer_df = pl.DataFrame({"a": [10], "b": [20]})
+    newer_df.write_parquet(tmp_path / "df_test_2023_01_02.parquet")
 
     log_messages = []
 
@@ -47,10 +128,10 @@ def logger(msg):
 
     df, file_name = load_single_delta(tmp_path, "df_test", log_info=logger)
 
-    # Should pick the latest one (alphabetically/chronologically sorted)
+    # Should pick the latest one (chronologically sorted)
     assert file_name == "df_test_2023_01_02"
     assert len(df) == 1
-    assert df["a"].iloc[0] == 10
+    assert df[0, "a"] == 10
     assert any("Loaded: df_test_2023_01_02.parquet" in msg for msg in log_messages)
 
 
@@ -64,8 +145,8 @@ def test_load_single_delta_no_files(tmp_path):
 # ------------------------------------------------------------
 
 
-def test_load_historical_table_success(tmp_path):
-    # Setup: Create multiple parquet files using Polars for consistency
+def test_load_historical_data_success(tmp_path):
+
     df1 = pl.DataFrame({"id": [1], "val": ["a"]})
     df2 = pl.DataFrame({"id": [2], "val": ["b"]})
 
@@ -77,24 +158,41 @@ def test_load_historical_table_success(tmp_path):
     def logger(msg):
         log_messages.append(msg)
 
-    lf_total = load_historical_table(tmp_path, "table", log_info=logger)
+    lf_total = load_historical_data(tmp_path, "table", log_info=logger)
 
-    # load_historical_table returns a LazyFrame now
     assert isinstance(lf_total, pl.LazyFrame)
     df_collected = lf_total.collect()
     assert df_collected.height == 2
     assert set(df_collected["id"].to_list()) == {1, 2}
-    assert any(
-        "Scanned: table (2 files queued for lazy evaluation)" in msg
-        for msg in log_messages
-    )
 
 
-def test_load_historical_table_no_files(tmp_path):
+def test_load_historical_data_no_files(tmp_path):
     with pytest.raises(
         FileNotFoundError, match="No Parquet files found for table_missing"
     ):
-        load_historical_table(tmp_path, "table_missing")
+        load_historical_data(base_path=tmp_path, table_name="table_missing")
+
+
+# ------------------------------------------------------------
+# LOAD ASSEMBLED DATA
+# ------------------------------------------------------------
+
+
+def test_load_assembled_data_success(tmp_path):
+    table_name = "assembled_table"
+    df = pl.DataFrame({"a": [1]})
+    df.write_parquet(tmp_path / f"{table_name}_part1.parquet")
+
+    lf = load_assembled_data(tmp_path, table_name)
+    assert isinstance(lf, pl.LazyFrame)
+    assert lf.collect().height == 1
+
+
+def test_load_assembled_data_no_files(tmp_path):
+    with pytest.raises(
+        FileNotFoundError, match="No Parquet files found for missing_assembled"
+    ):
+        load_assembled_data(tmp_path, "missing_assembled")
 
 
 # ------------------------------------------------------------
@@ -117,7 +215,6 @@ def logger(msg):
     assert output_path.exists()
     assert any("Exported file: data.parquet (2 rows)" in msg for msg in log_messages)
 
-    # Verify content using Polars
     read_df = pl.read_parquet(output_path)
     assert read_df.equals(sample_pl_df)
 
@@ -140,13 +237,12 @@ def logger(msg):
         for msg in log_messages
     )
 
-    # Verify content
     read_df = pl.read_parquet(output_path)
     assert read_df.equals(sample_pl_df)
 
 
 def test_export_file_unsupported_type(tmp_path, sample_pl_df):
-    # export_file currently doesn't check extension but rather type of DF.
+
     error_messages = []
 
     def error_logger(msg):
diff --git a/tests/test_assembly_stage.py b/tests/test_assembly_stage.py
index b690b29..b291cbe 100644
--- a/tests/test_assembly_stage.py
+++ b/tests/test_assembly_stage.py
@@ -4,6 +4,7 @@
 
 import polars as pl
 import pytest
+from pathlib import Path
 from data_pipeline.shared.run_context import RunContext
 from data_pipeline.assembly.assembly_logic import log_info, log_error, init_report
 from data_pipeline.assembly.assembly_executor import (
@@ -26,7 +27,9 @@ def valid_orders_df():
     return pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
+            "order_id_int": [1, 2],
             "customer_id": ["cos1", "cos2"],
+            "customer_id_int": [101, 102],
             "order_status": ["delivered", "delivered"],
             "order_purchase_timestamp": [
                 "2023-01-02 09:00:00",
@@ -54,7 +57,11 @@ def valid_orders_df():
             pl.col("order_delivered_timestamp").str.strptime(
                 pl.Datetime, "%Y-%m-%d %H:%M:%S"
             ),
-            pl.col("order_estimated_delivery_date").str.strptime(pl.Date, "%Y-%m-%d"),
+            pl.col("order_estimated_delivery_date")
+            .str.strptime(pl.Date, "%Y-%m-%d")
+            .cast(pl.Datetime),
+            pl.col("order_id_int").cast(pl.UInt32),
+            pl.col("customer_id_int").cast(pl.UInt32),
         ]
     )
 
@@ -64,11 +71,20 @@ def valid_order_items_df():
     return pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
+            "order_id_int": [1, 2],
             "product_id": ["prod1", "prod2"],
+            "product_id_int": [201, 202],
             "seller_id": ["seller1", "seller2"],
+            "seller_id_int": [301, 302],
             "price": [12.3, 45.6],
             "shipping_charges": [1.23, 4.56],
         }
+    ).with_columns(
+        [
+            pl.col("order_id_int").cast(pl.UInt32),
+            pl.col("product_id_int").cast(pl.UInt32),
+            pl.col("seller_id_int").cast(pl.UInt32),
+        ]
     )
 
 
@@ -77,12 +93,13 @@ def valid_payments_df():
     return pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
+            "order_id_int": [1, 2],
             "payment_sequential": [1, 2],
             "payment_type": ["credit", "cash"],
             "payment_installments": [4, 5],
             "payment_value": [100.1, 50.2],
         }
-    )
+    ).with_columns([pl.col("order_id_int").cast(pl.UInt32)])
 
 
 @pytest.fixture
@@ -90,6 +107,7 @@ def valid_customers_df():
     return pl.DataFrame(
         {
             "customer_id": ["cos1", "cos2"],
+            "customer_id_int": [101, 102],
             "customer_state": ["SP", "RJ"],
             "customer_city": ["Sao Paulo", "Rio"],
             "customer_segment": ["A", "B"],
@@ -98,6 +116,7 @@ def valid_customers_df():
     ).with_columns(
         [
             pl.col("account_creation_date").str.strptime(pl.Datetime, "%Y-%m-%d"),
+            pl.col("customer_id_int").cast(pl.UInt32),
         ]
     )
 
@@ -107,6 +126,7 @@ def valid_products_df():
     return pl.DataFrame(
         {
             "product_id": ["prod1", "prod2"],
+            "product_id_int": [201, 202],
             "product_category_name": ["tech", "home"],
             "product_weight_g": [100.0, 500.0],
             "product_length_cm": [10.0, 20.0],
@@ -115,7 +135,7 @@ def valid_products_df():
             "product_fragility_index": ["Low", "High"],
             "supplier_tier": ["Gold", "Silver"],
         }
-    )
+    ).with_columns([pl.col("product_id_int").cast(pl.UInt32)])
 
 
 @pytest.fixture
@@ -123,10 +143,14 @@ def valid_derived_df():
     df = pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
+            "order_id_int": [1, 2],
             "seller_id": ["seller1", "seller2"],
+            "seller_id_int": [301, 302],
             "customer_id": ["cos1", "cos2"],
+            "customer_id_int": [101, 102],
             "order_revenue": [100.1, 50.2],
             "product_id": ["prod1", "prod2"],
+            "product_id_int": [201, 202],
             "order_status": ["delivered", "delivered"],
             "order_purchase_timestamp": [
                 "2023-01-02 09:00:00",
@@ -163,7 +187,11 @@ def valid_derived_df():
                 pl.Datetime, "%Y-%m-%d %H:%M:%S"
             ),
             pl.col("order_estimated_delivery_date").str.strptime(pl.Date, "%Y-%m-%d"),
-            pl.col("order_date").str.strptime(pl.Date, "%Y-%m-%d"),
+            pl.col("order_date").str.strptime(pl.Date, "%Y-%m-%d").cast(pl.Datetime),
+            pl.col("order_id_int").cast(pl.UInt32),
+            pl.col("customer_id_int").cast(pl.UInt32),
+            pl.col("product_id_int").cast(pl.UInt32),
+            pl.col("seller_id_int").cast(pl.UInt32),
         ]
     )
     return df
@@ -213,7 +241,7 @@ def test_merge_data_preserve_grain(
         result = result.collect()
 
     assert result.height == 2
-    assert result.select(pl.col("order_id").is_duplicated().any()).item() == False
+    assert result.select(pl.col("order_id_int").is_duplicated().any()).item() == False
     assert "order_revenue" in result.columns
 
 
@@ -226,22 +254,28 @@ def test_merge_data_aggregates_duplicates(
         [valid_order_items_df, valid_order_items_df.slice(0, 1)]
     )
 
-    assert duplicated_items_df["order_id"][0] == duplicated_items_df["order_id"][2]
+    assert (
+        duplicated_items_df["order_id_int"][0] == duplicated_items_df["order_id_int"][2]
+    )
 
     result = merge_data(
         {
             "df_orders": valid_orders_df,
             "df_order_items": duplicated_items_df,
             "df_payments": pl.DataFrame(
-                {"order_id": ["o1", "o2"], "payment_value": [10.0, 20.0]}
-            ),
+                {
+                    "order_id": ["o1", "o2"],
+                    "order_id_int": [1, 2],
+                    "payment_value": [10.0, 20.0],
+                }
+            ).with_columns([pl.col("order_id_int").cast(pl.UInt32)]),
         }
     )
     if isinstance(result, pl.LazyFrame):
         result = result.collect()
 
     assert result.height == 2
-    assert result.select(pl.col("order_id").is_duplicated().any()).item() == False
+    assert result.select(pl.col("order_id_int").is_duplicated().any()).item() == False
 
 
 # =============================================================================
@@ -277,7 +311,7 @@ def test_freeze_schema_enforces_strict_schema_success(valid_derived_df):
 
 
 def test_freeze_schema_fails_on_missing_column(valid_derived_df):
-    missing_required_column = valid_derived_df.drop("seller_id")
+    missing_required_column = valid_derived_df.drop("seller_id_int")
     with pytest.raises(RuntimeError, match="missing required columns"):
         result = freeze_schema(missing_required_column)
         if isinstance(result, pl.LazyFrame):
@@ -298,18 +332,20 @@ def test_assemble_data_success(
     valid_products_df,
 ):
     run_id = "20230101T120000"
-    run_context = RunContext.create(base=tmp_path, run_id=run_id)
+    run_context = RunContext.create(
+        base=tmp_path, run_id=run_id, storage=tmp_path / "storage"
+    )
     run_context.initialize_directories()
+    storage_contracted_path = Path(run_context.storage_contracted_path)
+    storage_contracted_path.mkdir(parents=True, exist_ok=True)
 
-    valid_orders_df.write_parquet(run_context.contracted_path / "df_orders.parquet")
+    valid_orders_df.write_parquet(storage_contracted_path / "df_orders.parquet")
     valid_order_items_df.write_parquet(
-        run_context.contracted_path / "df_order_items.parquet"
+        storage_contracted_path / "df_order_items.parquet"
     )
-    valid_payments_df.write_parquet(run_context.contracted_path / "df_payments.parquet")
-    valid_customers_df.write_parquet(
-        run_context.contracted_path / "df_customers.parquet"
-    )
-    valid_products_df.write_parquet(run_context.contracted_path / "df_products.parquet")
+    valid_payments_df.write_parquet(storage_contracted_path / "df_payments.parquet")
+    valid_customers_df.write_parquet(storage_contracted_path / "df_customers.parquet")
+    valid_products_df.write_parquet(storage_contracted_path / "df_products.parquet")
 
     report = assemble_events(run_context)
 
@@ -327,16 +363,20 @@ def test_assemble_data_fails_on_missing_column(
     valid_payments_df,
 ):
     run_id = "20230101T120000"
-    run_context = RunContext.create(base=tmp_path, run_id=run_id)
+    run_context = RunContext.create(
+        base=tmp_path, run_id=run_id, storage=tmp_path / "storage"
+    )
     run_context.initialize_directories()
+    storage_contracted_path = Path(run_context.storage_contracted_path)
+    storage_contracted_path.mkdir(parents=True, exist_ok=True)
 
-    invalid_order_items_df = valid_order_items_df.drop("seller_id")
+    invalid_order_items_df = valid_order_items_df.drop("seller_id_int")
 
-    valid_orders_df.write_parquet(run_context.contracted_path / "df_orders.parquet")
+    valid_orders_df.write_parquet(storage_contracted_path / "df_orders.parquet")
     invalid_order_items_df.write_parquet(
-        run_context.contracted_path / "df_order_items.parquet"
+        storage_contracted_path / "df_order_items.parquet"
     )
-    valid_payments_df.write_parquet(run_context.contracted_path / "df_payments.parquet")
+    valid_payments_df.write_parquet(storage_contracted_path / "df_payments.parquet")
 
     report = assemble_events(run_context)
 
@@ -344,7 +384,7 @@ def test_assemble_data_fails_on_missing_column(
     assert report["assembled_events"]["freeze_schema"] == False
     assert any(
         "missing required columns" in error
-        or 'unable to find column "seller_id"' in error
+        or 'unable to find column "seller_id_int"' in error
         for error in report["errors"]
     )
 
@@ -356,15 +396,16 @@ def test_assemble_data_fails_on_missing_column(
 
 def test_dimension_references_uniqueness():
     df = pl.DataFrame({"id": ["1", "1", "2"], "val": ["a", "a", "b"]})
+    df_dtypes = {"id": pl.String, "val": pl.String}
 
-    result = dimension_references(df.lazy(), ["id"], ["id", "val"])
+    result = dimension_references(df.lazy(), ["id"], ["id", "val"], df_dtypes)
     if isinstance(result, pl.LazyFrame):
         result = result.collect()
     assert result.height == 2
 
     df_conflict = pl.DataFrame({"id": ["1", "1"], "val": ["a", "b"]})
 
-    result = dimension_references(df_conflict.lazy(), ["id"], ["id", "val"])
+    result = dimension_references(df_conflict.lazy(), ["id"], ["id", "val"], df_dtypes)
     if isinstance(result, pl.LazyFrame):
         result = result.collect()
     assert result.height == 1
diff --git a/tests/test_contract_stage.py b/tests/test_contract_stage.py
index ddcb53f..5243972 100644
--- a/tests/test_contract_stage.py
+++ b/tests/test_contract_stage.py
@@ -2,7 +2,7 @@
 # UNIT TESTS FOR contract_logic.py and contract_executor.py
 # =============================================================================
 
-import pandas as pd
+import polars as pl
 import pytest
 from data_pipeline.shared.run_context import RunContext
 from data_pipeline.contract.contract_executor import apply_contract
@@ -15,6 +15,12 @@
     enforce_parent_reference,
     enforce_schema,
 )
+from data_pipeline.contract.id_registrar import (
+    discover_uuids,
+    lookup_mapping_storage,
+    generate_and_persist_delta,
+    extract_entity_mappings,
+)
 
 # ------------------------------------------------------------
 # FIXTURES
@@ -23,7 +29,7 @@
 
 @pytest.fixture
 def sample_orders_df():
-    return pd.DataFrame(
+    return pl.DataFrame(
         {
             "order_id": ["o1", "o2", "o3"],
             "customer_id": ["c1", "c2", "c3"],
@@ -50,7 +56,7 @@ def sample_orders_df():
 
 @pytest.fixture
 def sample_payments_df():
-    return pd.DataFrame(
+    return pl.DataFrame(
         {
             "order_id": ["o1", "o2", "o3"],
             "payment_sequential": [1, 1, 1],
@@ -65,14 +71,14 @@ def sample_payments_df():
 
 
 def test_deduplicate_exact_events():
-    df = pd.DataFrame({"a": [1, 1, 2], "b": [2, 2, 3]})
+    df = pl.DataFrame({"a": [1, 1, 2], "b": [2, 2, 3]})
     filtered, removed = deduplicate_exact_events(df)
     assert len(filtered) == 2
     assert removed == 1
 
 
 def test_remove_unparsable_timestamps():
-    df = pd.DataFrame(
+    df = pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
             "order_purchase_timestamp": ["2026-01-01 10:00:00", "garbage"],
@@ -89,7 +95,7 @@ def test_remove_unparsable_timestamps():
 
 def test_remove_impossible_timestamps():
     # Delivered before purchase
-    df = pd.DataFrame(
+    df = pl.DataFrame(
         {
             "order_id": ["o1"],
             "order_purchase_timestamp": ["2026-03-25 10:00:00"],
@@ -104,29 +110,29 @@ def test_remove_impossible_timestamps():
 
 
 def test_cascade_drop_by_order_id():
-    df = pd.DataFrame({"order_id": ["o1", "o2", "o3"]})
+    df = pl.DataFrame({"order_id": ["o1", "o2", "o3"]})
     invalid = {"o1", "o3"}
 
     filtered, removed = cascade_drop_by_order_id(df, invalid)
 
     assert len(filtered) == 1
     assert removed == 2
-    assert filtered.iloc[0]["order_id"] == "o2"
+    assert filtered[0, "order_id"] == "o2"
 
 
 def test_enforce_parent_reference():
-    df = pd.DataFrame({"order_id": ["o1", "o2", "ghost"]})
+    df = pl.DataFrame({"order_id": ["o1", "o2", "ghost"]})
     valid = {"o1", "o2"}
 
     filtered, removed = enforce_parent_reference(df, valid)
 
     assert len(filtered) == 2
     assert removed == 1
-    assert "ghost" not in filtered["order_id"].values
+    assert "ghost" not in filtered["order_id"].to_list()
 
 
 def test_remove_rows_with_null_constraint():
-    df = pd.DataFrame({"order_id": ["o1", "o2", None, "o4"]})
+    df = pl.DataFrame({"order_id": ["o1", "o2", None, "o4"]})
     non_nullable = ["order_id"]
 
     filtered, removed, invalid_ids = remove_rows_with_null_constraint(df, non_nullable)
@@ -137,7 +143,7 @@ def test_remove_rows_with_null_constraint():
 
 
 def test_enforce_schema():
-    df = pd.DataFrame(
+    df = pl.DataFrame(
         {
             "order_id": ["o1", "o2", "o3"],
             "customer_id": ["c1", "c2", "c3"],
@@ -146,14 +152,117 @@ def test_enforce_schema():
         }
     )
     req_col = ["order_id", "customer_id", "state"]
-    dtype = {"order_id": "string", "customer_id": "string", "state": "category"}
+    dtype = {"order_id": pl.String, "customer_id": pl.String, "state": pl.Categorical}
 
     filtered, removed = enforce_schema(df, req_col, dtype)
 
     assert len(filtered) == 3
     assert removed == 1
-    assert isinstance(filtered["order_id"].dtype, pd.StringDtype)
-    assert isinstance(filtered["state"].dtype, pd.CategoricalDtype)
+    assert filtered["order_id"].dtype == pl.String
+    assert filtered["state"].dtype == pl.Categorical
+
+
+# ------------------------------------------------------------
+# ID REGISTRAR UNIT TESTS
+# ------------------------------------------------------------
+
+
+def test_discover_uuids_mixed_formats(tmp_path):
+    # Setup raw files (CSV and Parquet)
+    raw_path = tmp_path / "raw"
+    raw_path.mkdir()
+
+    # Table 1: Parquet
+    df1 = pl.DataFrame({"order_id": ["o1", "o2"]})
+    df1.write_parquet(raw_path / "df_orders_2026_04_01.parquet")
+
+    # Table 2: CSV
+    df2 = pl.DataFrame({"order_id": ["o2", "o3"]})
+    df2.write_csv(raw_path / "df_order_items_2026_04_01.csv")
+
+    tables = ["df_orders", "df_order_items"]
+    uuids = discover_uuids(raw_path, tables, "order_id")
+
+    assert uuids.len() == 3
+    assert set(uuids.to_list()) == {"o1", "o2", "o3"}
+
+
+def test_lookup_mapping_storage_uniqueness(tmp_path):
+    storage_dir = tmp_path / "storage" / "order_id"
+    storage_dir.mkdir(parents=True)
+
+    # Create history with a duplicate UUID across different deltas
+    pl.DataFrame({"order_id": ["o1"], "order_id_int": [1]}).write_parquet(
+        storage_dir / "d1.parquet"
+    )
+    pl.DataFrame({"order_id": ["o1"], "order_id_int": [1]}).write_parquet(
+        storage_dir / "d2.parquet"
+    )
+
+    storage_glob = str(storage_dir / "*.parquet")
+    batch_uuids = pl.Series("order_id", ["o1"])
+
+    known_df, max_id = lookup_mapping_storage(storage_glob, "order_id", batch_uuids)
+
+    assert known_df.height == 1  # Uniqueness check
+    assert max_id == 1
+
+
+def test_generate_and_persist_delta(tmp_path):
+    runtime_dir = tmp_path / "runtime"
+    missing = pl.Series("order_id", ["o10", "o11"])
+
+    new_df = generate_and_persist_delta(missing, 5, "order_id", runtime_dir, "run123")
+
+    assert new_df.height == 2
+    assert new_df["order_id_int"].to_list() == [6, 7]
+    assert (runtime_dir / "order_id" / "map_run123.parquet").exists()
+
+
+def test_extract_entity_mappings_orchestration(tmp_path, monkeypatch):
+    run_context = RunContext.create(base=tmp_path, storage=tmp_path / "storage")
+    run_context.initialize_directories()
+
+    # Mock all raw data files with required columns to satisfy ID_ENTITY_MAP
+    raw_path = run_context.raw_snapshot_path
+
+    pl.DataFrame({"order_id": ["o1"], "customer_id": ["c1"]}).write_parquet(
+        raw_path / "df_orders_2026.parquet"
+    )
+
+    pl.DataFrame(
+        {"order_id": ["o1"], "product_id": ["p1"], "seller_id": ["s1"]}
+    ).write_parquet(raw_path / "df_order_items_2026.parquet")
+
+    pl.DataFrame({"customer_id": ["c1"]}).write_parquet(
+        raw_path / "df_customers_2026.parquet"
+    )
+
+    pl.DataFrame({"order_id": ["o1"]}).write_parquet(
+        raw_path / "df_payments_2026.parquet"
+    )
+
+    # Mock products just to be safe though not strictly in ID_ENTITY_MAP as a source
+    pl.DataFrame({"product_id": ["p1"]}).write_parquet(
+        raw_path / "df_products_2026.parquet"
+    )
+
+    # Mock promote to avoid GCS errors in local test
+    monkeypatch.setattr(
+        "data_pipeline.contract.id_registrar.promote_new_mapping_files", lambda *_: None
+    )
+
+    mappings = extract_entity_mappings(run_context)
+
+    assert "order_id" in mappings
+    assert "customer_id" in mappings
+    assert "product_id" in mappings
+    assert "seller_id" in mappings
+
+    # Verify one result
+    result = mappings["order_id"].collect()
+    assert "order_id_int" in result.columns
+    assert result[0, "order_id_int"] == 1
 
 
 # ------------------------------------------------------------
@@ -162,66 +271,98 @@ def test_enforce_schema():
 
 
 def test_apply_contract_orders_success(tmp_path, sample_orders_df):
-    run_context = RunContext.create(base=tmp_path)
+    run_context = RunContext.create(base=tmp_path, storage=tmp_path / "storage")
     run_context.initialize_directories()
 
     suffix = "2026_03_25"
-    sample_orders_df.to_csv(
-        run_context.raw_snapshot_path / f"df_orders_{suffix}.csv", index=False
+    sample_orders_df.write_csv(
+        run_context.raw_snapshot_path / f"df_orders_{suffix}.csv"
     )
 
-    # New 3-tuple return signature
-    report, inv_ids, val_ids = apply_contract(run_context, "df_orders")
+    # Mock Discovery Mappings
+    master_mappings = {
+        "order_id": pl.DataFrame(
+            {"order_id": ["o1", "o2", "o3"], "order_id_int": [1, 2, 3]}
+        ).lazy(),
+        "customer_id": pl.DataFrame(
+            {"customer_id": ["c1", "c2", "c3"], "customer_id_int": [1, 2, 3]}
+        ).lazy(),
+    }
+
+    report, inv_ids, val_ids = apply_contract(
+        run_context, "df_orders", master_mappings=master_mappings
+    )
 
     assert report["status"] == "success"
     assert report["final_rows"] == 3
     assert len(val_ids) == 3
-    assert not inv_ids
-    assert (run_context.contracted_path / f"df_orders_{suffix}.parquet").exists()
+
+    # Check that integer columns are present
+    df_result = pl.read_parquet(
+        run_context.contracted_path / f"df_orders_{suffix}.parquet"
+    )
+    assert "order_id_int" in df_result.columns
+    assert "customer_id_int" in df_result.columns
 
 
 def test_apply_contract_cascade_and_valid_propagation(
     tmp_path, sample_orders_df, sample_payments_df
 ):
-    run_context = RunContext.create(base=tmp_path)
+    run_context = RunContext.create(base=tmp_path, storage=tmp_path / "storage")
     run_context.initialize_directories()
 
-    # o1: valid, o2: unparsable, o3: impossible
-    sample_orders_df.loc[1, "order_purchase_timestamp"] = "garbage"
-    sample_orders_df.loc[2, "order_delivered_timestamp"] = "2026-01-01 00:00:00"
+    sample_orders_df = sample_orders_df.with_columns(
+        pl.when(pl.col("order_id") == "o2")
+        .then(pl.lit("garbage"))
+        .otherwise(pl.col("order_purchase_timestamp"))
+        .alias("order_purchase_timestamp"),
+        pl.when(pl.col("order_id") == "o3")
+        .then(pl.lit("2026-01-01 00:00:00"))
+        .otherwise(pl.col("order_delivered_timestamp"))
+        .alias("order_delivered_timestamp"),
+    )
 
     suffix = "2026_03_25"
-    sample_orders_df.to_csv(
-        run_context.raw_snapshot_path / f"df_orders_{suffix}.csv", index=False
+    sample_orders_df.write_csv(
+        run_context.raw_snapshot_path / f"df_orders_{suffix}.csv"
     )
-    sample_payments_df.to_csv(
-        run_context.raw_snapshot_path / f"df_payments_{suffix}.csv", index=False
+    sample_payments_df.write_csv(
+        run_context.raw_snapshot_path / f"df_payments_{suffix}.csv"
     )
 
-    # 1. Process Orders
-    rep_o, inv_o, val_o = apply_contract(run_context, "df_orders")
+    # Mock Discovery Mappings
+    master_mappings = {
+        "order_id": pl.DataFrame(
+            {"order_id": ["o1", "o2", "o3"], "order_id_int": [1, 2, 3]}
+        ).lazy(),
+        "customer_id": pl.DataFrame(
+            {"customer_id": ["c1", "c2", "c3"], "customer_id_int": [1, 2, 3]}
+        ).lazy(),
+    }
+
+    rep_o, inv_o, val_o = apply_contract(
+        run_context, "df_orders", master_mappings=master_mappings
+    )
     assert "o2" in inv_o  # unparsable
     assert "o3" in inv_o  # impossible
     assert "o1" in val_o  # only one valid
 
-    # 2. Process Payments (should cascade drop o2, o3 and only keep o1)
     rep_p, inv_p, val_p = apply_contract(
-        run_context, "df_payments", invalid_order_ids=inv_o, valid_order_ids=val_o
+        run_context,
+        "df_payments",
+        master_mappings=master_mappings,
+        invalid_order_ids=inv_o,
+        valid_order_ids=val_o,
     )
 
     assert rep_p["removed_cascade_rows"] == 2  # o2 and o3 dropped
     assert rep_p["final_rows"] == 1
-    assert "o1" in set(
-        pd.read_parquet(run_context.contracted_path / f"df_payments_{suffix}.parquet")[
-            "order_id"
-        ]
-    )
 
 
 def test_apply_contract_unknown_table(tmp_path):
     run_context = RunContext.create(base=tmp_path)
     run_context.initialize_directories()
 
-    report, inv, val = apply_contract(run_context, "non_existent")
+    report, inv, val = apply_contract(run_context, "non_existent", master_mappings={})
     assert report["status"] == "failed"
     assert "Unknown table" in report["errors"][0]
diff --git a/tests/test_publish_stage.py b/tests/test_publish_stage.py
index d775f7d..82f5bfd 100644
--- a/tests/test_publish_stage.py
+++ b/tests/test_publish_stage.py
@@ -5,7 +5,9 @@
 import polars as pl
 import pytest
 import json
+import os
 from pathlib import Path
+from unittest.mock import MagicMock, patch
 
 from data_pipeline.shared.run_context import RunContext
 from data_pipeline.semantic.registry import SEMANTIC_MODULES
@@ -17,6 +19,7 @@
     run_integrity_gate,
     promote_semantic_version,
     activate_published_version,
+    swap_bigquery_view,
 )
 from data_pipeline.shared.modeling_configs import (
     SELLER_FACT_SCHEMA,
@@ -100,7 +103,6 @@ def setup_semantic_files(run_context, df_map):
         for table_name in module["tables"]:
             df = df_map[table_name]
             filename = f"{table_name}_{year}_{month}_{day}.parquet"
-            # df is now pl.DataFrame
             df.write_parquet(module_path / filename)
 
 
@@ -135,7 +137,7 @@ def test_run_integrity_gate_fails_on_missing_directory(tmp_path):
     run_context = RunContext.create(
         base=tmp_path, storage=tmp_path, run_id="20230101T120000"
     )
-    # Don't initialize directories or setup files
+    # Force to fail on missing directory
     report = run_integrity_gate(run_context)
     assert report["status"] == "failed"
     assert "Semantic directory is missing" in report["errors"]
@@ -150,7 +152,7 @@ def test_run_integrity_gate_fails_on_semantic_file_mismatch(
     )
     run_context.initialize_directories()
 
-    # Only setup one module but incomplete
+    # Force to fail on missing module
     module_path = run_context.semantic_path / "seller_semantic"
     module_path.mkdir(parents=True, exist_ok=True)
     valid_seller_fact.write_parquet(
@@ -159,36 +161,7 @@ def test_run_integrity_gate_fails_on_semantic_file_mismatch(
 
     report = run_integrity_gate(run_context)
     assert report["status"] == "failed"
-
-
-def test_run_integrity_gate_fails_on_empty_dataframe(
-    tmp_path,
-    valid_seller_fact,
-    valid_seller_dim,
-    valid_customer_fact,
-    valid_customer_dim,
-    valid_product_fact,
-    valid_product_dim,
-):
-    run_context = RunContext.create(
-        base=tmp_path, storage=tmp_path, run_id="20230101T120000"
-    )
-    run_context.initialize_directories()
-
-    df_map = {
-        "seller_weekly_fact": pl.DataFrame(),  # Empty
-        "seller_dim": valid_seller_dim,
-        "customer_weekly_fact": valid_customer_fact,
-        "customer_dim": valid_customer_dim,
-        "product_weekly_fact": valid_product_fact,
-        "product_dim": valid_product_dim,
-    }
-
-    setup_semantic_files(run_context, df_map)
-    report = run_integrity_gate(run_context)
-    assert report["status"] == "failed"
-    # Current implementation fails on missing columns if dataframe is empty
-    assert any("required column(s)" in error for error in report["errors"])
+    assert "Semantic module mismatch" in report["errors"]
 
 
 def test_run_integrity_gate_fails_on_missing_columns(
@@ -205,7 +178,7 @@ def test_run_integrity_gate_fails_on_missing_columns(
     )
     run_context.initialize_directories()
 
-    # Drop a column using Polars
+    # Drop a column
     df_map = {
         "seller_weekly_fact": valid_seller_fact.drop(valid_seller_fact.columns[0]),
         "seller_dim": valid_seller_dim,
@@ -232,6 +205,7 @@ def test_promote_semantic_version_success(tmp_path):
     )
     run_context.initialize_directories()
 
+    # Local promotion uses shutil.copytree
     run_context.semantic_path.mkdir(parents=True, exist_ok=True)
 
     report = promote_semantic_version(run_context)
@@ -251,12 +225,51 @@ def test_promote_semantic_version_fails_on_existing_version_directory(tmp_path):
     assert "Version directory already exists" in report["errors"]
 
 
+# ------------------------------------------------------------
+# BIGQUERY VIEW SWAP
+# ------------------------------------------------------------
+
+
+def test_swap_bigquery_view_local_skip(tmp_path):
+    run_context = RunContext.create(
+        base=tmp_path, storage=tmp_path, run_id="20230101T120000"
+    )
+    report = swap_bigquery_view(run_context)
+    assert report["status"] == "success"
+    assert any("Skipping BigQuery swap" in info for info in report["info"])
+
+
+def test_swap_bigquery_view_gcs_success():
+    run_id = "20230101T120000"
+    storage_path = "gs://test-bucket/pipeline"
+    run_context = RunContext.create(
+        base=Path("/tmp"), storage=storage_path, run_id=run_id
+    )
+
+    mock_client = MagicMock()
+    mock_client.project = "test-project"
+
+    with patch("google.cloud.bigquery.Client", return_value=mock_client), patch.dict(
+        os.environ, {"GCP_REGION": "us-east1"}
+    ):
+        report = swap_bigquery_view(run_context)
+
+        assert report["status"] == "success"
+        # Total 3 modules, each has 2 tables = 6 table DDLs + 6 view DDLs = 12 calls
+        assert mock_client.query.call_count == 12
+
+        # Verify one of the DDLs
+        first_call_ddl = mock_client.query.call_args_list[0][0][0]
+        assert "CREATE OR REPLACE EXTERNAL TABLE" in first_call_ddl
+        assert f"v{run_id}" in first_call_ddl
+
+
 # ------------------------------------------------------------
 # ACTIVATE VERSION
 # ------------------------------------------------------------
 
 
-def test_activate_published_version_success(tmp_path):
+def test_activate_published_version_success_local(tmp_path):
     run_context = RunContext.create(
         base=tmp_path, storage=tmp_path, run_id="20230101T120000"
     )
@@ -269,6 +282,29 @@ def test_activate_published_version_success(tmp_path):
     with open(run_context.latest_pointer_path, "r") as f:
         data = json.load(f)
         assert data["run_id"] == "20230101T120000"
+        assert "published_at" in data
+
+
+def test_activate_published_version_success_gcs():
+    storage_path = "gs://test-bucket/pipeline"
+    run_context = RunContext.create(
+        base=Path("/tmp"), storage=storage_path, run_id="20230101T120000"
+    )
+
+    mock_storage_client = MagicMock()
+    mock_bucket = MagicMock()
+    mock_blob = MagicMock()
+    mock_storage_client.bucket.return_value = mock_bucket
+    mock_bucket.blob.return_value = mock_blob
+
+    with patch("google.cloud.storage.Client", return_value=mock_storage_client):
+        report = activate_published_version(run_context)
+        assert report["status"] == "success"
+        mock_blob.upload_from_string.assert_called_once()
+
+        call_args = mock_blob.upload_from_string.call_args
+        payload = json.loads(call_args[0][0])
+        assert payload["run_id"] == "20230101T120000"
 
 
 # ------------------------------------------------------------
@@ -276,7 +312,7 @@ def test_activate_published_version_success(tmp_path):
 # ------------------------------------------------------------
 
 
-def test_execute_publish_lifecycle_success(
+def test_execute_publish_lifecycle_success_local(
     tmp_path,
     valid_seller_fact,
     valid_seller_dim,
@@ -301,6 +337,7 @@ def test_execute_publish_lifecycle_success(
 
     setup_semantic_files(run_context, df_map)
 
+    # In local mode, swap_bigquery_view skips
     report = execute_publish_lifecycle(run_context)
     assert report["status"] == "success"
     assert Path(run_context.version_path).exists()
diff --git a/tests/test_run_pipeline.py b/tests/test_run_pipeline.py
index aa07665..cf8464f 100644
--- a/tests/test_run_pipeline.py
+++ b/tests/test_run_pipeline.py
@@ -185,13 +185,10 @@ def test_main_fails_on_assemble_events(monkeypatch, tmp_path):
         lambda *a, **k: ({}, set(), set()),
     )
 
-    # Mocking upload/download contracted directory to avoid real I/O
+    # Mocking upload/download I/O
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.upload_contracted_directory", lambda *_: None
     )
-    monkeypatch.setattr(
-        "data_pipeline.run_pipeline.download_contracted_datasets", lambda *_: None
-    )
 
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.assemble_events",
@@ -258,13 +255,10 @@ def test_main_fails_on_build_semantic_layer(monkeypatch, tmp_path):
         lambda *a, **k: ({}, set(), set()),
     )
 
-    # Mocking upload/download contracted directory to avoid real I/O
+    # Mocking upload/download I/O
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.upload_contracted_directory", lambda *_: None
     )
-    monkeypatch.setattr(
-        "data_pipeline.run_pipeline.download_contracted_datasets", lambda *_: None
-    )
 
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.assemble_events",
@@ -341,13 +335,10 @@ def test_main_fails_on_execute_publish_lifecycle(monkeypatch, tmp_path):
         lambda *a, **k: ({}, set(), set()),
     )
 
-    # Mocking upload/download contracted directory to avoid real I/O
+    # Mocking upload/download I/O
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.upload_contracted_directory", lambda *_: None
     )
-    monkeypatch.setattr(
-        "data_pipeline.run_pipeline.download_contracted_datasets", lambda *_: None
-    )
 
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.assemble_events",
@@ -434,13 +425,10 @@ def test_main_success(monkeypatch, tmp_path):
         lambda *a, **k: ({}, set(), set()),
     )
 
-    # Mocking upload/download contracted directory to avoid real I/O
+    # Mocking upload/download I/O
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.upload_contracted_directory", lambda *_: None
     )
-    monkeypatch.setattr(
-        "data_pipeline.run_pipeline.download_contracted_datasets", lambda *_: None
-    )
 
     monkeypatch.setattr(
         "data_pipeline.run_pipeline.assemble_events",
diff --git a/tests/test_semantic_stage.py b/tests/test_semantic_stage.py
index 9ba084c..6e93f37 100644
--- a/tests/test_semantic_stage.py
+++ b/tests/test_semantic_stage.py
@@ -27,6 +27,7 @@ def valid_customers_df():
     return pl.DataFrame(
         {
             "customer_id": ["cos1", "cos2"],
+            "customer_id_int": [101, 102],
             "customer_state": ["SP", "RJ"],
             "customer_city": ["Sao Paulo", "Rio"],
             "customer_segment": ["A", "B"],
@@ -35,6 +36,7 @@ def valid_customers_df():
     ).with_columns(
         [
             pl.col("account_creation_date").str.strptime(pl.Datetime, "%Y-%m-%d"),
+            pl.col("customer_id_int").cast(pl.UInt32),
         ]
     )
 
@@ -44,6 +46,7 @@ def valid_products_df():
     return pl.DataFrame(
         {
             "product_id": ["prod1", "prod2"],
+            "product_id_int": [201, 202],
             "product_category_name": ["tech", "home"],
             "product_weight_g": [100.0, 500.0],
             "product_length_cm": [10.0, 20.0],
@@ -52,7 +55,7 @@ def valid_products_df():
             "product_fragility_index": ["Low", "High"],
             "supplier_tier": ["Gold", "Silver"],
         }
-    )
+    ).with_columns([pl.col("product_id_int").cast(pl.UInt32)])
 
 
 @pytest.fixture
@@ -60,10 +63,14 @@ def valid_assembled_df():
     df = pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
+            "order_id_int": [1, 2],
             "seller_id": ["seller1", "seller2"],
+            "seller_id_int": [301, 302],
             "customer_id": ["cos1", "cos2"],
+            "customer_id_int": [101, 102],
             "order_revenue": [12.34, 56.78],
             "product_id": ["prod1", "prod2"],
+            "product_id_int": [201, 202],
             "order_status": ["delivered", "delivered"],
             "order_purchase_timestamp": [
                 "2023-01-02 09:00:00",
@@ -115,6 +122,10 @@ def valid_assembled_df():
             pl.col("order_year").cast(pl.Int16),
             pl.col("order_revenue").cast(pl.Float32),
             pl.col("run_id").cast(pl.Categorical),
+            pl.col("order_id_int").cast(pl.UInt32),
+            pl.col("seller_id_int").cast(pl.UInt32),
+            pl.col("customer_id_int").cast(pl.UInt32),
+            pl.col("product_id_int").cast(pl.UInt32),
         ]
     )
     return df
@@ -151,7 +162,7 @@ def test_seller_semantic_model_grain_preserved_success(tmp_path, valid_assembled
     seller_semantic = build_seller_semantic(valid_assembled_df.lazy(), run_context)
 
     expected_fact_len = (
-        valid_assembled_df.select(["seller_id", "order_year_week"]).unique().height
+        valid_assembled_df.select(["seller_id_int", "order_year_week"]).unique().height
     )
 
     fact_df = seller_semantic["seller_weekly_fact"]
@@ -162,7 +173,7 @@ def test_seller_semantic_model_grain_preserved_success(tmp_path, valid_assembled
     dim_df = seller_semantic["seller_dim"]
     if isinstance(dim_df, pl.LazyFrame):
         dim_df = dim_df.collect()
-    expected_dim_len = valid_assembled_df["seller_id"].n_unique()
+    expected_dim_len = valid_assembled_df["seller_id_int"].n_unique()
     assert dim_df.height == expected_dim_len
 
 
diff --git a/tests/test_validation_stage.py b/tests/test_validation_stage.py
index fae8bcb..b7c4dd6 100644
--- a/tests/test_validation_stage.py
+++ b/tests/test_validation_stage.py
@@ -2,7 +2,7 @@
 # UNIT TESTS FOR validation_logic.py and validation_executor.py
 # =============================================================================
 
-import pandas as pd
+import polars as pl
 import pytest
 from data_pipeline.shared.run_context import RunContext
 from data_pipeline.validation.validation_executor import apply_validation
@@ -29,7 +29,7 @@ def empty_report():
 
 @pytest.fixture
 def valid_orders_df():
-    return pd.DataFrame(
+    return pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
             "customer_id": ["c1", "c2"],
@@ -44,7 +44,7 @@ def valid_orders_df():
 
 @pytest.fixture
 def valid_order_items_df():
-    return pd.DataFrame(
+    return pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
             "product_id": ["p1", "p2"],
@@ -56,7 +56,7 @@ def valid_order_items_df():
 
 @pytest.fixture
 def valid_payments_df():
-    return pd.DataFrame(
+    return pl.DataFrame(
         {
             "order_id": ["o1", "o2"],
             "payment_sequential": [1, 1],
@@ -67,7 +67,7 @@ def valid_payments_df():
 
 @pytest.fixture
 def valid_customers_df():
-    return pd.DataFrame(
+    return pl.DataFrame(
         {
             "customer_id": ["c1", "c2"],
             "customer_state": ["SP", "RJ"],
@@ -80,7 +80,7 @@ def valid_customers_df():
 
 @pytest.fixture
 def valid_products_df():
-    return pd.DataFrame(
+    return pl.DataFrame(
         {
             "product_id": ["p1", "p2"],
             "product_category_name": ["tech", "home"],
@@ -136,14 +136,14 @@ def test_run_base_validations_success(valid_customers_df, empty_report):
 
 
 def test_run_base_validations_empty_df(empty_report):
-    df = pd.DataFrame()
+    df = pl.DataFrame()
     ok = run_base_validations(df, "test", ["id"], ["id"], ["id"], empty_report)
     assert ok is False
     assert any("dataset is empty" in e for e in empty_report["errors"])
 
 
 def test_run_base_validations_missing_column(valid_customers_df, empty_report):
-    df = valid_customers_df.drop(columns=["customer_state"])
+    df = valid_customers_df.drop(["customer_state"])
     ok = run_base_validations(
         df,
         "df_customers",
@@ -157,21 +157,21 @@ def test_run_base_validations_missing_column(valid_customers_df, empty_report):
 
 
 def test_run_base_validations_duplicate_pk(empty_report):
-    df = pd.DataFrame({"id": ["1", "1"], "val": ["a", "b"]})
+    df = pl.DataFrame({"id": ["1", "1"], "val": ["a", "b"]})
     ok = run_base_validations(df, "test", ["id"], ["id", "val"], ["id"], empty_report)
     assert ok is False
     assert any("conflicting duplicate primary key" in e for e in empty_report["errors"])
 
 
 def test_run_base_validations_repairable_duplicate(empty_report):
-    df = pd.DataFrame({"id": ["1", "1"], "val": ["a", "a"]})
+    df = pl.DataFrame({"id": ["1", "1"], "val": ["a", "a"]})
     ok = run_base_validations(df, "test", ["id"], ["id", "val"], ["id"], empty_report)
     assert ok is True
     assert any("eligible for deduplication" in w for w in empty_report["warnings"])
 
 
 def test_run_base_validations_null_pk(empty_report):
-    df = pd.DataFrame({"id": [None, "2"], "val": ["a", "b"]})
+    df = pl.DataFrame({"id": [None, "2"], "val": ["a", "b"]})
     ok = run_base_validations(df, "test", ["id"], ["id", "val"], [], empty_report)
     assert ok is True
     assert any(
@@ -179,13 +179,6 @@ def test_run_base_validations_null_pk(empty_report):
     )
 
 
-def test_run_base_validations_duplicate_columns(empty_report):
-    df = pd.DataFrame([[1, 2]], columns=["id", "id"])
-    ok = run_base_validations(df, "test", ["id"], ["id"], [], empty_report)
-    assert ok is True
-    assert any("duplicate column names detected" in w for w in empty_report["warnings"])
-
-
 # =============================================================================
 # EVENT FACT VALIDATION TESTS
 # =============================================================================
@@ -199,14 +192,24 @@ def test_run_event_fact_validations_success(valid_orders_df, empty_report):
 
 def test_run_event_fact_validations_temporal_error(valid_orders_df, empty_report):
     # Approval before purchase
-    valid_orders_df.loc[0, "order_approved_at"] = "2026-03-24 10:00:00"
+    valid_orders_df = valid_orders_df.with_columns(
+        pl.when(pl.col("order_id") == "o1")
+        .then(pl.lit("2026-03-24 10:00:00"))
+        .otherwise(pl.col("order_approved_at"))
+        .alias("order_approved_at")
+    )
     ok = run_event_fact_validations(valid_orders_df, "df_orders", empty_report)
     assert ok is True
     assert any("approval precedes purchase" in w for w in empty_report["warnings"])
 
 
 def test_run_event_fact_validations_unparsable_ts(valid_orders_df, empty_report):
-    valid_orders_df.loc[0, "order_purchase_timestamp"] = "garbage"
+    valid_orders_df = valid_orders_df.with_columns(
+        pl.when(pl.col("order_id") == "o1")
+        .then(pl.lit("garbage"))
+        .otherwise(pl.col("order_purchase_timestamp"))
+        .alias("order_purchase_timestamp")
+    )
     ok = run_event_fact_validations(valid_orders_df, "df_orders", empty_report)
     assert ok is True
     assert any("unparsable timestamp values" in w for w in empty_report["warnings"])
@@ -218,7 +221,7 @@ def test_run_event_fact_validations_unparsable_ts(valid_orders_df, empty_report)
 
 
 def test_run_transaction_detail_validations_negative(empty_report):
-    df = pd.DataFrame({"order_id": ["o1"], "price": [-10.0]})
+    df = pl.DataFrame({"order_id": ["o1"], "price": [-10.0]})
     ok = run_transaction_detail_validations(df, "test", empty_report)
     assert ok is True
     assert any("negative values in numeric column" in e for e in empty_report["errors"])
@@ -230,9 +233,9 @@ def test_run_transaction_detail_validations_negative(empty_report):
 
 
 def test_run_cross_table_validations_orphans(empty_report):
-    orders = pd.DataFrame({"order_id": ["o1"]})
-    items = pd.DataFrame({"order_id": ["o1", "o2"]})  # o2 is orphan
-    payments = pd.DataFrame({"order_id": ["o3"]})  # o3 is orphan
+    orders = pl.DataFrame({"order_id": ["o1"]})
+    items = pl.DataFrame({"order_id": ["o1", "o2"]})  # o2 is orphan
+    payments = pl.DataFrame({"order_id": ["o3"]})  # o3 is orphan
 
     tables = {"df_orders": orders, "df_order_items": items, "df_payments": payments}
     ok = run_cross_table_validations(tables, empty_report)
@@ -258,20 +261,18 @@ def test_apply_validation_integration(
 
     # Create date-suffixed files for loader
     suffix = "2026_03_25"
-    valid_orders_df.to_csv(
-        run_context.raw_snapshot_path / f"df_orders_{suffix}.csv", index=False
-    )
-    valid_order_items_df.to_csv(
-        run_context.raw_snapshot_path / f"df_order_items_{suffix}.csv", index=False
+    valid_orders_df.write_csv(run_context.raw_snapshot_path / f"df_orders_{suffix}.csv")
+    valid_order_items_df.write_csv(
+        run_context.raw_snapshot_path / f"df_order_items_{suffix}.csv"
     )
-    valid_payments_df.to_csv(
-        run_context.raw_snapshot_path / f"df_payments_{suffix}.csv", index=False
+    valid_payments_df.write_csv(
+        run_context.raw_snapshot_path / f"df_payments_{suffix}.csv"
     )
-    valid_customers_df.to_csv(
-        run_context.raw_snapshot_path / f"df_customers_{suffix}.csv", index=False
+    valid_customers_df.write_csv(
+        run_context.raw_snapshot_path / f"df_customers_{suffix}.csv"
     )
-    valid_products_df.to_csv(
-        run_context.raw_snapshot_path / f"df_products_{suffix}.csv", index=False
+    valid_products_df.write_csv(
+        run_context.raw_snapshot_path / f"df_products_{suffix}.csv"
     )
 
     report = apply_validation(run_context)