Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GNN/RESULTS_SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ The method produces spatially coherent hotspot regions that capture all high-sev

---

## Approach: Hybrid Spatial Modeling + Clustering
### Performance Winner: Hybrid GNN Modeling + Clustering

This approach integrates multiple complementary components.

Expand Down
169 changes: 114 additions & 55 deletions GNN/dashboard.html

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions GNN/src/baseline_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,7 @@ def compare_all_baselines(self, metadata: pd.DataFrame,
Returns:
Comparison results dictionary
"""
print("\n" + "="*60)
print("Running Baseline Comparisons")
print("="*60)
print("\nBaseline Comparison")

results = {}

Expand Down
6 changes: 2 additions & 4 deletions GNN/src/data_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,7 @@ def print_split_summary(self, splits: Dict):
Args:
splits: Dictionary with split data
"""
print("\n" + "="*50)
print("Data Split Summary")
print("="*50)
print("\nData Split Summary")

if 'train' in splits:
if isinstance(splits['train'], dict):
Expand All @@ -218,5 +216,5 @@ def print_split_summary(self, splits: Dict):
pct = (n / total) * 100 if total > 0 else 0
print(f"{split_name.capitalize()}: {n} samples ({pct:.1f}%)")

print("="*50 + "\n")
print("")

6 changes: 2 additions & 4 deletions GNN/src/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,7 @@ def print_metrics(self, metrics: Dict):
Args:
metrics: Dictionary of metrics
"""
print("\n" + "="*50)
print("Evaluation Metrics")
print("="*50)
print("\nEvaluation Metrics")

if 'silhouette_score' in metrics:
print(f"Silhouette Score: {metrics['silhouette_score']:.4f}")
Expand All @@ -200,5 +198,5 @@ def print_metrics(self, metrics: Dict):
print(f" Hotspot ratio: {metrics.get('hotspot_ratio', 0):.2%}")
print(f" Avg risk in hotspots: {metrics.get('avg_risk_in_hotspots', 0):.4f}")
print(f" Avg risk outside: {metrics.get('avg_risk_outside_hotspots', 0):.4f}")
print("="*50 + "\n")
print("")

56 changes: 17 additions & 39 deletions GNN/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ def main():
print(f"Using device: {device}")

# 1. Data Preprocessing
print("\n" + "="*50)
print("Step 1: Data Preprocessing")
print("="*50)
print("\nStep 1: Data Preprocessing")
preprocessor = DataPreprocessor(config)
features, metadata = preprocessor.process()
print(f"Preprocessed {len(features)} data points")
Expand All @@ -70,24 +68,18 @@ def main():
coords = metadata[['lon', 'lat']].values

# 2. Graph Construction
print("\n" + "="*50)
print("Step 2: Graph Construction")
print("="*50)
print("\nStep 2: Graph Construction")
graph_builder = GraphBuilder(config)
graph_data = graph_builder.build_graph(coords, features, metadata)
print(f"Graph constructed: {graph_data.num_nodes} nodes, {graph_data.num_edges} edges")

# 2.5. Data Splitting
print("\n" + "="*50)
print("Step 2.5: Data Splitting")
print("="*50)
print("\nStep 2.5: Data Splitting")
data_splitter = DataSplitter(config)
graph_splits, metadata_splits = data_splitter.split_graph_data(graph_data, metadata)

# 3. Model Setup
print("\n" + "="*50)
print("Step 3: Model Setup")
print("="*50)
print("\nStep 3: Model Setup")
input_dim = features.shape[1]
model = GNNModel(input_dim, config['model'])
print(f"Model created: {config['model']['architecture']}")
Expand All @@ -99,9 +91,8 @@ def main():
model_path = Path(__file__).parent.parent / model_path

if args.train or not model_path.exists():
print("\n" + "="*50)
print("Step 4: Training")
print("="*50)
# 4. Training
print("\nStep 4: Training")
history = trainer.train(
graph_splits['train'],
metadata_splits['train'],
Expand All @@ -115,9 +106,7 @@ def main():
trainer.load_model(str(model_path))

# 5. Feature Extraction (on test set for evaluation)
print("\n" + "="*50)
print("Step 5: Feature Extraction & Aggregation")
print("="*50)
print("\nStep 5: Feature Extraction & Aggregation")
feature_extractor = FeatureExtractor(model, config, device)

# Extract embeddings on test set for evaluation
Expand All @@ -136,19 +125,15 @@ def main():
print(f"Extracted and aggregated embeddings: {aggregated_embeddings.shape} (test set)")

# 6. Hotspot Scoring (on test set)
print("\n" + "="*50)
print("Step 6: Hotspot Scoring (Test Set)")
print("="*50)
print("\nStep 6: Hotspot Scoring (Test Set)")
hotspot_scorer = HotspotScorer(config)
risk_scores, score_components = hotspot_scorer.score(
metadata_splits['test'], aggregation_metadata, aggregated_embeddings
)
print(f"Computed risk scores for {len(risk_scores)} spatial units")

# 7. Hotspot Detection (on test set)
print("\n" + "="*50)
print("Step 7: Hotspot Detection (Test Set)")
print("="*50)
print("\nStep 7: Hotspot Detection (Test Set)")
hotspot_detector = HotspotDetector(config)
aggregation_coords = aggregation_metadata[['center_lon', 'center_lat']].values
hotspot_results = hotspot_detector.detect_hotspots(
Expand All @@ -160,9 +145,8 @@ def main():
# 8. Evaluation (on test set)
metrics = {}
if args.eval:
print("\n" + "="*50)
print("Step 8: Evaluation (Test Set)")
print("="*50)
# 8. Evaluation (on test set)
print("\nStep 8: Evaluation (Test Set)")
evaluator = Evaluator(config)
metrics = evaluator.evaluate(
aggregated_embeddings,
Expand Down Expand Up @@ -214,9 +198,7 @@ def convert_to_native(obj):

# 9. Visualization (on full data for better visualization)
if args.visualize:
print("\n" + "="*50)
print("Step 9: Visualization (Full Data)")
print("="*50)
print("\nStep 9: Visualization (Full Data)")
output_dir = Path(config['paths']['output_dir'])
output_dir = Path(__file__).parent.parent / output_dir
visualizer = Visualizer(config, str(output_dir))
Expand All @@ -236,9 +218,8 @@ def convert_to_native(obj):

# 10. Create Dashboards
if args.eval:
print("\n" + "="*50)
print("Step 10: Creating Dashboards")
print("="*50)
# 10. Create Dashboards
print("\nStep 10: Creating Dashboards")
dashboard_dir = Path(__file__).parent.parent / config['paths']['output_dir'] / 'dashboards'
dashboard = ModelDashboard(str(dashboard_dir))

Expand All @@ -254,9 +235,8 @@ def convert_to_native(obj):

# 11. Baseline Comparison
if args.eval:
print("\n" + "="*50)
print("Step 11: Baseline Comparison")
print("="*50)
# 11. Baseline Comparison
print("\nStep 11: Baseline Comparison")
baseline_comparison = BaselineComparison(config)

# Prepare GNN results (using test set results)
Expand Down Expand Up @@ -318,9 +298,7 @@ def convert_for_json(obj):
from .update_results_with_baselines import update_results_summary_with_baselines
update_results_summary_with_baselines()

print("\n" + "="*50)
print("Pipeline Complete!")
print("="*50)
print("\nPipeline Complete!")


if __name__ == '__main__':
Expand Down
Binary file modified temporal_calc/__pycache__/optimized_training.cpython-313.pyc
Binary file not shown.
Binary file modified temporal_calc/checkpoints/best_transformer.pt
Binary file not shown.
10 changes: 0 additions & 10 deletions temporal_calc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
import os
from pathlib import Path

# ============================================================================
# Paths
# ============================================================================
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR.parent / "data"
OUTPUT_DIR = BASE_DIR / "outputs"
Expand All @@ -23,9 +21,7 @@
for dir_path in [OUTPUT_DIR, PLOTS_DIR, EVAL_DIR, CHECKPOINT_DIR]:
dir_path.mkdir(parents=True, exist_ok=True)

# ============================================================================
# Data Configuration
# ============================================================================
# Number of synthetic time bins to create from attribute_id ordering
# Higher = more granular time series, more training data per neighborhood
NUM_TIME_BINS = 200 # Increased from 50 for denser data
Expand Down Expand Up @@ -55,9 +51,7 @@
"Other": 1.0
}

# ============================================================================
# Model Configuration (Optimized for M1 Mac 8GB RAM)
# ============================================================================
MODEL_CONFIG = {
"d_model": 128, # Embedding dimension (increased from 64)
"n_heads": 8, # Number of attention heads (increased from 4)
Expand All @@ -67,9 +61,7 @@
"max_seq_len": 30, # Maximum sequence length
}

# ============================================================================
# Training Configuration
# ============================================================================
TRAIN_CONFIG = {
"seq_len": 8, # Input sequence length (reduced for more samples)
"pred_len": 1, # Prediction horizon
Expand All @@ -81,9 +73,7 @@
"seed": 42,
}

# ============================================================================
# Device Configuration
# ============================================================================
def get_device():
"""Get the best available device (MPS for M1 Mac, else CPU)."""
import torch
Expand Down
10 changes: 5 additions & 5 deletions temporal_calc/max_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def calculate_transformer_metrics(preds, actuals, tolerance=1.0):
print(f"Using device: {DEVICE}")


# ============= DATA AUGMENTATION =============
# DATA AUGMENTATION

class AugmentedDataset(Dataset):
"""Dataset with noise injection and temporal jittering."""
Expand Down Expand Up @@ -140,7 +140,7 @@ def eval(self):
self._training = False


# ============= ENHANCED TRANSFORMER =============
# ENHANCED TRANSFORMER

class MaxPerformanceTransformer(nn.Module):
"""
Expand Down Expand Up @@ -223,7 +223,7 @@ def forward(self, x):
return self.output_head(cls_output)


# ============= BIDIRECTIONAL LSTM =============
# BIDIRECTIONAL LSTM

class BiLSTMModel(nn.Module):
"""
Expand Down Expand Up @@ -273,7 +273,7 @@ def forward(self, x):
return self.output_head(context)


# ============= LEARNING RATE SCHEDULER =============
# LEARNING RATE SCHEDULER

class CosineWarmupScheduler:
"""Cosine annealing with linear warmup."""
Expand Down Expand Up @@ -304,7 +304,7 @@ def get_lr(self):
return self.optimizer.param_groups[0]['lr']


# ============= TRAINING =============
# TRAINING

def train_model(model, train_loader, val_loader, target_scaler,
model_name: str, epochs: int = 200, lr: float = 3e-4,
Expand Down
12 changes: 6 additions & 6 deletions temporal_calc/optimized_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
print(f"Using device: {DEVICE}")


# ============= 1. ENHANCED FEATURE ENGINEERING =============
# 1. ENHANCED FEATURE ENGINEERING

def add_enhanced_temporal_features(df: pd.DataFrame, target_col: str = 'accessibility_score') -> pd.DataFrame:
"""
Expand Down Expand Up @@ -114,7 +114,7 @@ def add_enhanced_temporal_features(df: pd.DataFrame, target_col: str = 'accessib
return df


# ============= 2. STANDARDSCALER NORMALIZATION =============
# 2. STANDARDSCALER NORMALIZATION

def prepare_normalized_data(df: pd.DataFrame, target_col: str = 'accessibility_score'):
"""
Expand Down Expand Up @@ -144,7 +144,7 @@ def prepare_normalized_data(df: pd.DataFrame, target_col: str = 'accessibility_s
return X_scaled, y_scaled.flatten(), feature_scaler, target_scaler, feature_cols


# ============= 3. INCREASED SEQUENCE LENGTH DATASET =============
# 3. INCREASED SEQUENCE LENGTH DATASET

class EnhancedTimeSeriesDataset(Dataset):
"""
Expand Down Expand Up @@ -194,7 +194,7 @@ def __getitem__(self, idx):
)


# ============= 4. TRANSFORMER WITH REGULARIZATION =============
# 4. TRANSFORMER WITH REGULARIZATION

class RegularizedTransformer(nn.Module):
"""
Expand Down Expand Up @@ -257,7 +257,7 @@ def forward(self, x):
return self.output_head(x)


# ============= 5. XGBOOST BASELINE =============
# 5. XGBOOST BASELINE

def train_xgboost_baseline(X_train, y_train, X_val, y_val,
target_scaler, pred_len: int = 3):
Expand Down Expand Up @@ -321,7 +321,7 @@ def train_xgboost_baseline(X_train, y_train, X_val, y_val,
return models, {'mae': mae, 'rmse': rmse, 'r2': r2}


# ============= 6. TRAINING WITH REGULARIZATION =============
# 6. TRAINING WITH REGULARIZATION

def train_transformer(model, train_loader, val_loader,
target_scaler,
Expand Down
Loading
Loading