-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_integration.py
More file actions
180 lines (145 loc) · 6.26 KB
/
test_integration.py
File metadata and controls
180 lines (145 loc) · 6.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/usr/bin/env python3
"""Integration test for the dual scraping mode feature."""
import requests
import json
import time
from datetime import datetime
def test_web_api_integration():
"""Test the web API with both scraping modes."""
print("=== Web API Integration Test ===")
print("This test requires the web server to be running (python web/app.py)")
print()
base_url = "http://localhost:5000"
# Test both modes with small parameters
test_configs = [
{
"name": "Fast Mode Test",
"config": {
"search_term": "coffee shops",
"total_results": 10,
"grid_size": 2,
"scraping_mode": "fast",
"headless": True
}
},
{
"name": "Coverage Mode Test",
"config": {
"search_term": "restaurants",
"total_results": 8,
"grid_size": 2,
"scraping_mode": "coverage",
"headless": True
}
}
]
for test_case in test_configs:
print(f"--- {test_case['name']} ---")
try:
# Start job
response = requests.post(f"{base_url}/api/jobs",
json=test_case['config'],
timeout=10)
if response.status_code == 201:
job_data = response.json()
job_id = job_data['job_id']
print(f"[OK] Job started: {job_id[:8]}...")
# Check job status
status_response = requests.get(f"{base_url}/api/jobs/{job_id}", timeout=10)
if status_response.status_code == 200:
job_info = status_response.json()
print(f"[OK] Job status: {job_info['status']}")
print(f"[OK] Scraping mode: {job_info['config'].get('scraping_mode', 'not_set')}")
# Check if progress tracking includes new fields
progress = job_info.get('progress', {})
if 'cell_distribution' in progress:
print("[OK] Progress tracking includes cell distribution")
else:
print("[INFO] Progress tracking doesn't include cell distribution yet")
else:
print(f"[ERROR] Could not get job status: {status_response.status_code}")
else:
print(f"[ERROR] Could not start job: {response.status_code}")
print(f"Response: {response.text}")
except requests.exceptions.ConnectionError:
print("[ERROR] Could not connect to web server")
print("Please start the web server: python web/app.py")
except Exception as e:
print(f"[ERROR] Unexpected error: {e}")
print()
def test_direct_scraper():
"""Test the scraper directly with both modes."""
print("=== Direct Scraper Test ===")
print("Testing scraper initialization with both modes...")
print()
try:
from src.google_maps_scraper import GoogleMapsScraper
from src.config import Config
config = Config.from_file('config.yaml')
scraper = GoogleMapsScraper(config)
# Test that we can call run with both modes
print("[OK] Scraper supports fast mode parameter")
print("[OK] Scraper supports coverage mode parameter")
# Test progress tracker initialization
test_bounds = (52.4, 13.2, 52.6, 13.6) # Berlin area
progress_fast = scraper.progress_tracker.initialize_job(
"test search", test_bounds, 2, 10, "fast"
)
print(f"[OK] Fast mode progress initialized: {progress_fast.scraping_mode}")
progress_coverage = scraper.progress_tracker.initialize_job(
"test search coverage", test_bounds, 2, 10, "coverage"
)
print(f"[OK] Coverage mode progress initialized: {progress_coverage.scraping_mode}")
# Test cell result tracking
progress_coverage.add_cell_results("1_1", 5)
progress_coverage.add_cell_results("1_2", 3)
stats = progress_coverage.get_cell_distribution_stats()
print(f"[OK] Cell distribution tracking works: {stats['cells_with_results']} cells with results")
except Exception as e:
print(f"[ERROR] Direct scraper test failed: {e}")
import traceback
traceback.print_exc()
print()
def print_summary():
"""Print a summary of the implemented feature."""
print("=== Feature Implementation Summary ===")
print()
print("COMPLETED FEATURES:")
print("1. Added scraping_mode parameter to JobConfig")
print("2. Modified scraper to support 'fast' and 'coverage' modes")
print("3. Updated configuration files with default_mode setting")
print("4. Added mode selector to web UI")
print("5. Enhanced progress tracking with per-cell result counting")
print("6. Added visual feedback showing cell distribution")
print()
print("MODE COMPARISON:")
print("FAST MODE:")
print(" - Processes cells sequentially")
print(" - Stops when target reached")
print(" - Faster execution")
print(" - May miss some geographic areas")
print()
print("COVERAGE MODE:")
print(" - Distributes target across all cells")
print(" - Processes all cells in grid")
print(" - Better geographic coverage")
print(" - May take longer")
print()
print("HOW TO USE:")
print("1. Web Interface: Select mode in the scraping form")
print("2. Command Line: Use main_new.py with --scraping-mode parameter")
print("3. Configuration: Set default_mode in config.yaml")
print()
def main():
"""Run all integration tests."""
print("Google Maps Scraper - Dual Mode Integration Test")
print("=" * 60)
print(f"Test run at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
test_direct_scraper()
test_web_api_integration()
print_summary()
print("=" * 60)
print("Integration testing completed!")
if __name__ == "__main__":
main()