-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdb_cleanup.py
More file actions
177 lines (142 loc) · 5.22 KB
/
db_cleanup.py
File metadata and controls
177 lines (142 loc) · 5.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python3
"""
Simple database cleanup script for benchHUB
Handles score reset and database maintenance
"""
import sqlite3
import os
import sys
def get_database_path():
"""Get the database file path"""
# Check for production database URL
db_url = os.environ.get("DATABASE_URL")
if db_url and not db_url.startswith("sqlite"):
print("❌ This script only works with local SQLite databases")
print("For production PostgreSQL, use the online admin tools")
sys.exit(1)
# Use local SQLite database
return "leaderboard.db"
def show_stats():
"""Show current database statistics"""
db_path = get_database_path()
if not os.path.exists(db_path):
print(f"❌ Database not found: {db_path}")
return
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("📈 Database Statistics:")
print("-" * 40)
# Total count
cursor.execute("SELECT COUNT(*) FROM results")
total = cursor.fetchone()[0]
print(f"Total results: {total}")
# By category
cursor.execute("SELECT config_name, COUNT(*) FROM results GROUP BY config_name")
categories = cursor.fetchall()
print("\nBy configuration:")
for name, count in categories:
print(f" {name or 'unknown'}: {count} results")
# Score ranges
print("\nScore ranges:")
for name, _ in categories:
cursor.execute("""
SELECT MIN(reference_index), MAX(reference_index)
FROM results
WHERE config_name = ? AND reference_index > 0
""", (name,))
result = cursor.fetchone()
if result[0] is not None:
print(f" {name or 'unknown'}: {result[0]:.1f} - {result[1]:.1f}")
conn.close()
def clear_all():
"""Clear all benchmark results"""
db_path = get_database_path()
if not os.path.exists(db_path):
print(f"❌ Database not found: {db_path}")
return
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM results")
count = cursor.fetchone()[0]
print(f"🗑️ Found {count} results to delete")
if count > 0:
confirm = input(f"Are you sure you want to delete all {count} results? (yes/no): ")
if confirm.lower() == 'yes':
cursor.execute("DELETE FROM results")
conn.commit()
print(f"✅ Deleted {count} results")
else:
print("❌ Operation cancelled")
else:
print("ℹ️ Database is already empty")
conn.close()
def limit_per_category(limit=2000):
"""Keep only top N scores per category"""
db_path = get_database_path()
if not os.path.exists(db_path):
print(f"❌ Database not found: {db_path}")
return
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print(f"📊 Limiting each category to top {limit} scores...")
# Get categories
cursor.execute("SELECT DISTINCT config_name FROM results")
categories = [row[0] for row in cursor.fetchall()]
total_deleted = 0
for category in categories:
print(f"Processing category: {category or 'unknown'}")
# Count current results
cursor.execute("SELECT COUNT(*) FROM results WHERE config_name = ?", (category,))
current_count = cursor.fetchone()[0]
if current_count <= limit:
print(f" ✅ {category or 'unknown'}: {current_count} results (within limit)")
continue
# Delete excess results (keep top N by reference_index)
cursor.execute("""
DELETE FROM results
WHERE config_name = ?
AND id NOT IN (
SELECT id FROM results
WHERE config_name = ?
ORDER BY reference_index DESC
LIMIT ?
)
""", (category, category, limit))
deleted = cursor.rowcount
conn.commit()
total_deleted += deleted
print(f" 🗑️ {category or 'unknown'}: Deleted {deleted} results, kept top {limit}")
print(f"✅ Total deleted: {total_deleted} results")
conn.close()
def reset_schema():
"""Reset database schema (nuclear option)"""
db_path = get_database_path()
print("💥 NUCLEAR OPTION: Resetting entire database...")
confirm = input("This will DELETE ALL DATA. Type 'RESET' to confirm: ")
if confirm == 'RESET':
if os.path.exists(db_path):
os.remove(db_path)
print(f"✅ Database {db_path} deleted")
print("Run the API once to recreate the schema")
else:
print("❌ Operation cancelled")
def main():
if len(sys.argv) < 2:
print("Usage: python db_cleanup.py <action>")
print("Actions: stats, clear, limit [N], reset")
sys.exit(1)
action = sys.argv[1]
if action == 'stats':
show_stats()
elif action == 'clear':
clear_all()
elif action == 'limit':
limit = int(sys.argv[2]) if len(sys.argv) > 2 else 2000
limit_per_category(limit)
elif action == 'reset':
reset_schema()
else:
print(f"❌ Unknown action: {action}")
sys.exit(1)
if __name__ == "__main__":
main()