-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenhance_existing_data.py
More file actions
152 lines (119 loc) · 5.39 KB
/
enhance_existing_data.py
File metadata and controls
152 lines (119 loc) · 5.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python3
"""
Demo script to enhance existing repository data with AI-generated tags and quality assessments
"""
import json
import random
def generate_demo_tags(repo):
"""Generate demo AI tags based on repository information"""
language = repo.get('language', '').lower() if repo.get('language') else ''
name = repo.get('name', '').lower()
description = repo.get('description', '') or ''
# Common NHS/healthcare tags
healthcare_tags = ['healthcare', 'nhs', 'patient-care', 'clinical', 'medical', 'digital-health']
# Technology tags based on language
tech_tags = {
'python': ['python', 'data-science', 'automation', 'api'],
'javascript': ['javascript', 'web-app', 'frontend', 'interactive'],
'java': ['java', 'enterprise', 'backend', 'scalable'],
'c#': ['csharp', 'dotnet', 'enterprise', 'windows'],
'typescript': ['typescript', 'web-app', 'frontend', 'modern'],
'r': ['r', 'statistics', 'data-analysis', 'research'],
'sql': ['sql', 'database', 'analytics', 'reporting']
}
# Purpose tags based on name patterns
purpose_tags = []
if any(word in name for word in ['api', 'service', 'backend']):
purpose_tags.extend(['api', 'service', 'integration'])
if any(word in name for word in ['web', 'app', 'frontend', 'ui']):
purpose_tags.extend(['web-application', 'user-interface'])
if any(word in name for word in ['data', 'analytics', 'report']):
purpose_tags.extend(['data-analytics', 'reporting', 'insights'])
if any(word in name for word in ['mobile', 'ios', 'android']):
purpose_tags.extend(['mobile', 'cross-platform'])
if any(word in name for word in ['test', 'quality', 'automation']):
purpose_tags.extend(['testing', 'quality-assurance', 'automation'])
# Combine tags
tags = []
tags.extend(random.sample(healthcare_tags, min(2, len(healthcare_tags))))
if language and language in tech_tags:
tags.extend(tech_tags[language])
tags.extend(purpose_tags[:3])
# Add some random quality tags
quality_tags = ['innovative', 'scalable', 'secure', 'user-friendly', 'efficient', 'robust']
tags.extend(random.sample(quality_tags, min(2, len(quality_tags))))
return list(set(tags))[:6] # Limit to 6 unique tags
def calculate_quality_score(repo):
"""Calculate a demo quality score based on repository metrics"""
score = 50 # Base score
# Adjust based on stars
stars = repo.get('stargazers_count', 0)
score += min(stars * 5, 20)
# Adjust based on recent activity
if repo.get('pushed_at'):
score += 10
# Adjust based on description
if repo.get('description') and len(repo.get('description', '')) > 20:
score += 15
# Adjust based on language
if repo.get('language'):
score += 10
# Adjust based on size (activity indicator)
size = repo.get('size', 0)
if size > 100:
score += 10
elif size > 1000:
score += 15
# Add some randomness for demo
score += random.randint(-5, 10)
return min(max(score, 30), 95) # Clamp between 30-95
def assess_featured_status(repo, quality_score):
"""Determine if repository should be featured"""
# Feature repositories with high quality scores and recent activity
if quality_score >= 75:
return True
elif quality_score >= 65 and repo.get('stargazers_count', 0) > 0:
return True
elif repo.get('language') and quality_score >= 60:
return True
else:
return False
def enhance_repositories():
"""Enhance repository data with AI tags and assessments"""
print("Loading repository data...")
with open('data/repositories.json', 'r') as f:
repositories = json.load(f)
print(f"Enhancing {len(repositories)} repositories...")
featured_count = 0
for repo in repositories:
# Generate AI tags
generated_tags = generate_demo_tags(repo)
repo['generated_tags'] = generated_tags
# Combine with existing topics
all_tags = list(set(repo.get('topics', []) + generated_tags))
repo['all_tags'] = all_tags
# Calculate quality score
quality_score = calculate_quality_score(repo)
repo['quality_score'] = quality_score
# Assess featured status
featured = assess_featured_status(repo, quality_score)
repo['featured'] = featured
if featured:
featured_count += 1
print(f"Enhanced {repo['name']}: {len(generated_tags)} tags, quality: {quality_score}, featured: {featured}")
print(f"\nEnhancement complete!")
print(f"- Total repositories: {len(repositories)}")
print(f"- Featured repositories: {featured_count}")
print(f"- Coverage: {featured_count/len(repositories)*100:.1f}%")
# Save enhanced data
print("\nSaving enhanced data...")
with open('data/repositories.json', 'w') as f:
json.dump(repositories, f, indent=2)
print("✅ Repository data enhanced and saved!")
# Show some examples
print("\nFeatured repositories preview:")
featured_repos = [r for r in repositories if r.get('featured')][:5]
for repo in featured_repos:
print(f"- {repo['name']}: {repo['quality_score']} pts, tags: {repo['generated_tags'][:3]}")
if __name__ == "__main__":
enhance_repositories()