Agent/github_opportunity_feed.py at main · manfromnowhere143/Agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
#!/usr/bin/env python3
"""
GitHub Opportunity Feed - Integrates with MegaAgent System
Scans GitHub for real revenue opportunities using the existing infrastructure

Targets:
- $50-500 per improved README
- $500-5000/day potential
- $15,000-150,000/month
"""

import asyncio
import aiohttp
import os
import sys
import json
import re
from datetime import datetime, timedelta
from typing import List, Dict, Optional
from dotenv import load_dotenv

# Add project root to path
sys.path.insert(0, ".")
load_dotenv()

# Import existing MegaAgent components
from src.config.settings import Settings


class GitHubOpportunityFeed:
    """Real GitHub opportunity scanner integrated with MegaAgent system"""

    def __init__(self, settings: Settings):
        self.settings = settings
        self.github_token = os.getenv("GITHUB_TOKEN")
        self.base_url = "https://api.github.com"
        self.session = None

        # Revenue opportunity criteria
        self.min_stars = 100
        self.min_revenue_potential = 50
        self.target_languages = [
            "Python",
            "JavaScript",
            "TypeScript",
            "Go",
            "Rust",
            "Java",
        ]

    async def __aenter__(self):
        headers = {
            "Authorization": f"token {self.github_token}",
            "Accept": "application/vnd.github.v3+json",
            "User-Agent": "MegaAgent-Revenue-Scanner/1.0",
        }
        self.session = aiohttp.ClientSession(headers=headers)
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        if self.session:
            await self.session.close()

    async def scan_revenue_opportunities(self) -> List[Dict]:
        """Scan GitHub for real money-making opportunities"""
        print("🔍 SCANNING GITHUB FOR REVENUE OPPORTUNITIES...")
        print("=" * 50)

        opportunities = []

        for language in self.target_languages:
            print(f"   �� Scanning {language} repositories...")

            # Search strategy: High-value repos with poor documentation
            search_queries = [
                f"language:{language} stars:{self.min_stars}..1000 pushed:>2024-01-01",
                f"language:{language} stars:1000..5000 pushed:>2023-06-01",
            ]

            for query in search_queries:
                batch_opportunities = await self._search_repositories(query, limit=5)
                opportunities.extend(batch_opportunities)

                # Rate limiting - be respectful to GitHub API
                await asyncio.sleep(1)

        # Remove duplicates and sort by revenue potential
        unique_opportunities = {
            opp["full_name"]: opp for opp in opportunities if opp
        }.values()
        sorted_opportunities = sorted(
            unique_opportunities, key=lambda x: x["revenue_potential"], reverse=True
        )

        return sorted_opportunities[:15]  # Top 15 opportunities

    async def _search_repositories(self, query: str, limit: int = 5) -> List[Dict]:
        """Search GitHub repositories with specific query"""
        try:
            async with self.session.get(
                f"{self.base_url}/search/repositories",
                params={
                    "q": query,
                    "sort": "stars",
                    "order": "desc",
                    "per_page": limit,
                },
            ) as response:
                if response.status == 200:
                    data = await response.json()
                    opportunities = []

                    for repo in data.get("items", []):
                        opportunity = await self._analyze_repository(repo)
                        if opportunity:
                            opportunities.append(opportunity)

                    return opportunities
                else:
                    print(f"   ⚠️ GitHub API error: {response.status}")
                    return []
        except Exception as e:
            print(f"   ❌ Search error: {e}")
            return []

    async def _analyze_repository(self, repo: Dict) -> Optional[Dict]:
        """Analyze repository for revenue opportunity"""
        try:
            # Get README content
            readme_content = await self._get_readme_content(repo["full_name"])
            if not readme_content:
                return None

            # Calculate revenue potential
            stars = repo.get("stargazers_count", 0)
            forks = repo.get("forks_count", 0)
            language = repo.get("language", "")

            # Documentation quality score (1-10, lower = more opportunity)
            doc_score = self._score_documentation(readme_content)

            # Revenue calculation
            revenue_potential = self._calculate_revenue_potential(
                stars, doc_score, language
            )

            if revenue_potential >= self.min_revenue_potential:
                return {
                    "repo_name": repo["name"],
                    "full_name": repo["full_name"],
                    "description": repo.get("description", ""),
                    "stars": stars,
                    "forks": forks,
                    "language": language,
                    "html_url": repo["html_url"],
                    "doc_score": doc_score,
                    "revenue_potential": revenue_potential,
                    "readme_length": len(readme_content),
                    "opportunity_type": self._classify_opportunity(readme_content),
                    "contact_methods": self._extract_contact_info(readme_content),
                    "improvement_areas": self._identify_improvements(readme_content),
                    "last_updated": repo.get("updated_at", ""),
                    "priority_score": self._calculate_priority(
                        stars, doc_score, language
                    ),
                }
        except Exception as e:
            print(f"   ⚠️ Analysis error for {repo.get('name', 'unknown')}: {e}")

        return None

    async def _get_readme_content(self, full_name: str) -> Optional[str]:
        """Get README content from repository"""
        try:
            async with self.session.get(
                f"{self.base_url}/repos/{full_name}/readme"
            ) as response:
                if response.status == 200:
                    data = await response.json()
                    content = data.get("content", "")

                    # Decode base64 content
                    import base64

                    return base64.b64decode(content).decode("utf-8")
        except Exception:
            pass

        return None

    def _score_documentation(self, readme_content: str) -> int:
        """Score documentation quality (1-10, 10=perfect, 1=terrible)"""
        score = 5  # Start at average

        # Essential sections check
        essential_patterns = [
            (r"installation|install|setup", "Installation"),
            (r"usage|example|getting started", "Usage examples"),
            (r"api|reference|documentation", "API documentation"),
            (r"contributing|contribute", "Contributing guidelines"),
            (r"license", "License information"),
            (r"description|about|overview", "Project description"),
        ]

        for pattern, name in essential_patterns:
            if re.search(pattern, readme_content.lower()):
                score += 0.5

        # Quality indicators
        if len(readme_content) > 1000:
            score += 1
        if readme_content.count("```") >= 2:  # Code examples
            score += 1
        if re.search(r"!\[.*\]\(.*\)", readme_content):  # Images
            score += 0.5

        # Negative indicators (more opportunity!)
        if len(readme_content) < 300:
            score -= 2
        if "TODO" in readme_content or "FIXME" in readme_content:
            score -= 1
        if readme_content.count("\n") < 15:
            score -= 1

        return max(1, min(10, int(score)))

    def _calculate_revenue_potential(
        self, stars: int, doc_score: int, language: str
    ) -> int:
        """Calculate revenue potential in USD"""
        # Base value from stars (popularity)
        base_value = min(stars / 200 * 50, 250)  # $50 per 200 stars, max $250

        # Language multipliers (market demand)
        lang_multipliers = {
            "Python": 1.5,
            "JavaScript": 1.3,
            "TypeScript": 1.4,
            "Go": 1.2,
            "Rust": 1.6,
            "Java": 1.1,
        }
        multiplier = lang_multipliers.get(language, 1.0)

        # Documentation improvement potential
        improvement_value = (10 - doc_score) / 10 * 200  # Up to $200 for worst docs

        total = int((base_value + improvement_value) * multiplier)
        return max(50, min(500, total))  # Clamp between $50-500

    def _classify_opportunity(self, readme_content: str) -> str:
        """Classify the type of opportunity"""
        content_lower = readme_content.lower()

        if len(readme_content) < 200:
            return "complete_rewrite"
        elif "todo" in content_lower or "fixme" in content_lower:
            return "completion_required"
        elif not re.search(r"installation|install", content_lower):
            return "missing_instructions"
        elif not re.search(r"```", readme_content):
            return "needs_examples"
        else:
            return "enhancement"

    def _extract_contact_info(self, readme_content: str) -> Dict:
        """Extract contact information for negotiations"""
        contacts = {}

        # Email pattern
        email_match = re.search(
            r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", readme_content
        )
        if email_match:
            contacts["email"] = email_match.group()

        # Twitter handle
        twitter_match = re.search(r"@([A-Za-z0-9_]+)", readme_content)
        if twitter_match:
            contacts["twitter"] = f"@{twitter_match.group(1)}"

        return contacts

    def _identify_improvements(self, readme_content: str) -> List[str]:
        """Identify specific improvement opportunities"""
        improvements = []
        content_lower = readme_content.lower()

        if len(readme_content) < 500:
            improvements.append("Expand with detailed project description")
        if not re.search(r"installation|install", content_lower):
            improvements.append("Add clear installation instructions")
        if not re.search(r"usage|example", content_lower):
            improvements.append("Add usage examples and tutorials")
        if not re.search(r"```", readme_content):
            improvements.append("Add properly formatted code examples")
        if not re.search(r"!\[.*\]\(.*\)", readme_content):
            improvements.append("Add screenshots or diagrams")
        if "todo" in content_lower:
            improvements.append("Complete TODO sections")
        if not re.search(r"contributing|contribute", content_lower):
            improvements.append("Add contribution guidelines")

        return improvements

    def _calculate_priority(self, stars: int, doc_score: int, language: str) -> float:
        """Calculate priority score for targeting"""
        # Higher stars = higher priority
        star_factor = min(stars / 1000, 1.0)

        # Lower doc score = higher priority (more opportunity)
        doc_factor = (10 - doc_score) / 10

        # Language demand factor
        lang_factors = {
            "Python": 0.9,
            "JavaScript": 0.8,
            "TypeScript": 0.85,
            "Rust": 0.95,
        }
        lang_factor = lang_factors.get(language, 0.7)

        return (star_factor * 0.4 + doc_factor * 0.4 + lang_factor * 0.2) * 100


async def main():
    """Main function to run GitHub opportunity feed"""
    print("🚀 MEGAAGENT GITHUB OPPORTUNITY FEED")
    print("=" * 50)
    print("💰 Targeting: $15,000-150,000/month revenue")
    print("🎯 Strategy: Find → Improve → Get Paid")
    print("=" * 50)

    # Load settings
    settings = Settings()

    # Initialize GitHub scanner
    async with GitHubOpportunityFeed(settings) as scanner:
        # Scan for opportunities
        opportunities = await scanner.scan_revenue_opportunities()

    if not opportunities:
        print("❌ No opportunities found. GitHub API might be rate limited.")
        return

    print(f"\n🎯 FOUND {len(opportunities)} REVENUE OPPORTUNITIES!")
    print("=" * 60)

    total_potential = 0

    for i, opp in enumerate(opportunities, 1):
        total_potential += opp["revenue_potential"]

        print(f"\n💰 OPPORTUNITY #{i} - ${opp['revenue_potential']}")
        print(f"   📦 {opp['repo_name']} ({opp['language']})")
        print(f"   ⭐ {opp['stars']:,} stars | 🍴 {opp['forks']:,} forks")
        print(
            f"   📊 Doc Score: {opp['doc_score']}/10 | 🎯 Priority: {opp['priority_score']:.1f}"
        )
        print(f"   📝 Type: {opp['opportunity_type']}")
        print(f"   🔗 {opp['html_url']}")

        if opp["contact_methods"]:
            print(f"   📧 Contact: {', '.join(opp['contact_methods'].keys())}")

        print("   🎯 Improvements needed:")
        for improvement in opp["improvement_areas"][:3]:  # Show top 3
            print(f"      • {improvement}")

    # Revenue projections
    avg_revenue = total_potential / len(opportunities) if opportunities else 0
    daily_potential = avg_revenue * 10  # 10 improvements per day
    monthly_potential = daily_potential * 30

    print("\n📊 REVENUE ANALYSIS")
    print("=" * 30)
    print(f"💵 Total Potential Found: ${total_potential:,}")
    print(f"📈 Average per Opportunity: ${avg_revenue:.0f}")
    print(f"🎯 Daily Potential (10/day): ${daily_potential:,.0f}")
    print(f"📅 Monthly Potential: ${monthly_potential:,.0f}")

    if monthly_potential >= 15000:
        print(f"\n🎉 TARGET ACHIEVED! Monthly potential: ${monthly_potential:,.0f}")
        print("🚀 Ready to start autonomous revenue generation!")
    else:
        print(f"\n💡 Scale needed for $15K target. Current: ${monthly_potential:,.0f}")

    print("\n🔥 GITHUB OPPORTUNITY FEED OPERATIONAL!")
    print("🎯 Next: Integrate with MegaAgent for autonomous execution")


if __name__ == "__main__":
    asyncio.run(main())