diff --git a/.tmp.driveupload/164315 b/.tmp.driveupload/164315 deleted file mode 100644 index e0e3238..0000000 --- a/.tmp.driveupload/164315 +++ /dev/null @@ -1,18 +0,0 @@ -// vite.config.ts -import { defineConfig } from "file:///Users/mike/SourceProof/FounderFinder/node_modules/vite/dist/node/index.js"; -import react from "file:///Users/mike/SourceProof/FounderFinder/node_modules/@vitejs/plugin-react/dist/index.js"; -import tailwindcss from "file:///Users/mike/SourceProof/FounderFinder/node_modules/@tailwindcss/vite/dist/index.mjs"; -import path from "path"; -var __vite_injected_original_dirname = "/Users/mike/SourceProof/FounderFinder"; -var vite_config_default = defineConfig({ - plugins: [react(), tailwindcss()], - resolve: { - alias: { - "@": path.resolve(__vite_injected_original_dirname, "./src") - } - } -}); -export { - vite_config_default as default -}; -//# sourceMappingURL=data:application/json;base64,ewogICJ2ZXJzaW9uIjogMywKICAic291cmNlcyI6IFsidml0ZS5jb25maWcudHMiXSwKICAic291cmNlUm9vdCI6ICJmaWxlOi8vL1VzZXJzL21pa2UvU291cmNlUHJvb2YvRm91bmRlckZpbmRlci8iLAogICJzb3VyY2VzQ29udGVudCI6IFsiY29uc3QgX192aXRlX2luamVjdGVkX29yaWdpbmFsX2Rpcm5hbWUgPSBcIi9Vc2Vycy9taWtlL1NvdXJjZVByb29mL0ZvdW5kZXJGaW5kZXJcIjtjb25zdCBfX3ZpdGVfaW5qZWN0ZWRfb3JpZ2luYWxfZmlsZW5hbWUgPSBcIi9Vc2Vycy9taWtlL1NvdXJjZVByb29mL0ZvdW5kZXJGaW5kZXIvdml0ZS5jb25maWcudHNcIjtjb25zdCBfX3ZpdGVfaW5qZWN0ZWRfb3JpZ2luYWxfaW1wb3J0X21ldGFfdXJsID0gXCJmaWxlOi8vL1VzZXJzL21pa2UvU291cmNlUHJvb2YvRm91bmRlckZpbmRlci92aXRlLmNvbmZpZy50c1wiO2ltcG9ydCB7IGRlZmluZUNvbmZpZyB9IGZyb20gXCJ2aXRlXCI7XG5pbXBvcnQgcmVhY3QgZnJvbSBcIkB2aXRlanMvcGx1Z2luLXJlYWN0XCI7XG5pbXBvcnQgdGFpbHdpbmRjc3MgZnJvbSBcIkB0YWlsd2luZGNzcy92aXRlXCI7XG5pbXBvcnQgcGF0aCBmcm9tIFwicGF0aFwiO1xuXG5leHBvcnQgZGVmYXVsdCBkZWZpbmVDb25maWcoe1xuICBwbHVnaW5zOiBbcmVhY3QoKSwgdGFpbHdpbmRjc3MoKV0sXG4gIHJlc29sdmU6IHtcbiAgICBhbGlhczoge1xuICAgICAgXCJAXCI6IHBhdGgucmVzb2x2ZShfX2Rpcm5hbWUsIFwiLi9zcmNcIiksXG4gICAgfSxcbiAgfSxcbn0pO1xuIl0sCiAgIm1hcHBpbmdzIjogIjtBQUFpUyxTQUFTLG9CQUFvQjtBQUM5VCxPQUFPLFdBQVc7QUFDbEIsT0FBTyxpQkFBaUI7QUFDeEIsT0FBTyxVQUFVO0FBSGpCLElBQU0sbUNBQW1DO0FBS3pDLElBQU8sc0JBQVEsYUFBYTtBQUFBLEVBQzFCLFNBQVMsQ0FBQyxNQUFNLEdBQUcsWUFBWSxDQUFDO0FBQUEsRUFDaEMsU0FBUztBQUFBLElBQ1AsT0FBTztBQUFBLE1BQ0wsS0FBSyxLQUFLLFFBQVEsa0NBQVcsT0FBTztBQUFBLElBQ3RDO0FBQUEsRUFDRjtBQUNGLENBQUM7IiwKICAibmFtZXMiOiBbXQp9Cg== diff --git a/QA-Report-2026-02-28.html b/QA-Report-2026-02-28.html deleted file mode 100644 index 4eb61cd..0000000 --- a/QA-Report-2026-02-28.html +++ /dev/null @@ -1,685 +0,0 @@ - - - - - - SourceKit QA Report | Feb 28, 2026 - - - - - - - - - -
- -
- - -
- - -
-
-

QA Scorecard

- -
-
-
87.5%
-
Pass Rate
-
-
-
8
-
Nav Sections
-
-
-
0
-
Console Errors
-
-
-
1
-
Bugs Found
-
-
- -
-

All 8 navigation sections render correctly with dark-mode styling. Authentication working (michael.f.rubino@gmail.com). Credit system displays 10/10 searches. Form validation enforced on empty submissions. No app-specific console errors detected.

-
- -
-

One functional bug found: History page items do not load cached results. Clicking a history entry re-fires the search API call (confirmed via network trace showing OPTIONS preflight to github-search edge function) instead of loading previously stored candidates from the database.

-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
SectionStatusNotes
New SearchPASSBoth tabs (Role + Company, Job Description), form validation, example chips, credit counter
ResultsPASSSearch bar, workflow stepper, example chips render correctly
HistoryPARTIALRenders correctly with date grouping and result counts. Clicking items fails to load cached results (BUG-001)
PipelinePASSKanban board, 5 stages, candidate cards with scores, sort/export buttons
WatchlistPASSCandidate entries, list management, filter bar, + New List button
Bulk ActionsPASSTable view, sortable columns, score slider, AI action panel, export
WebsetsPASSCreate form with query/criteria/enrichment fields, empty state renders
SettingsPASSOutreach defaults, API keys masked, webhook URLs, integrations
-
- -
-

Environment

- - - - - - - - - - - - - - - -
ParameterValue
URLgetsourcekit.vercel.app
Auth Usermichael.f.rubino@gmail.com
Branchmain (post-PR #10 merge)
DateFebruary 28, 2026
BrowserChrome (Cowork VM)
Console Errors22 exceptions, all from chrome-extension (aeblfdkhhhdcdjpifhhbdiojplfjncoa). Zero app errors.
-
-
- - -
-
-

Detailed Section Results

- -
-
-

PASS New Search

-

Two input tabs: "Role + Company" (Job Title + Company Name fields) and "Job Description" (URL input + text area). Example chips render: Staff ML Engineer @ Anthropic, Senior Robotics @ Figure, etc. Credit counter shows 10/10 searches remaining. "Build Sourcing Strategy" button present. Form validation tested: submitting with empty fields shows red borders + error messages ("Job title is required", "Company name is required"). Form correctly blocks submission.

-
-
-

PASS Results

-

Search bar with icon. Workflow stepper: Search > Review & Score > Enrich & Pipeline. Five example search chips: Rust systems engineers, React accessibility experts, ML infrastructure engineers, Kubernetes contributors, Security researchers. Clean layout, no rendering issues.

-
-
-

PARTIAL History

-

Date grouping (Today / Yesterday). Three history entries with query text, relative timestamps, and result counts (15, 60, 15). UI renders correctly. BUG: Clicking "Rust systems engineers" (15 results) navigates to Results page but shows "No engineers found." Clicking "Seeking a technical founder..." (60 results) same behavior. Network trace confirms an OPTIONS preflight to github-search edge function, meaning history click re-fires the API instead of loading cached results. See BUG-001.

-
-
-

PASS Pipeline

-

Kanban board with 5 stages: Contacted, Not Interested, Recruiter Screen, Rejected, Moved to ATS. Filter tabs with counts. Thomas Wolf candidate card visible with score 98 and avatar. Sort by Score dropdown. Export button present. Stage columns render with proper spacing.

-
-
-

PASS Watchlist

-

Thomas Wolf entry with @thomwolf handle, "Added Feb 27." Default list tab active. + New List button. Filter bar present. Layout clean, card rendering correct.

-
-
-

PASS Bulk Actions

-

Table view with sortable columns: Name, Score, Stage, Languages. "All Stages" dropdown filter. Score range slider (0-100). AI action panel with buttons: Refine Shortlist, Draft Outreach, Search Insights, Candidate Brief, Compare Selected. Export All button at bottom. Rendering clean.

-
-
-

PASS Websets

-

Create Webset form with search query input, number of results (default 10), expandable Add Criteria section, expandable Add Enrichment section. + Create Webset button. Empty state shown when no websets exist. Form renders correctly.

-
-
-

PASS Settings

-

Outreach section: Default Target Role ("Senior Software Engineer"), Default Target Company ("Acme Inc"), Role Pitch ("Building the next-gen developer platform"). API Keys section: Exa key masked ("exa-..."), Parallel key masked ("parallel-..."). Integrations: Webhook URL and Slack Webhook URL fields. All inputs editable.

-
-
-
- -
-

Validation Tests

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TestActionExpectedActualStatus
Empty form submitClick "Build Sourcing Strategy" with empty fieldsValidation errors shown, form blockedRed borders + error messages displayed, form not submittedPASS
Auth stateCheck user sessionAuthenticated user displayedmichael.f.rubino@gmail.com shownPASS
Credit displayCheck search credit counterCredit count visible10/10 searches displayedPASS
Tab switchingSwitch between New Search tabsContent swaps cleanlyRole+Company and Job Description tabs switch correctlyPASS
History clickClick history entry with 15 resultsLoad cached candidatesShows "No engineers found", re-fires APIFAIL
Console errorsCheck browser consoleNo app-specific errors22 errors from Chrome extension only, zero from appPASS
-
-
- - -
-
-

BUG-001: History Items Do Not Load Cached Results

- -
-

Severity: P2. History page renders correctly with query text, timestamps, and result counts. However, clicking any history entry navigates to the Results page and re-fires the search API call instead of loading previously cached results from the candidates table. The user sees "No engineers found" despite the history entry showing 15 or 60 results.

-
- - - - - - - - - - - - - - - - - -
FieldDetail
Bug IDBUG-001
SeverityP2 (functional regression, data loss from user perspective)
Page/history -> /results
BehaviorClicking a history entry navigates to Results page with query pre-filled but shows "No engineers found"
ExpectedHistory click should load cached candidates from the candidates table using the stored search_id or query_hash
ActualHistory click re-triggers the github-search edge function. Network trace shows OPTIONS preflight to the search endpoint.
ReproducedYes, on two separate history entries (15 results and 60 results)
- -
-

Root cause hypothesis: The history item click handler passes the query string to the Results page and re-executes the search flow instead of passing a search_id/query_hash that would trigger a lookup against cached candidates. The Results page component likely has a single code path: receive query -> call edge function -> render results. It needs a second path: receive search_id -> SELECT from candidates WHERE search_id = ? -> render results.

-
- -
-

Recommended Fix

- - - - - - - - - - - - - - - - - - - - - - - - - -
StepChange
1History item click should pass search_id (or query_hash) as a URL param to the Results page, not just the query string
2Results page should check for search_id param on mount. If present, query candidates table directly instead of calling the edge function
3The github-search edge function should store a search_id on each search run, associating candidates with that search
4History entries should store search_id so they can be linked back to cached results
-
-
-
- - -
-
-

Pull Request Status

- -
-

All PRs resolved. Zero open pull requests on mrNLK/SourceProof. Main branch is current with all merged changes from PR #10.

-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PRTitleStatusResolution
#10Contributors API fixes, search pipeline fixesMERGEDMerged to main. Contains consolidated fixes from all sprint branches.
#8Sprint branchCLOSEDSuperseded by #10
#3feat: full audit remediation - all 20 promptsCLOSEDSuperseded by #10
#2Beta v1 polish: 12 bug fixes from workflow test reportCLOSEDSuperseded by #10
-
- -
-

Merged Changes (PR #10)

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FileChange
supabase/functions/_shared/gate.tsCredit gating logic with user_subscriptions table
supabase/migrations/20260228110000_create_user_subscriptions.sqlIdempotent migration for subscription table
supabase/migrations/20260227100000_increment_searches_rpc.sqlAtomic search count increment RPC
supabase/migrations/20260228100000_v2_qa_fixes.sqlAdded query_hash, linkedin_fetched_at columns
src/components/DeveloperCard.tsxFixed type casts
src/components/SearchProgress.tsxFixed useEffect deps array
supabase/functions/github-search/index.tsFixed O(n^2) dedup with Map lookup
-
-
- -
- - - - - - - diff --git a/beta-v2-guide.html b/beta-v2-guide.html deleted file mode 100644 index 0bfcefb..0000000 --- a/beta-v2-guide.html +++ /dev/null @@ -1,936 +0,0 @@ - - - - - - - - - - - - - -
- - -
- - - - - - - - - - - - - - SourceKit - - -
BETA V2.1 / EARLY ACCESS
-
Find engineers by what they build.
-
AI-powered technical sourcing, grounded in real GitHub data + company intelligence.
-
getsourcekit.vercel.app
-
- - -
- -
What is SourceKit?
-

SourceKit is a technical sourcing tool that finds software engineers based on their actual code contributions on GitHub, not self-reported LinkedIn keywords. You enter a role and company. SourceKit builds a sourcing strategy, searches real repositories, and returns a scored pipeline of candidates in minutes. v2.1 adds Harmonic as a fifth API provider for company intelligence: real-time enrichment, poachability scoring, and similar-company expansion. The system now orchestrates five providers: Claude AI (strategy), Exa (search, enrichment, Websets), Parallel (company intel, JD parsing), GitHub (contributor mining), and Harmonic (company data, poachability, team mapping).

-
-
-
Signal Over Noise
-
Candidates are scored on real GitHub activity: commits, pull requests, and repo contributions. Ground-truth engineering signal that resumes can't replicate.
-
-
-
AI-Built Strategy
-
Claude orchestrates five API providers. Harmonic enriches every poach company with funding, headcount, and traction data. Strategy generation expands targets via similar-company discovery.
-
-
-
Minutes, Not Days
-
Job description to scored candidate pipeline in under two minutes. Company intel cards appear inline on every candidate. No tool switching.
-
-
-
- - -
- -
How is this different?
-
-
-
Traditional Sourcing
-
Search LinkedIn by keywords. Filter by title and company. Hope the resume matches reality. High noise, profile-optimized results, limited engineering signal.
-
-
-
SourceKit Sourcing
-
AI identifies key repos, Exa surfaces candidates, Parallel maps company employees, Harmonic scores company poachability, and Websets build living candidate lists. Evidence-based, not keyword-based.
-
-
-
Best Use Cases
-
Niche and senior technical roles. Passive candidates not actively job-seeking. Engineers at early-stage startups. Anyone with a meaningful open-source presence.
-
-
-
- -
- - -
- - -
- -
Architecture and Technology
-

SourceKit orchestrates five API providers through Claude AI, backed by Supabase for data persistence. Exa powers search, enrichment, synthesis, and persistent collections. Parallel handles company intelligence and JD extraction. GitHub provides contributor mining and commit-level scoring. Harmonic delivers company enrichment, poachability scoring, and similar-company expansion.

- - -
-
Presentation Layer
-
-
React + TypeScript Web App
-
Mobile-first UI / Google OAuth / Settings / Search / Pipeline / Websets / Company Intel Cards
-
-
-
-
-
AI Intelligence Layer
-
-
-
Claude AI (Anthropic)
-
Strategy / Scoring / Outreach / Orchestration
-
-
-
Exa API (5 endpoints)
-
Research / Search / Answer / findSimilar / Websets
-
-
-
Parallel API (3 endpoints)
-
Task (intel) / FindAll (map) / Search (JD parse)
-
-
-
-
-
GitHub REST + GraphQL
-
Contributor mining / Commit data / EEA scoring
-
-
-
- - - - - - - Harmonic API (6 endpoints) -
-
Enrich / Search / Similar / Employees / Poachability / Cache
-
-
-
-
-
-
Data Layer
-
-
-
Supabase (PostgreSQL)
-
Auth / RLS / User Data / Pipeline State / Webset Mappings / Harmonic Cache (7d TTL)
-
-
-
Enrichment APIs
-
LinkedIn matching / Email discovery / Profile enrichment
-
-
-
-
-
-
Infrastructure
-
-
-
Vercel
-
Hosting / Serverless Functions / Edge Network
-
-
-
Stripe
-
Billing / Usage Tracking / Tier Management
-
-
-
-
- - -
- -
Workflow: From Role to Pipeline
-
-
-
You Input
-
Role + company, JD, or job URL
-
-
-
-
AI Strategy
-
Claude builds repos, poach list, skills matrix
-
-
-
-
Harmonic Enrich
-
Poach companies enriched, expanded via similar cos
-
-
-
-
You Refine
-
Edit repos, targets, criteria
-
-
-
-
Multi-API Search
-
Exa + GitHub + Parallel scan
-
-
-
-
Scored Pipeline
-
Ranked by code + company signal
-
-
-
- - -
- -
Three Ways to Start a Search
-
-
-
1. Role + Company
-
Type a job title and target company. Claude infers the technical stack, identifies relevant repos, and builds your strategy from scratch. Example: "Staff ML Engineer" at "Anthropic"
-
-
-
2. Paste a Job Description
-
Copy and paste the full JD text. The AI parses requirements, technologies, and seniority level to generate a targeted strategy.
-
-
-
3. Paste a Job URL
-
Drop in a link from Lever, Greenhouse, or Ashby. Parallel.ai extracts the JD content and builds the strategy automatically. Works with JS-rendered pages.
-
-
-
- -
- - -
- - -
- -
Real-Time Company Data on Every Candidate
-

v2.1 integrates Harmonic's company intelligence API across the entire pipeline. Every candidate's current employer is automatically enriched with funding, headcount, traction metrics, and investors. The strategy builder expands your poach company list using similar-company discovery. A computed poachability score (0-100) tells you which companies' engineers are most likely to be receptive to outreach.

- - -
How It Works
-
-
-
1. Strategy Enrichment
-
When Claude generates a poach company list, each company is enriched via Harmonic's enrich endpoint. Funding stage, headcount, traction metrics, and investors are fetched and cached for 7 days. Companies with high-confidence "direct_competitor" URNs are used as seeds for similar-company expansion.
-
-
-
2. Similar Company Expansion
-
Harmonic's similar_companies endpoint takes seed URNs from direct competitors and returns adjacent companies you may not have considered. These are automatically deduplicated against the original list and appended. Your poach list grows from Claude's initial suggestions to a broader, data-informed set.
-
-
-
3. Poachability Scoring
-
Each enriched company receives a computed poachability score (0-100) based on six signals: engineering headcount trajectory, web traffic trends, funding stage, time since last funding round, team size, and layoff/restructuring signals from company highlights. Higher score = easier to recruit from.
-
-
-
4. Candidate Company Cards
-
On every candidate profile and pipeline card, SourceKit renders an expandable company context card. It shows the company logo, name, poachability badge, stage, headcount, funding total, key metrics with trend indicators, lead investors, and industry/technology tags. Non-blocking: the profile renders immediately, company data loads independently.
-
-
-
- - -
- -
Scoring Signals
-
-
Easy 70-100
-
Moderate 50-69
-
Hard 0-49
-
-
-
-
Eng. Headcount
-
Engineering team shrinking in 90d = +20 pts. Growing fast = -15 pts. Captures reorgs and hiring freezes.
-
-
-
Web Traffic
-
Traffic declining 30d = +15 pts. Traffic surging = -10 pts. Proxy for product momentum and morale.
-
-
-
Funding Stage
-
Pre-Seed/Seed = +10 pts (stability concerns). Series D+/Growth = -5 pts (harder to poach).
-
-
-
Funding Recency
-
No funding in 24+ months = +15 pts. Recently funded (<6mo) = -10 pts. Runway anxiety drives mobility.
-
-
-
Team Size
-
<20 employees = +5 pts. Small teams have less retention infrastructure and fewer internal moves.
-
-
-
Layoff Signals
-
Layoff/restructuring mentions in Harmonic highlights = +20 pts. Strongest single signal.
-
-
-
- -
- - -
- - -
- -
How Candidates Are Scored
-

Every candidate receives a composite score from 0 to 100 based on actual GitHub activity, weighted by relevance to the target role. The EEA scoring layer can be enhanced by Harmonic company data to boost signals for candidates at companies in relevant industries or technology domains.

-
-
Strong Match 80-100
-
Worth Reviewing 60-79
-
Lower Signal 0-59
-
-
-
-
Commit Volume and Recency
-
How actively and recently the candidate has contributed to relevant repos. Recent activity weighted more heavily.
-
-
-
Language and Stack Match
-
How well the candidate's primary languages align with what the role requires. Python for ML, Rust for systems, etc.
-
-
-
Repo Relevance + EEA
-
Contributions to high-signal repos weighted heavily. Harmonic company context can boost EEA dimensions for industry alignment.
-
-
-
- - -
- -
5-Stage Candidate Pipeline
-
- Contacted - - Not Interested - - Recruiter Screen - - Rejected - - Moved to ATS -
-
-
-
Bulk Actions
-
Select multiple candidates to compare side-by-side, generate a summary report, or batch-draft personalized outreach messages.
-
-
-
Company Intel Inline
-
Every pipeline card shows the candidate's company context: poachability badge, funding stage, headcount, and trend metrics. No extra clicks needed.
-
-
-
- - -
- -
Exa Websets: Persistent Candidate Collections
-

Websets are persistent, auto-updating candidate collections powered by Exa's neural search. Define your criteria once and get new matches continuously as new contributors emerge. v2.0 adds Import (bulk ingest from external lists) and webhook Notify (real-time pipeline sync).

-
-
-
Create + Search + Enrich
-
Build a Webset from any search or scratch. Define entity type, query, and criteria. Auto-enrich with company, role, LinkedIn, GitHub, and languages.
-
-
-
Monitor + Import
-
Schedule auto-updates for new matches. Import bulk candidates from external CSV/lists. Both paths feed the same verified collection.
-
-
-
Notify (Webhooks)
-
Real-time webhook events fire when new candidates enter a Webset. Push matches directly to your pipeline or external ATS without polling.
-
-
-
- -
- - -
- - -
- -
What to Test
-
-
1. Sign In — Log in with Google at getsourcekit.vercel.app
-
2. Build a Strategy — Try all three input paths: role + company, pasted JD, or job URL
-
3. Check Harmonic Enrichment — Verify poach companies show funding, headcount, and poachability scores
-
4. Similar Company Expansion — Confirm additional companies appear via Harmonic similar-company discovery
-
5. Edit and Run the Search — Review the repo list. Add, remove, or rearrange, then run the search.
-
6. Filter Results — Narrow by language, minimum commits, star count, and location
-
7. Review Company Context Cards — Click into candidate profiles and check the expandable company intel card
-
8. Test Exa Websets — Create a Webset from search results. Add criteria. Check enrichment data. Set up monitoring.
-
9. Pipeline and Bulk Actions — Add candidates to the pipeline. Drag between stages. Check company context inline.
-
10. Export to CSV — Export and verify that the data looks clean and complete
-
-
- - -
- -
Best Practices
-
-
1. Be Specific With Titles
-
Use "ML Infrastructure Engineer focused on training pipelines" instead of just "ML Engineer." Specificity dramatically improves repo targeting and result quality.
-
-
-
2. Always Edit the Repo List
-
The AI-generated suggestions are a starting point. Editing the repo list is the single biggest lever you have for improving result quality.
-
-
-
3. Use Poachability to Prioritize Outreach
-
Candidates at companies scoring 70+ on poachability are more likely to be open. Declining web traffic and engineering headcount are the strongest signals.
-
-
-
4. Target Hidden Gems
-
Candidates with high contributions but low follower counts tend to be under-recruited and more responsive to outreach. Prioritize them.
-
-
-
5. Use Websets for Ongoing Roles
-
For roles you hire repeatedly, set up a Webset with monitoring. New contributors matching your criteria surface automatically.
-
-
- - -
- -
Known Limitations
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LimitationImpactWorkaround
GitHub-only sourcingMisses engineers not active on GitHubUse alongside LinkedIn Recruiter for full coverage
Strategy build time (15-20s)Brief wait while AI generates the planExpected behavior. Full search takes 30-60s.
JS-rendered job pagesSome job URLs may still fail to parsePaste the JD text directly instead of the URL
LinkedIn URL accuracyLinks are AI-matched, not guaranteedAlways verify the LinkedIn profile before outreach
Harmonic cache (7d TTL)Company data may be up to 7 days staleCache refreshes automatically. Stale data still valuable for strategy.
Harmonic coverage gapsVery early or stealth companies may not be in HarmonicStrategy falls back to Claude's original poach list if enrichment fails
-
- -
- -
- - - diff --git a/builder-score-skill-patches.md b/builder-score-skill-patches.md deleted file mode 100644 index dae3287..0000000 --- a/builder-score-skill-patches.md +++ /dev/null @@ -1,89 +0,0 @@ -# Builder Score Skill Patches - -Apply these edits to the three sourcing skill files. Each section shows the file, the line to find, and the replacement. - ---- - -## 1. candidate-scorer/SKILL.md - -### Find (line ~24): -``` -- **Candidate list**: From Clay results, Exa Websets, sourcing-dossier pipeline, candidate-signal-enricher output, or manual input. Minimum fields: name, current title, current company. Better: LinkedIn URL, evidence signals, career history. -``` - -### Replace with: -``` -- **Candidate list**: From Clay results, Exa Websets, sourcing-dossier pipeline, candidate-signal-enricher output, or manual input. Minimum fields: name, current title, current company. Better: LinkedIn URL, evidence signals, career history, Builder Score. -- **Builder Score** (optional): If the candidate has a GitHub username, call the `github-code-quality` edge function (via SourceKit's `getCodeQuality(username)`) to get a 0-100 Builder Score with dimension breakdowns (AI Mastery 30%, Build Velocity 20%, Tooling 15%, Testing 10%, Documentation 8%, Community 7%). Integrate the Builder Score as a signal under Technical Signal scoring. A Builder Score of 70+ adds +0.5 to Technical Signal. A score of 40-69 is neutral. Below 40 is a -0.5 penalty. Claude Code usage detection is a strong positive signal for AI-native roles. -``` - -### Find (line ~93, Technical Signal row): -``` -| **Technical Signal** | Verifiable evidence of exceptional technical ability: papers, patents, shipped products, open source, competitive programming | 25% | -``` - -### Replace with: -``` -| **Technical Signal** | Verifiable evidence of exceptional technical ability: papers, patents, shipped products, open source, competitive programming, Builder Score (0-100 from GitHub code analysis) | 25% | -``` - ---- - -## 2. candidate-signal-enricher/SKILL.md - -### Find (line ~47, GitHub/Open Source row): -``` -| 3 | **GitHub / Open Source** | GitHub, npm, PyPI, crates.io | "[Name]" OR "[known username]" | -``` - -### Replace with: -``` -| 3 | **GitHub / Open Source** | GitHub, npm, PyPI, crates.io, **SourceKit Builder Score** | "[Name]" OR "[known username]". If GitHub username is known, also call `getCodeQuality(username)` for a 0-100 Builder Score with AI Mastery, Build Velocity, Tooling, Testing, Documentation, and Community dimensions. Claude Code usage and AI framework detection are included. | -``` - ---- - -## 3. elite-sourcing/SKILL.md - -### Find (line ~53-57, enrichments array): -``` - "enrichments": [ - {"description": "LinkedIn profile URL", "format": "url"}, - {"description": "Current job title", "format": "text"}, - {"description": "Current employer / company name", "format": "text"} - ], -``` - -### Replace with: -``` - "enrichments": [ - {"description": "LinkedIn profile URL", "format": "url"}, - {"description": "Current job title", "format": "text"}, - {"description": "Current employer / company name", "format": "text"}, - {"description": "GitHub username", "format": "text"} - ], -``` - -### Add after Step 3 (Enrich) section, before Step 4: - -``` -### Step 3b: Builder Score (optional, for technical roles) - -For candidates with GitHub usernames from enrichment: -1. Call SourceKit's `getCodeQuality(username)` for each candidate -2. Returns 0-100 Builder Score with dimension breakdowns: - - AI Mastery (30%): GenAI repo count, AI framework detection, Claude Code usage - - Build Velocity (20%): Recent commit frequency, repo activity - - Tooling (15%): CI/CD, linting, dependency management - - Testing (10%): Test file presence, test frameworks - - Documentation (8%): README quality, inline docs - - Community (7%): Stars, forks, contributor count -3. Append Builder Score to candidate data before scoring -4. Flag candidates with Claude Code usage (strong signal for AI-native roles) -``` - ---- - -## Applying - -These patches need to be applied manually since skill files are read-only from the VM. Open each SKILL.md in your editor and make the replacements above. diff --git a/candidate-scorer.skill b/candidate-scorer.skill deleted file mode 100644 index 775ada6..0000000 Binary files a/candidate-scorer.skill and /dev/null differ diff --git a/candidate-signal-enricher.skill b/candidate-signal-enricher.skill deleted file mode 100644 index ce5c92b..0000000 Binary files a/candidate-signal-enricher.skill and /dev/null differ diff --git a/docs/SourceKit Logo.svg b/docs/SourceKit Logo.svg deleted file mode 100644 index 07dc1f2..0000000 --- a/docs/SourceKit Logo.svg +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/_redirects b/docs/_redirects deleted file mode 100644 index fb543b2..0000000 --- a/docs/_redirects +++ /dev/null @@ -1,2 +0,0 @@ -/architecture /architecture.html 200 -/diagram /architecture.html 200 diff --git a/docs/architecture.html b/docs/architecture.html deleted file mode 100644 index e5d28f1..0000000 --- a/docs/architecture.html +++ /dev/null @@ -1,574 +0,0 @@ - - - - - - - SourceKit — Architecture - - - - - - -
-
- - -
-
SourceKit
-
architecture
-
-
- -
-
- -
-
-
Technical Overview / Diagram First
-

Find engineers by what they build.

-

- Four APIs orchestrated through a single role-driven strategy layer. Each handles a distinct - slice of the sourcing pipeline from semantic discovery to company intelligence. -

-
- -
- ← Docs - System - Example - Output - Principles -
- -
-
-
-

System Architecture + Workflow

-

Three layers from role input to ranked pipeline, powered by four external intelligence APIs.

-
- -
-
-
Input Layer
-

Strategy

-

Role title, JD, or hiring URL parsed into target repos, company map, and skill graph.

-
-
-
-
Discovery Layer
-

Search

-

Neural search + GitHub contributor mining scan repos, artifacts, and engineering ecosystems.

-
-
-
-
Evaluation Layer
-

Ranking

-

Contribution scoring, company context enrichment, and evidence-weighted pipeline output.

-
-
- -
API Breakdown
- -
-
-
-
- - - - -
-
-
Exa
-
Discovery + Websets
-
-
-
    -
  • Semantic repo and artifact search
  • -
  • Persistent candidate Websets
  • -
  • Auto-monitoring for new matches
  • -
  • Enrichment fields on verified items
  • -
-
- -
-
-
- - - - - - - -
-
-
Parallel
-
Strategy + Mapping
-
-
-
    -
  • JD extraction from any URL
  • -
  • Engineer-to-company mapping
  • -
  • Ecosystem graph traversal
  • -
  • Multi-hop talent intelligence
  • -
-
- -
-
-
- - - - -
-
-
GitHub
-
Evidence + Scoring
-
-
-
    -
  • Commit history and contribution graphs
  • -
  • Repo quality and star analysis
  • -
  • Builder Score (0-100) computation
  • -
  • AI framework and tooling detection
  • -
-
- -
-
-
- - - - - - - -
-
-
Harmonic
-
Company Intelligence
-
-
-
    -
  • Startup and founder metadata
  • -
  • Company relationship mapping
  • -
  • Market context enrichment
  • -
  • Target-company intelligence layer
  • -
-
-
-
- -
-
-

Example Query

-

Role input generates strategy, repos, and poach company targets.

-
- -
-
-
InputRole
-
Staff ML Engineer
-Company: Anthropic
-
- -
-
Generated StrategyRepos
-
vllm
-transformers
-pytorch
-triton
-megatron-lm
-
- -
-
Generated StrategyPoach Companies
-
OpenAI
-DeepMind
-Meta FAIR
-Cohere
-Databricks
-
- -
-
Search GoalInterpretation
-
Large-scale model training
-Inference optimization
-RLHF and alignment research
-
-
-
- -
-
-

Example Output

-

Ranked by code signal from target repos. Builder Score weights AI framework usage 3x.

-
- -
- - - - - - - - - - - - - - - -
EngineerPrimary RepoContributionsScore
Liang ChenvLLM203 commits94
Priya SharmaPyTorch156 commits91
Marcus WebbMegatron-LM89 commits86
Yuki TanakaTriton74 commits83
-
-
- -
-
-

Principles

-

What drives the system design.

-
- -
-
-

Artifact > Profile

-

Repository history and code output outweigh profile metadata.

-
-
-

Evidence > Keywords

-

Signals come from technical artifacts, not thin matching logic.

-
-
-

Contribution > Claims

-

Rank by shipped work, maintenance, and real technical ownership.

-
-
-
-
-
- - - - \ No newline at end of file diff --git a/docs/beta-v2-guide.html b/docs/beta-v2-guide.html deleted file mode 100644 index c257b8f..0000000 --- a/docs/beta-v2-guide.html +++ /dev/null @@ -1,1188 +0,0 @@ - - - - - - SourceKit Guide - - - - - - - - -
-
-
-
-
-

Find engineers by what they build.

-

SourceKit ranks talent on commits, repositories, publications, technical artifacts, maintainership, and ecosystem proof. No self-reported resumes. No keyword-first sourcing.

-
-
Guide • updated for Harmonic
-
- -
-
-
Input
-

Your role

-

Role + company, pasted job description, or hiring URL.

-
-
-
Claude + Parallel
-

AI strategy

-

Builds target repos, ecosystem map, and signal criteria.

-
-
-
You
-

Refine

-

Edit repo lists, adjust targets, and tighten evidence filters.

-
-
-
Exa + GitHub + Harmonic
-

Discovery

-

Scans repositories, contribution histories, and company intelligence.

-
-
-
Websets
-

Scored pipeline

-

Candidates ranked on verified proof and ready for enrichment or outreach.

-
-
- -
- - - -
- -
-
-
-

One search surface

-

SourceKit blends role strategy, artifact discovery, and candidate evaluation into one loop. Search, score, enrich, and export from the same working surface.

-
-
-

Proof over polish

-

Commits, repo activity, maintainership, talks, papers, and ecosystem evidence all matter more than a polished keyword resume.

-
-
-

Continuous pipelines

-

Websets persist. New evidence-bearing candidates flow in automatically as repos, publications, and ecosystems evolve.

-
-
-
- -
-
-
-

Role + company

-

Type a title and company. SourceKit infers stack, identifies repo targets, and builds a search strategy.

-
-
-

Paste a JD

-

Paste the full job description. SourceKit extracts requirements, seniority, stack, and evidence criteria.

-
-
-

Paste a job URL

-

Drop in a Lever, Greenhouse, or Ashby link. Relevant job content is parsed into the strategy layer.

-
-
-
- -
-
-
-
-

Builder score

-

AI-weighted GitHub evaluation returns a 0–100 score with dimension-level breakdowns. GitHub activity is weighted by role relevance. No self-reported data.

-
-
-
-
30%AI mastery
-
20%Build velocity
-
15%Tooling
-
15%Commit quality
-
20%Docs + testing
-
-
- Claude Code - AI keywords - Framework files - Coding tools - Recent activity -
-
-
-
-
- -
-
-
-

Workflow

-

SourceKit starts with role intent, turns it into an explicit technical strategy, then moves through artifact discovery, evidence scoring, and auto-updating candidate collections.

-
-
- -
-
-
-

Strategy layer

-
    -
  • Decompose the role into concrete technical markers.
  • -
  • Define target repositories, companies, and evidence thresholds.
  • -
  • Use role-specific criteria instead of generic keyword matching.
  • -
-
-
-

Search layer

-
    -
  • Exa surfaces relevant repos, artifacts, and ecosystem signals.
  • -
  • Parallel maps engineers to companies and surrounding talent graphs.
  • -
  • GitHub pulls repository activity and contribution proof.
  • -
  • Harmonic enriches startup and company intelligence.
  • -
-
-
- -
-
-
-

Pipeline stages

-

Persistent, sortable, and built for bulk compare, export, and outreach.

-
-
-
-
Sourced
-
Contacted
-
Responded
-
Screen
-
In process
-
Close
-
-
-
-
- -
-
-
-

Integrations

-

The external intelligence layer combines semantic artifact discovery, engineer-to-company mapping, contribution analysis, and startup intelligence enrichment.

-
-
- -
-

Integrations Layer

-
-
-
-
- - - -
-
-

Exa Research API

-
-
-

Semantic discovery of repositories, papers, and technical artifacts used to define and expand candidate search surfaces.

-
- -
-
-
- - - - - - - - -
-
-

Parallel API

-
-
-

Engineer-to-company mapping plus ecosystem discovery used to connect individuals, organizations, and adjacent talent clusters.

-
- -
-
-
- - - - - - -
-
-

GitHub API

-
-
-

Repository mining, commit history, and contribution signal extraction used to score builders on real technical output.

-
- -
-
-
- - - - - -
-
-

Harmonic API

-
-
-

Startup, founder, and company intelligence enrichment used to add market context and organization-level signal to candidate sets.

-
-
-
-
- -
-
-
-

Evidence of Exceptional Ability

-

Build Websets around explicit proof of elite performance instead of loose title matching or keyword spray.

-
-
- -
-
-
-
-

Published research

-

NeurIPS, ICML, ICLR, CVPR, ACL papers, citations, and notable publication venues.

-
-
-

Open source impact

-

Top contributors, maintainers, starred projects, and long-lived repository ownership.

-
-
-

Conference signals

-

KubeCon, QCon, Strange Loop, DEF CON, and technical teaching at respected venues.

-
-
-

Industry recognition

-

Patents, major technical press, field-specific awards, and externally validated technical leadership.

-
-
-

Technical leadership

-

Architectural decisions, RFC ownership, system design authorship, and org-defining engineering work.

-
-
-

Scale + impact

-

Production ML, distributed training, infra serving 10k+ QPS, or code shipped at true system scale.

-
-
-
- -
-
-

Build EEA-driven Websets

-

Decompose the role into 3–5 concrete and verifiable signals, then build your collection around those signals directly.

-
-
-

Add enrichments

-

Surface the evidence itself: talk links, publication venues, contact data, stars, citations, commits, and company context.

-
-
-

Monitor continuously

-

New engineers who publish, ship OSS, or appear in the right ecosystems automatically re-enter the pipeline.

-
-
-
-
- -
-
-
-

Architecture

-

SourceKit is designed as a role-driven orchestration layer sitting on top of external intelligence APIs and a GitHub-based builder score.

-
-
- -
-
-
Presentation
-  React + TypeScript
-  Mobile-first / OAuth / Search / Pipeline / Websets
-
-AI intelligence layer
-  Claude AI
-  Exa + Websets
-  Parallel.ai
-  GitHub API
-  Harmonic API
-
-Data + infrastructure
-  Supabase
-  Auth / RLS / Pipeline
-  Vercel
-  Stripe
-
-Builder score
-  Commit history
-  Repository quality
-  Contribution graphs
-  Role-weighted evaluation
-
- -
-
- Exa API - Research, search, answer, findSimilar, and Websets for persistent collections and enrichment. -
-
- Parallel API - Task execution, FindAll discovery, and content-aware search across companies and engineering ecosystems. -
-
- GitHub API + Builder Score - Commits, contributors, graphs, and code quality analysis translated into role-relevant scoring. -
-
- Harmonic API - Company metadata, founder context, startup relationships, and enrichment for target-market mapping. -
-
-
-
- -
-
-
-

Webset recipes + operating notes

-

Use tightly scoped criteria and then broaden only after the evidence framework is correct.

-
-
- -
-
-

ML infra engineer

-

Top PyTorch, JAX, DeepSpeed, and systems contributors. Enrich with talks, stars, and company context.

-
-
-

Robotics software

-

ROS maintainers, Isaac contributors, and applied robotics OSS builders with verified shipped systems.

-
-
-

Staff frontend

-

Core React, Vue, and design-system contributors with production ownership and architectural depth.

-
-
-

Security engineer

-

DEF CON, Black Hat, bug bounty, CVE, and respected advisory signals with strong technical writing.

-
-
- -
- -
-
-

Be specific with titles

-

“ML infrastructure engineer focused on training pipelines” beats “ML engineer.”

-
-
-

Edit the repo list

-

The repo list is the single biggest lever for result quality. Tighten it first.

-
-
-

Target hidden gems

-

High contributions with lower follower counts often surface stronger and less recruited talent.

-
-
-

Layer enrichments later

-

Let the Webset verify matches first, then add contact data, publications, and company context.

-
-
-
- - -
- - - - diff --git a/docs/generate-pdfs.mjs b/docs/generate-pdfs.mjs deleted file mode 100644 index 37f9a34..0000000 --- a/docs/generate-pdfs.mjs +++ /dev/null @@ -1,50 +0,0 @@ -import puppeteer from 'puppeteer'; -import { fileURLToPath } from 'url'; -import { dirname, join } from 'path'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -async function generatePDFs() { - console.log('Launching browser...'); - const browser = await puppeteer.launch({ headless: 'new' }); - - // Full Guide (4 pages) - console.log('Generating Full Guide...'); - const guidePage = await browser.newPage(); - await guidePage.goto(`file://${join(__dirname, 'beta-v2-guide.html')}`, { - waitUntil: 'networkidle0', - timeout: 30000, - }); - await new Promise(r => setTimeout(r, 3000)); // Wait for fonts - await guidePage.pdf({ - path: join(__dirname, 'SourceKit_Beta_v2_0_Guide.pdf'), - format: 'letter', - printBackground: true, - margin: { top: 0, right: 0, bottom: 0, left: 0 }, - }); - console.log(' -> SourceKit_Beta_v2_0_Guide.pdf'); - - // TL;DR (1 page) - console.log('Generating TL;DR...'); - const tldrPage = await browser.newPage(); - await tldrPage.goto(`file://${join(__dirname, 'beta-v2-tldr.html')}`, { - waitUntil: 'networkidle0', - timeout: 30000, - }); - await new Promise(r => setTimeout(r, 3000)); - await tldrPage.pdf({ - path: join(__dirname, 'SourceKit_Beta_v2_0___TL_DR.pdf'), - format: 'letter', - printBackground: true, - margin: { top: 0, right: 0, bottom: 0, left: 0 }, - }); - console.log(' -> SourceKit_Beta_v2_0___TL_DR.pdf'); - - await browser.close(); - console.log('Done! PDFs generated in docs/'); -} - -generatePDFs().catch(err => { - console.error('Error:', err); - process.exit(1); -}); diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index 985578a..0000000 --- a/docs/index.html +++ /dev/null @@ -1,1080 +0,0 @@ - - - - - -SourceKit Docs - - - - - - -
-
- - -
-
-
-
-
Evidence-Based Technical Talent Discovery
-

Find engineers by what they actually build.

-

SourceKit converts a role, job description, or hiring link into a sourcing strategy, maps the engineering ecosystem, evaluates visible GitHub signal, and turns strong searches into live candidate pipelines.

- -
-
-
What makes it different
- Source on proof, not self-reporting. -

Commits, repos, activity, talks, publications, maintainership, and ecosystem evidence all matter more than polished keyword resumes.

-
-
- -
-
- 0 - Self-reported data -
-
- 6 - External sources -
-
- 7 - Signal dimensions -
-
- 15 - Edge functions -
-
- 6 - Pipeline stages -
-
- Live - Harmonic integration -
-
-
- -
-
-
-
Signal Over Keywords
-

Source on evidence, not resumes.

-

Start from shipped work, ecosystem position, and visible technical output rather than title matching alone.

-
-
-
Research Built In
-

Turn vague roles into search logic.

-

Translate technical hiring briefs into target repos, target companies, and ability criteria.

-
-
-
Persistent Pipelines
-

Keep strong searches alive.

-

Promote successful criteria into monitored Websets that continue surfacing the right talent.

-
-
-
- -
-
-
-
How SourceKit Works
-

One search becomes strategy, discovery, and a live talent pipeline.

-
-

This is the core system story: define the brief, generate search logic, map the ecosystem, identify visible proof, then move ranked candidates into a pipeline that can actually be worked.

-
- -
-
-
Role Input
-
Start with the hiring brief
-

Paste a JD, enter a role spec, or drop in a Lever, Greenhouse, or Ashby link.

-
- -
-
Strategy Generation
-
EExa Research
-
Translate the role into sourcing logic
-

Exa identifies target repos, peer companies, and ability criteria from the brief.

-
- -
-
Ecosystem Mapping
-
PParallel
-
Map the company and talent landscape
-

Parallel maps company intelligence, engineers at targets, and adjacent search surfaces.

-
- -
-
Candidate Discovery
-
GHGitHub + Harmonic
-
Find builders with visible technical and company proof
-

GitHub commits, repo quality, Harmonic enrichment, and code scoring produce ranked candidates with citations.

-
- -
-
Candidate Pipeline
-
Move from shortlist to outreach
-

Ranked shortlist with similar-profile expansion, source citations, and stage movement.

-
-
-
- -
-
-
-
Harmonic Integration
-

Company graph intelligence is now part of the guide spec.

-
-

Harmonic is live in SourceKit and expands the product beyond repo and web signal. It adds structured company intelligence, people enrichment, and a stronger bridge between technical proof and company context.

-
- -
-
-
Live Activation
-

What is live now

-

The Harmonic API key is set in Supabase secrets, the cache migration is pushed, and the `harmonic-search`, `harmonic-enrich`, and `research-role` edge functions are deployed to production.

-
-
-

`harmonic-search`

-

Search companies and related entities to widen the sourcing universe around a role.

-
-
-

`harmonic-enrich`

-

Enrich discovered targets with structured company and people graph data.

-
-
-

`research-role`

-

Use live research inputs to sharpen the target company map and talent landscape.

-
-
-

`harmonic_cache`

-

Cache structured responses to reduce duplicate fetches and stabilize repeated research flows.

-
-
-
- -
-
What this adds to SourceKit
-

Harmonic makes SourceKit better at company-aware sourcing.

-

GitHub and Exa help identify builders and technical proof. Harmonic adds structured business context so SourceKit can reason about company targets, startup ecosystems, adjacent employers, and candidate-company relationships with more precision.

-

Target company discovery for ecosystem mapping. Structured enrichment connecting engineers to startups and adjacent company graphs. Stronger research loops inside role strategy and outreach logic.

-
-
-
- -
-
-
-
Builder Score
-

A premium read on public engineering output.

-
-

Builder Score is how SourceKit goes deeper than search. It evaluates visible GitHub work with weighted dimensions and returns an AI summary recruiters can use immediately.

-
- -
-
-
What It Does
-

Scan a candidate’s strongest public repos, score code signal from 0-100, and surface evidence around AI usage, build velocity, tooling, testing, documentation, and activity patterns.

-

Weighted dimensions: AI mastery 30%, build velocity 20%, tooling 15%, commit bonus 15%, testing and docs signal.

-
- -
-

Why the ranking works

-

Search relevance handles stack fit and recency. Builder Score adds deeper technical diligence so rankings feel more defensible and less like heuristics.

-

Compare shortlisted candidates, write smarter outreach, and reduce manual GitHub review.

-
-
- -
- -
-
-
-
Websets + EEA
-

Build living market maps around what elite talent looks like.

-
-

For hard technical roles, titles are weak proxies. SourceKit helps define evidence of exceptional ability, then packages that logic into persistent, monitorable talent sets.

-
- -
-
-

Persistent candidate sets

-

Define criteria once, enrich survivors, and monitor over time as new contributors emerge across the ecosystem.

-

Create a Webset from proven criteria. Enrich with contact, company, and talk signal. Monitor on a recurring schedule.

-
-
-

Evidence of exceptional ability

-

Model the top 5-10% using research output, open-source impact, conference visibility, technical leadership, and scale.

-

Published research and citations, open-source impact and maintainership, technical leadership and shipped scale.

-
-
- -
-
-

ML Infra Engineer

-

PyTorch, JAX, or DeepSpeed contribution with distributed training signal and technical depth.

-
-
-

Robotics Software

-

ROS / MoveIt / Drake contribution plus visible robotics research or ecosystem presence.

-
-
-

Staff Frontend

-

Core UI systems work, OSS contribution, and conference or thought-leadership signal.

-
-
-

AI-Native Engineer

-

High Builder Score with visible AI-native coding and framework usage patterns in public repos.

-
-
-
- -
-
-
-
Architecture
-

A layered system built for sourcing, scoring, and orchestration.

-
-

This stack is intentionally simple to explain: an operator-facing product layer, an intelligence layer for research and scoring, and an infrastructure layer for auth, data, and billing.

-
- -
-
-
-
Presentation Layer
-
-
-

React + TypeScript

-

Role input, search UI, compare, outreach, search history, pipeline, and Websets workflow.

-
-
-

Candidate Pipeline

-

Ranked profiles, source citations, stage movement, notes, and export paths for operators.

-
-
-
-
- -
-
-
Intelligence Layer
-
-
-

Claude AI

-

Role strategy, ability criteria, summaries, and Builder Score narrative.

-
-
-

Exa + Websets

-

Research, neural search, collections, enrichment, and persistent monitoring.

-
-
-

Parallel.ai

-

JD extraction, company intelligence, and ecosystem mapping.

-
-
-

Harmonic

-

Structured company graph search, company enrichment, and startup ecosystem intelligence.

-
-
-

GitHub API

-

Contributors, commits, repo mining, language patterns, and code quality signals.

-
-
-
-
- -
-
-
Data + Infrastructure
-
-
-

Supabase

-

Auth, PostgreSQL, RLS, search history, Websets data, and pipeline state.

-
-
-

Vercel

-

Frontend delivery, edge deployment, and serverless execution.

-
-
-

Stripe

-

Plans, billing, checkout flow, and entitlement enforcement.

-
-
-
-
-
-
- -
-
-
-
Operator Playbook
-

Use the product like an operator, not a keyword searcher.

-
-

The strongest SourceKit workflows come from narrowing evidence, curating the repo set, and layering enrichments after candidate quality is already established.

-
- -
-
-

Be specific with titles

-

Technical specificity sharpens the repo universe and improves search relevance immediately.

-
-
-

Edit the repo set

-

The repo list is often the highest-leverage quality control surface in the entire workflow.

-
-
-

Start narrow

-

Three tight criteria and twenty excellent matches beats one loose criterion and hundreds of noisy profiles.

-
-
-

Layer enrichments later

-

Verify quality first, then add Builder Score, contact data, publications, or outreach context.

-
-
-
- - -
-
-
- - diff --git a/docs/sourcekit-infographic.html b/docs/sourcekit-infographic.html deleted file mode 100644 index 99b0468..0000000 --- a/docs/sourcekit-infographic.html +++ /dev/null @@ -1,912 +0,0 @@ - - - - - - - - - - - -
- - -
-
- - - - - - - -
-
- Source Kit -
-
getsourcekit.vercel.app
-
-
-
Find engineers by what they build.
-
- -
- - -
-
HOW IT WORKS
-
-
-
Input
-
Your Role
-
Role + company, paste JD text, or drop in a job URL
-
-
-
-
Claude + Parallel
-
AI Strategy
-
Builds target repos, poach list, skills matrix
-
-
-
-
You
-
Refine
-
Edit repo list, add or remove targets, adjust criteria
-
-
-
-
Exa + GitHub
-
Multi-API Search
-
Exa + Parallel + GitHub scan for top contributors
-
-
-
-
Exa Websets
-
Scored Pipeline
-
Candidates ranked by code signal, ready for outreach
-
-
-
- - -
-
THREE WAYS TO START
-
-
-
Role + Company
-
Type a title and company. Claude infers the stack, identifies repos, builds strategy.
-
-
-
Paste a JD
-
Paste the full job description. AI parses requirements, tech stack, and seniority level.
-
-
-
Paste a Job URL
-
Drop a Lever, Greenhouse, or Ashby link. Parallel.ai extracts content from JS pages.
-
-
-
- - -
-
-
SCORING
-
Composite Score: 0 to 100
-
GitHub activity weighted by role relevance. No self-reported data.
-
-
-
-
Commit Volume + Recency
-
How actively and recently the candidate has contributed. Recent activity weighted higher.
-
-
-
Language + Stack Match
-
How well primary languages align with role requirements. Python for ML, Rust for systems.
-
-
-
Repo Relevance + Impact
-
High-signal repos (well-starred, actively maintained) weighted over personal projects.
-
-
-
- - -
-
PIPELINE
-
-
-
-
Sourced
-
-
-
-
Contacted
-
-
-
-
Responded
-
-
-
-
Screen
-
-
-
-
In Process
-
-
-
Drag between stages. Bulk select for compare, summary, or batch outreach. CSV export anytime.
-
- - -
-
-
BUILDER SCORE NEW
-
AI-Weighted GitHub Evaluation
-
Scans top repos. Returns 0-100 with per-dimension breakdowns.
-
-
-
-
30%
-
AI Mastery
-
-
-
20%
-
Build Velocity
-
-
-
15%
-
Tooling
-
-
-
15%
-
Commit Bonus
-
-
-
10%
-
Testing
-
-
-
8%
-
Docs
-
-
-
7%
-
Community
-
-
-
- Claude Code 3x - 46 AI Keywords - 28 Framework Files - 7 Coding Tools - No self-reported data. GitHub activity only. -
-
- -
- - -
-
EXA WEBSETS NEW IN V2.0
-
-
-
Persistent, Auto-Updating Candidate Sets
-
Define search criteria once. Get new verified matches continuously as new contributors emerge. Websets are talent pipelines that run themselves.
-
-
-
-
-
Create
-
Define query + criteria. Exa's neural search finds matches. Each verified before admission.
-
-
-
Enrich
-
Auto-extract: contact email, current company, publications, talks, GitHub stats.
-
-
-
Monitor
-
Daily or weekly schedule. New matches appended automatically. Override replaces stale.
-
-
-
Export
-
API, CSV, or feed into Clay and Parallel for enrichment and outreach sequencing.
-
-
-
-
-
- - -
-
EEA FRAMEWORK
-
-
-
Evidence of Exceptional Ability
-
Verifiable signal that puts someone in the top 5-10% of practitioners. Instead of searching for titles or keywords, build Websets around the specific evidence that defines elite performance.
-
-
-
Published Research
-
NeurIPS, ICML, ICLR, CVPR, ACL papers. H-index. Citation count.
-
-
-
Open Source Impact
-
Top-10 contributor to repos with 1K+ stars. Maintainer status.
-
-
-
Conference + Teaching
-
KubeCon, QCon, Strange Loop, DEF CON talks. Course instructor.
-
-
-
Industry Recognition
-
Patents filed. CVE author. Awards. Press coverage.
-
-
-
Technical Leadership
-
RFCs adopted. Design docs merged. Architecture decisions at scale.
-
-
-
Scale + Impact
-
Shipped to 1M+ users. Infra serving 10K+ QPS. Production ML at scale.
-
-
-
-
-
Building EEA-Driven Websets
-
-
-
Decompose the Role
-
Identify 3-5 concrete, verifiable markers of exceptional ability. These become your Webset criteria.
-
-
-
Build the Webset
-
Pass EEA signals as criteria. Exa's verification agent checks each candidate before admission.
-
-
-
Add Enrichments
-
Surface the evidence itself: conference talks, star count, publication venues, contact email.
-
-
-
Monitor
-
Weekly monitoring. New people who publish, ship OSS, or give talks auto-enter the pipeline.
-
-
-
-
-
- -
- - -
-
WEBSET RECIPES
-
-
-
ML Infra Engineer
-
- Top contributor to PyTorch, JAX, DeepSpeed (500+ stars)
- Shipped distributed training at scale
- Enrich: talks, star count, email, company -
-
-
-
Robotics Software
-
- ROS/ROS2 maintainer or MoveIt, Drake, Isaac contributor
- Published at ICRA, RSS, or CoRL
- Enrich: patents, lab, H-index, email -
-
-
-
Staff Frontend
-
- Core contributor to React, Vue, Svelte (2K+ stars)
- Speaker at React Conf, VueConf, JSConf
- Enrich: blog posts, npm packages, email -
-
-
-
Security Engineer
-
- Published CVE or security advisory author
- DEF CON, Black Hat, BSides speaker
- Enrich: CVEs, bounties, certs, email -
-
-
-
- - -
-
ARCHITECTURE + API SURFACE
-
-
-
Presentation
-
-
React + TypeScript
-
Mobile-first / OAuth / Search / Pipeline / Websets
-
-
-
-
-
AI Intelligence Layer
-
-
-
Claude AI
-
Strategy / Scoring / Builder Score
-
-
-
Exa + Websets
-
Neural search / Collections
-
-
-
Parallel.ai
-
JD extraction
-
-
-
GitHub API
-
Contributors / Commits / Code Quality
-
-
-
-
-
-
Data + Infrastructure
-
-
-
Supabase
-
Auth / RLS / Pipeline
-
-
-
Vercel
-
Edge
-
-
-
Stripe
-
Billing
-
-
-
-
-
-
-
Exa API 5 endpoints
-
- Research Deep research on any topic
- Search Neural + keyword hybrid search
- Answer Direct answers from web data
- findSimilar Find pages similar to a URL
- Websets Persistent collections, enrichment, monitoring -
-
-
-
Parallel API 3 endpoints
-
- Task Single deep-research task with citations
- FindAll Batch entity discovery from a query
- Search Web search with content extraction -
-
-
-
GitHub API + Builder Score 3 interfaces
-
- REST Contributors, commits, profiles, repo metadata
- GraphQL Contribution graphs, org membership, activity
- Builder Score On-demand code quality eval (6 dims, Claude summary) -
-
-
-
- -
- - -
-
PRO TIPS
-
-
-
Be Specific With Titles
-
"ML Infrastructure Engineer focused on training pipelines" beats "ML Engineer."
-
-
-
Always Edit the Repo List
-
AI suggestions are a starting point. Editing the repo list is the single biggest lever for result quality.
-
-
-
Target Hidden Gems
-
High contributions, low follower counts. Under-recruited and more responsive to outreach.
-
-
-
Run Multiple Searches
-
Same role across different companies surfaces different repo ecosystems and candidate pools.
-
-
-
Use Websets for Ongoing Roles
-
Set up monitoring. New contributors matching your criteria surface automatically.
-
-
-
Write Criteria Like Filters
-
"Has 50+ commits to a distributed systems project with 1K+ stars" works. "Passionate about backend" does not.
-
-
-
Start Narrow, Broaden Later
-
3 tight criteria and 20 verified matches beats 1 loose criterion and 200 unqualified results.
-
-
-
Layer Enrichments After
-
Let the Webset verify matches first, then add contact info and publications to survivors.
-
-
-
- - - - -
- - diff --git a/docs/sourcekit-infographic.html.bak b/docs/sourcekit-infographic.html.bak deleted file mode 100644 index cf21931..0000000 --- a/docs/sourcekit-infographic.html.bak +++ /dev/null @@ -1,822 +0,0 @@ - - - - - - - - - - - -
- - -
-
- - - - - - - - - -
-
- Source Kit -
-
getsourcekit.vercel.app
-
-
-
- Find engineers by what they build. -
-
- -
- - -
-
HOW IT WORKS
-
-
-
Input
-
Your Role
-
Role + company, paste JD text, or drop in a job URL
-
-
-
-
Claude + Parallel
-
AI Strategy
-
Builds target repos, poach list, skills matrix
-
-
-
-
You
-
Refine
-
Edit repo list, add or remove targets, adjust criteria
-
-
-
-
Exa + GitHub
-
Multi-API Search
-
Exa + Parallel + GitHub scan for top contributors
-
-
-
-
Exa Websets
-
Scored Pipeline
-
Candidates ranked by code signal, ready for outreach
-
-
-
- - -
-
THREE WAYS TO START
-
-
-
Role + Company
-
Type a title and company. Claude infers the stack, identifies repos, builds strategy.
-
-
-
Paste a JD
-
Paste the full job description. AI parses requirements, tech stack, and seniority level.
-
-
-
Paste a Job URL
-
Drop a Lever, Greenhouse, or Ashby link. Parallel.ai extracts content from JS pages.
-
-
-
- - -
-
-
SCORING
-
Composite Score: 0 to 100
-
GitHub activity weighted by role relevance. No self-reported data.
-
-
-
-
Commit Volume + Recency
-
How actively and recently the candidate has contributed. Recent activity weighted higher.
-
-
-
Language + Stack Match
-
How well primary languages align with role requirements. Python for ML, Rust for systems.
-
-
-
Repo Relevance + Impact
-
High-signal repos (well-starred, actively maintained) weighted over personal projects.
-
-
-
- - -
-
PIPELINE
-
-
- Sourced - Contacted - Responded - Screen - In Process -
-
Drag between stages. Bulk select for compare, summary, or batch outreach. CSV export anytime.
-
-
- - -
-
BUILDER SCORE NEW
-
-
-
AI-Weighted GitHub Evaluation
-
Scans a candidate's top repos for AI mastery, build velocity, tooling, testing, docs, and community. Returns a 0-100 score with per-dimension breakdowns. Claude Code commits weighted 3x.
-
- Claude Code 3x - AI Frameworks - Coding Tools -
-
-
-
-
-
30%
-
AI Mastery
-
-
-
20%
-
Build Velocity
-
-
-
15%
-
Tooling
-
-
-
-
-
- -
- - -
-
EXA WEBSETS NEW IN V2.0
-
-
-
Persistent, Auto-Updating Candidate Sets
-
Define search criteria once. Get new verified matches continuously as new contributors emerge. Websets are talent pipelines that run themselves.
-
-
-
-
-
Create
-
Define query + criteria. Exa's neural search finds matches. Each verified before admission.
-
-
-
Enrich
-
Auto-extract: contact email, current company, publications, talks, GitHub stats.
-
-
-
Monitor
-
Daily or weekly schedule. New matches appended automatically. Override replaces stale.
-
-
-
Export
-
API, CSV, or feed into Clay and Parallel for enrichment and outreach sequencing.
-
-
-
-
-
- - -
-
EEA FRAMEWORK
-
- -
-
Evidence of Exceptional Ability
-
Verifiable signal that puts someone in the top 5-10% of practitioners. Instead of searching for titles or keywords, build Websets around the specific evidence that defines elite performance.
- -
-
-
Published Research
-
NeurIPS, ICML, ICLR, CVPR, ACL papers. H-index. Citation count.
-
-
-
Open Source Impact
-
Top-10 contributor to repos with 1K+ stars. Maintainer status.
-
-
-
Conference + Teaching
-
KubeCon, QCon, Strange Loop, DEF CON talks. Course instructor.
-
-
-
Industry Recognition
-
Patents filed. CVE author. Awards. Press coverage.
-
-
-
Technical Leadership
-
RFCs adopted. Design docs merged. Architecture decisions at scale.
-
-
-
Scale + Impact
-
Shipped to 1M+ users. Infra serving 10K+ QPS. Production ML at scale.
-
-
-
- -
-
Building EEA-Driven Websets
-
-
-
Decompose the Role
-
Identify 3-5 concrete, verifiable markers of exceptional ability. These become your Webset criteria.
-
-
-
Build the Webset
-
Pass EEA signals as criteria. Exa's verification agent checks each candidate before admission.
-
-
-
Add Enrichments
-
Surface the evidence itself: conference talks, star count, publication venues, contact email.
-
-
-
Monitor
-
Weekly monitoring. New people who publish, ship OSS, or give talks auto-enter the pipeline.
-
-
-
-
-
- -
- - -
-
WEBSET RECIPES
-
-
-
ML Infra Engineer
-
- Top contributor to PyTorch, JAX, DeepSpeed (500+ stars)
- Shipped distributed training at scale
- Enrich: talks, star count, email, company -
-
-
-
Robotics Software
-
- ROS/ROS2 maintainer or MoveIt, Drake, Isaac contributor
- Published at ICRA, RSS, or CoRL
- Enrich: patents, lab, H-index, email -
-
-
-
Staff Frontend
-
- Core contributor to React, Vue, Svelte (2K+ stars)
- Speaker at React Conf, VueConf, JSConf
- Enrich: blog posts, npm packages, email -
-
-
-
Security Engineer
-
- Published CVE or security advisory author
- DEF CON, Black Hat, BSides speaker
- Enrich: CVEs, bounties, certs, email -
-
-
-
- - -
-
ARCHITECTURE + API SURFACE
-
- - -
-
-
Presentation Layer
-
-
-
React + TypeScript
-
Mobile-first / OAuth / Search / Pipeline / Websets
-
-
-
-
-
-
AI Intelligence Layer
-
-
-
Claude AI
-
Strategy / Scoring / Builder Score
-
-
-
Exa + Websets
-
Neural search / Collections
-
-
-
Parallel.ai
-
JD extraction
-
-
-
GitHub API
-
Contributors / Commits / Code Quality
-
-
-
-
-
-
Data + Infrastructure
-
-
-
Supabase (PostgreSQL)
-
Auth / RLS / Pipeline / Websets
-
-
-
Vercel
-
Edge / Serverless
-
-
-
Stripe
-
Billing / Tiers
-
-
-
-
- - -
-
-
Exa API 5 endpoints
-
-
ResearchDeep research on any topic
-
SearchNeural + keyword hybrid search
-
AnswerDirect answers from web data
-
findSimilarFind pages similar to a URL
-
WebsetsPersistent collections with criteria, enrichment, monitoring
-
-
- -
-
Parallel API 3 endpoints
-
-
TaskSingle deep-research task with citations
-
FindAllBatch entity discovery from a query
-
SearchWeb search with content extraction
-
-
- -
-
GitHub API + Builder Score 3 interfaces
-
-
RESTContributors, commits, profiles, repo metadata
-
GraphQLContribution graphs, org membership, activity
-
Builder ScoreOn-demand code quality eval (6 dimensions, Claude summary)
-
-
-
- -
-
- -
- - -
-
PRO TIPS
-
-
-
Be Specific With Titles
-
"ML Infrastructure Engineer focused on training pipelines" beats "ML Engineer."
-
-
-
Always Edit the Repo List
-
AI suggestions are a starting point. Editing the repo list is the single biggest lever for result quality.
-
-
-
Target Hidden Gems
-
High contributions, low follower counts. Under-recruited and more responsive to outreach.
-
-
-
Run Multiple Searches
-
Same role across different companies surfaces different repo ecosystems and candidate pools.
-
-
-
Use Websets for Ongoing Roles
-
Set up monitoring. New contributors matching your criteria surface automatically.
-
-
-
Write Criteria Like Filters
-
"Has 50+ commits to a distributed systems project with 1K+ stars" works. "Passionate about backend" does not.
-
-
-
Start Narrow, Broaden Later
-
3 tight criteria and 20 verified matches beats 1 loose criterion and 200 unqualified results.
-
-
-
Layer Enrichments After
-
Let the Webset verify matches first, then add contact info and publications to survivors.
-
-
-
- - - - -
- - \ No newline at end of file diff --git a/docs/sourcekit-system-diagram.html b/docs/sourcekit-system-diagram.html deleted file mode 100644 index 472cccd..0000000 --- a/docs/sourcekit-system-diagram.html +++ /dev/null @@ -1,572 +0,0 @@ - - - - - - - SourceKit — System Diagram - - - - - - -
-
-
- -
-
SourceKit
-
system diagram
-
-
- -
-
- -
-
-
Technical Overview / Diagram First
-

Find engineers by what they build.

-

- Four APIs orchestrated through a single role-driven strategy layer. Each handles a distinct - slice of the sourcing pipeline from semantic discovery to company intelligence. -

-
- -
- System - Example - Output - Principles -
- -
-
-
-

System Architecture + Workflow

-

Three layers from role input to ranked pipeline, powered by four external intelligence APIs.

-
- -
-
-
Input Layer
-

Strategy

-

Role title, JD, or hiring URL parsed into target repos, company map, and skill graph.

-
-
-
-
Discovery Layer
-

Search

-

Neural search + GitHub contributor mining scan repos, artifacts, and engineering ecosystems.

-
-
-
-
Evaluation Layer
-

Ranking

-

Contribution scoring, company context enrichment, and evidence-weighted pipeline output.

-
-
- -
API Breakdown
- -
-
-
-
- - - - -
-
-
Exa
-
Discovery + Websets
-
-
-
    -
  • Semantic repo and artifact search
  • -
  • Persistent candidate Websets
  • -
  • Auto-monitoring for new matches
  • -
  • Enrichment fields on verified items
  • -
-
- -
-
-
- - - - - - - -
-
-
Parallel
-
Strategy + Mapping
-
-
-
    -
  • JD extraction from any URL
  • -
  • Engineer-to-company mapping
  • -
  • Ecosystem graph traversal
  • -
  • Multi-hop talent intelligence
  • -
-
- -
-
-
- - - - -
-
-
GitHub
-
Evidence + Scoring
-
-
-
    -
  • Commit history and contribution graphs
  • -
  • Repo quality and star analysis
  • -
  • Builder Score (0-100) computation
  • -
  • AI framework and tooling detection
  • -
-
- -
-
-
- - - - - - - -
-
-
Harmonic
-
Company Intelligence
-
-
-
    -
  • Startup and founder metadata
  • -
  • Company relationship mapping
  • -
  • Market context enrichment
  • -
  • Target-company intelligence layer
  • -
-
-
-
- -
-
-

Example Query

-

Role input generates strategy, repos, and poach company targets.

-
- -
-
-
InputRole
-
Staff ML Engineer
-Company: Anthropic
-
- -
-
Generated StrategyRepos
-
vllm
-transformers
-pytorch
-triton
-megatron-lm
-
- -
-
Generated StrategyPoach Companies
-
OpenAI
-DeepMind
-Meta FAIR
-Cohere
-Databricks
-
- -
-
Search GoalInterpretation
-
Large-scale model training
-Inference optimization
-RLHF and alignment research
-
-
-
- -
-
-

Example Output

-

Ranked by code signal from target repos. Builder Score weights AI framework usage 3x.

-
- -
- - - - - - - - - - - - - - - -
EngineerPrimary RepoContributionsScore
Liang ChenvLLM203 commits94
Priya SharmaPyTorch156 commits91
Marcus WebbMegatron-LM89 commits86
Yuki TanakaTriton74 commits83
-
-
- -
-
-

Principles

-

What drives the system design.

-
- -
-
-

Artifact > Profile

-

Repository history and code output outweigh profile metadata.

-
-
-

Evidence > Keywords

-

Signals come from technical artifacts, not thin matching logic.

-
-
-

Contribution > Claims

-

Rank by shipped work, maintenance, and real technical ownership.

-
-
-
-
-
- - - - \ No newline at end of file diff --git a/elite-sourcing.skill b/elite-sourcing.skill deleted file mode 100644 index 10d45fb..0000000 Binary files a/elite-sourcing.skill and /dev/null differ diff --git a/poster.html b/poster.html deleted file mode 100644 index d6295c3..0000000 --- a/poster.html +++ /dev/null @@ -1,130 +0,0 @@ - - - - SourceKit Poster - - - - - - - - - - - - - - - SourceKit - Technical sourcing on GitHub signal - v1.0 - - - WORKFLOW - - - Define criteria - - - - Search repos - - - - Extract candidates - - - - Evaluate - - - - Engage - - - FEATURES - - - Repo Discovery - Exa Search across GitHub - - - Persistent Pools - Exa Websets monitoring - - - Strategy + Eval - Claude parallel analysis - - - EEA SIGNALS - - - Experience - Commits - - - 92% - Reviews - - - 78% - Issues - - - 65% - - - Expertise - Code - - - 88% - Docs - - - 71% - Tests - - - 83% - - - STACK - - - React - - TypeScript - - Tailwind - - Vite - - - Claude - - Exa - - Supabase - - Vercel - - - - sourcekit.dev - Technical sourcing on GitHub signal - - - - - - - - - - diff --git a/public/screenshots/results.png b/public/screenshots/results.png deleted file mode 100644 index d4dadde..0000000 Binary files a/public/screenshots/results.png and /dev/null differ diff --git a/public/screenshots/screenshot_eea_signals_bottom.png b/public/screenshots/screenshot_eea_signals_bottom.png deleted file mode 100644 index fecd86e..0000000 Binary files a/public/screenshots/screenshot_eea_signals_bottom.png and /dev/null differ diff --git a/public/screenshots/screenshot_eea_signals_top.png b/public/screenshots/screenshot_eea_signals_top.png deleted file mode 100644 index 7d2979d..0000000 Binary files a/public/screenshots/screenshot_eea_signals_top.png and /dev/null differ diff --git a/public/screenshots/screenshot_eea_webset.png b/public/screenshots/screenshot_eea_webset.png deleted file mode 100644 index 40c39f9..0000000 Binary files a/public/screenshots/screenshot_eea_webset.png and /dev/null differ diff --git a/public/screenshots/screenshot_repositories_companies.png b/public/screenshots/screenshot_repositories_companies.png deleted file mode 100644 index 2a528d3..0000000 Binary files a/public/screenshots/screenshot_repositories_companies.png and /dev/null differ diff --git a/public/screenshots/screenshot_search_analyzing.png b/public/screenshots/screenshot_search_analyzing.png deleted file mode 100644 index 4e70332..0000000 Binary files a/public/screenshots/screenshot_search_analyzing.png and /dev/null differ diff --git a/public/screenshots/screenshot_search_progress.png b/public/screenshots/screenshot_search_progress.png deleted file mode 100644 index 8afc99a..0000000 Binary files a/public/screenshots/screenshot_search_progress.png and /dev/null differ diff --git a/public/screenshots/screenshot_search_results.png b/public/screenshots/screenshot_search_results.png deleted file mode 100644 index 44ad494..0000000 Binary files a/public/screenshots/screenshot_search_results.png and /dev/null differ diff --git a/public/screenshots/screenshot_skills.png b/public/screenshots/screenshot_skills.png deleted file mode 100644 index b04b48b..0000000 Binary files a/public/screenshots/screenshot_skills.png and /dev/null differ diff --git a/public/screenshots/screenshot_strategy.png b/public/screenshots/screenshot_strategy.png deleted file mode 100644 index 8f6def3..0000000 Binary files a/public/screenshots/screenshot_strategy.png and /dev/null differ diff --git a/public/screenshots/screenshot_websets.png b/public/screenshots/screenshot_websets.png deleted file mode 100644 index 7c29b70..0000000 Binary files a/public/screenshots/screenshot_websets.png and /dev/null differ diff --git a/public/screenshots/strategy.png b/public/screenshots/strategy.png deleted file mode 100644 index 00158f7..0000000 Binary files a/public/screenshots/strategy.png and /dev/null differ diff --git a/public/screenshots/webset.png b/public/screenshots/webset.png deleted file mode 100644 index ce4754b..0000000 Binary files a/public/screenshots/webset.png and /dev/null differ diff --git a/sourcekit-fix-prompts.md b/sourcekit-fix-prompts.md deleted file mode 100644 index e4aa1ff..0000000 --- a/sourcekit-fix-prompts.md +++ /dev/null @@ -1,130 +0,0 @@ -# SourceKit QA Fix Prompts - -Repo: github.com/mrNLK/sourcekit-charm -Stack: React + Vite + TypeScript + shadcn/ui + Tailwind CSS + Supabase -Date: 2026-02-28 -Total bugs: 10 (2 P1, 5 P2, 3 P3) - ---- - -## P1 FIXES (do these first) - -### BUG-001: Fix Outreach Generation TypeError - -Console error: `Outreach generation failed: Error: Cannot read properties of undefined (reading 'name')` - -Clicking "Generate Outreach" in the candidate detail panel throws "Cannot read properties of undefined (reading 'name')". The error fires 4-5 times per click (event handler leak). - -Root cause: The outreach generation function reads company/role context (likely `strategy.company.name` or `role.name`) but this context is not passed when opening the candidate detail from the Results page or Pipeline. - -Fix required: -1. Find the outreach generation handler (search for "Generate Outreach" button onClick or the function that reads `.name`). -2. Add null-safe access: use optional chaining (`?.`) or provide fallback values from the candidate's own data (e.g., current company from enrichment). -3. Fix the event handler leak: ensure the click handler is attached once. If using useEffect, add a cleanup return. If using onClick prop directly, ensure the component isn't re-mounting multiple times. -4. Add a try/catch with a toast notification on failure so users see feedback instead of silent failure. -5. Add a loading state to the button while generation is in progress. - -Files to check: -- Component rendering the "Generate Outreach" button (likely in src/components/CandidateDetail or similar) -- The outreach generation service/function -- The search strategy context/store that should provide role + company - -Test: After fix, click "Generate Outreach" on any candidate from Results and from Pipeline. Verify: (a) no console errors, (b) loading state shows, (c) outreach text appears or error toast shows, (d) only 1 handler fires per click. - ---- - -### BUG-002: Fix First Search Race Condition - -First "Search with this strategy" click returns 0 results. Retry from Results page succeeds. - -After building a sourcing strategy, clicking "Search with this strategy" navigates to the Results page but returns "No engineers found" (0 results). Clicking the Search button again on the Results page succeeds with 20 results. - -Root cause hypothesis: The strategy state (search query, target repos, criteria) is passed via React state/context during navigation. The Results component fires the search edge function call before the state is fully populated, sending an empty or incomplete request. - -Fix required: -1. Find where "Search with this strategy" triggers navigation + search. -2. Ensure the search request only fires AFTER the strategy state is confirmed available. Options: - a. Pass strategy as URL params or route state instead of context. - b. Add a useEffect in Results that watches for strategy state and only triggers search when state is non-null. - c. Trigger the search from the strategy page BEFORE navigating, store the request promise, and resolve it on the Results page. -3. Add a retry mechanism: if search returns 0 results and strategy state is available, auto-retry once after 500ms. - -Files to check: -- Strategy page component (the "Search with this strategy" button handler) -- Results page component (the search trigger logic) -- The search state/context provider -- The github-search edge function call - -Test: Build a strategy for any role > click "Search with this strategy" > verify results appear on first attempt without needing to retry. Repeat 5x to confirm no intermittent failures. - ---- - -## P2 FIXES - -### BUG-003 + BUG-004: Pipeline Stage Dropdown Fixes - -Issues: -1. Stage dropdown on candidate detail (from Pipeline) only shows "Contacted" and "Not Interested". Missing: Recruiter Screen, Rejected, Moved to ATS. -2. After selecting a new stage, the dropdown button text does not update (still shows old stage), even though the change persists in Supabase. - -Fix: -1. Find the stage dropdown component. Ensure it reads ALL stage options from the same constant/enum used by the kanban columns: ["contacted", "not_interested", "recruiter_screen", "rejected", "moved_to_ats"]. -2. After the Supabase upsert succeeds, update the local React state for the selected stage. Use setState or invalidate the query cache (if using React Query / TanStack Query). - -Test: Open candidate from Pipeline > dropdown shows all 5 stages > select "Recruiter Screen" > button text updates to "Recruiter Screen" > go back to kanban > candidate appears in Recruiter Screen column. - ---- - -### BUG-005: Fix Route Persistence on Refresh - -Pressing F5 on any page (Pipeline, Results, History, etc.) always redirects to the New Search home page. - -Fix options: -1. If using react-router with BrowserRouter: ensure Vercel has a rewrite rule in vercel.json: - `{ "rewrites": [{ "source": "/(.*)", "destination": "/index.html" }] }` -2. If using in-memory state for navigation (no URL changes): switch to proper route paths (/pipeline, /results, /history, etc.) so the URL reflects the current page. -3. If using HashRouter: this should already work. Check if the router is wrapping the entire app correctly. - -Test: Navigate to Pipeline > press F5 > should stay on Pipeline. Repeat for Results, History, Watchlist, Settings. - ---- - -### BUG-006 + BUG-007: Outreach Error Handling + Handler Leak - -Issues: -1. No loading spinner, success message, or error toast when outreach generation runs or fails. -2. Click handler fires 4-5 times per single click. - -Fix: -1. Add loading state: set isGenerating=true on click, show spinner on button, disable button during generation. -2. On success: display generated outreach text in a text area below the button with a "Copy" button. -3. On error: show a toast/alert with a user-friendly message like "Failed to generate outreach. Please try again." -4. Fix handler leak: if using addEventListener in useEffect, return a cleanup function. Prefer onClick prop on the button element. - -Test: Click "Generate Outreach" > button shows spinner > on completion, text appears or error toast shows. Check console: only 1 handler fires per click. - ---- - -## P3 IMPROVEMENTS - -### BUG-008: History Result Count Accuracy - -History shows "46 results" for a search that displays 20 candidates on the Results page. - -Fix: When writing to search_history table, store the count of candidates that were actually processed and displayed, not the raw GitHub API match count. Alternatively, show both: "46 found, 20 displayed" to set accurate expectations. - ---- - -### BUG-009: Truncated Search Criteria Tags - -Tags like "Large language model dev..." are cut off with ellipsis but have no tooltip showing the full text. - -Fix: Add a title attribute or a Tooltip component (shadcn/ui has one) to each criteria tag showing the full text on hover. - ---- - -### BUG-010: Kanban Drag-and-Drop Verification - -Automated testing could not confirm drag-and-drop works on the kanban board. Drop zones display "Drop candidates here" text. - -Action: Manually test drag-and-drop in Chrome. If it works, no fix needed. If not, ensure the DnD library (likely @dnd-kit or react-beautiful-dnd) is properly configured with droppable zones and draggable items. The onDragEnd handler should call the same Supabase update as the stage dropdown. diff --git a/sourcekit-infographic.png b/sourcekit-infographic.png deleted file mode 100644 index 80d64bf..0000000 Binary files a/sourcekit-infographic.png and /dev/null differ diff --git a/sourcekit-microsite.html b/sourcekit-microsite.html deleted file mode 100644 index ec908d9..0000000 --- a/sourcekit-microsite.html +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - SourceKit / Technical sourcing on GitHub signal - - - - - - - - -
- - diff --git a/sourcekit-qa-report.html b/sourcekit-qa-report.html deleted file mode 100644 index f1d3831..0000000 --- a/sourcekit-qa-report.html +++ /dev/null @@ -1,960 +0,0 @@ - - - - - -SourceKit QA Report - - - - -
- - -
-

SourceKit QA Report

-
Comprehensive feature-by-feature testing of SourceKit at getsourcekit.vercel.app
-
- Date: 2026-02-28 - Tester: Claude (automated browser QA) - Stack: React + Vite + TS + shadcn/ui + Supabase - Repo: github.com/mrNLK/sourcekit-charm -
-
- - -
-
-
2
-
P0/P1 Bugs
-
-
-
5
-
P2 Bugs
-
-
-
3
-
P3 Issues
-
-
-
8
-
Features Tested
-
-
-
6
-
Features Passing
-
-
- - -
-
Issue Summary
-
Workflow Map
-
Detailed Findings
-
Scoring Audit
-
Fix Prompts (P0+P1)
-
Improvement Prompts (P2+P3)
-
- - -
-
-
Issue Summary Table
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
IDSeverityFeatureIssueRepro StepsRoot Cause
BUG-001P1OutreachGenerate Outreach button throws TypeError: "Cannot read properties of undefined (reading 'name')"Open any candidate detail panel > scroll to Outreach > click "Generate Outreach"Outreach generator reads .name from role/company context object that is undefined. The context from the search strategy is not passed to the outreach component. Also fires 4-5x per click (event handler leak).
BUG-002P1Candidate SearchFirst search from strategy returns 0 results. Clicking "Search with this strategy" navigates to Results page but shows "No engineers found". Retry from Results page succeeds.New Search > enter role+company > Build Sourcing Strategy > click "Search with this strategy"Race condition: the strategy state (repos, query) is likely not fully transferred to the search component before the edge function call fires. Only an OPTIONS preflight was captured in network monitoring, suggesting the POST either fired before monitoring started or was aborted.
BUG-003P2PipelineStage dropdown only shows 2 of 5 stages. On candidate detail (from Pipeline), the stage selector dropdown shows only "Contacted" and "Not Interested", missing Recruiter Screen, Rejected, Moved to ATS.Pipeline > click candidate card > click stage dropdown (e.g., "Contacted")Dropdown options are likely hardcoded to a subset or the stage list is being filtered incorrectly.
BUG-004P2PipelineStage dropdown UI does not update after stage change. Clicking "Not Interested" from the dropdown changes the stage in Supabase (verified on kanban), but the button still displays "Contacted".Pipeline > click candidate > change stage via dropdown > observe button textThe local React state for the stage button is not re-rendered after the Supabase write succeeds. Likely a missing state update or stale closure.
BUG-005P2NavigationPage refresh always navigates to New Search. Refreshing (F5) on Pipeline, Results, or any sub-page redirects to the home/New Search page.Navigate to Pipeline (or any page) > press F5React Router likely uses in-memory state for navigation instead of URL-based routes, or the route paths are not defined in the Vercel routing config (SPA fallback).
BUG-006P2OutreachGenerate Outreach handler fires multiple times per click. Console shows 4-5 identical errors per single button click, indicating the event handler is attached multiple times.Open candidate detail > click "Generate Outreach" once > observe 4-5 console errorsEvent listener is registered in a useEffect without cleanup, or the component re-renders and re-attaches handlers on each render cycle.
BUG-007P2OutreachNo user-facing error when outreach generation fails. Button shows no loading state, no error toast, no feedback. Silent failure.Click "Generate Outreach" > observe no UI responseError is caught and logged to console but no toast/alert is shown to the user.
BUG-008P3Search HistoryResult count mismatch. History shows "46 results" for "Senior ML Engineer at Anthropic" but Results page only displays 20 candidates.History page > compare result count badge vs actual results loadedHistory may be counting total GitHub API matches (before filtering/deduplication) while Results page shows post-processed results. Or multiple search attempts are aggregated.
BUG-009P3UI/UXAI-parsed search criteria tags are truncated. Tags like "Large language model dev..." and "PyTorch or JAX with deep..." cut off without tooltip or expand option.Results page > observe top criteria tagsCSS text-overflow: ellipsis applied without a title attribute or tooltip component.
BUG-010P3PipelineDrag-and-drop on kanban board not functional via automated testing. Drop zones show "Drop candidates here" but drag interaction could not be verified. Stage change only works via dropdown on detail page.Pipeline > attempt to drag candidate card to another columnMay be a testing tool limitation, or actual DnD implementation may need verification with manual testing.
-
-
- -
-
Feature Status
-
-
- Authentication (Supabase Google SSO) - PASS -
-
- Role Research / Strategy Builder - PASS -
-
- Candidate Search (GitHub via Edge Functions) - P1 - intermittent first-search failure -
-
- Candidate Enrichment (EEA scoring, AI summary) - PASS -
-
- Scoring System (weighted EEA signals) - PASS -
-
- Pipeline / Kanban Board - P2 - stage dropdown incomplete + stale UI -
-
- Candidate Detail View - PASS -
-
- Outreach Generation - P1 - TypeError, no error handling -
-
- Watchlist - PASS -
-
- Search History - PASS -
-
- Bulk Actions - PASS -
-
- Websets (Exa AI) - PASS -
-
- Settings - PASS -
-
- Data Persistence (refresh survival) - P2 - route lost on refresh -
-
-
-
- - -
-
-
Core Workflow: Role Research to Pipeline
-
-
-
Step 1
- New Search
PASS -
-
-
-
Step 2
- Enter Role + Company
PASS -
-
-
-
Step 3
- Build Strategy
PASS -
-
-
-
Step 4
- Search with Strategy
P1: First attempt fails -
-
-
-
Step 5
- View Results (20)
PASS on retry -
-
-
-
Step 6
- View Candidate Detail
PASS -
-
-
-
Step 7
- Add to Pipeline
PASS -
-
-
-
Step 8
- Change Stage
P2: Incomplete dropdown -
-
-
-
Step 9
- Generate Outreach
P1: TypeError crash -
-
-
- -
-
Secondary Workflows
-
-
- Watchlist: Add candidate > view in Watchlist > persists on refresh - PASS -
-
- History: Search > view in History > shows result count + timestamp - PASS -
-
- Bulk Actions: View pipeline candidates > table with filters + AI chat panel - PASS -
-
- Websets: Create webset form > empty state displayed correctly - PASS -
-
- Settings: Outreach defaults + API keys + integrations > all fields rendered - PASS -
-
- Validation: Empty form submission > red borders + error messages - PASS -
-
- Job Description tab: Alternative input mode with URL + text area - PASS -
-
-
-
- - -
-
-
Phase 1: Authentication
-
All auth tests passed. Google SSO via Supabase authenticated correctly. No flash of unstyled content. Dark mode rendered on load. Session persists across navigation.
-
- -
-
Phase 2A: Role Research
-
-

Happy Path: Entered "Senior ML Engineer" + "Anthropic". Strategy built in ~15 seconds with 4-step progress animation. Output included:

-
    -
  • Search Query (editable semantic description)
  • -
  • Target Repositories (10 repos with descriptions and external links, removable with X)
  • -
  • Companies to Source From (8 companies with typed tags: Competitor, Adjacent, Talent Hub)
  • -
  • Skills (12 skills, Must Have / Nice to Have, removable tags)
  • -
  • EEA Signals (8 signals, color-coded strength indicators)
  • -
  • Sticky "Ready to search?" bar at bottom
  • -
-

Edge Cases:

-
    -
  • Empty submission: Both fields show red borders + "required" messages. PASS.
  • -
  • Job Description tab: Toggle works, shows URL + text area. PASS.
  • -
  • Clear button: Resets all fields and results. PASS.
  • -
-
-
- -
-
Phase 2B: Candidate Search
-
-
- BUG-002: First "Search with this strategy" click returned 0 results. Network monitoring captured only OPTIONS preflight to github-search edge function, no POST. Retry from Results page succeeded with 20 candidates. Intermittent race condition between strategy state transfer and search execution. -
-

When working, Results page shows: AI-parsed search criteria tags, repos searched, filter bar (location, Hidden Gems, Enrich All, Language, Min Score, Export, Seniority), and candidate cards with scores, EEA badges, commit stats, language bars.

-
-
- -
-
Phase 2C: Candidate Enrichment + Detail View
-
-

Thomas Wolf (Score 98) detail panel verified:

-
    -
  • Profile header: avatar, name, score, handle, followers, join date
  • -
  • Action buttons: In Pipeline, Watchlisted, GitHub, LinkedIn
  • -
  • About: AI-generated summary of candidate background
  • -
  • EEA Section: USCIS Criteria (Original Work, Leadership, Published, Judging, Remuneration, Membership) + Supplementary Signals (Sustained, Tech Profile, Velocity, Builder, Early Mover)
  • -
  • Top Documentation Gaps: 4 specific gaps identified
  • -
  • Skills: Python, Jupyter Notebook, C++ with proportion bars
  • -
  • Notable Work: huggingface/transformers (1384 commits) + other repos
  • -
  • Outreach section with tone dropdown + Generate button (broken, see BUG-001)
  • -
  • GitHub stats footer: 79 Repos, 58 Stars, 3,388 Followers, Since 2014
  • -
-
Enrichment data renders correctly and comprehensively. EEA scoring categories with strength labels are well-designed. LinkedIn URL populated: https://www.linkedin.com/in/thom-wolf
-
-
- -
-
Phase 2E: Pipeline / Kanban
-
-

5 columns: Contacted, Not Interested, Recruiter Screen, Rejected, Moved to ATS. Stage filter tabs with counts. Candidate cards show avatar, name, score, time indicator.

-
BUG-003: Stage dropdown on detail page only shows Contacted and Not Interested (missing 3 stages).
-
BUG-004: Stage dropdown button text doesn't update after successful stage change.
-

Stage change does persist to Supabase: confirmed Thomas Wolf moved from Contacted to Not Interested column on returning to kanban view.

-
-
- -
-
Phase 3: Data Integrity
-
-
    -
  • Pipeline data persists on refresh: PASS (candidate in correct stage after F5)
  • -
  • Watchlist persists on refresh: PASS (badge count and candidate present)
  • -
  • Route state on refresh: FAIL (always redirects to New Search, BUG-005)
  • -
  • Stage changes persist: PASS (Supabase write confirmed)
  • -
  • Search history persists: PASS (previous searches visible with timestamps)
  • -
-
-
-
- - -
-
-
Scoring Formula (src/lib/scoring.ts)
-
-

The scoring system uses a weighted boolean signal model with a max score of 100.

- - - - - - - - - - - - - - - -
SignalWeightSource
top_company20LLM enrichment or regex match against 12 top companies
has_phd15LLM enrichment or regex /phd|doctorate|doctoral/
has_publications15LLM enrichment or regex /publication|paper|published|journal/
open_source10LLM enrichment or regex /open.?source|github stars|contributor|maintainer/
conference_speaker10LLM enrichment or regex against 7 top conferences
has_patents10LLM enrichment or regex /patents?/
leadership_role10LLM enrichment or regex /founder|cto|vp|vice president|chief|director/
top_university10LLM enrichment or regex against 11 top universities
experience_bonus510+ years experience
-

Max possible: 105 (capped at 100). Color coding: green (80+), yellow (60-79), orange (40-59), red (<40).

-
-
- -
-
Score Verification: Thomas Wolf
-
-

Displayed Score: 98 | EEA Score: 51

-

The main score (98) is the GitHub-based relevance score, NOT the EEA score. These are two separate scoring systems:

-
    -
  • Score (98): AI relevance score based on GitHub activity, commit count, repo relevance, follower count, language match. This score is generated by the search/enrichment pipeline, not by scoring.ts.
  • -
  • EEA (51): Evidence of Exceptional Ability score from scoring.ts. For Thomas Wolf: leadership_role (10, Co-founder) + open_source (10, transformers maintainer) + top_company (20, Hugging Face is likely matched) + experience_bonus (5, Since 2014 = 12 years) + possible other signals = plausible range 45-55.
  • -
-
Both scoring systems appear internally consistent. The dual-score display is clear in the candidate cards (main score as large number, EEA as badge).
-
-
-
- - -
-
-
Claude Code Fix Prompts for P0 + P1 Issues
- -
-
-

P1   BUG-001: Fix Outreach Generation TypeError

-
-
-

Console error: Outreach generation failed: Error: Cannot read properties of undefined (reading 'name')

-
// Prompt for Claude Code: - -Fix the outreach generation TypeError in the candidate detail panel. - -Bug: Clicking "Generate Outreach" in the candidate detail panel throws -"Cannot read properties of undefined (reading 'name')". The error fires -4-5 times per click (event handler leak). - -Root cause: The outreach generation function reads company/role context -(likely `strategy.company.name` or `role.name`) but this context is not -passed when opening the candidate detail from the Results page or Pipeline. - -Fix required: -1. Find the outreach generation handler (search for "Generate Outreach" - button onClick or the function that reads `.name`). -2. Add null-safe access: use optional chaining (`?.`) or provide fallback - values from the candidate's own data (e.g., current company from - enrichment). -3. Fix the event handler leak: ensure the click handler is attached once. - If using useEffect, add a cleanup return. If using onClick prop directly, - ensure the component isn't re-mounting multiple times. -4. Add a try/catch with a toast notification on failure so users see - feedback instead of silent failure. -5. Add a loading state to the button while generation is in progress. - -Files to check: -- Component rendering the "Generate Outreach" button (likely in - src/components/CandidateDetail or similar) -- The outreach generation service/function -- The search strategy context/store that should provide role + company - -Test: After fix, click "Generate Outreach" on any candidate from Results -and from Pipeline. Verify: (a) no console errors, (b) loading state shows, -(c) outreach text appears or error toast shows, (d) only 1 handler fires -per click.
-
-
- -
-
-

P1   BUG-002: Fix First Search Race Condition

-
-
-

First "Search with this strategy" click returns 0 results. Retry from Results page succeeds.

-
// Prompt for Claude Code: - -Fix the race condition where the first search from the strategy page -returns 0 results. - -Bug: After building a sourcing strategy, clicking "Search with this -strategy" navigates to the Results page but returns "No engineers found" -(0 results). Clicking the Search button again on the Results page -succeeds with 20 results. - -Root cause hypothesis: The strategy state (search query, target repos, -criteria) is passed via React state/context during navigation. The -Results component fires the search edge function call before the state -is fully populated, sending an empty or incomplete request. - -Fix required: -1. Find where "Search with this strategy" triggers navigation + search. -2. Ensure the search request only fires AFTER the strategy state is - confirmed available. Options: - a. Pass strategy as URL params or route state instead of context. - b. Add a useEffect in Results that watches for strategy state and - only triggers search when state is non-null. - c. Trigger the search from the strategy page BEFORE navigating, - store the request promise, and resolve it on the Results page. -3. Add a retry mechanism: if search returns 0 results and strategy - state is available, auto-retry once after 500ms. - -Files to check: -- Strategy page component (the "Search with this strategy" button handler) -- Results page component (the search trigger logic) -- The search state/context provider -- The github-search edge function call - -Test: Build a strategy for any role > click "Search with this strategy" -> verify results appear on first attempt without needing to retry. -Repeat 5x to confirm no intermittent failures.
-
-
-
-
- - -
-
-
Improvement Prompts for P2 + P3 Issues
- -
-
-

P2   BUG-003+004: Pipeline Stage Dropdown Fixes

-
-
-
Fix the pipeline stage dropdown to show all 5 stages and update UI -on stage change. - -Issues: -1. Stage dropdown on candidate detail (from Pipeline) only shows - "Contacted" and "Not Interested". Missing: Recruiter Screen, Rejected, Moved to ATS. -2. After selecting a new stage, the dropdown button text does not - update (still shows old stage), even though the change persists - in Supabase. - -Fix: -1. Find the stage dropdown component. Ensure it reads ALL stage - options from the same constant/enum used by the kanban columns: - ["contacted", "not_interested", "recruiter_screen", "rejected", "moved_to_ats"]. -2. After the Supabase upsert succeeds, update the local React state - for the selected stage. Use setState or invalidate the query cache - (if using React Query / TanStack Query). - -Test: Open candidate from Pipeline > dropdown shows all 5 stages > -select "Recruiter Screen" > button text updates to "Recruiter Screen" > go back to -kanban > candidate appears in Recruiter Screen column.
-
-
- -
-
-

P2   BUG-005: Fix Route Persistence on Refresh

-
-
-
Fix page refresh to maintain current route instead of redirecting -to New Search. - -Issue: Pressing F5 on any page (Pipeline, Results, History, etc.) -always redirects to the New Search home page. - -Fix options: -1. If using react-router with BrowserRouter: ensure Vercel has a - rewrite rule in vercel.json: - { "rewrites": [{ "source": "/(.*)", "destination": "/index.html" }] } -2. If using in-memory state for navigation (no URL changes): switch - to proper route paths (/pipeline, /results, /history, etc.) so - the URL reflects the current page. -3. If using HashRouter: this should already work. Check if the - router is wrapping the entire app correctly. - -Test: Navigate to Pipeline > press F5 > should stay on Pipeline. -Repeat for Results, History, Watchlist, Settings.
-
-
- -
-
-

P2   BUG-006+007: Outreach Error Handling + Handler Leak

-
-
-
Add error handling UI and fix event handler leak for outreach -generation. - -Issues: -1. No loading spinner, success message, or error toast when outreach - generation runs or fails. -2. Click handler fires 4-5 times per single click. - -Fix: -1. Add loading state: set isGenerating=true on click, show spinner - on button, disable button during generation. -2. On success: display generated outreach text in a text area below - the button with a "Copy" button. -3. On error: show a toast/alert with a user-friendly message like - "Failed to generate outreach. Please try again." -4. Fix handler leak: if using addEventListener in useEffect, return - a cleanup function. Prefer onClick prop on the button element. - -Test: Click "Generate Outreach" > button shows spinner > on -completion, text appears or error toast shows. Check console: only -1 handler fires per click.
-
-
- -
-
-

P3   BUG-008: History Result Count Accuracy

-
-
-
Align history result counts with actual displayed results. - -Issue: History shows "46 results" for a search that displays 20 -candidates on the Results page. - -Fix: When writing to search_history table, store the count of -candidates that were actually processed and displayed, not the raw -GitHub API match count. Alternatively, show both: "46 found, 20 -displayed" to set accurate expectations.
-
-
- -
-
-

P3   BUG-009: Truncated Search Criteria Tags

-
-
-
Add tooltips to truncated AI-parsed search criteria tags. - -Issue: Tags like "Large language model dev..." are cut off with -ellipsis but have no tooltip showing the full text. - -Fix: Add a title attribute or a Tooltip component (shadcn/ui has -one) to each criteria tag showing the full text on hover.
-
-
- -
-
-

P3   BUG-010: Kanban Drag-and-Drop Verification

-
-
-
Verify and test kanban drag-and-drop functionality manually. - -Issue: Automated testing could not confirm drag-and-drop works on -the kanban board. Drop zones display "Drop candidates here" text. - -Action: Manually test drag-and-drop in Chrome. If it works, no fix -needed. If not, ensure the DnD library (likely @dnd-kit or -react-beautiful-dnd) is properly configured with droppable zones -and draggable items. The onDragEnd handler should call the same -Supabase update as the stage dropdown.
-
-
-
-
- -
- - - - diff --git a/sourcekit-writeup-v2.md b/sourcekit-writeup-v2.md deleted file mode 100644 index 031c1ff..0000000 --- a/sourcekit-writeup-v2.md +++ /dev/null @@ -1,99 +0,0 @@ -# SourceKit Beta v2.0 - -**Find engineers by what they build, not what they claim.** - -SourceKit is a technical sourcing tool that finds software engineers through their actual code contributions, open-source activity, and verifiable technical output. No keyword matching. No LinkedIn scraping. No self-reported skills. - -You give it a role (title + company, a JD, or a job URL). It returns a scored pipeline of engineers ranked by real GitHub activity, weighted against your role requirements. - ---- - -## How It Works - -1. **Input**: Role + company name, paste a full JD, or drop in a Lever/Greenhouse/Ashby URL -2. **AI Strategy**: Claude + Parallel.ai build a target repo list, poach list, and skills matrix from the JD -3. **Refine**: You edit the repo list, add/remove targets, adjust criteria (this is the single biggest lever for result quality) -4. **Multi-API Search**: Exa neural search + Parallel company intel + GitHub contributor scan run in parallel -5. **Scored Pipeline**: Candidates ranked by code signal, ready for outreach in a Kanban board - ---- - -## What's New in v2.0 - -### Builder Score -AI-weighted GitHub code quality evaluation. Scans a candidate's top repositories and returns a 0-100 composite score with per-dimension breakdowns across 7 weighted dimensions: - -| Dimension | Weight | What It Measures | -|-----------|--------|-----------------| -| AI Mastery | 30% | AI/ML framework usage, model files, AI keywords, Claude Code activity | -| Build Velocity | 20% | Commit frequency, recency, contribution consistency | -| Tooling | 15% | CI/CD configs, framework files, dev tool adoption | -| Commit Bonus | 15% | Code volume, language diversity, active coding tools | -| Testing | 10% | Test file coverage, testing framework usage | -| Documentation | 8% | README quality, docs directory, inline documentation | -| Community Health | 7% | Issues, PRs, contributor guidelines, community engagement | - -No self-reported data. GitHub activity only. Each score includes a Claude-generated natural language summary explaining the signal. - -### Exa Websets -Persistent, auto-updating candidate sets. Define search criteria once. Get new verified matches continuously as new contributors emerge. - -- **Create**: Define query + criteria. Exa's neural search finds matches. Each candidate verified before admission. -- **Enrich**: Auto-extract contact email, current company, publications, conference talks, GitHub stats. -- **Monitor**: Daily or weekly schedule. New matches appended automatically. -- **Export**: API, CSV, or feed into Clay/Parallel for enrichment and outreach sequencing. - -Websets are talent pipelines that run themselves. - ---- - -## EEA Framework - -Evidence of Exceptional Ability: verifiable signal that puts someone in the top 5-10% of practitioners. Instead of searching for titles or keywords, build Websets around specific evidence that defines elite performance. - -**Artifact catalog**: Published research (NeurIPS, ICML, ICLR, CVPR), open-source impact (top-10 contributor to 1K+ star repos), conference talks (KubeCon, QCon, DEF CON), patents/CVEs, technical leadership (RFCs adopted, architecture at scale), and production impact (shipped to 1M+ users, 10K+ QPS infra). - ---- - -## API Surface - -### Exa API (5 endpoints) -- **Research**: Deep research on any topic -- **Search**: Neural + keyword hybrid search -- **Answer**: Direct answers from web data -- **findSimilar**: Find pages similar to a URL -- **Websets**: Persistent collections with enrichment and monitoring - -### Parallel API (3 endpoints) -- **Task**: Single deep-research task with citations -- **FindAll**: Batch entity discovery from a query -- **Search**: Web search with content extraction - -### GitHub API + Builder Score (3 interfaces) -- **REST**: Contributors, commits, profiles, repo metadata -- **GraphQL**: Contribution graphs, org membership, activity -- **Builder Score**: On-demand code quality eval (7 dimensions, Claude summary) - ---- - -## Pipeline UI - -Kanban board with 5 stages: Sourced, Contacted, Responded, Screen, In Process. Drag between stages. Bulk select for compare, summary, or batch outreach. CSV export anytime. - ---- - -## Stack - -React + TypeScript, Supabase (PostgreSQL + Edge Functions + RLS), Vercel, Stripe, Claude AI, Exa API + Websets, Parallel.ai, GitHub REST + GraphQL. - ---- - -## Links - -- **App**: [getsourcekit.vercel.app](https://getsourcekit.vercel.app) -- **Docs/Infographic**: [sourcekit-docs.netlify.app](https://sourcekit-docs.netlify.app) -- **Feedback**: michael.f.rubino@gmail.com - ---- - -![SourceKit Beta v2.0 Infographic](sourcekit-infographic.png) diff --git a/tasks/smoke-test-report.md b/tasks/smoke-test-report.md deleted file mode 100644 index 37aaf9e..0000000 --- a/tasks/smoke-test-report.md +++ /dev/null @@ -1,578 +0,0 @@ -# SourceKit v2.0 — Smoke Test Report & Improvement Prompts - -**Date**: 2026-02-28 -**Tested on**: getsourcekit.vercel.app (deployed) + localhost:8080 (local dev) -**Tester**: Claude Code (automated E2E) - ---- - -## Part 1: Smoke Test Results - -### Tab-by-Tab Status - -| Tab | Status | Issues Found | -|-----|--------|--------------| -| Research & Strategy | ✅ Pass | Working. JD URL parsing, strategy generation operational | -| Results (Search) | ❌ Fail | Suggestion chips produce 0 results; manual free-text search works | -| History | ✅ Pass | Shows prior searches. 0-result searches correctly omitted | -| Pipeline | ✅ Pass | Kanban loads, 5 stages visible, cards render with scores | -| Watchlist | ✅ Pass | Saved candidates display correctly | -| Bulk Actions | ✅ Pass | Table renders, action buttons visible, AI chat input works | -| Websets | ✅ Pass | Create form + empty state working. Pipeline import added (P3) | -| Settings | ✅ Pass | All 3 sections (Outreach, API Keys, Integrations) populated | - -### Critical Bugs Found - -#### BUG-1: Suggestion Chips Return 0 Results (CRITICAL) -- **Reproduction**: Click any suggestion chip (e.g., "Rust systems engineers", "ML infrastructure") -- **Expected**: 15-60 results -- **Actual**: 0 results every time -- **Root cause**: Expanded queries are descriptive prose (e.g., "Rust systems engineers with experience in low-level programming, memory safety, performance optimization…") but the AI `parseQuery()` step needs to map these to concrete GitHub repos. The verbose, abstract descriptions confuse the AI into either returning 0 repos or hallucinating invalid repo names. -- **Evidence**: The `parseQuery` prompt gives examples like `"ML infrastructure engineers" → pytorch/pytorch`, but the actual input it receives is 30+ words of expanded prose, not the short label. - -#### BUG-2: "Ungettable" Candidates Get Top Scores (CRITICAL) -- **Reproduction**: Search for "contributors to pytorch/pytorch and huggingface/transformers" -- **Actual**: Thomas Wolf (co-founder/CEO of Hugging Face) appears with score 98 -- **Problem**: Founders, CEOs, VCs, and executives who happen to contribute code are not recruitable for an IC role. Scoring prompt has zero filtering for unrecrutable profiles. -- **Affected data**: `followers > 10,000`, roles containing "founder", "CEO", "CTO", "VP", "Partner" - -#### BUG-3: Stale Query State After Chip Click (MEDIUM) -- **Reproduction**: Click a suggestion chip, then manually edit the search input -- **Expected**: Should search with the manually entered text -- **Actual**: May retain the chip's expandedQuery internally while showing different text in the input - ---- - -## Part 2: Complete Workflow Map & API Chain - -### Search Pipeline (Primary Workflow) -``` -User Input (SearchTab.tsx) - ↓ query string (or chip expandedQuery) - ↓ -invokeFunction('github-search', { q: query }) [src/lib/api.ts] - ↓ Supabase Edge Function - ↓ -┌─────────────────────────────────────────────────────┐ -│ github-search/index.ts │ -│ │ -│ Step 0: checkSearchGate() → user_subscriptions table │ -│ API: Supabase DB (user_subscriptions) │ -│ │ -│ Step 1: parseQuery(query) → AI identifies repos │ -│ API: Anthropic Claude Haiku (via anthropicCall) │ -│ Returns: { repos: [...], skills, location } │ -│ │ -│ Step 2: fetchContributors(repos) │ -│ API: GitHub REST v3 │ -│ - /repos/{owner}/{name}/contributors?per_page=30 │ -│ - Fallback: /search/users?q={skills} │ -│ Up to 6 repos × 30 contributors = 180 max │ -│ │ -│ Step 3: enrichCandidates(contributorMap) │ -│ API: GitHub REST v3 (parallel) │ -│ - /users/{username} │ -│ - /users/{username}/repos?sort=stars&per_page=30 │ -│ Caching: candidates table, 7-day TTL │ -│ Max 15 fresh fetches per search │ -│ │ -│ Step 4: scoreCandidates(candidates, query) │ -│ API: Anthropic Claude Haiku (via anthropicCall) │ -│ Batch: 25 per call, up to 3 concurrent │ -│ Caching: query_hash scoped (P5 fix) │ -│ Returns: score 0-100, summary, about, hidden_gem │ -│ │ -│ Step 5: incrementSearchCount(userId) │ -│ API: Supabase DB (user_subscriptions) │ -└─────────────────────────────────────────────────────┘ - ↓ JSON response - ↓ -SearchTab.tsx renders DeveloperCard[] -``` - -### Research Pipeline -``` -User pastes JD URL → ResearchTab.tsx - ↓ -invokeFunction('parse-jd', { url, parallel_api_key? }) - ↓ -┌──────────────────────────────────────┐ -│ parse-jd/index.ts │ -│ │ -│ Try 1: Parallel.ai Extract API │ -│ POST api.parallel.ai/v1beta/extract│ -│ (JS-rendered pages: Greenhouse, │ -│ Lever, Workable) │ -│ │ -│ Try 2: Direct HTML fetch (fallback) │ -│ fetch(url) → strip tags → text │ -│ │ -│ Then: Anthropic Claude Haiku │ -│ Extract: title, company, skills, │ -│ experience, responsibilities │ -└──────────────────────────────────────┘ - ↓ parsed JD - ↓ -invokeFunction('generate-outreach', { ... }) - → Anthropic Claude for strategy/outreach generation -``` - -### LinkedIn Enrichment -``` -DeveloperCard → "Find LinkedIn" button - ↓ -invokeFunction('enrich-linkedin', { username, name, location, bio }) - ↓ -┌──────────────────────────────────────┐ -│ enrich-linkedin/index.ts │ -│ │ -│ Step 1: Check cache (P8 fix) │ -│ - If linkedin_url exists AND │ -│ linkedin_fetched_at < 30 days │ -│ → Return cached immediately │ -│ │ -│ Step 2: Exa Neural Search │ -│ POST api.exa.ai/search │ -│ Query: "{name} {location} {bio}" │ -│ Category: linkedin profile │ -│ │ -│ Step 3: Anthropic AI verification │ -│ Match Exa results to candidate │ -│ Return best match URL │ -└──────────────────────────────────────┘ -``` - -### Websets Pipeline -``` -WebsetsTab → Create form - ↓ -callWebsetsApi('create', { query, count, criteria, enrichments }) - ↓ -┌──────────────────────────────────────┐ -│ exa-websets/index.ts │ -│ │ -│ Step 0: Verify user via JWT (P2 fix) │ -│ Step 1: POST api.exa.ai/websets/v0 │ -│ Create webset on Exa │ -│ Step 2: INSERT webset_mappings │ -│ (multi-tenant isolation) │ -│ List: filter by user_id │ -│ Delete: verify ownership first │ -└──────────────────────────────────────┘ -``` - -### External API Summary - -| API | Used For | Auth | Rate Limits | -|-----|----------|------|-------------| -| GitHub REST v3 | Contributors, profiles, repos | GITHUB_TOKEN (env) | 5000/hr authenticated | -| Anthropic Claude Haiku | Query parsing, scoring, outreach, JD extraction, matching | ANTHROPIC_API_KEY (env) | Token-based billing | -| Exa AI Search | LinkedIn profile matching | EXA_API_KEY (env, server-side only) | API key billing | -| Exa Websets | Entity collection + enrichment | EXA_API_KEY (env) | API key billing | -| Parallel.ai Extract | JS-rendered page scraping | parallel_api_key (user settings) | API key billing | -| Supabase | Auth, DB, Edge Functions | Service role key | Generous free tier | - ---- - -## Part 3: Claude Code Improvement Prompts - -### P9 — Fix Suggestion Chip Queries (CRITICAL — 0 results bug) - -``` -Fix the suggestion chip search feature that currently returns 0 results every time. - -ROOT CAUSE: The DEFAULT_SUGGESTIONS in src/components/SearchTab.tsx use long descriptive -expandedQuery strings (30+ words of prose), but the parseQuery() AI step in -supabase/functions/github-search/index.ts needs short, specific input to map to GitHub repos. - -SOLUTION: Replace the expandedQuery values with repo-hinted queries that the AI can reliably parse. - -In src/components/SearchTab.tsx, change DEFAULT_SUGGESTIONS to: - -const DEFAULT_SUGGESTIONS: SuggestionChip[] = [ - { label: "Rust systems engineers", expandedQuery: "Rust systems engineers — repos like rust-lang/rust, tokio-rs/tokio, denoland/deno" }, - { label: "React accessibility experts", expandedQuery: "React accessibility experts — repos like facebook/react, jsx-eslint/eslint-plugin-jsx-a11y, reach/reach-ui" }, - { label: "ML infrastructure", expandedQuery: "ML infrastructure engineers — repos like pytorch/pytorch, huggingface/transformers, ray-project/ray" }, - { label: "Kubernetes contributors", expandedQuery: "Kubernetes contributors — repos like kubernetes/kubernetes, helm/helm, istio/istio" }, - { label: "Security researchers", expandedQuery: "Security researchers — repos like OWASP/owasp-testing-guide, sqlmapproject/sqlmap, rapid7/metasploit-framework" }, -]; - -Also improve the parseQuery() AI system prompt (line 92 of github-search/index.ts) to handle both -short and long queries. Add to the prompt: "If the query mentions specific repos after a dash or -colon, use those. Otherwise, infer the best repos for the described role." - -VERIFICATION: Click each suggestion chip — each should return 10+ results. -``` - -### P10 — Filter Ungettable Candidates (CRITICAL — accuracy) - -``` -Add "ungettable candidate" detection to prevent founders, CEOs, and other unrecrutable people -from appearing in search results with high scores. - -CHANGES: - -1. In supabase/functions/github-search/index.ts, add a new Step 4b after scoring: - - function filterUngettable(candidates: any[]): any[] { - const UNRECRUTABLE_TITLES = /\b(founder|co-founder|cofounder|ceo|chief executive|cto|chief technology|coo|chief operating|vp |vice president|managing partner|general partner|venture partner|president)\b/i; - const FOLLOWER_THRESHOLD = 10000; - - return candidates.map(c => { - const bio = (c.bio || '') + ' ' + (c.about || ''); - const isUngettable = UNRECRUTABLE_TITLES.test(bio) || c.followers >= FOLLOWER_THRESHOLD; - if (isUngettable) { - c.ungettable = true; - c.ungettableReason = c.followers >= FOLLOWER_THRESHOLD - ? `${c.followers.toLocaleString()} followers — likely industry leader` - : `Bio mentions executive/founder role`; - } - return c; - }); - } - - Call this after scoreCandidates and before formatting the response. - -2. Add `ungettable` and `ungettableReason` to the response object (line ~391). - -3. In src/components/DeveloperCard.tsx (or the search results renderer): - - If `ungettable === true`: show an amber/yellow badge "⚠️ Likely Ungettable" with the reason - in a tooltip - - Do NOT remove them from results entirely — the user may still want to see them - - Sort ungettable candidates to the bottom of results - -4. Also update the scoring prompt (line 292) to add this instruction: - "If a candidate is a company founder, CEO, CTO, VP, or has >10K followers, note this in their - summary and give a lower score (cap at 60) since they are unlikely to be recruited for an IC role." - -VERIFICATION: Search for "contributors to huggingface/transformers" — Thomas Wolf should appear -with the "Likely Ungettable" badge and a score ≤ 60. -``` - -### P11 — Add GitHub Search Users API as Primary Source (HIGH — result volume) - -``` -Currently, if parseQuery() fails to identify repos (returns 0 repos), the fallback to GitHub -search/users API only kicks in inside fetchContributors(). This fallback is too narrow and uses -raw skill keywords. - -IMPROVE the search pipeline to use GitHub's search/users API as a PARALLEL primary source -alongside the repo contributor approach: - -In supabase/functions/github-search/index.ts: - -1. Add a new function `searchGitHubUsers()`: - - async function searchGitHubUsers(query: string, skills: string[], location: string | null): Promise }>> { - const userMap = new Map(); - - // Build GitHub search query with qualifiers - let searchQ = skills.slice(0, 3).join(' '); - if (location) searchQ += ` location:${location}`; - searchQ += ' type:user'; - - const data = await githubFetch( - `${GITHUB_API}/search/users?q=${encodeURIComponent(searchQ)}&per_page=25&sort=followers` - ); - - if (data?.items) { - for (const user of data.items) { - userMap.set(user.login, { username: user.login, commitCounts: {} }); - } - } - return userMap; - } - -2. In the serve handler (line ~377), run BOTH in parallel: - - const [contributorMap, userSearchMap] = await Promise.all([ - fetchContributors(parsedCriteria.repos, parsedCriteria.skills), - searchGitHubUsers(query, parsedCriteria.skills, parsedCriteria.location), - ]); - - // Merge: contributor data takes priority (has commit counts) - for (const [username, data] of userSearchMap) { - if (!contributorMap.has(username)) { - contributorMap.set(username, data); - } - } - -This ensures we always have candidates even if repo identification fails, and adds users who -match the skill profile but contribute to repos we didn't think to check. - -VERIFICATION: Suggestion chip searches should now return 15-25+ results even if parseQuery -misidentifies repos, because the user search provides a fallback pool. -``` - -### P12 — Improve parseQuery AI Reliability (HIGH — accuracy) - -``` -The parseQuery() function in supabase/functions/github-search/index.ts (line 88) frequently -fails to return valid repos. Improve its reliability. - -CHANGES: - -1. Add a curated fallback repo map for common search patterns: - - const REPO_HINTS: Record = { - 'rust': [{ owner: 'rust-lang', name: 'rust' }, { owner: 'tokio-rs', name: 'tokio' }, { owner: 'denoland', name: 'deno' }], - 'react': [{ owner: 'facebook', name: 'react' }, { owner: 'vercel', name: 'next.js' }, { owner: 'remix-run', name: 'remix' }], - 'python': [{ owner: 'python', name: 'cpython' }, { owner: 'django', name: 'django' }, { owner: 'pallets', name: 'flask' }], - 'ml': [{ owner: 'pytorch', name: 'pytorch' }, { owner: 'tensorflow', name: 'tensorflow' }, { owner: 'huggingface', name: 'transformers' }], - 'kubernetes': [{ owner: 'kubernetes', name: 'kubernetes' }, { owner: 'helm', name: 'helm' }, { owner: 'istio', name: 'istio' }], - 'security': [{ owner: 'OWASP', name: 'CheatSheetSeries' }, { owner: 'zaproxy', name: 'zaproxy' }], - 'go': [{ owner: 'golang', name: 'go' }, { owner: 'gin-gonic', name: 'gin' }, { owner: 'gofiber', name: 'fiber' }], - 'typescript': [{ owner: 'microsoft', name: 'TypeScript' }, { owner: 'trpc', name: 'trpc' }, { owner: 'colinhacks', name: 'zod' }], - 'ios': [{ owner: 'apple', name: 'swift' }, { owner: 'Alamofire', name: 'Alamofire' }, { owner: 'realm', name: 'realm-swift' }], - 'android': [{ owner: 'android', name: 'architecture-components-samples' }, { owner: 'square', name: 'retrofit' }], - }; - -2. After AI parseQuery returns, if repos.length === 0, check REPO_HINTS: - - if (parsedCriteria.repos.length === 0) { - const queryLower = query.toLowerCase(); - for (const [keyword, repos] of Object.entries(REPO_HINTS)) { - if (queryLower.includes(keyword)) { - parsedCriteria.repos = repos; - break; - } - } - } - -3. Validate AI-returned repos: after parsing, verify each repo exists with a HEAD request - (githubFetch). Remove 404s before passing to fetchContributors: - - parsedCriteria.repos = (await Promise.all( - parsedCriteria.repos.map(async r => { - const check = await githubFetch(`${GITHUB_API}/repos/${r.owner}/${r.name}`); - return check ? r : null; - }) - )).filter(Boolean); - -VERIFICATION: The query "Rust systems engineers" should always return rust-lang/rust, -tokio-rs/tokio repos even if the AI hallucinates others. -``` - -### P13 — Performance: Stream Search Progress via SSE (MEDIUM — speed perception) - -``` -Currently the search endpoint returns a single JSON response after ALL 4 steps complete (15-25s). -The frontend shows fake timed progress steps (P6). Replace with real server-sent progress. - -CHANGES: - -1. In supabase/functions/github-search/index.ts, convert the response to SSE: - - - Instead of building a final JSON response, stream progress events: - event: progress - data: {"step": 1, "message": "Parsed query — found 4 repos", "repos": [...]} - - event: progress - data: {"step": 2, "message": "Found 87 contributors", "count": 87} - - event: progress - data: {"step": 3, "message": "Enriched 45 profiles", "count": 45} - - event: progress - data: {"step": 4, "message": "Scored 45 candidates", "count": 45} - - event: result - data: {full results JSON} - -2. In src/lib/api.ts, add a streaming search function: - - export async function searchDevelopersStream( - query: string, - onProgress: (step: number, message: string) => void - ): Promise { - // Use EventSource or fetch + ReadableStream - // Call onProgress for each SSE "progress" event - // Return final "result" event data - } - -3. In src/components/SearchTab.tsx: - - Replace the timer-based progress steps with real SSE events - - Show actual repo names, contributor counts as they arrive - - Update the step display with real data instead of generic messages - -VERIFICATION: Run a search — progress steps should update with real counts -(e.g., "Found 87 contributors from pytorch/pytorch, tensorflow/tensorflow"). -``` - -### P14 — Improve Scoring Prompt Quality (MEDIUM — accuracy) - -``` -The scoring prompt in supabase/functions/github-search/index.ts (line 292) is generic. -Improve it to produce more useful, differentiated scores. - -Replace the scoring system prompt with: - -'You are an elite technical recruiter scoring GitHub contributors for a specific role. For EACH candidate, analyze: - -1. RELEVANCE (40%): How closely do their contributions, languages, and repos match the search query? A React expert contributing to Vue repos is less relevant. - -2. ACTIVITY (20%): Contribution volume and recency. Recent active contributors score higher than one-time contributors from years ago. - -3. SENIORITY SIGNALS (20%): Years on GitHub, number of repos, stars received, whether they maintain popular projects. More experience = higher score for senior roles. - -4. RECRUITABILITY (20%): Are they likely open to opportunities? Negative signals: founder/CEO/CTO titles, 10K+ followers (industry leaders), employed at FAANG with "not looking" indicators. Positive signals: recent job change, "open to work", moderate following. - -SCORING BANDS: -- 90-100: Perfect match — deep contributions to query-relevant repos, right seniority, likely recruitable -- 70-89: Strong match — good contributions, some alignment gaps -- 50-69: Moderate match — tangential contributions or seniority mismatch -- 30-49: Weak match — minimal relevance or clearly unrecrutable (founders/CEOs) -- 0-29: Poor match — wrong domain or bot accounts - -Return a JSON array (no markdown): [{ "username": "string", "score": 0-100, "summary": "1 line mentioning their top repos and commit counts", "about": "2-3 sentences on why they match or don't", "is_hidden_gem": true/false, "recruitable": true/false }] - -Hidden gem = high contributions but under 500 followers. -recruitable = false if they are a founder, CEO, CTO, VP, or have >10K followers.' - -Also add `recruitable` to the response format (line ~391) and surface it in DeveloperCard. - -VERIFICATION: Thomas Wolf should score ≤ 50 with recruitable: false. A regular contributor -with 200 commits and 300 followers should score 80+. -``` - -### P15 — Add Multi-Source Search via Exa (MEDIUM — result diversity) - -``` -Currently search only uses GitHub API. Add Exa semantic search as a supplementary source -to find candidates from non-GitHub signals (blog posts, talks, publications). - -CHANGES: - -1. In supabase/functions/github-search/index.ts, add after Step 2: - - async function searchExaForCandidates(query: string, skills: string[]): Promise { - const exaKey = Deno.env.get('EXA_API_KEY'); - if (!exaKey) return []; - - try { - const res = await fetch('https://api.exa.ai/search', { - method: 'POST', - headers: { 'x-api-key': exaKey, 'Content-Type': 'application/json' }, - body: JSON.stringify({ - query: `${query} github profile`, - numResults: 10, - includeDomains: ['github.com'], - type: 'neural', - }), - }); - const data = await res.json(); - // Extract GitHub usernames from URLs like github.com/username - return (data.results || []) - .map((r: any) => r.url?.match(/github\.com\/([^\/\?]+)/)?.[1]) - .filter(Boolean); - } catch { return []; } - } - -2. Run this in parallel with fetchContributors and merge the usernames into the contributor map. - -3. In the response, add a `sources` field showing where each candidate was found: - { source: 'contributor' | 'github_search' | 'exa' } - -VERIFICATION: Results should include candidates found via Exa that weren't in the contributor -lists, marked with their source. -``` - -### P16 — Fix Stale Query State in SearchTab (MEDIUM — UX bug) - -``` -Fix the stale expanded query bug in src/components/SearchTab.tsx. - -PROBLEM: When a user clicks a suggestion chip, the expandedQuery is stored in state. If the user -then manually types in the search input, the search may still use the old expandedQuery. - -Find the search submission handler and ensure: - -1. When a chip is clicked: set both the display query AND the internal expandedQuery -2. When the user types in the input: CLEAR the expandedQuery state so only the typed text is used -3. The search should use expandedQuery if it was set by a chip, otherwise use the raw input value - -Look for the input onChange handler and add: setExpandedQuery('') or equivalent. -Look for the search submit handler and use: const finalQuery = expandedQuery || inputValue - -VERIFICATION: Click "Rust systems engineers" chip, then clear the input and type "python -developers". Submit — should search for "python developers", NOT the Rust expanded query. -``` - -### P17 — Add Candidate Deduplication (LOW — data quality) - -``` -When running multiple searches, the same candidates can appear across results. Add deduplication. - -CHANGES: - -1. In supabase/functions/github-search/index.ts, after merging contributor maps (if P11 is - implemented), the Map already deduplicates by username. No backend change needed. - -2. In src/components/SearchTab.tsx (or wherever results are displayed): - - Before rendering, deduplicate by `id` (github_username) - - If a candidate appears from multiple sources, merge and keep the highest score - -3. In the pipeline (PipelineTab.tsx), when adding candidates: - - Check if github_username already exists in any pipeline stage - - If yes, show a "Already in pipeline (Stage: X)" message instead of duplicating - -VERIFICATION: Run two searches that share contributors (e.g., "pytorch contributors" and -"ML infrastructure"). Same person should not appear twice in combined results. -``` - -### P18 — Optimize GitHub API Usage (LOW — performance) - -``` -Current GitHub API usage can be optimized to reduce rate limit pressure and speed up searches. - -CHANGES: - -1. Reduce per-repo contributor fetch from 30 to 20 (line 122): - /repos/{owner}/{name}/contributors?per_page=20 - - Most repos' top 20 contributors capture 80%+ of meaningful contributors. - -2. Reduce profile repo fetch from 30 to 15 (line 182): - /users/{username}/repos?sort=stars&per_page=15 - - We only use top 3 highlights anyway. - -3. Add conditional requests with If-None-Match (ETags) for cached profiles: - - Store the ETag from GitHub responses in the candidates table - - On cache miss, send If-None-Match header → 304 = no change, skip processing - -4. Use GraphQL API for profile + repos in a single call: - Instead of 2 REST calls per user (/users/{username} + /users/{username}/repos), - use a single GraphQL query: - - query { user(login: "{username}") { - name bio location followers { totalCount } - repositories(first: 15, orderBy: {field: STARGAZERS, direction: DESC}, isFork: false) { - nodes { name description stargazerCount primaryLanguage { name } } - } - }} - - This cuts API calls in half for uncached profiles. - -VERIFICATION: Monitor x-ratelimit-remaining header in logs. Should see ~40% fewer API calls -per search. -``` - ---- - -## Part 4: Priority Matrix - -| Prompt | Severity | Effort | Impact | Dependencies | -|--------|----------|--------|--------|--------------| -| P9 — Fix suggestion chips | CRITICAL | Small | Fixes broken feature | None | -| P10 — Filter ungettable | CRITICAL | Medium | Accuracy | None | -| P11 — Add user search API | HIGH | Medium | More results | None | -| P12 — parseQuery reliability | HIGH | Medium | Accuracy | None | -| P14 — Better scoring prompt | MEDIUM | Small | Accuracy | P10 | -| P16 — Fix stale query state | MEDIUM | Small | UX bug | P9 | -| P13 — SSE progress | MEDIUM | Large | Perf perception | None | -| P15 — Exa multi-source | MEDIUM | Medium | Diversity | None | -| P17 — Deduplication | LOW | Small | Data quality | P11 | -| P18 — GitHub API optimization | LOW | Large | Performance | None | - -**Recommended execution order**: P9 → P10 → P16 → P14 → P11 → P12 → P15 → P13 → P17 → P18 diff --git a/tmp/pdfs/generate_sourcekit_summary_pdf.py b/tmp/pdfs/generate_sourcekit_summary_pdf.py deleted file mode 100644 index 9b978fe..0000000 --- a/tmp/pdfs/generate_sourcekit_summary_pdf.py +++ /dev/null @@ -1,181 +0,0 @@ -from pathlib import Path - -from reportlab.lib import colors -from reportlab.lib.pagesizes import letter -from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet -from reportlab.lib.units import inch -from reportlab.platypus import BaseDocTemplate, Frame, KeepTogether, ListFlowable, ListItem, PageTemplate, Paragraph, Spacer - - -OUTPUT_PATH = Path("/Users/mike/SourceProof/output/pdf/sourcekit-app-summary.pdf") - - -def build_pdf() -> None: - OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) - - doc = BaseDocTemplate( - str(OUTPUT_PATH), - pagesize=letter, - leftMargin=0.45 * inch, - rightMargin=0.45 * inch, - topMargin=0.42 * inch, - bottomMargin=0.4 * inch, - title="SourceKit App Summary", - author="Codex", - ) - - gap = 0.22 * inch - frame_width = (doc.width - gap) / 2 - frames = [ - Frame(doc.leftMargin, doc.bottomMargin, frame_width, doc.height, id="left"), - Frame(doc.leftMargin + frame_width + gap, doc.bottomMargin, frame_width, doc.height, id="right"), - ] - doc.addPageTemplates([PageTemplate(id="two-col", frames=frames)]) - - styles = getSampleStyleSheet() - title_style = ParagraphStyle( - "Title", - parent=styles["Title"], - fontName="Helvetica-Bold", - fontSize=20, - leading=22, - textColor=colors.HexColor("#111827"), - spaceAfter=6, - ) - subtitle_style = ParagraphStyle( - "Subtitle", - parent=styles["BodyText"], - fontName="Helvetica", - fontSize=8.5, - leading=10, - textColor=colors.HexColor("#4b5563"), - spaceAfter=12, - ) - section_style = ParagraphStyle( - "Section", - parent=styles["Heading2"], - fontName="Helvetica-Bold", - fontSize=10, - leading=12, - textColor=colors.HexColor("#0f766e"), - spaceBefore=4, - spaceAfter=4, - ) - body_style = ParagraphStyle( - "Body", - parent=styles["BodyText"], - fontName="Helvetica", - fontSize=8.4, - leading=10.2, - textColor=colors.HexColor("#111827"), - spaceAfter=4, - ) - bullet_style = ParagraphStyle( - "Bullet", - parent=body_style, - leftIndent=0, - firstLineIndent=0, - spaceAfter=1.5, - ) - - def bullets(items: list[str]) -> ListFlowable: - return ListFlowable( - [ - ListItem(Paragraph(item, bullet_style), leftIndent=0) - for item in items - ], - bulletType="bullet", - start="circle", - bulletFontName="Helvetica", - bulletFontSize=6, - leftIndent=10, - bulletOffsetY=1, - ) - - def section(title: str, content) -> KeepTogether: - items = [Paragraph(title, section_style)] - if isinstance(content, list): - items.extend(content) - else: - items.append(content) - return KeepTogether(items) - - story = [ - Paragraph("SourceKit", title_style), - Paragraph( - "Repo-based one-page summary generated from evidence in `/Users/mike/SourceProof`.", - subtitle_style, - ), - section( - "What It Is", - Paragraph( - "SourceKit is a GitHub-centric talent sourcing app for technical recruiting. " - "It turns role inputs into search strategy, ranked engineer matches, enrichment, and pipeline workflows grounded in open-source activity rather than self-reported profiles.", - body_style, - ), - ), - section( - "Who It's For", - Paragraph( - "Primary persona: technical recruiters, talent sourcers, or hiring teams looking for software engineers through GitHub and related public-web signals.", - body_style, - ), - ), - section( - "What It Does", - bullets( - [ - "Builds sourcing strategy from a role, company, or pasted job description via the `research-role` edge function.", - "Searches GitHub contributors and users, then ranks candidates with AI-backed scoring and hidden-gem filtering.", - "Stores and replays search history, cached candidates, and result sets through Supabase-backed data models.", - "Supports LinkedIn enrichment, contact discovery, and AI-generated outreach from repo evidence.", - "Tracks candidates in a recruiting pipeline, watchlist, and bulk-action workflow from the React dashboard.", - "Creates and monitors Exa-backed Websets for persistent candidate collection and batch pipeline import.", - "Applies auth, subscription gating, and Stripe checkout for trial/pro plan usage control.", - ], - ), - ), - Spacer(1, 5), - section( - "How It Works", - bullets( - [ - "Presentation: React 18 + TypeScript + Vite SPA (`src/App.tsx`, `src/pages/Index.tsx`) with tabs for research, results, history, pipeline, watchlist, bulk actions, websets, settings, and guide.", - "Auth/data access: frontend uses Supabase Auth and invokes Supabase Edge Functions with the signed-in user's access token (`src/lib/api.ts`, `src/services/websets.ts`).", - "AI/service layer: edge functions call Anthropic for query parsing, strategy, scoring, and outreach; GitHub APIs for contributors/profile data; Exa for search/websets; Stripe for billing.", - "Persistence: Supabase Postgres stores candidates cache, pipeline, outreach history, search history, watchlist items, settings, and user subscriptions (`supabase/full-schema.sql` plus migrations).", - "Data flow: role or JD input -> strategy generation -> GitHub/Exa retrieval -> candidate enrichment/scoring -> saved results/pipeline/websets surfaced back in the dashboard.", - ], - ), - ), - Spacer(1, 5), - section( - "How To Run", - bullets( - [ - "Install Node.js 18+.", - "Run `npm install` in `/Users/mike/SourceProof`.", - "Copy `.env.example` to `.env` and fill Supabase, GitHub, Anthropic, Exa, and Stripe keys.", - "Start the app with `npm run dev`.", - "Not found in repo: a fully local, no-external-services setup.", - ], - ), - ), - Spacer(1, 5), - section( - "Evidence Notes", - bullets( - [ - "Product description and startup steps are stated in `README.md`.", - "App shell and tab structure are visible in `src/App.tsx`, `src/pages/Index.tsx`, and `src/components/DashboardLayout.tsx`.", - "Backend service calls are evidenced in `src/lib/api.ts`, `src/services/websets.ts`, and `supabase/functions/*`.", - ], - ), - ), - ] - - doc.build(story) - - -if __name__ == "__main__": - build_pdf() diff --git a/tmp/pdfs/sourcekit-app-summary-1.png b/tmp/pdfs/sourcekit-app-summary-1.png deleted file mode 100644 index 8b89bd9..0000000 Binary files a/tmp/pdfs/sourcekit-app-summary-1.png and /dev/null differ