1+ package com .lzx .kaleido .test ;
2+
3+ import org .apache .poi .ss .usermodel .*;
4+ import org .apache .poi .xssf .usermodel .XSSFWorkbook ;
5+ import org .jsoup .Jsoup ;
6+ import org .jsoup .nodes .Document ;
7+ import org .jsoup .nodes .Element ;
8+ import org .jsoup .select .Elements ;
9+ import com .google .gson .Gson ;
10+ import com .google .gson .GsonBuilder ;
11+
12+ import java .io .FileOutputStream ;
13+ import java .io .IOException ;
14+ import java .text .SimpleDateFormat ;
15+ import java .util .*;
16+
17+ public class GitHubTrendingScraper {
18+
19+ public static void main (String [] args ) {
20+ String url = "https://github.com/trending" ;
21+ try {
22+ // Fetch the document from the URL
23+ Document doc = Jsoup .connect (url ).get ();
24+ Elements repoElements = doc .select ("article.Box-row" );
25+
26+ List <Map <String , Object >> repoList = new ArrayList <>();
27+
28+ for (Element repoElement : repoElements ) {
29+ Map <String , Object > repoData = new HashMap <>();
30+
31+ // Extract repository name
32+ String repoName = repoElement .select ("h1.h3 a" ).text ();
33+ repoData .put ("name" , repoName );
34+
35+ // Extract repository URL
36+ String repoUrl = "https://github.com" + repoElement .select ("h1.h3 a" ).attr ("href" );
37+ repoData .put ("url" , repoUrl );
38+
39+ // Extract description
40+ String description = repoElement .select ("p.col-9" ).text ();
41+ repoData .put ("description" , description );
42+
43+ // Extract language
44+ String language = repoElement .select ("[itemprop=programmingLanguage]" ).text ();
45+ repoData .put ("language" , language );
46+
47+ // Extract stars
48+ String stars = repoElement .select (".Link--muted.d-inline-block.mr-3" ).first ().text ();
49+ repoData .put ("stars" , stars );
50+
51+ // Extract today's stars
52+ String todayStars = repoElement .select (".d-inline-block.float-sm-right" ).text ().replace (" stars today" , "" );
53+ repoData .put ("today_stars" , todayStars );
54+
55+ repoList .add (repoData );
56+ }
57+
58+ // Convert list to JSON
59+ Gson gson = new GsonBuilder ().setPrettyPrinting ().create ();
60+ String json = gson .toJson (repoList );
61+ System .out .println (json );
62+
63+ // Save to Excel
64+ saveToExcel (repoList );
65+
66+ } catch (IOException e ) {
67+ e .printStackTrace ();
68+ }
69+ }
70+
71+ private static void saveToExcel (List <Map <String , Object >> repoList ) {
72+ Workbook workbook = new XSSFWorkbook ();
73+ Sheet sheet = workbook .createSheet ("Trending Repos" );
74+
75+ // Create header row
76+ Row headerRow = sheet .createRow (0 );
77+ String [] headers = {"Name" , "URL" , "Description" , "Language" , "Stars" , "Today Stars" };
78+ for (int i = 0 ; i < headers .length ; i ++) {
79+ Cell cell = headerRow .createCell (i );
80+ cell .setCellValue (headers [i ]);
81+ }
82+
83+ // Fill data
84+ int rowNum = 1 ;
85+ for (Map <String , Object > repoData : repoList ) {
86+ Row row = sheet .createRow (rowNum ++);
87+ row .createCell (0 ).setCellValue ((String ) repoData .get ("name" ));
88+ row .createCell (1 ).setCellValue ((String ) repoData .get ("url" ));
89+ row .createCell (2 ).setCellValue ((String ) repoData .get ("description" ));
90+ row .createCell (3 ).setCellValue ((String ) repoData .get ("language" ));
91+ row .createCell (4 ).setCellValue ((String ) repoData .get ("stars" ));
92+ row .createCell (5 ).setCellValue ((String ) repoData .get ("today_stars" ));
93+ }
94+
95+ // Save to file
96+ String date = new SimpleDateFormat ("yyyy-MM-dd" ).format (new Date ());
97+ try (FileOutputStream fileOut = new FileOutputStream ("trending_" + date + ".xlsx" )) {
98+ workbook .write (fileOut );
99+ } catch (IOException e ) {
100+ e .printStackTrace ();
101+ }
102+ }
103+ }
0 commit comments