-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
206 lines (169 loc) · 5.08 KB
/
main.py
File metadata and controls
206 lines (169 loc) · 5.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python3
"""
Vinted Scraper - CLI entry point.
Scrapes marketplace listings from Vinted using ScrapingAnt API.
"""
import argparse
import os
import sys
from datetime import datetime
from config import (
CATEGORIES,
DEFAULT_DELAY,
DEFAULT_PAGES,
OUTPUT_DIR,
DEFAULT_OUTPUT_FILE
)
from scraper import VintedScraper
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Scrape Vinted marketplace listings",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python main.py -s "nike shoes" Search for nike shoes
python main.py -c women -p 3 Scrape women's category, 3 pages
python main.py -s "vintage" --json Export to JSON
python main.py --list-categories List available categories
"""
)
parser.add_argument(
"-s", "--search",
type=str,
default="",
help="Search query (default: empty)"
)
parser.add_argument(
"-c", "--category",
type=str,
default="",
help="Category to scrape (see --list-categories)"
)
parser.add_argument(
"-p", "--pages",
type=int,
default=DEFAULT_PAGES,
help=f"Maximum pages to scrape (default: {DEFAULT_PAGES})"
)
parser.add_argument(
"-d", "--delay",
type=float,
default=DEFAULT_DELAY,
help=f"Delay between requests in seconds (default: {DEFAULT_DELAY})"
)
parser.add_argument(
"-o", "--output",
type=str,
default="",
help="Output filename (without extension)"
)
parser.add_argument(
"--json",
action="store_true",
help="Also export to JSON format"
)
parser.add_argument(
"--min-price",
type=float,
default=None,
help="Minimum price filter"
)
parser.add_argument(
"--max-price",
type=float,
default=None,
help="Maximum price filter"
)
parser.add_argument(
"--list-categories",
action="store_true",
help="List available categories and exit"
)
parser.add_argument(
"--api-key",
type=str,
default=os.environ.get("SCRAPINGANT_API_KEY", ""),
help="ScrapingAnt API key (or use SCRAPINGANT_API_KEY env var)"
)
return parser.parse_args()
def list_categories():
"""Print available categories."""
print("\nAvailable categories:")
print("-" * 40)
for key, info in CATEGORIES.items():
print(f" {key:15} - {info['name']}")
print()
def main():
"""Main entry point."""
args = parse_args()
# Handle list categories
if args.list_categories:
list_categories()
return 0
# Validate API key
if not args.api_key:
print("Error: ScrapingAnt API key is required.")
print("Set SCRAPINGANT_API_KEY environment variable or use --api-key")
return 1
# Validate category if provided
if args.category and args.category not in CATEGORIES:
print(f"Error: Invalid category '{args.category}'")
list_categories()
return 1
# Validate that at least search or category is provided
if not args.search and not args.category:
print("Error: Please provide either --search or --category")
print("Use --help for usage information")
return 1
# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Initialize scraper
scraper = VintedScraper(
api_key=args.api_key,
delay=args.delay
)
print(f"\nVinted Scraper")
print("=" * 50)
if args.search:
print(f"Search query: {args.search}")
if args.category:
print(f"Category: {args.category}")
print(f"Max pages: {args.pages}")
if args.min_price:
print(f"Min price: ${args.min_price}")
if args.max_price:
print(f"Max price: ${args.max_price}")
print("=" * 50)
# Scrape listings
total = scraper.scrape_search(
search_text=args.search,
category=args.category,
max_pages=args.pages,
min_price=args.min_price,
max_price=args.max_price
)
print(f"\n{'=' * 50}")
print(f"Total listings scraped: {total}")
if total == 0:
print("No listings found.")
return 0
# Generate output filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
if args.output:
base_name = args.output
else:
search_part = args.search.replace(" ", "_")[:20] if args.search else ""
category_part = args.category if args.category else ""
base_name = f"{DEFAULT_OUTPUT_FILE}_{search_part or category_part}_{timestamp}"
# Export results
csv_path = os.path.join(OUTPUT_DIR, f"{base_name}.csv")
scraper.export_csv(csv_path)
print(f"Results exported to: {csv_path}")
if args.json:
json_path = os.path.join(OUTPUT_DIR, f"{base_name}.json")
scraper.export_json(json_path)
print(f"JSON exported to: {json_path}")
return 0
if __name__ == "__main__":
sys.exit(main())